Skip to content

Commit

Permalink
join: join over lb if available (#2348)
Browse files Browse the repository at this point in the history
* join: join over lb if available
  • Loading branch information
3u13r authored Sep 25, 2023
1 parent df77696 commit 2776e40
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 60 deletions.
1 change: 0 additions & 1 deletion bootstrapper/cmd/bootstrapper/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,4 @@ type clusterInitJoiner interface {
type metadataAPI interface {
joinclient.MetadataAPI
initserver.MetadataAPI
GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error)
}
29 changes: 20 additions & 9 deletions bootstrapper/internal/joinclient/joinclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,29 @@ func (c *JoinClient) Stop() {
}

func (c *JoinClient) tryJoinWithAvailableServices() error {
ips, err := c.getControlPlaneIPs()
ctx, cancel := c.timeoutCtx()
defer cancel()

var endpoints []string

ip, _, err := c.metadataAPI.GetLoadBalancerEndpoint(ctx)
if err != nil {
return err
return fmt.Errorf("failed to get load balancer endpoint: %w", err)
}
endpoints = append(endpoints, net.JoinHostPort(ip, strconv.Itoa(constants.JoinServiceNodePort)))

if len(ips) == 0 {
ips, err := c.getControlPlaneIPs(ctx)
if err != nil {
return fmt.Errorf("failed to get control plane IPs: %w", err)
}
endpoints = append(endpoints, ips...)

if len(endpoints) == 0 {
return errors.New("no control plane IPs found")
}

for _, ip := range ips {
err = c.join(net.JoinHostPort(ip, strconv.Itoa(constants.JoinServiceNodePort)))
for _, endpoint := range endpoints {
err = c.join(net.JoinHostPort(endpoint, strconv.Itoa(constants.JoinServiceNodePort)))
if err == nil {
return nil
}
Expand Down Expand Up @@ -357,10 +369,7 @@ func (c *JoinClient) getDiskUUID() (string, error) {
return c.disk.UUID()
}

func (c *JoinClient) getControlPlaneIPs() ([]string, error) {
ctx, cancel := c.timeoutCtx()
defer cancel()

func (c *JoinClient) getControlPlaneIPs(ctx context.Context) ([]string, error) {
instances, err := c.metadataAPI.List(ctx)
if err != nil {
c.log.With(zap.Error(err)).Errorf("Failed to list instances from metadata API")
Expand Down Expand Up @@ -425,6 +434,8 @@ type MetadataAPI interface {
List(ctx context.Context) ([]metadata.InstanceMetadata, error)
// Self retrieves the current instance.
Self(ctx context.Context) (metadata.InstanceMetadata, error)
// GetLoadBalancerEndpoint retrieves the load balancer endpoint.
GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error)
}

type encryptedDisk interface {
Expand Down
8 changes: 8 additions & 0 deletions bootstrapper/internal/joinclient/joinclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,10 @@ func (s *stubRepeaterMetadataAPI) List(_ context.Context) ([]metadata.InstanceMe
return s.listInstances, s.listErr
}

func (s *stubRepeaterMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) {
return "", "", nil
}

type stubMetadataAPI struct {
selfAnswerC chan selfAnswer
listAnswerC chan listAnswer
Expand All @@ -352,6 +356,10 @@ func (s *stubMetadataAPI) List(_ context.Context) ([]metadata.InstanceMetadata,
return answer.instances, answer.err
}

func (s *stubMetadataAPI) GetLoadBalancerEndpoint(_ context.Context) (string, string, error) {
return "", "", nil
}

type selfAnswer struct {
instance metadata.InstanceMetadata
err error
Expand Down
17 changes: 17 additions & 0 deletions cli/internal/terraform/terraform/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ locals {
ports_verify = "30081"
ports_recovery = "9999"
ports_debugd = "4000"
ports_join = "30090"
target_group_arns = {
control-plane : flatten([
module.load_balancer_target_bootstrapper.target_group_arn,
module.load_balancer_target_kubernetes.target_group_arn,
module.load_balancer_target_verify.target_group_arn,
module.load_balancer_target_recovery.target_group_arn,
module.load_balancer_target_konnectivity.target_group_arn,
module.load_balancer_target_join.target_group_arn,
var.debug ? [module.load_balancer_target_debugd[0].target_group_arn] : [],
])
worker : []
Expand Down Expand Up @@ -96,6 +98,7 @@ resource "aws_lb" "front_end" {
internal = false
load_balancer_type = "network"
tags = local.tags
security_groups = [aws_security_group.security_group.id]

dynamic "subnet_mapping" {
# TODO(malt3): use for_each = toset(module.public_private_subnet.all_zones)
Expand All @@ -111,6 +114,10 @@ resource "aws_lb" "front_end" {
}
}
enable_cross_zone_load_balancing = true

lifecycle {
ignore_changes = [security_groups]
}
}

resource "aws_security_group" "security_group" {
Expand Down Expand Up @@ -255,6 +262,16 @@ module "load_balancer_target_konnectivity" {
healthcheck_protocol = "TCP"
}

module "load_balancer_target_join" {
source = "./modules/load_balancer_target"
name = "${local.name}-join"
vpc_id = aws_vpc.vpc.id
lb_arn = aws_lb.front_end.arn
port = local.ports_join
tags = local.tags
healthcheck_protocol = "TCP"
}

module "instance_group" {
source = "./modules/instance_group"
for_each = var.node_groups
Expand Down
12 changes: 10 additions & 2 deletions cli/internal/terraform/terraform/azure/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ locals {
ports_konnectivity = "8132"
ports_verify = "30081"
ports_recovery = "9999"
ports_join = "30090"
ports_debugd = "4000"
cidr_vpc_subnet_nodes = "192.168.178.0/24"
cidr_vpc_subnet_pods = "10.10.0.0/16"
Expand Down Expand Up @@ -182,6 +183,12 @@ module "loadbalancer_backend_control_plane" {
protocol = "Tcp",
path = null
},
{
name = "join",
port = local.ports_join,
protocol = "Tcp",
path = null
},
var.debug ? [{
name = "debugd",
port = local.ports_debugd,
Expand Down Expand Up @@ -231,8 +238,9 @@ resource "azurerm_network_security_group" "security_group" {
{ name = "kubernetes", priority = 101, dest_port_range = local.ports_kubernetes },
{ name = "bootstrapper", priority = 102, dest_port_range = local.ports_bootstrapper },
{ name = "konnectivity", priority = 103, dest_port_range = local.ports_konnectivity },
{ name = "recovery", priority = 104, dest_port_range = local.ports_recovery },
var.debug ? [{ name = "debugd", priority = 105, dest_port_range = local.ports_debugd }] : [],
{ name = "join", priority = 104, dest_port_range = local.ports_recovery },
{ name = "recovery", priority = 105, dest_port_range = local.ports_join },
var.debug ? [{ name = "debugd", priority = 106, dest_port_range = local.ports_debugd }] : [],
])
content {
name = security_rule.value.name
Expand Down
14 changes: 14 additions & 0 deletions cli/internal/terraform/terraform/gcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ locals {
ports_konnectivity = "8132"
ports_verify = "30081"
ports_recovery = "9999"
ports_join = "30090"
ports_debugd = "4000"
cidr_vpc_subnet_nodes = "192.168.178.0/24"
cidr_vpc_subnet_pods = "10.10.0.0/16"
Expand All @@ -52,6 +53,7 @@ locals {
{ name = "verify", port = local.ports_verify },
{ name = "konnectivity", port = local.ports_konnectivity },
{ name = "recovery", port = local.ports_recovery },
{ name = "join", port = local.ports_join },
var.debug ? [{ name = "debugd", port = local.ports_debugd }] : [],
])
node_groups_by_role = {
Expand Down Expand Up @@ -120,6 +122,7 @@ resource "google_compute_firewall" "firewall_external" {
local.ports_kubernetes,
local.ports_konnectivity,
local.ports_recovery,
local.ports_join,
var.debug ? [local.ports_debugd] : [],
])
}
Expand Down Expand Up @@ -234,6 +237,17 @@ module "loadbalancer_recovery" {
frontend_labels = merge(local.labels, { constellation-use = "recovery" })
}

module "loadbalancer_join" {
source = "./modules/loadbalancer"
name = local.name
health_check = "TCP"
backend_port_name = "join"
backend_instance_groups = local.control_plane_instance_groups
ip_address = google_compute_global_address.loadbalancer_ip.self_link
port = local.ports_join
frontend_labels = merge(local.labels, { constellation-use = "join" })
}

module "loadbalancer_debugd" {
count = var.debug ? 1 : 0 // only deploy debugd in debug mode
source = "./modules/loadbalancer"
Expand Down
1 change: 1 addition & 0 deletions disk-mapper/internal/rejoinclient/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ go_library(
visibility = ["//disk-mapper:__subpackages__"],
deps = [
"//internal/cloud/metadata",
"//internal/constants",
"//internal/logger",
"//internal/role",
"//joinservice/joinproto",
Expand Down
39 changes: 32 additions & 7 deletions disk-mapper/internal/rejoinclient/rejoinclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ package rejoinclient
import (
"context"
"errors"
"fmt"
"net"
"strconv"
"time"

"github.com/edgelesssys/constellation/v2/internal/cloud/metadata"
"github.com/edgelesssys/constellation/v2/internal/constants"
"github.com/edgelesssys/constellation/v2/internal/logger"
"github.com/edgelesssys/constellation/v2/internal/role"
"github.com/edgelesssys/constellation/v2/joinservice/joinproto"
Expand Down Expand Up @@ -75,7 +78,7 @@ func (c *RejoinClient) Start(ctx context.Context, diskUUID string) (diskKey, mea
defer c.log.Infof("RejoinClient stopped")

for {
endpoints, err := c.getControlPlaneEndpoints()
endpoints, err := c.getJoinEndpoints()
if err != nil {
c.log.With(zap.Error(err)).Errorf("Failed to get control-plane endpoints")
} else {
Expand Down Expand Up @@ -130,19 +133,39 @@ func (c *RejoinClient) requestRejoinTicket(endpoint string) (*joinproto.IssueRej
return joinproto.NewAPIClient(conn).IssueRejoinTicket(ctx, &joinproto.IssueRejoinTicketRequest{DiskUuid: c.diskUUID})
}

// getControlPlaneEndpoints requests the available control-plane endpoints from the metadata API.
// getJoinEndpoints requests the available control-plane endpoints from the metadata API.
// The list is filtered to remove *this* node if it is a restarting control-plane node.
func (c *RejoinClient) getControlPlaneEndpoints() ([]string, error) {
// Furthermore, the load balancer's endpoint is added.
func (c *RejoinClient) getJoinEndpoints() ([]string, error) {
ctx, cancel := c.timeoutCtx()
defer cancel()
endpoints, err := metadata.JoinServiceEndpoints(ctx, c.metadataAPI)

joinEndpoints := []string{}

lbEndpoint, _, err := c.metadataAPI.GetLoadBalancerEndpoint(ctx)
if err != nil {
return nil, err
return nil, fmt.Errorf("retrieving load balancer endpoint from cloud provider: %w", err)
}
joinEndpoints = append(joinEndpoints, net.JoinHostPort(lbEndpoint, strconv.Itoa(constants.JoinServiceNodePort)))

instances, err := c.metadataAPI.List(ctx)
if err != nil {
return nil, fmt.Errorf("retrieving instances list from cloud provider: %w", err)
}

for _, instance := range instances {
if instance.Role == role.ControlPlane {
if instance.VPCIP != "" {
joinEndpoints = append(joinEndpoints, net.JoinHostPort(instance.VPCIP, strconv.Itoa(constants.JoinServiceNodePort)))
}
}
}

if c.nodeInfo.Role == role.ControlPlane {
return removeSelfFromEndpoints(c.nodeInfo.VPCIP, endpoints), nil
return removeSelfFromEndpoints(c.nodeInfo.VPCIP, joinEndpoints), nil
}
return endpoints, nil

return joinEndpoints, nil
}

// removeSelfFromEndpoints removes *this* node from the list of endpoints.
Expand All @@ -169,4 +192,6 @@ type grpcDialer interface {
type metadataAPI interface {
// List retrieves all instances belonging to the current constellation.
List(ctx context.Context) ([]metadata.InstanceMetadata, error)
// GetLoadBalancerEndpoint retrieves the load balancer endpoint.
GetLoadBalancerEndpoint(ctx context.Context) (host, port string, err error)
}
Loading

0 comments on commit 2776e40

Please sign in to comment.