Skip to content

Commit

Permalink
fix: workload service reconciler
Browse files Browse the repository at this point in the history
Most of the code is simplifying/refactoring, but there are few fixes:

* increase LB upstream healthcheck interval to 1 minute
* pass a logger to the LB (as otherwise it creates its own)
* shutdown the LB by waiting for it to shutdown
* close the LB even when it fails to start to avoid leaking health check goroutines

Additionally, add an integration test for workload proxying.

Co-authored-by: Utku Ozdemir <[email protected]>
Signed-off-by: Andrey Smirnov <[email protected]>
Signed-off-by: Utku Ozdemir <[email protected]>
  • Loading branch information
smira and utkuozdemir committed Aug 18, 2024
1 parent a173c8a commit dcd123d
Show file tree
Hide file tree
Showing 13 changed files with 668 additions and 108 deletions.
80 changes: 79 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-07-23T13:55:08Z by kres faf91e3.
# Generated on 2024-08-16T15:47:13Z by kres 7be2a05.

name: default
concurrency:
Expand Down Expand Up @@ -596,3 +596,81 @@ jobs:
!~/.talos/clusters/**/swtpm.log
retention-days: "5"
continue-on-error: true
e2e-workload-proxy:
runs-on:
- self-hosted
- omni
if: contains(fromJSON(needs.default.outputs.labels), 'integration/e2e') || contains(fromJSON(needs.default.outputs.labels), 'integration/e2e-workload-proxy')
needs:
- default
steps:
- name: gather-system-info
id: system-info
uses: kenchan0130/[email protected]
continue-on-error: true
- name: print-system-info
run: |
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
OUTPUTS=(
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
"Hostname: ${{ steps.system-info.outputs.hostname }}"
"NodeName: ${NODE_NAME}"
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
"Name: ${{ steps.system-info.outputs.name }}"
"Platform: ${{ steps.system-info.outputs.platform }}"
"Release: ${{ steps.system-info.outputs.release }}"
"Total memory: ${MEMORY_GB} GB"
)
for OUTPUT in "${OUTPUTS[@]}";do
echo "${OUTPUT}"
done
continue-on-error: true
- name: checkout
uses: actions/checkout@v4
- name: Unshallow
run: |
git fetch --prune --unshallow
- name: Set up Docker Buildx
id: setup-buildx
uses: docker/setup-buildx-action@v3
with:
driver: remote
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
timeout-minutes: 10
- name: Mask secrets
run: |
echo -e "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
- name: Set secrets for job
run: |
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: artifacts
path: _out
- name: Fix artifact permissions
run: |
xargs -a _out/executable-artifacts -I {} chmod +x {}
- name: run-integration-test
env:
INTEGRATION_RUN_E2E_TEST: "false"
INTEGRATION_TEST_ARGS: --test.run CleanState/|WorkloadProxy
RUN_TALEMU_TESTS: "false"
TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceDowngrade/|ClusterTemplate/
WITH_DEBUG: "true"
run: |
sudo -E make run-integration-test
- name: save-talos-logs-artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: talos-logs-e2e-workload-proxy
path: |-
~/.talos/clusters/**/*.log
!~/.talos/clusters/**/swtpm.log
retention-days: "5"
continue-on-error: true
78 changes: 78 additions & 0 deletions .github/workflows/e2e-workload-proxy-cron.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-08-15T23:44:03Z by kres 7be2a05.

name: e2e-workload-proxy-cron
concurrency:
group: ${{ github.head_ref || github.run_id }}
cancel-in-progress: true
"on":
schedule:
- cron: 30 1 * * *
jobs:
default:
runs-on:
- self-hosted
- omni
steps:
- name: gather-system-info
id: system-info
uses: kenchan0130/[email protected]
continue-on-error: true
- name: print-system-info
run: |
MEMORY_GB=$((${{ steps.system-info.outputs.totalmem }}/1024/1024/1024))
OUTPUTS=(
"CPU Core: ${{ steps.system-info.outputs.cpu-core }}"
"CPU Model: ${{ steps.system-info.outputs.cpu-model }}"
"Hostname: ${{ steps.system-info.outputs.hostname }}"
"NodeName: ${NODE_NAME}"
"Kernel release: ${{ steps.system-info.outputs.kernel-release }}"
"Kernel version: ${{ steps.system-info.outputs.kernel-version }}"
"Name: ${{ steps.system-info.outputs.name }}"
"Platform: ${{ steps.system-info.outputs.platform }}"
"Release: ${{ steps.system-info.outputs.release }}"
"Total memory: ${MEMORY_GB} GB"
)
for OUTPUT in "${OUTPUTS[@]}";do
echo "${OUTPUT}"
done
continue-on-error: true
- name: checkout
uses: actions/checkout@v4
- name: Unshallow
run: |
git fetch --prune --unshallow
- name: Set up Docker Buildx
id: setup-buildx
uses: docker/setup-buildx-action@v3
with:
driver: remote
endpoint: tcp://buildkit-amd64.ci.svc.cluster.local:1234
timeout-minutes: 10
- name: Mask secrets
run: |
echo -e "$(sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | "::add-mask::" + .value')"
- name: Set secrets for job
run: |
sops -d .secrets.yaml | yq -e '.secrets | to_entries[] | .key + "=" + .value' >> "$GITHUB_ENV"
- name: run-integration-test
env:
INTEGRATION_RUN_E2E_TEST: "false"
INTEGRATION_TEST_ARGS: --test.run CleanState/|WorkloadProxy
RUN_TALEMU_TESTS: "false"
TALEMU_TEST_ARGS: --test.run ImmediateClusterDestruction/|EncryptedCluster/|SinglenodeCluster/|ScaleUpAndDown/|ScaleUpAndDownMachineClassBasedMachineSets/|TalosUpgrades/|KubernetesUpgrades/|MaintenanceDowngrade/|ClusterTemplate/
WITH_DEBUG: "true"
run: |
sudo -E make run-integration-test
- name: save-talos-logs-artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: talos-logs
path: |-
~/.talos/clusters/**/*.log
!~/.talos/clusters/**/swtpm.log
retention-days: "5"
3 changes: 2 additions & 1 deletion .github/workflows/slack-notify.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2023-11-23T16:03:37Z by kres latest.
# Generated on 2024-08-15T23:44:03Z by kres 7be2a05.

name: slack-notify
"on":
Expand All @@ -12,6 +12,7 @@ name: slack-notify
- e2e-upgrades-cron
- e2e-templates-cron
- e2e-backups-cron
- e2e-workload-proxy-cron
types:
- completed
jobs:
Expand Down
12 changes: 12 additions & 0 deletions .kres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,18 @@ spec:
INTEGRATION_RUN_E2E_TEST: "false"
INTEGRATION_TEST_ARGS: "--test.run CleanState/|EtcdBackupAndRestore"
RUN_TALEMU_TESTS: false
- name: e2e-workload-proxy
crons:
- '30 1 * * *'
runnerLabels:
- omni
triggerLabels:
- integration/e2e
- integration/e2e-workload-proxy
environmentOverride:
INTEGRATION_RUN_E2E_TEST: "false"
INTEGRATION_TEST_ARGS: "--test.run CleanState/|WorkloadProxy"
RUN_TALEMU_TESTS: false
---
kind: common.Build
spec:
Expand Down
25 changes: 25 additions & 0 deletions cmd/integration-test/pkg/clientconfig/clientconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package clientconfig

import (
"context"
"encoding/base64"
"fmt"
"net/http"
"os"
Expand Down Expand Up @@ -145,6 +146,30 @@ func SignHTTPRequestWithEmail(ctx context.Context, client *client.Client, req *h
return msg.Sign(email, newKey)
}

// RegisterKeyGetIDSignatureBase64 registers a new public key with the default test email and returns its ID and the base-64 encoded signature of the same ID.
func RegisterKeyGetIDSignatureBase64(ctx context.Context, client *client.Client) (id, idSignatureBase66 string, err error) {
newKey, err := pgp.GenerateKey("", "", defaultEmail, 4*time.Hour)
if err != nil {
return "", "", err
}

err = registerKey(ctx, client.Auth(), newKey, defaultEmail)
if err != nil {
return "", "", err
}

id = newKey.Fingerprint()

signedIDBytes, err := newKey.Sign([]byte(id))
if err != nil {
return "", "", err
}

idSignatureBase66 = base64.StdEncoding.EncodeToString(signedIDBytes)

return id, idSignatureBase66, nil
}

var talosAPIKeyMutex sync.Mutex

// TalosAPIKeyPrepare prepares a public key to be used with tests interacting via Talos API client using the default test email.
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
42 changes: 42 additions & 0 deletions cmd/integration-test/pkg/tests/tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,48 @@ Test flow of cluster creation and scaling using cluster templates.`,
},
Finalizer: DestroyCluster(ctx, rootClient.Omni().State(), "tmpl-cluster"),
},
{
Name: "WorkloadProxy",
Description: "Test workload service proxying feature",
Parallel: true,
MachineClaim: 1,
Subtests: subTests(
subTest{
"ClusterShouldBeCreated",
CreateCluster(ctx, rootClient, ClusterOptions{
Name: "integration-workload-proxy",
ControlPlanes: 1,
Workers: 0,

Features: &specs.ClusterSpec_Features{
EnableWorkloadProxy: true,
},

MachineOptions: options.MachineOptions,
},
),
},
).Append(
TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady(
ctx, rootClient,
"integration-workload-proxy",
options.MachineOptions.TalosVersion,
options.MachineOptions.KubernetesVersion,
talosAPIKeyPrepare,
)...,
).Append(
subTest{
"WorkloadProxyShouldBeTested",
AssertWorkloadProxy(ctx, rootClient, "integration-workload-proxy"),
},
).Append(
subTest{
"ClusterShouldBeDestroyed",
AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-workload-proxy"),
},
),
Finalizer: DestroyCluster(ctx, rootClient.Omni().State(), "integration-workload-proxy"),
},
}

var re *regexp.Regexp
Expand Down
Loading

0 comments on commit dcd123d

Please sign in to comment.