Skip to content

Commit

Permalink
plugin: Remove ComputeUnit from config and PluginResponse (#743)
Browse files Browse the repository at this point in the history
... and every where else that it filters into.

This is part of the process of moving the ComputeUnit source of truth
from the scheduler to the autoscaler-agent (#706).
We've already redefined the autoscaler-agent's own config as its source
of truth, so this is mostly just following through on the previous
deprecation.
  • Loading branch information
sharnoff authored Feb 21, 2024
1 parent 977e188 commit b7243b0
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 169 deletions.
1 change: 0 additions & 1 deletion deploy/scheduler/config_map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ metadata:
data:
autoscale-enforcer-config.json: |
{
"computeUnit": { "vCPUs": 0.25, "mem": "1Gi" },
"nodeConfig": {
"cpu": { "watermark": 0.9 },
"memory": { "watermark": 0.9 },
Expand Down
90 changes: 36 additions & 54 deletions pkg/agent/core/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ func Test_DesiredResourcesFromMetricsOrRequestedUpscaling(t *testing.T) {
// set lastApproved by simulating a scheduler request/response
state.Plugin().StartingRequest(now, c.schedulerApproved)
err := state.Plugin().RequestSuccessful(now, api.PluginResponse{
Permit: c.schedulerApproved,
Migrate: nil,
ComputeUnit: nil,
Permit: c.schedulerApproved,
Migrate: nil,
})
if err != nil {
t.Errorf("state.Plugin().RequestSuccessful() failed: %s", err)
Expand Down Expand Up @@ -218,9 +217,8 @@ func doInitialPluginRequest(
a.Do(state.Plugin().StartingRequest, clock.Now(), resources)
clock.Inc(requestTime)
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resources,
Migrate: nil,
ComputeUnit: nil,
Permit: resources,
Migrate: nil,
})
}

Expand Down Expand Up @@ -287,9 +285,8 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) {
// should have nothing more to do; waiting on plugin request to come back
a.Call(nextActions).Equals(core.ActionSet{})
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(2),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(2),
Migrate: nil,
})

// Scheduler approval is done, now we should be making the request to NeonVM
Expand Down Expand Up @@ -396,9 +393,8 @@ func TestBasicScaleUpAndDownFlow(t *testing.T) {
// should have nothing more to do; waiting on plugin request to come back
a.Call(nextActions).Equals(core.ActionSet{})
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(1),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(1),
Migrate: nil,
})

// Finally, check there's no leftover actions:
Expand Down Expand Up @@ -460,9 +456,8 @@ func TestPeriodicPluginRequest(t *testing.T) {
clock.Inc(reqDuration)
a.Call(state.NextActions, clock.Now()).Equals(core.ActionSet{})
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resources,
Migrate: nil,
ComputeUnit: nil,
Permit: resources,
Migrate: nil,
})
clock.Inc(clockTick - reqDuration)
}
Expand Down Expand Up @@ -617,9 +612,8 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) {
})
clockTick()
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(3),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(3),
Migrate: nil,
})
// ... And *now* there's nothing left to do but wait until downscale wait expires:
a.Call(nextActions).Equals(core.ActionSet{
Expand Down Expand Up @@ -661,9 +655,8 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) {
})
clockTick()
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(3),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(3),
Migrate: nil,
})
a.Call(nextActions).Equals(core.ActionSet{
Wait: &core.ActionWait{Duration: duration("0.9s")}, // yep, still waiting on retrying vm-monitor downscaling
Expand Down Expand Up @@ -728,9 +721,8 @@ func TestDeniedDownscalingIncreaseAndRetry(t *testing.T) {
})
clockTick()
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(1),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(1),
Migrate: nil,
})
// And now there's truly nothing left to do. Back to waiting on plugin request tick :)
a.Call(nextActions).Equals(core.ActionSet{
Expand Down Expand Up @@ -795,9 +787,8 @@ func TestRequestedUpscale(t *testing.T) {
Wait: &core.ActionWait{Duration: duration("5.9s")}, // same waiting for requested upscale expiring
})
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(2),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(2),
Migrate: nil,
})

// After approval from the scheduler plugin, now need to make NeonVM request:
Expand Down Expand Up @@ -847,9 +838,8 @@ func TestRequestedUpscale(t *testing.T) {
Wait: &core.ActionWait{Duration: duration("0.9s")}, // waiting for requested upscale expiring
})
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(2),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(2),
Migrate: nil,
})

// Still should just be waiting on vm-monitor upscale expiring
Expand Down Expand Up @@ -992,9 +982,8 @@ func TestDownscalePivotBack(t *testing.T) {
*pluginWait = duration("4.9s") // reset because we just made a request
t.Log(" > finish plugin downscale")
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(1),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(1),
Migrate: nil,
})
},
post: func(pluginWait *time.Duration) {
Expand All @@ -1011,9 +1000,8 @@ func TestDownscalePivotBack(t *testing.T) {
*pluginWait = duration("4.9s") // reset because we just made a request
t.Log(" > finish plugin upscale")
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(2),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(2),
Migrate: nil,
})
},
},
Expand Down Expand Up @@ -1150,9 +1138,8 @@ func TestBoundsChangeRequiresDownsale(t *testing.T) {
a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(1))
clockTick()
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(1),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(1),
Migrate: nil,
})
// And then, we shouldn't need to do anything else:
a.Call(nextActions).Equals(core.ActionSet{
Expand Down Expand Up @@ -1222,9 +1209,8 @@ func TestBoundsChangeRequiresUpscale(t *testing.T) {
a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(3))
clockTick()
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(3),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(3),
Migrate: nil,
})
// Do NeonVM request for the upscaling
a.Call(nextActions).Equals(core.ActionSet{
Expand Down Expand Up @@ -1322,9 +1308,8 @@ func TestFailedRequestRetry(t *testing.T) {
a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(2))
clockTick()
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(2),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(2),
Migrate: nil,
})

// Now, after plugin request is successful, we should be making a request to NeonVM.
Expand Down Expand Up @@ -1407,9 +1392,8 @@ func TestMetricsConcurrentUpdatedDuringDownscale(t *testing.T) {
a.Do(state.Plugin().StartingRequest, clock.Now(), resForCU(3))
clockTick()
a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(3),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(3),
Migrate: nil,
})

clockTick()
Expand Down Expand Up @@ -1502,9 +1486,8 @@ func TestMetricsConcurrentUpdatedDuringDownscale(t *testing.T) {
clockTick()

a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(2),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(2),
Migrate: nil,
})
// Still waiting for NeonVM request to complete
a.Call(nextActions).Equals(core.ActionSet{
Expand All @@ -1528,9 +1511,8 @@ func TestMetricsConcurrentUpdatedDuringDownscale(t *testing.T) {
clockTick()

a.NoError(state.Plugin().RequestSuccessful, clock.Now(), api.PluginResponse{
Permit: resForCU(1),
Migrate: nil,
ComputeUnit: nil,
Permit: resForCU(1),
Migrate: nil,
})
// Nothing left to do
a.Call(nextActions).Equals(core.ActionSet{
Expand Down
2 changes: 1 addition & 1 deletion pkg/agent/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ func (r *Runner) DoSchedulerRequest(
reqData := &api.AgentRequest{
ProtoVersion: PluginProtocolVersion,
Pod: r.podName,
ComputeUnit: &r.global.config.Scaling.ComputeUnit,
ComputeUnit: r.global.config.Scaling.ComputeUnit,
Resources: resources,
LastPermit: lastPermit,
Metrics: metrics,
Expand Down
2 changes: 1 addition & 1 deletion pkg/api/VERSIONING.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ number.

| Release | autoscaler-agent | Scheduler plugin |
|---------|------------------|------------------|
| _Current_ | v4.0 only | v1.0-v4.0 |
| _Current_ | v4.0 only | v3.0-v4.0 |
| v0.25.0 | v4.0 only | v1.0-v4.0 |
| v0.24.0 | v4.0 only | v1.0-v4.0 |
| v0.23.0 | **v4.0 only** | **v1.0-v4.0** |
Expand Down
13 changes: 1 addition & 12 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ type AgentRequest struct {
// If the requested resources are not a multiple of ComputeUnit, the scheduler plugin will make
// a best-effort attempt to return a value satisfying the request. Any approved increases will
// be a multiple of ComputeUnit, but otherwise the plugin does not check.
ComputeUnit *Resources `json:"computeUnit"`
ComputeUnit Resources `json:"computeUnit"`
// Resources gives a requested or notified change in resources allocated to the VM.
//
// The requested amount MAY be equal to the current amount, in which case it serves as a
Expand Down Expand Up @@ -390,17 +390,6 @@ type PluginResponse struct {
// Migrate, if present, notifies the autoscaler-agent that its VM will be migrated away,
// alongside whatever other information may be useful.
Migrate *MigrateResponse `json:"migrate,omitempty"`

// ComputeUnit is the minimum unit of resources that the scheduler is expecting to work with
//
// For example, if ComputeUnit is Resources{ VCPU: 2, Mem: 4Gi }, then all VMs are expected to
// have allocated vCPUs that are a multiple of two, with 2 GiB of memory per vCPU.
//
// This value may be different across nodes, and the scheduler expects that all AgentRequests
// will abide by the most recent ComputeUnit they've received.
//
// THIS FIELD IS DEPRECATED: See https://github.com/neondatabase/autoscaling/issues/706
ComputeUnit *Resources `json:"resourceUnit,omitempty"`
}

// MigrateResponse, when provided, is a notification to the autsocaler-agent that it will migrate
Expand Down
9 changes: 0 additions & 9 deletions pkg/plugin/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,6 @@ import (
//////////////////

type Config struct {
// ComputeUnit is the desired ratio between CPU and memory that autoscaler-agents should uphold
//
// This value is sent to autoscaler-agents in every response, as part of api.PluginResponse.
ComputeUnit api.Resources `json:"computeUnit"`

// NodeConfig defines our policies around node resources and scoring
NodeConfig nodeConfig `json:"nodeConfig"`

Expand Down Expand Up @@ -121,10 +116,6 @@ func (c *Config) migrationEnabled() bool {

// if the returned error is not nil, the string is a JSON path to the invalid value
func (c *Config) validate() (string, error) {
if err := c.ComputeUnit.ValidateNonZero(); err != nil {
return "computeUnit", err
}

if path, err := c.NodeConfig.validate(); err != nil {
return fmt.Sprintf("nodeConfig.%s", path), err
}
Expand Down
7 changes: 0 additions & 7 deletions pkg/plugin/dumpstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ type podStateDump struct {
type vmPodStateDump struct {
Name util.NamespacedName `json:"name"`
TestingOnlyAlwaysMigrate bool `json:"testingOnlyAlwaysMigrate"`
MostRecentComputeUnit *api.Resources `json:"mostRecentComputeUnit"`
Metrics *api.Metrics `json:"metrics"`
MqIndex int `json:"mqIndex"`
MigrationState *podMigrationStateDump `json:"migrationState"`
Expand Down Expand Up @@ -246,11 +245,6 @@ func (s *podState) dump() podStateDump {

func (s *vmPodState) dump() vmPodStateDump {
// Copy some of the "may be nil" pointer fields
var mostRecentComputeUnit *api.Resources
if s.mostRecentComputeUnit != nil {
mrcu := *s.mostRecentComputeUnit
mostRecentComputeUnit = &mrcu
}
var metrics *api.Metrics
if s.metrics != nil {
m := *s.metrics
Expand All @@ -266,7 +260,6 @@ func (s *vmPodState) dump() vmPodStateDump {
return vmPodStateDump{
Name: s.name,
TestingOnlyAlwaysMigrate: s.testingOnlyAlwaysMigrate,
MostRecentComputeUnit: mostRecentComputeUnit,
Metrics: metrics,
MqIndex: s.mqIndex,
MigrationState: migrationState,
Expand Down
Loading

0 comments on commit b7243b0

Please sign in to comment.