Skip to content

Commit

Permalink
Handle phantom public-config change (#2359)
Browse files Browse the repository at this point in the history
Sometimes nodes reboot randomly because they receive
phantom public-config changes.

This was a code bug because we didn't handle ctx cancelation which
caused the channel to receive a "zero" event which containes no data
which causes the node to remove it's public config and then reboot

this happens only with nodes that have pulic config
  • Loading branch information
muhamadazmy authored Jun 12, 2024
1 parent 160dfc6 commit 93640c5
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion cmds/modules/noded/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func action(cli *cli.Context) error {

go func() {
for {
if err := public(ctx, node, env, redis, consumer); err != nil {
if err := public(ctx, node, redis, consumer); err != nil {
log.Error().Err(err).Msg("setting public config failed")
<-time.After(10 * time.Second)
}
Expand Down
6 changes: 3 additions & 3 deletions cmds/modules/noded/public.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go"
"github.com/threefoldtech/zbus"
"github.com/threefoldtech/zos/pkg"
"github.com/threefoldtech/zos/pkg/environment"
"github.com/threefoldtech/zos/pkg/events"
"github.com/threefoldtech/zos/pkg/stubs"
)
Expand All @@ -31,7 +30,7 @@ func setPublicConfig(ctx context.Context, cl zbus.Client, cfg *substrate.PublicC
}

// public sets and watches changes to public config on chain and tries to apply the provided setup
func public(ctx context.Context, nodeID uint32, env environment.Environment, cl zbus.Client, events *events.RedisConsumer) error {
func public(ctx context.Context, nodeID uint32, cl zbus.Client, events *events.RedisConsumer) error {
ch, err := events.PublicConfig(ctx)
if err != nil {
return errors.Wrap(err, "failed to subscribe to node events")
Expand All @@ -57,8 +56,9 @@ reapply:

for {
select {
case <-ctx.Done():
return nil
case event := <-ch:

log.Info().Msgf("got a public config update: %+v", event.PublicConfig)
var cfg *substrate.PublicConfig
if event.PublicConfig.HasValue {
Expand Down
2 changes: 1 addition & 1 deletion pkg/events/redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ func (r *RedisConsumer) consumer(ctx context.Context, stream string, ch reflect.
if chosen == 0 {
return
}
} else if err != nil {
} else {
logger.Error().Err(err).Str("id", message.ID).Msg("failed to handle message")
}

Expand Down

0 comments on commit 93640c5

Please sign in to comment.