Skip to content

Commit

Permalink
message: account for duplicates across a recovered checkpoint boundary
Browse files Browse the repository at this point in the history
Sequencer's runtime assertion checks were slightly too strong.
A scenario we encountered which tripped a runtime panic was:

- CONTINUE_TXN messages were read from a journal, and were then part of
  a checkpoint. The checkpoint included the producer's earlier last ACK
  clock and a begin offset at the first of these messages.

- The messages were then duplicated within the journal, and ACKed.

- A new Sequencer recovers from the checkpoint.

- It reads the later, duplicated messages and adds them to the ring,
  followed by an ACK which begins a dequeue with a replay-read of the
  earlier messages.

- The earlier messages are dequeued first, and then the replay ends and
  hands off to the ring.

- Sequencer blows up because the first ring message is not strictly
  larger than the largest Clock dequeued from the replay read.

The fix is simply to remove the runtime assertion, and discard
duplicates, as this is valid thing that can happen. Also add a new test
case covering this scenario.
  • Loading branch information
jgraettinger committed Oct 17, 2024
1 parent 4818a4b commit 719adef
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 34 deletions.
49 changes: 15 additions & 34 deletions message/sequencer.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,40 +401,21 @@ func (w *Sequencer) Step() error {
w.dequeuedClock = GetClock(uuid)

if w.dequeuedClock != 0 && w.dequeuedClock <= w.emit.minClock {
if w.replayIt != nil {
continue // These can happen during replays.
} else {
// We don't allow duplicates within the ring in the first place,
// with one exception: messages with zero-valued Clocks are not
// expected to be consistently ordered on clock.
// In QueueUncommitted we synthetically assigned a clock value.
// Log a bunch of diagnostics using separate
log.WithFields(log.Fields{
"uuid": uuid,
"dequeuedClock": w.dequeuedClock,
"emit": fmt.Sprintf("%+v", w.emit),
"partialsCount": len(w.partials),
"pendingCount": len(w.pending),
}).Error("ring clock <= emit.minClock (will log diagnostics then panic)")
log.WithField("offsets", w.offsets).Error("sequencer offsets")
log.WithField("dequeued", w.Dequeued).Error("dequeued message")

for producer, partial := range w.partials {
log.WithFields(log.Fields{
"producer": producer,
"partial": partial,
}).Error("partials")
}
var i = 0
for pending, _ := range w.pending {
log.WithFields(log.Fields{
"i": i,
"pending": pending,
}).Error("pending")
i++
}
panic("ring clock <= emit.minClock")
}
// We don't allow duplicates within the ring with one exception:
// messages with zero-valued Clocks are not expected to be
// consistently ordered on clock (in QueueUncommitted we
// synthetically assigned a clock value).
//
// However:
// - Duplicated sequences CAN happen during replays.
// - They can ALSO happen if we dequeued during replay,
// and then observe the sequence duplicated again in the ring.
//
// While ordinarily we would discard such duplicates during ring
// maintenance operations, if the duplication straddles a recovered
// checkpoint boundary then all bets are off because checkpoints
// track only the last ACK and not the partial minClock.
continue
} else if w.dequeuedClock > w.emit.maxClock {
continue // ACK'd clock tells us not to commit.
}
Expand Down
45 changes: 45 additions & 0 deletions message/sequencer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,51 @@ func TestSequencerProducerStatesRoundTripDuringDequeue(t *testing.T) {
expectDeque(t, seq2, a5, a6ACK)
}

func TestSequencerProducerStatesStraddleDuplicate(t *testing.T) {
var (
generate = newTestMsgGenerator()
seq1 = NewSequencer(nil, nil, 12)
A = NewProducerID()
a1 = generate(A, 1, Flag_CONTINUE_TXN)
a2 = generate(A, 2, Flag_CONTINUE_TXN)
a3 = generate(A, 3, Flag_CONTINUE_TXN)
a1Dup = generate(A, 1, Flag_CONTINUE_TXN)
a2Dup = generate(A, 2, Flag_CONTINUE_TXN)
a3Dup = generate(A, 3, Flag_CONTINUE_TXN)
a4ACK = generate(A, 4, Flag_ACK_TXN)
)

require.Equal(t, []QueueOutcome{
QueueContinueBeginSpan,
QueueContinueExtendSpan,
QueueContinueExtendSpan,
}, queue(seq1, a1, a2, a3))
require.Nil(t, seq1.emit) // No messages to dequeue.

// Take a checkpoint and then restore it.
var offsets, states = seq1.Checkpoint(0)
var seq2 = NewSequencer(offsets, states, 12)

require.Equal(t, []QueueOutcome{
QueueContinueTxnClockLarger,
QueueContinueTxnClockLarger,
QueueContinueTxnClockLarger,
QueueAckCommitRing,
}, queue(seq1, a1Dup, a2Dup, a3Dup, a4ACK))

require.Equal(t, []QueueOutcome{
QueueContinueExtendSpan,
QueueContinueExtendSpan,
QueueContinueExtendSpan,
QueueAckCommitReplay,
}, queue(seq2, a1Dup, a2Dup, a3Dup, a4ACK))

expectReplay(t, seq2, a1.Begin, a1Dup.Begin, a1, a2, a3)

expectDeque(t, seq1, a1, a2, a3, a4ACK)
expectDeque(t, seq2, a1, a2, a3, a4ACK)
}

func TestSequencerProducerPruning(t *testing.T) {
var (
generate = newTestMsgGenerator()
Expand Down

0 comments on commit 719adef

Please sign in to comment.