Skip to content

Commit

Permalink
Add client info to SHUTDOWN / CLUSTER FAILOVER logs (#875)
Browse files Browse the repository at this point in the history
Print the full client info by using catClientInfoString, the
info is useful when we want to identify the source of request.

Signed-off-by: Binbin <[email protected]>
  • Loading branch information
enjoy-binbin authored Sep 8, 2024
1 parent 6478526 commit c642cf0
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 14 deletions.
8 changes: 5 additions & 3 deletions src/cluster_legacy.c
Original file line number Diff line number Diff line change
Expand Up @@ -6656,25 +6656,27 @@ int clusterCommandSpecial(client *c) {
}
resetManualFailover();
server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log);

if (takeover) {
/* A takeover does not perform any initial check. It just
* generates a new configuration epoch for this node without
* consensus, claims the primary's slots, and broadcast the new
* configuration. */
serverLog(LL_NOTICE, "Taking over the primary (user request).");
serverLog(LL_NOTICE, "Taking over the primary (user request from '%s').", client);
clusterBumpConfigEpochWithoutConsensus();
clusterFailoverReplaceYourPrimary();
} else if (force) {
/* If this is a forced failover, we don't need to talk with our
* primary to agree about the offset. We just failover taking over
* it without coordination. */
serverLog(LL_NOTICE, "Forced failover user request accepted.");
serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client);
server.cluster->mf_can_start = 1;
} else {
serverLog(LL_NOTICE, "Manual failover user request accepted.");
serverLog(LL_NOTICE, "Manual failover user request accepted (user request from '%s').", client);
clusterSendMFStart(myself->replicaof);
}
sdsfree(client);
addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "set-config-epoch") && c->argc == 3) {
/* CLUSTER SET-CONFIG-EPOCH <epoch>
Expand Down
2 changes: 1 addition & 1 deletion src/db.c
Original file line number Diff line number Diff line change
Expand Up @@ -1278,7 +1278,7 @@ void shutdownCommand(client *c) {
}

blockClientShutdown(c);
if (prepareForShutdown(flags) == C_OK) exit(0);
if (prepareForShutdown(c, flags) == C_OK) exit(0);
/* If we're here, then shutdown is ongoing (the client is still blocked) or
* failed (the client has received an error). */
}
Expand Down
2 changes: 1 addition & 1 deletion src/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ void debugCommand(client *c) {
int flags = !strcasecmp(c->argv[1]->ptr, "restart")
? (RESTART_SERVER_GRACEFULLY | RESTART_SERVER_CONFIG_REWRITE)
: RESTART_SERVER_NONE;
restartServer(flags, delay);
restartServer(c, flags, delay);
addReplyError(c, "failed to restart the server. Check server logs.");
} else if (!strcasecmp(c->argv[1]->ptr, "oom")) {
void *ptr = zmalloc(SIZE_MAX / 2); /* Should trigger an out of memory. */
Expand Down
25 changes: 18 additions & 7 deletions src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -1328,7 +1328,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
else if (server.last_sig_received == SIGTERM && server.shutdown_on_sigterm)
shutdownFlags = server.shutdown_on_sigterm;

if (prepareForShutdown(shutdownFlags) == C_OK) exit(0);
if (prepareForShutdown(NULL, shutdownFlags) == C_OK) exit(0);
} else if (isShutdownInitiated()) {
if (server.mstime >= server.shutdown_mstime || isReadyToShutdown()) {
if (finishShutdown() == C_OK) exit(0);
Expand Down Expand Up @@ -1560,7 +1560,7 @@ void whileBlockedCron(void) {
/* We received a SIGTERM during loading, shutting down here in a safe way,
* as it isn't ok doing so inside the signal handler. */
if (server.shutdown_asap && server.loading) {
if (prepareForShutdown(SHUTDOWN_NOSAVE) == C_OK) exit(0);
if (prepareForShutdown(NULL, SHUTDOWN_NOSAVE) == C_OK) exit(0);
serverLog(LL_WARNING,
"SIGTERM received but errors trying to shut down the server, check the logs for more information");
server.shutdown_asap = 0;
Expand Down Expand Up @@ -2139,7 +2139,7 @@ extern char **environ;
*
* On success the function does not return, because the process turns into
* a different process. On error C_ERR is returned. */
int restartServer(int flags, mstime_t delay) {
int restartServer(client *c, int flags, mstime_t delay) {
int j;

/* Check if we still have accesses to the executable that started this
Expand All @@ -2162,7 +2162,7 @@ int restartServer(int flags, mstime_t delay) {
}

/* Perform a proper shutdown. We don't wait for lagging replicas though. */
if (flags & RESTART_SERVER_GRACEFULLY && prepareForShutdown(SHUTDOWN_NOW) != C_OK) {
if (flags & RESTART_SERVER_GRACEFULLY && prepareForShutdown(c, SHUTDOWN_NOW) != C_OK) {
serverLog(LL_WARNING, "Can't restart: error preparing for shutdown");
return C_ERR;
}
Expand Down Expand Up @@ -4189,7 +4189,12 @@ void closeListeningSockets(int unlink_unix_socket) {
}
}

/* Prepare for shutting down the server. Flags:
/* Prepare for shutting down the server.
*
* The client *c can be NULL, it may come from a signal. If client is passed in,
* it is used to print the client info.
*
* Flags:
*
* - SHUTDOWN_SAVE: Save a database dump even if the server is configured not to
* save any dump.
Expand All @@ -4212,7 +4217,7 @@ void closeListeningSockets(int unlink_unix_socket) {
* errors are logged but ignored and C_OK is returned.
*
* On success, this function returns C_OK and then it's OK to call exit(0). */
int prepareForShutdown(int flags) {
int prepareForShutdown(client *c, int flags) {
if (isShutdownInitiated()) return C_ERR;

/* When SHUTDOWN is called while the server is loading a dataset in
Expand All @@ -4225,7 +4230,13 @@ int prepareForShutdown(int flags) {

server.shutdown_flags = flags;

serverLog(LL_NOTICE, "User requested shutdown...");
if (c != NULL) {
sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log);
serverLog(LL_NOTICE, "User requested shutdown... (user request from '%s')", client);
sdsfree(client);
} else {
serverLog(LL_NOTICE, "User requested shutdown...");
}
if (server.supervised_mode == SUPERVISED_SYSTEMD) serverCommunicateSystemd("STOPPING=1\n");

/* If we have any replicas, let them catch up the replication offset before
Expand Down
4 changes: 2 additions & 2 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -3296,7 +3296,7 @@ void preventCommandAOF(client *c);
void preventCommandReplication(client *c);
void slowlogPushCurrentCommand(client *c, struct serverCommand *cmd, ustime_t duration);
void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist);
int prepareForShutdown(int flags);
int prepareForShutdown(client *c, int flags);
void replyToClientsBlockedOnShutdown(void);
int abortShutdown(void);
void afterCommand(client *c);
Expand Down Expand Up @@ -3341,7 +3341,7 @@ void dismissMemoryInChild(void);
#define RESTART_SERVER_NONE 0
#define RESTART_SERVER_GRACEFULLY (1 << 0) /* Do proper shutdown. */
#define RESTART_SERVER_CONFIG_REWRITE (1 << 1) /* CONFIG REWRITE before restart.*/
int restartServer(int flags, mstime_t delay);
int restartServer(client *c, int flags, mstime_t delay);
int getKeySlot(sds key);
int calculateKeySlot(sds key);

Expand Down

0 comments on commit c642cf0

Please sign in to comment.