From 671e4f412646f6a04e7b5a153f0270584531a110 Mon Sep 17 00:00:00 2001 From: Will Donnelly Date: Tue, 15 Oct 2024 15:48:14 -0500 Subject: [PATCH] source-mysql: Fix some benign error/warnings that get logged The collation families `utf8_whatever` and `ascii_whatever` are explicitly added to the charsets table so we stop logging errors reading `unknown charset for collation, assuming UTF-8` and just do that without complaining because UTF-8 is in fact correct. (This requires another minor tweak, because the way we match up a collation name to a character set is by simple prefix matching, and `utf8` is a prefix of the `utf8mb3` and `utf8mb4` collations. But a collation name is actually `_` with an underscore, so to fix that we just have to do a prefix match including that trailing underscore. This should still work the same for all collation names that actually exist.) The `replication connected without TLS` log message is downgraded from WARN to INFO to match the main (non-replication) connection logging. Between these fixes, that's all the main sources of errors and warnings in our production MySQL tasks, so hopefully that will be much less noisy to keep an eye on in the future. This fixes https://github.com/estuary/connectors/issues/2056 --- source-mysql/discovery.go | 4 +++- source-mysql/replication.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/source-mysql/discovery.go b/source-mysql/discovery.go index 0977f355e..a98c4d762 100644 --- a/source-mysql/discovery.go +++ b/source-mysql/discovery.go @@ -478,7 +478,7 @@ func charsetFromCollation(name string) string { // // We rely on this assumption to identify known charsets based on the decoders table here. for charset := range mysqlStringDecoders { - if strings.HasPrefix(name, charset) { + if strings.HasPrefix(name, charset+"_") { return charset } } @@ -702,6 +702,8 @@ func decodeBytesToString(charset string, bs []byte) (string, error) { } var mysqlStringDecoders = map[string]func([]byte) (string, error){ + "utf8": decodeUTF8, // MariaDB alias for utf8mb3 or utf8mb4 depending on config. We don't care, it's all UTF-8 text to us. + "ascii": decodeUTF8, // Guaranteed only ASCII characters (8-bit clean), meaning we can still treat it as UTF-8. "utf8mb3": decodeUTF8, "utf8mb4": decodeUTF8, "latin1": decodeLatin1, diff --git a/source-mysql/replication.go b/source-mysql/replication.go index 3cdbc6ae7..9936d1999 100644 --- a/source-mysql/replication.go +++ b/source-mysql/replication.go @@ -108,7 +108,7 @@ func (db *mysqlDatabase) ReplicationStream(ctx context.Context, startCursor stri syncConfig.TLSConfig = nil syncer = replication.NewBinlogSyncer(syncConfig) if streamer, err = syncer.StartSync(pos); err == nil { - logrus.Warn("replication connected without TLS") + logrus.Info("replication connected without TLS") } else { return nil, fmt.Errorf("error starting binlog sync: %w", err) }