diff --git a/source-mysql/.snapshots/TestAddLegacyTextColumn b/source-mysql/.snapshots/TestAddLegacyTextColumn index 7a8aa9515..af3fcde8a 100644 --- a/source-mysql/.snapshots/TestAddLegacyTextColumn +++ b/source-mysql/.snapshots/TestAddLegacyTextColumn @@ -1,14 +1,17 @@ # ================================ -# Collection "acmeCo/test/test_addlegacytextcolumn_30621561": 6 Documents +# Collection "acmeCo/test/test_addlegacytextcolumn_30621561": 9 Documents # ================================ {"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"AddLegacyTextColumn_30621561","cursor":"backfill:0"}},"id":1} {"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"AddLegacyTextColumn_30621561","cursor":"backfill:1"}},"id":2} {"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"AddLegacyTextColumn_30621561","cursor":"backfill:2"}},"id":3} -{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"Heizölrückstoßabdämpfung","id":5} {"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"four","id":4} +{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"Heizölrückstoßabdämpfung","id":5} {"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"six","id":6} +{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"777","data_ucs":"seven","id":7} +{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"888","data_ucs":"次常用字","id":8} +{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"999","data_ucs":"nine","id":9} # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FAddLegacyTextColumn_30621561":{"backfilled":3,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"type":"int"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FAddLegacyTextColumn_30621561":{"backfilled":3,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data","data_ucs"],"types":{"data":{"charset":"latin1","type":"text"},"data_ucs":{"charset":"ucs2","type":"text"},"id":{"type":"int"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"} diff --git a/source-mysql/.snapshots/TestBackfillLegacyTextKey b/source-mysql/.snapshots/TestBackfillLegacyTextKey index 9534d5b28..0113e124b 100644 --- a/source-mysql/.snapshots/TestBackfillLegacyTextKey +++ b/source-mysql/.snapshots/TestBackfillLegacyTextKey @@ -8,7 +8,7 @@ # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":1,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWFvw7t0AA=="}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":1,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWFvw7t0AA=="}},"cursor":"binlog.000123:56789"} #################################### @@ -21,7 +21,7 @@ # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":2,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWZvcsOqdAA="}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":2,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWZvcsOqdAA="}},"cursor":"binlog.000123:56789"} #################################### @@ -34,7 +34,7 @@ # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":3,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DoG8A"}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":3,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DoG8A"}},"cursor":"binlog.000123:56789"} #################################### @@ -47,7 +47,7 @@ # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":4,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DqG8A"}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":4,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DqG8A"}},"cursor":"binlog.000123:56789"} #################################### @@ -60,7 +60,7 @@ # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":5,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/Dsm8A"}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":5,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/Dsm8A"}},"cursor":"binlog.000123:56789"} #################################### @@ -73,7 +73,7 @@ # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AXLDqXN1bcOpAA=="}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AXLDqXN1bcOpAA=="}},"cursor":"binlog.000123:56789"} #################################### @@ -82,7 +82,7 @@ # ================================ # Final State Checkpoint # ================================ -{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"} +{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"} diff --git a/source-mysql/capture_test.go b/source-mysql/capture_test.go index 419ad54f6..b9fc5985a 100644 --- a/source-mysql/capture_test.go +++ b/source-mysql/capture_test.go @@ -389,6 +389,7 @@ func TestAddLegacyTextColumn(t *testing.T) { tb.Insert(ctx, t, table, [][]any{{1}, {2}, {3}}) var cs = tb.CaptureSpec(ctx, t, regexp.MustCompile(uniqueID)) + cs.Validator = &st.OrderedCaptureValidator{} sqlcapture.TestShutdownAfterCaughtUp = true t.Cleanup(func() { sqlcapture.TestShutdownAfterCaughtUp = false }) @@ -400,6 +401,14 @@ func TestAddLegacyTextColumn(t *testing.T) { {6, "six"}, }) cs.Capture(ctx, t, nil) + tb.Query(ctx, t, fmt.Sprintf("ALTER TABLE %s ADD COLUMN data_ucs TEXT COLLATE ucs2_general_ci;", table)) + tb.Insert(ctx, t, table, [][]any{ + {7, "777", "seven"}, + {8, "888", "次常用字"}, + {9, "999", "nine"}, + }) + cs.Capture(ctx, t, nil) + cupaloy.SnapshotT(t, cs.Summary()) } diff --git a/source-mysql/replication.go b/source-mysql/replication.go index d00417611..193472efe 100644 --- a/source-mysql/replication.go +++ b/source-mysql/replication.go @@ -766,12 +766,15 @@ func translateDataType(meta *mysqlTableMetadata, t sqlparser.ColumnType) any { case "tinyint", "smallint", "mediumint", "int", "bigint": return &mysqlColumnType{Type: typeName, Unsigned: t.Unsigned} case "char", "varchar", "tinytext", "text", "mediumtext", "longtext": - var charset = t.Charset.Name - if charset == "" { - charset = meta.DefaultCharset // If not explicitly specified, use the default charset of the table - } - if charset == "" { - charset = mysqlDefaultCharset // If the default charset is also not known, fall back to UTF-8 + var charset string + if t.Charset.Name != "" { + charset = t.Charset.Name // If explicitly specified, the declared charset wins + } else if t.Options.Collate != "" { + charset = charsetFromCollation(t.Options.Collate) // If only a collation is declared, figure out what charset that implies + } else if meta.DefaultCharset != "" { + charset = meta.DefaultCharset // In the absence of a column-specific declaration, use the default table charset if known + } else { + charset = mysqlDefaultCharset // Finally fall back to UTF-8 if nothing else supersedes that } return &mysqlColumnType{Type: typeName, Charset: charset} default: