Skip to content

Commit

Permalink
source-mysql: Fix DDL with COLLATE but not CHARSET
Browse files Browse the repository at this point in the history
In this case we want to apply the same "charset from collation"
mapping function that we use during discovery. Now the hierarchy
of column charsets goes:

1. Explicit CHARSET declaration
2. Explicit COLLATE declaration
3. Default for the table (which omits utf8mb4 in some cases)
4. Default to utf8mb4 as the last resort
  • Loading branch information
willdonnelly committed Sep 24, 2024
1 parent 55ca5a0 commit 2dfcec0
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 16 deletions.
9 changes: 6 additions & 3 deletions source-mysql/.snapshots/TestAddLegacyTextColumn
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# ================================
# Collection "acmeCo/test/test_addlegacytextcolumn_30621561": 6 Documents
# Collection "acmeCo/test/test_addlegacytextcolumn_30621561": 9 Documents
# ================================
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"AddLegacyTextColumn_30621561","cursor":"backfill:0"}},"id":1}
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"AddLegacyTextColumn_30621561","cursor":"backfill:1"}},"id":2}
{"_meta":{"op":"c","source":{"schema":"test","snapshot":true,"table":"AddLegacyTextColumn_30621561","cursor":"backfill:2"}},"id":3}
{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"Heizölrückstoßabdämpfung","id":5}
{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"four","id":4}
{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"Heizölrückstoßabdämpfung","id":5}
{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"six","id":6}
{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"777","data_ucs":"seven","id":7}
{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"888","data_ucs":"次常用字","id":8}
{"_meta":{"op":"c","source":{"ts_ms":1111111111111,"schema":"test","table":"AddLegacyTextColumn_30621561","cursor":"binlog.000123:56789:123","txid":"11111111-1111-1111-1111-111111111111:111"}},"data":"999","data_ucs":"nine","id":9}
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FAddLegacyTextColumn_30621561":{"backfilled":3,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"type":"int"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FAddLegacyTextColumn_30621561":{"backfilled":3,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data","data_ucs"],"types":{"data":{"charset":"latin1","type":"text"},"data_ucs":{"charset":"ucs2","type":"text"},"id":{"type":"int"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"}

14 changes: 7 additions & 7 deletions source-mysql/.snapshots/TestBackfillLegacyTextKey
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":1,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWFvw7t0AA=="}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":1,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWFvw7t0AA=="}},"cursor":"binlog.000123:56789"}


####################################
Expand All @@ -21,7 +21,7 @@
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":2,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWZvcsOqdAA="}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":2,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AWZvcsOqdAA="}},"cursor":"binlog.000123:56789"}


####################################
Expand All @@ -34,7 +34,7 @@
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":3,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DoG8A"}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":3,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DoG8A"}},"cursor":"binlog.000123:56789"}


####################################
Expand All @@ -47,7 +47,7 @@
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":4,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DqG8A"}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":4,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/DqG8A"}},"cursor":"binlog.000123:56789"}


####################################
Expand All @@ -60,7 +60,7 @@
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":5,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/Dsm8A"}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":5,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AW/Dsm8A"}},"cursor":"binlog.000123:56789"}


####################################
Expand All @@ -73,7 +73,7 @@
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AXLDqXN1bcOpAA=="}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"UnfilteredBackfill","scanned":"AXLDqXN1bcOpAA=="}},"cursor":"binlog.000123:56789"}


####################################
Expand All @@ -82,7 +82,7 @@
# ================================
# Final State Checkpoint
# ================================
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"}
{"bindingStateV1":{"test%2FBackfillLegacyTextKey_83451544":{"backfilled":6,"key_columns":["id"],"metadata":{"charset":"latin1","schema":{"columns":["id","data"],"types":{"data":{"charset":"latin1","type":"text"},"id":{"charset":"latin1","type":"varchar"}}}},"mode":"Active"}},"cursor":"binlog.000123:56789"}



9 changes: 9 additions & 0 deletions source-mysql/capture_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ func TestAddLegacyTextColumn(t *testing.T) {
tb.Insert(ctx, t, table, [][]any{{1}, {2}, {3}})

var cs = tb.CaptureSpec(ctx, t, regexp.MustCompile(uniqueID))
cs.Validator = &st.OrderedCaptureValidator{}
sqlcapture.TestShutdownAfterCaughtUp = true
t.Cleanup(func() { sqlcapture.TestShutdownAfterCaughtUp = false })

Expand All @@ -400,6 +401,14 @@ func TestAddLegacyTextColumn(t *testing.T) {
{6, "six"},
})
cs.Capture(ctx, t, nil)
tb.Query(ctx, t, fmt.Sprintf("ALTER TABLE %s ADD COLUMN data_ucs TEXT COLLATE ucs2_general_ci;", table))
tb.Insert(ctx, t, table, [][]any{
{7, "777", "seven"},
{8, "888", "次常用字"},
{9, "999", "nine"},
})
cs.Capture(ctx, t, nil)

cupaloy.SnapshotT(t, cs.Summary())
}

Expand Down
15 changes: 9 additions & 6 deletions source-mysql/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -766,12 +766,15 @@ func translateDataType(meta *mysqlTableMetadata, t sqlparser.ColumnType) any {
case "tinyint", "smallint", "mediumint", "int", "bigint":
return &mysqlColumnType{Type: typeName, Unsigned: t.Unsigned}
case "char", "varchar", "tinytext", "text", "mediumtext", "longtext":
var charset = t.Charset.Name
if charset == "" {
charset = meta.DefaultCharset // If not explicitly specified, use the default charset of the table
}
if charset == "" {
charset = mysqlDefaultCharset // If the default charset is also not known, fall back to UTF-8
var charset string
if t.Charset.Name != "" {
charset = t.Charset.Name // If explicitly specified, the declared charset wins
} else if t.Options.Collate != "" {
charset = charsetFromCollation(t.Options.Collate) // If only a collation is declared, figure out what charset that implies
} else if meta.DefaultCharset != "" {
charset = meta.DefaultCharset // In the absence of a column-specific declaration, use the default table charset if known
} else {
charset = mysqlDefaultCharset // Finally fall back to UTF-8 if nothing else supersedes that
}
return &mysqlColumnType{Type: typeName, Charset: charset}
default:
Expand Down

0 comments on commit 2dfcec0

Please sign in to comment.