From 51f0a7b27079f4f9c23ed215dcb715004bf6e0b8 Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Fri, 30 Jan 2026 10:07:48 -0800 Subject: [PATCH 1/2] fix: varchar IN check constraints generating invalid SQL (#269) When a varchar column has a CHECK constraint with IN (...), PostgreSQL stores it internally as `col::text = ANY (ARRAY['a'::varchar, ...]::text[])`. The old code failed to handle the `::text[]` cast at the end, producing invalid SQL. This fix: - Fixes findArrayClose() to handle optional array type casts like ::text[] - Updates convertAnyArrayToIn() to properly find closing paren after optional type casts - Preserves type information (e.g., ::character varying, ::text) in output Co-Authored-By: Claude Opus 4.5 --- ir/normalize.go | 31 +++++++++++++------ testdata/diff/online/add_constraint/diff.sql | 2 ++ testdata/diff/online/add_constraint/new.sql | 4 ++- testdata/diff/online/add_constraint/old.sql | 1 + testdata/diff/online/add_constraint/plan.json | 14 ++++++++- testdata/diff/online/add_constraint/plan.sql | 5 +++ testdata/diff/online/add_constraint/plan.txt | 6 ++++ 7 files changed, 52 insertions(+), 11 deletions(-) diff --git a/ir/normalize.go b/ir/normalize.go index c7d8f964..27b76e3f 100644 --- a/ir/normalize.go +++ b/ir/normalize.go @@ -921,7 +921,8 @@ func normalizeCheckClause(checkClause string) string { // applyLegacyCheckNormalizations applies the existing normalization patterns func applyLegacyCheckNormalizations(clause string) string { - // Convert PostgreSQL's "= ANY (ARRAY[...])" format to "IN (...)" format + // Convert PostgreSQL's "= ANY (ARRAY[...])" format to "IN (...)" format. + // Type casts are preserved to maintain accuracy with PostgreSQL's internal representation. if strings.Contains(clause, "= ANY (ARRAY[") { return convertAnyArrayToIn(clause) } @@ -1072,6 +1073,7 @@ func IsTextLikeType(typeName string) bool { // - "status = ANY (ARRAY['active'::public.status_type])" → "status IN ('active'::public.status_type)" // - "gender = ANY (ARRAY['M'::text, 'F'::text])" → "gender IN ('M'::text, 'F'::text)" // - "(col = ANY (ARRAY[...])) AND (other)" → "(col IN (...)) AND (other)" +// - "col::text = ANY (ARRAY['a'::varchar]::text[])" → "col::text IN ('a'::varchar)" func convertAnyArrayToIn(expr string) string { const marker = " = ANY (ARRAY[" idx := strings.Index(expr, marker) @@ -1082,16 +1084,28 @@ func convertAnyArrayToIn(expr string) string { // Extract the part before the marker (column name with possible leading content) prefix := expr[:idx] - // Find the closing "])" for ARRAY[...] starting after the marker + // Find the closing "]" for ARRAY[...] starting after the marker startIdx := idx + len(marker) arrayEnd := findArrayClose(expr, startIdx) if arrayEnd == -1 { return expr } - // Extract array contents and any trailing expression + // Extract array contents arrayContents := expr[startIdx:arrayEnd] - suffix := expr[arrayEnd+2:] // skip "])" + + // Find the closing ")" for ANY(...), which may be after an optional type cast like "::text[]" + // Pattern after "]": optional "::type[]" followed by ")" + closeParenIdx := arrayEnd + 1 + for closeParenIdx < len(expr) && expr[closeParenIdx] != ')' { + closeParenIdx++ + } + if closeParenIdx >= len(expr) { + return expr // No closing paren found + } + + // Everything after the closing ")" is the suffix + suffix := expr[closeParenIdx+1:] // Split values and preserve them as-is, including all type casts values := strings.Split(arrayContents, ", ") @@ -1105,8 +1119,9 @@ func convertAnyArrayToIn(expr string) string { return fmt.Sprintf("%s IN (%s)%s", prefix, strings.Join(cleanValues, ", "), suffix) } -// findArrayClose finds the position of the closing "])" for an ARRAY literal, +// findArrayClose finds the position of the closing "]" for an ARRAY literal, // handling nested brackets and quoted strings properly. +// Returns the position of the "]" that closes the ARRAY. func findArrayClose(expr string, startIdx int) int { bracketDepth := 1 // We're already inside ARRAY[ inQuote := false @@ -1134,10 +1149,8 @@ func findArrayClose(expr string, startIdx int) int { case ']': bracketDepth-- if bracketDepth == 0 { - // Found the closing ], verify next char is ) - if i+1 < len(expr) && expr[i+1] == ')' { - return i // Return position of ] - } + // Found the closing ] for the ARRAY + return i } } } diff --git a/testdata/diff/online/add_constraint/diff.sql b/testdata/diff/online/add_constraint/diff.sql index bf8fed43..3bbfd5aa 100644 --- a/testdata/diff/online/add_constraint/diff.sql +++ b/testdata/diff/online/add_constraint/diff.sql @@ -1,2 +1,4 @@ ALTER TABLE orders ADD CONSTRAINT check_amount_positive CHECK (amount > 0::numeric); +ALTER TABLE orders +ADD CONSTRAINT check_valid_status CHECK (status::text IN ('pending'::character varying, 'shipped'::character varying, 'delivered'::character varying)); diff --git a/testdata/diff/online/add_constraint/new.sql b/testdata/diff/online/add_constraint/new.sql index 414c8837..cdabbe86 100644 --- a/testdata/diff/online/add_constraint/new.sql +++ b/testdata/diff/online/add_constraint/new.sql @@ -2,6 +2,8 @@ CREATE TABLE public.orders ( id integer NOT NULL, customer_id integer NOT NULL, amount numeric(10,2), + status varchar(10) NOT NULL, created_at timestamp with time zone DEFAULT now() NOT NULL, - CONSTRAINT check_amount_positive CHECK (amount > 0) + CONSTRAINT check_amount_positive CHECK (amount > 0), + CONSTRAINT check_valid_status CHECK (status IN ('pending', 'shipped', 'delivered')) ); \ No newline at end of file diff --git a/testdata/diff/online/add_constraint/old.sql b/testdata/diff/online/add_constraint/old.sql index bd2b6263..1a18398a 100644 --- a/testdata/diff/online/add_constraint/old.sql +++ b/testdata/diff/online/add_constraint/old.sql @@ -2,5 +2,6 @@ CREATE TABLE public.orders ( id integer NOT NULL, customer_id integer NOT NULL, amount numeric(10,2), + status varchar(10) NOT NULL, created_at timestamp with time zone DEFAULT now() NOT NULL ); \ No newline at end of file diff --git a/testdata/diff/online/add_constraint/plan.json b/testdata/diff/online/add_constraint/plan.json index f6011a9a..00cf3b16 100644 --- a/testdata/diff/online/add_constraint/plan.json +++ b/testdata/diff/online/add_constraint/plan.json @@ -3,7 +3,7 @@ "pgschema_version": "1.6.1", "created_at": "1970-01-01T00:00:00Z", "source_fingerprint": { - "hash": "563f1deff597a3640689858d007209bb87e776772a637d6ae4dc151558f8af38" + "hash": "a3bcb075d28a8e3584a89f8644ecdc54b7f0861216ae959392e9078128545620" }, "groups": [ { @@ -19,6 +19,18 @@ "type": "table.constraint", "operation": "create", "path": "public.orders.check_amount_positive" + }, + { + "sql": "ALTER TABLE orders\nADD CONSTRAINT check_valid_status CHECK (status::text IN ('pending'::character varying, 'shipped'::character varying, 'delivered'::character varying)) NOT VALID;", + "type": "table.constraint", + "operation": "create", + "path": "public.orders.check_valid_status" + }, + { + "sql": "ALTER TABLE orders VALIDATE CONSTRAINT check_valid_status;", + "type": "table.constraint", + "operation": "create", + "path": "public.orders.check_valid_status" } ] } diff --git a/testdata/diff/online/add_constraint/plan.sql b/testdata/diff/online/add_constraint/plan.sql index 1be25342..741fb559 100644 --- a/testdata/diff/online/add_constraint/plan.sql +++ b/testdata/diff/online/add_constraint/plan.sql @@ -2,3 +2,8 @@ ALTER TABLE orders ADD CONSTRAINT check_amount_positive CHECK (amount > 0::numeric) NOT VALID; ALTER TABLE orders VALIDATE CONSTRAINT check_amount_positive; + +ALTER TABLE orders +ADD CONSTRAINT check_valid_status CHECK (status::text IN ('pending'::character varying, 'shipped'::character varying, 'delivered'::character varying)) NOT VALID; + +ALTER TABLE orders VALIDATE CONSTRAINT check_valid_status; diff --git a/testdata/diff/online/add_constraint/plan.txt b/testdata/diff/online/add_constraint/plan.txt index d31693c7..c143a07e 100644 --- a/testdata/diff/online/add_constraint/plan.txt +++ b/testdata/diff/online/add_constraint/plan.txt @@ -6,6 +6,7 @@ Summary by type: Tables: ~ orders + check_amount_positive (constraint) + + check_valid_status (constraint) DDL to be executed: -------------------------------------------------- @@ -14,3 +15,8 @@ ALTER TABLE orders ADD CONSTRAINT check_amount_positive CHECK (amount > 0::numeric) NOT VALID; ALTER TABLE orders VALIDATE CONSTRAINT check_amount_positive; + +ALTER TABLE orders +ADD CONSTRAINT check_valid_status CHECK (status::text IN ('pending'::character varying, 'shipped'::character varying, 'delivered'::character varying)) NOT VALID; + +ALTER TABLE orders VALIDATE CONSTRAINT check_valid_status; From 8888e6bf72e7e9739a29938f4fb230541b1fe90a Mon Sep 17 00:00:00 2001 From: Tianzhou Date: Fri, 30 Jan 2026 22:00:33 -0800 Subject: [PATCH 2/2] fix: normalize varchar IN check constraints for idempotency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When user writes CHECK (status IN ('pending', ...)) on a varchar column, PostgreSQL stores it with single casts: 'pending'::character varying. But when pgschema generates CHECK (status::text IN ('pending'::character varying, ...)), PostgreSQL stores it with double casts: 'pending'::character varying::text. This caused idempotency failures on second apply because the normalized forms differed. Add two normalizations: 1. (column)::type → column::type (remove unnecessary parens) 2. ::character varying::text → ::character varying (normalize double casts) Both forms now normalize to the same output while preserving type casts. Co-Authored-By: Claude Opus 4.5 --- ir/normalize.go | 27 +++++++++++++++++++++++++++ ir/normalize_test.go | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 ir/normalize_test.go diff --git a/ir/normalize.go b/ir/normalize.go index 27b76e3f..0d3a2409 100644 --- a/ir/normalize.go +++ b/ir/normalize.go @@ -921,6 +921,33 @@ func normalizeCheckClause(checkClause string) string { // applyLegacyCheckNormalizations applies the existing normalization patterns func applyLegacyCheckNormalizations(clause string) string { + // Normalize unnecessary parentheses around simple identifiers before type casts. + // PostgreSQL sometimes stores "(column)::type" but the generated SQL uses "column::type". + // These are semantically equivalent, so normalize to the simpler form for idempotency. + // Pattern: (identifier)::type → identifier::type + // Examples: + // - "(status)::text" → "status::text" + // - "((name))::varchar" → "name::varchar" + parenCastRe := regexp.MustCompile(`\(([a-zA-Z_][a-zA-Z0-9_]*)\)::`) + for { + newClause := parenCastRe.ReplaceAllString(clause, `$1::`) + if newClause == clause { + break + } + clause = newClause + } + + // Normalize redundant double type casts. + // When pgschema generates CHECK (status::text IN ('value'::character varying, ...)), + // PostgreSQL stores it with double casts: 'value'::character varying::text + // But when the user writes CHECK (status IN ('value', ...)), PostgreSQL stores + // just 'value'::character varying with ::text[] on the whole array. + // Normalize the double cast to single cast for idempotent comparison. + // Pattern: ::character varying::text → ::character varying + // Pattern: ::varchar::text → ::varchar + doubleCastRe := regexp.MustCompile(`::(character varying|varchar)::text\b`) + clause = doubleCastRe.ReplaceAllString(clause, "::$1") + // Convert PostgreSQL's "= ANY (ARRAY[...])" format to "IN (...)" format. // Type casts are preserved to maintain accuracy with PostgreSQL's internal representation. if strings.Contains(clause, "= ANY (ARRAY[") { diff --git a/ir/normalize_test.go b/ir/normalize_test.go new file mode 100644 index 00000000..528de4d5 --- /dev/null +++ b/ir/normalize_test.go @@ -0,0 +1,41 @@ +package ir + +import ( + "testing" +) + +func TestNormalizeCheckClause(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "varchar IN with ::text cast - user form (has extra parens around column)", + input: "CHECK ((status)::text = ANY (ARRAY['pending'::character varying, 'shipped'::character varying, 'delivered'::character varying]::text[]))", + expected: "CHECK (status::text IN ('pending'::character varying, 'shipped'::character varying, 'delivered'::character varying))", + }, + { + name: "varchar IN without explicit cast - user form (no extra parens)", + input: "CHECK (status::text = ANY (ARRAY['pending'::character varying, 'shipped'::character varying, 'delivered'::character varying]::text[]))", + expected: "CHECK (status::text IN ('pending'::character varying, 'shipped'::character varying, 'delivered'::character varying))", + }, + { + name: "varchar IN with double cast - applied form (pgschema-generated SQL stored by PostgreSQL)", + input: "CHECK (status::text = ANY (ARRAY['pending'::character varying::text, 'shipped'::character varying::text, 'delivered'::character varying::text]))", + expected: "CHECK (status::text IN ('pending'::character varying, 'shipped'::character varying, 'delivered'::character varying))", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := normalizeCheckClause(tt.input) + t.Logf("Input: %s", tt.input) + t.Logf("Output: %s", result) + t.Logf("Expected: %s", tt.expected) + if result != tt.expected { + t.Errorf("normalizeCheckClause() = %v, want %v", result, tt.expected) + } + }) + } +}