@@ -34,6 +34,7 @@ const (
3434 checkInlinePattern = `(?i)\bCHECK\s*\(`
3535 primaryKeyPattern = `(?i)\bPRIMARY\s+KEY\b`
3636 uniquePattern = `(?i)\bUNIQUE\b`
37+ computedPattern = `(?i)\b(AS)\b`
3738
3839 // Patterns for table-level constraints
3940 tablePrimaryKeyPattern = `\((.*?)\)`
6061 checkInlineRe = regexp .MustCompile (checkInlinePattern )
6162 primaryKeyRe = regexp .MustCompile (primaryKeyPattern )
6263 uniqueRe = regexp .MustCompile (uniquePattern )
64+ computedRe = regexp .MustCompile (computedPattern )
6365 //following regexes are for table constraint handling
6466 tablePrimaryKeyRe = regexp .MustCompile (tablePrimaryKeyPattern )
6567 tableUniqueRe = regexp .MustCompile (tableUniquePattern )
@@ -352,6 +354,7 @@ func generateDDLFromCSV(
352354// It parses the table name, columns, and constraints (primary keys, unique constraints,
353355// foreign keys, and check constraints) from the DDL statement and returns a structured
354356// representation of the table schema.
357+ // TODO: (@nameisbhaskar) use sql parser instead of regexes - https://github.com/cockroachdb/cockroach/issues/155173
355358func ParseDDL (ddl string ) (* TableSchema , error ) {
356359 tableMatch := tablePattern .FindStringSubmatch (ddl )
357360 if tableMatch == nil {
@@ -439,7 +442,8 @@ func hasConstrainingPrefix(up string) bool {
439442 sqlFamily ,
440443 }
441444 for _ , p := range prefixes {
442- if strings .HasPrefix (up , p ) {
445+ // a space is added after the prefix to avoid matches like "checksum" as a field name.
446+ if strings .HasPrefix (up , p + " " ) {
443447 return true
444448 }
445449 }
@@ -453,7 +457,7 @@ func processColumnDefs(table *TableSchema, columnDefs []string) {
453457 // Process each column definition
454458 for _ , columnDef := range columnDefs {
455459 colMatch := colPattern .FindStringSubmatch (columnDef )
456- if colMatch == nil {
460+ if colMatch == nil || computedRe . MatchString ( columnDef ) {
457461 continue // Skip if pattern doesn't match
458462 }
459463 // Extract column properties from regex matches
@@ -659,6 +663,120 @@ func openCreateStatementsTSV(zipDir string) (*os.File, error) {
659663 return f , nil
660664}
661665
666+ // removeComputedColumns removes computed columns (columns with AS clause) from a CREATE TABLE statement.
667+ // It parses the statement, identifies computed columns, and reconstructs the statement without them.
668+ // It also removes any indexes or constraints that reference the computed columns.
669+ func removeComputedColumns (stmt string ) string {
670+ columnBlockMatch := bodyRe .FindStringSubmatch (stmt )
671+ if columnBlockMatch == nil {
672+ return stmt
673+ }
674+
675+ body := columnBlockMatch [1 ]
676+ suffix := columnBlockMatch [2 ]
677+
678+ // column definitions and table constraints are split based on commas
679+ var partsList []string
680+ buf := ""
681+ depth := 0
682+ for _ , ch := range body {
683+ switch ch {
684+ case '(' :
685+ depth ++
686+ buf += string (ch )
687+ case ')' :
688+ depth --
689+ buf += string (ch )
690+ case ',' :
691+ if depth == 0 {
692+ partsList = append (partsList , strings .TrimSpace (buf ))
693+ buf = ""
694+ } else {
695+ buf += string (ch )
696+ }
697+ default :
698+ buf += string (ch )
699+ }
700+ }
701+ if strings .TrimSpace (buf ) != "" {
702+ partsList = append (partsList , strings .TrimSpace (buf ))
703+ }
704+
705+ // computed column names are collected for reference
706+ computedColumnNames := make (map [string ]struct {})
707+ for _ , p := range partsList {
708+ // Computed column (contains AS keyword) is identified using the computedRe regex
709+ // Example: "haserror BOOL NULL AS (errordetail != '':::STRING) VIRTUAL"
710+ // This will match because it contains "AS"
711+ // Note: This is a simple heuristic and may need to be improved for complex cases
712+ // where "AS" might appear in other contexts.
713+ // The regex is case-insensitive to match "AS", "as", "As", etc.
714+ // It looks for the word "AS" surrounded by word boundaries to avoid partial matches.
715+ // This approach assumes that computed columns are defined with the "AS" keyword.
716+ // If the SQL dialect changes or has different syntax, this may need to be adjusted.
717+ // The regex is applied to each part of the column block to identify computed columns.
718+ // If a part matches, the column name is extracted and added to the computedColumnNames map.
719+ if computedRe .MatchString (p ) {
720+ colMatch := colPattern .FindStringSubmatch (p )
721+ if len (colMatch ) > 1 {
722+ colName := strings .Trim (colMatch [1 ], `"` )
723+ computedColumnNames [colName ] = struct {}{}
724+ }
725+ }
726+ }
727+
728+ // Computed columns and constraints/indexes that reference them are filtered out
729+ var filteredParts []string
730+ for _ , p := range partsList {
731+ // Skip computed columns
732+ if computedRe .MatchString (p ) {
733+ continue
734+ }
735+
736+ // If this is an index or constraint that references a computed column, it is skipped.
737+ shouldSkip := false
738+ pUpper := strings .ToUpper (p )
739+ if strings .HasPrefix (pUpper , "INDEX " ) || strings .HasPrefix (pUpper , "UNIQUE INDEX " ) {
740+ // Computed column names are checked in the index definition
741+ // If any computed column is found, the index is skipped
742+ // This is a simple substring check and may need to be improved for complex cases
743+ // where column names might be part of other identifiers.
744+ // For example, if a computed column is "col1", an index on "col10" should not be skipped.
745+ for colName := range computedColumnNames {
746+ // A regex pattern to match the column name in the index definition is created.
747+ pattern := regexp .MustCompile (`\b` + regexp .QuoteMeta (colName ) + `\b` )
748+ if pattern .MatchString (p ) {
749+ shouldSkip = true
750+ break
751+ }
752+ }
753+ }
754+
755+ if ! shouldSkip {
756+ filteredParts = append (filteredParts , p )
757+ }
758+ }
759+
760+ // If all parts were filtered out, return original statement
761+ if len (filteredParts ) == 0 {
762+ return stmt
763+ }
764+
765+ // The statement is reconstructed without computed columns and related constraints/indexes
766+ // The original formatting (indentation, line breaks) is preserved as much as possible
767+ // by joining the filtered parts with commas and new lines.
768+ tableMatch := tablePattern .FindStringSubmatch (stmt )
769+ if tableMatch == nil {
770+ return stmt
771+ }
772+
773+ // The CREATE TABLE is the part before the column block
774+ createTablePart := stmt [:strings .Index (stmt , "(" )]
775+ reconstructed := createTablePart + "(\n \t " + strings .Join (filteredParts , ",\n \t " ) + "\n )" + suffix
776+
777+ return reconstructed
778+ }
779+
662780// processDDLRecord inspects one TSV row and, if it represents a public table
663781// in dbName, normalizes its CREATE TABLE stmt and appends it to order/statements.
664782// It returns the fully qualified table name and table statements.
@@ -673,6 +791,9 @@ func processDDLRecord(
673791 stmt = createTableRe .ReplaceAllString (stmt , "${1}IF NOT EXISTS " )
674792 }
675793
794+ // 5) Remove computed columns from the statement
795+ stmt = removeComputedColumns (stmt )
796+
676797 return fmt .Sprintf ("%s.%s.%s" , dbName , schemaName , tableName ), stmt
677798}
678799
0 commit comments