9898// TODO: The "anonymize" parameter is unused for now.
9999func generateDDLs (
100100 zipDir ,
101- dbName string , anonymize bool ,
101+ dbName , ddlFile string , anonymize bool ,
102102) (allSchemas map [string ]* TableSchema , createStmts map [string ]string , retErr error ) {
103103
104+ if ddlFile != "" {
105+ // DDL file location is present. We will use this instead of the debug zip.
106+ f , err := os .Open (ddlFile )
107+ if err != nil {
108+ return nil , nil , errors .Wrap (err , "failed to open DDL file" )
109+ }
110+ defer func () {
111+ if cerr := f .Close (); cerr != nil && retErr == nil {
112+ retErr = errors .Wrap (cerr , "failed to close input DDL file" )
113+ }
114+ }()
115+ return generateDDLFromDDLFile (bufio .NewReader (f ), dbName , anonymize )
116+ }
104117 f , err := openCreateStatementsTSV (zipDir )
105118 if err != nil {
106119 return nil , nil , errors .Wrap (err , "failed to open TSV file" )
@@ -111,14 +124,158 @@ func generateDDLs(
111124 }
112125 }()
113126
114- return generateDDLFromReader (bufio .NewReader (f ), dbName , anonymize )
127+ return generateDDLFromCSV (bufio .NewReader (f ), dbName , anonymize )
128+ }
129+
130+ // generateDDLFromDDLFile reads DDL statements from a SQL dump file
131+ // and returns a map of table names to their schemas and a map of
132+ // short table names to their CREATE TABLE statements.
133+ // The file can be generated by running the following:
134+ //
135+ // cockroach sql --url='postgresql://<url>/<db name>' --execute="SHOW CREATE ALL TABLES;" > ddl_file.sql
136+ func generateDDLFromDDLFile (
137+ reader * bufio.Reader , dbName string , anonymize bool ,
138+ ) (map [string ]* TableSchema , map [string ]string , error ) {
139+ // the results are stored in these Maps.
140+ tableStatements := make (map [string ]string )
141+ order := make ([]string , 0 )
142+ seen := make (map [string ]struct {})
143+
144+ // Buffer accumulates the SQL statements
145+ var currentStmt strings.Builder
146+ // inStatement helps handling multi line statements
147+ inStatement := false
148+
149+ // The file is read line by line
150+ for {
151+ line , err := reader .ReadString ('\n' )
152+ if err != nil {
153+ if err .Error () == "EOF" {
154+ break
155+ }
156+ return nil , nil , errors .Wrap (err , "failed while reading SQL file" )
157+ }
158+
159+ // Empty lines and comments are skipped
160+ trimmedLine := strings .TrimSpace (line )
161+ if ! inStatement {
162+ // The generated statement has a quote at the start of the statement. This is trimmed.
163+ trimmedLine = strings .TrimLeft (trimmedLine , "\" " )
164+ }
165+ if trimmedLine == "" || strings .HasPrefix (trimmedLine , "--" ) ||
166+ strings .HasPrefix (trimmedLine , "create_statement" ) {
167+ continue
168+ }
169+
170+ // A new statement is expected to start with CREATE TABLE.
171+ if strings .HasPrefix (strings .ToUpper (trimmedLine ), "CREATE TABLE" ) {
172+ // If we were already in a statement, the previous statement is processed
173+ if inStatement {
174+ tableStatements , order , seen = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
175+ }
176+
177+ // A new statement is started.
178+ currentStmt .Reset ()
179+ currentStmt .WriteString (trimmedLine )
180+ inStatement = true
181+ } else if strings .HasPrefix (strings .ToUpper (trimmedLine ), "ALTER TABLE" ) {
182+ // If we were in a CREATE TABLE statement, the statement is processed
183+ if inStatement {
184+ tableStatements , order , seen = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
185+ }
186+
187+ // A new ALTER TABLE statement is started.
188+ currentStmt .Reset ()
189+ currentStmt .WriteString (trimmedLine )
190+ inStatement = true
191+ } else if inStatement {
192+ if strings .HasSuffix (trimmedLine , ";\" " ) {
193+ // The generated statement has a quote at the end of the statement. This needs to be trimmed.
194+ trimmedLine = strings .TrimRight (trimmedLine , "\" " )
195+ }
196+ // The current statement is accumulated.
197+ currentStmt .WriteString (trimmedLine )
198+
199+ // if the statement is complete (ends with semicolon or has closing parenthesis followed by options), it is processed.
200+ if strings .HasSuffix (trimmedLine , ";" ) ||
201+ (strings .Contains (trimmedLine , ");" ) && ! strings .HasPrefix (trimmedLine , "--" )) {
202+ tableStatements , order , seen = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
203+ inStatement = false
204+ }
205+ }
206+ }
207+
208+ // Any remaining statement is processed.
209+ if inStatement {
210+ tableStatements , order , _ = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
211+ }
212+
213+ return buildSchemas (order , tableStatements ), buildCreateStmts (tableStatements ), nil
214+ }
215+
216+ // processStatement processes a single SQL statement and adds it to the tableStatements map if it's a CREATE TABLE statement
217+ // It returns the updated tableStatements, order, and seen maps
218+ func processStatement (
219+ stmt string ,
220+ tableStatements map [string ]string ,
221+ order []string ,
222+ seen map [string ]struct {},
223+ dbName string ,
224+ ) (map [string ]string , []string , map [string ]struct {}) {
225+ // Only process CREATE TABLE statements
226+ if ! strings .HasPrefix (strings .ToUpper (strings .TrimSpace (stmt )), "CREATE TABLE" ) {
227+ return tableStatements , order , seen
228+ }
229+
230+ // Extract the table name using the tablePattern regex
231+ tableMatch := tablePattern .FindStringSubmatch (stmt )
232+ if tableMatch == nil {
233+ return tableStatements , order , seen
234+ }
235+
236+ // Extract and normalize the table name
237+ tableName := tableMatch [1 ]
238+ parts := strings .Split (tableName , "." )
239+ for i := range parts {
240+ parts [i ] = strings .Trim (parts [i ], `"` ) // Remove quotes from parts
241+ }
242+
243+ // If the table name doesn't have a schema, assume it's "public"
244+ var schemaName string
245+ var simpleTableName string
246+
247+ if len (parts ) == 1 {
248+ schemaName = "public"
249+ simpleTableName = parts [0 ]
250+ } else if len (parts ) == 2 {
251+ schemaName = parts [0 ]
252+ simpleTableName = parts [1 ]
253+ } else {
254+ // Skip tables with more complex names
255+ return tableStatements , order , seen
256+ }
257+
258+ // Create a regex for the schema name
259+ schemaPattern := regexp .MustCompile (`\b` + regexp .QuoteMeta (schemaName ) + `\.` )
260+
261+ // Process the DDL record
262+ fullTableName , statement := processDDLRecord (dbName , schemaName , simpleTableName , stmt , schemaPattern )
263+
264+ // Add to the maps if not seen before
265+ if _ , ok := seen [fullTableName ]; ! ok && fullTableName != "" {
266+ tableStatements [fullTableName ] = statement
267+ order = append (order , fullTableName )
268+ seen [fullTableName ] = struct {}{}
269+ }
270+
271+ return tableStatements , order , seen
115272}
116273
117- // generateDDLFromReader takes a reader for a TSV file containing DDL statements,
274+ // generateDDLFromCSV takes a reader for a TSV file containing DDL statements,
118275// parses the statements, and returns a map of table names to their schemas
119276// and a map of short table names to their CREATE TABLE statements.
120277// It has been deigned this way to maek it unit-testable
121- func generateDDLFromReader (
278+ func generateDDLFromCSV (
122279 r io.Reader , dbName string , anonymize bool ,
123280) (map [string ]* TableSchema , map [string ]string , error ) {
124281 reader := csv .NewReader (r )
0 commit comments