@@ -27,9 +27,10 @@ use sqlparser_derive::{Visit, VisitMut};
2727use crate :: display_utils:: { indented_list, Indent , SpaceOrNewline } ;
2828
2929use super :: {
30- display_comma_separated, query:: InputFormatClause , Assignment , Expr , FromTable , Ident ,
31- InsertAliases , MysqlInsertPriority , ObjectName , OnInsert , OrderByExpr , Query , SelectItem ,
32- Setting , SqliteOnConflict , TableObject , TableWithJoins , UpdateTableFromKind ,
30+ display_comma_separated, display_separated, query:: InputFormatClause , Assignment ,
31+ CopyLegacyCsvOption , CopyLegacyOption , CopyOption , CopySource , CopyTarget , Expr , FromTable ,
32+ Ident , InsertAliases , MysqlInsertPriority , ObjectName , OnInsert , OrderByExpr , Query ,
33+ SelectItem , Setting , SqliteOnConflict , TableObject , TableWithJoins , UpdateTableFromKind ,
3334} ;
3435
3536/// INSERT statement.
@@ -303,3 +304,290 @@ impl Display for Update {
303304 Ok ( ( ) )
304305 }
305306}
307+
308+ /// CSV formatting options extracted from COPY options.
309+ ///
310+ /// This struct encapsulates the CSV formatting settings used when parsing
311+ /// or formatting COPY statement data. It extracts relevant options from both
312+ /// modern [`CopyOption`] and legacy [`CopyLegacyOption`] variants.
313+ #[ derive( Debug , Clone , PartialEq , Eq ) ]
314+ pub struct CsvFormatOptions {
315+ /// The field delimiter character (default: tab)
316+ pub ( crate ) delimiter : char ,
317+ /// The quote character used to enclose fields (default: `"`)
318+ pub ( crate ) quote : char ,
319+ /// The escape character (default: `\`)
320+ pub ( crate ) escape : char ,
321+ /// The string representing NULL values (default: `\\N`)
322+ pub ( crate ) null_symbol : String ,
323+ }
324+
325+ impl Default for CsvFormatOptions {
326+ fn default ( ) -> Self {
327+ Self {
328+ delimiter : '\t' ,
329+ quote : '"' ,
330+ escape : '\\' ,
331+ null_symbol : "\\ N" . to_string ( ) ,
332+ }
333+ }
334+ }
335+
336+ impl CsvFormatOptions {
337+ /// Extract CSV format options from CopyOption and CopyLegacyOption lists.
338+ ///
339+ /// This method processes both modern and legacy COPY options to determine
340+ /// the CSV formatting settings. Later options in the lists override earlier ones.
341+ ///
342+ /// # Arguments
343+ ///
344+ /// * `options` - Modern COPY options (PostgreSQL 9.0+)
345+ /// * `legacy_options` - Legacy COPY options (pre-PostgreSQL 9.0)
346+ ///
347+ /// # Returns
348+ ///
349+ /// A `CsvFormatOptions` instance with the extracted settings, using defaults
350+ /// for any options not specified.
351+ pub ( crate ) fn from_copy_options (
352+ options : & [ CopyOption ] ,
353+ legacy_options : & [ CopyLegacyOption ] ,
354+ ) -> Self {
355+ let mut csv_options = Self :: default ( ) ;
356+
357+ // Apply options
358+ for option in options {
359+ match option {
360+ CopyOption :: Delimiter ( c) => {
361+ csv_options. delimiter = * c;
362+ }
363+ CopyOption :: Quote ( c) => {
364+ csv_options. quote = * c;
365+ }
366+ CopyOption :: Escape ( c) => {
367+ csv_options. escape = * c;
368+ }
369+ CopyOption :: Null ( null) => {
370+ csv_options. null_symbol = null. clone ( ) ;
371+ }
372+ // These options don't affect CSV formatting
373+ CopyOption :: Format ( _)
374+ | CopyOption :: Freeze ( _)
375+ | CopyOption :: Header ( _)
376+ | CopyOption :: ForceQuote ( _)
377+ | CopyOption :: ForceNotNull ( _)
378+ | CopyOption :: ForceNull ( _)
379+ | CopyOption :: Encoding ( _) => { }
380+ }
381+ }
382+
383+ // Apply legacy options
384+ for option in legacy_options {
385+ match option {
386+ CopyLegacyOption :: Delimiter ( c) => {
387+ csv_options. delimiter = * c;
388+ }
389+ CopyLegacyOption :: Null ( null) => {
390+ csv_options. null_symbol = null. clone ( ) ;
391+ }
392+ CopyLegacyOption :: Csv ( csv_opts) => {
393+ for csv_option in csv_opts {
394+ match csv_option {
395+ CopyLegacyCsvOption :: Quote ( c) => {
396+ csv_options. quote = * c;
397+ }
398+ CopyLegacyCsvOption :: Escape ( c) => {
399+ csv_options. escape = * c;
400+ }
401+ // These CSV options don't affect CSV formatting
402+ CopyLegacyCsvOption :: Header
403+ | CopyLegacyCsvOption :: ForceQuote ( _)
404+ | CopyLegacyCsvOption :: ForceNotNull ( _) => { }
405+ }
406+ }
407+ }
408+ // These legacy options don't affect CSV formatting
409+ CopyLegacyOption :: AcceptAnyDate
410+ | CopyLegacyOption :: AcceptInvChars ( _)
411+ | CopyLegacyOption :: AddQuotes
412+ | CopyLegacyOption :: AllowOverwrite
413+ | CopyLegacyOption :: Binary
414+ | CopyLegacyOption :: BlankAsNull
415+ | CopyLegacyOption :: Bzip2
416+ | CopyLegacyOption :: CleanPath
417+ | CopyLegacyOption :: CompUpdate { .. }
418+ | CopyLegacyOption :: DateFormat ( _)
419+ | CopyLegacyOption :: EmptyAsNull
420+ | CopyLegacyOption :: Encrypted { .. }
421+ | CopyLegacyOption :: Escape
422+ | CopyLegacyOption :: Extension ( _)
423+ | CopyLegacyOption :: FixedWidth ( _)
424+ | CopyLegacyOption :: Gzip
425+ | CopyLegacyOption :: Header
426+ | CopyLegacyOption :: IamRole ( _)
427+ | CopyLegacyOption :: IgnoreHeader ( _)
428+ | CopyLegacyOption :: Json
429+ | CopyLegacyOption :: Manifest { .. }
430+ | CopyLegacyOption :: MaxFileSize ( _)
431+ | CopyLegacyOption :: Parallel ( _)
432+ | CopyLegacyOption :: Parquet
433+ | CopyLegacyOption :: PartitionBy ( _)
434+ | CopyLegacyOption :: Region ( _)
435+ | CopyLegacyOption :: RemoveQuotes
436+ | CopyLegacyOption :: RowGroupSize ( _)
437+ | CopyLegacyOption :: StatUpdate ( _)
438+ | CopyLegacyOption :: TimeFormat ( _)
439+ | CopyLegacyOption :: TruncateColumns
440+ | CopyLegacyOption :: Zstd => { }
441+ }
442+ }
443+
444+ csv_options
445+ }
446+
447+ /// Format a single CSV field, adding quotes and escaping if necessary.
448+ ///
449+ /// This method handles CSV field formatting according to the configured options:
450+ /// - Writes NULL values using the configured `null_symbol`
451+ /// - Adds quotes around fields containing delimiters, quotes, or newlines
452+ /// - Escapes quote characters by doubling them
453+ /// - Escapes escape characters
454+ ///
455+ /// # Arguments
456+ ///
457+ /// * `f` - The formatter to write to
458+ /// * `field` - The field value to format, or `None` for NULL
459+ ///
460+ /// # Returns
461+ ///
462+ /// A `fmt::Result` indicating success or failure of the write operation.
463+ fn format_csv_field ( & self , f : & mut fmt:: Formatter , field : Option < & str > ) -> fmt:: Result {
464+ let field_value = field. unwrap_or ( & self . null_symbol ) ;
465+
466+ // Check if field needs quoting
467+ let needs_quoting = field_value. contains ( self . delimiter )
468+ || field_value. contains ( self . quote )
469+ || field_value. contains ( '\n' )
470+ || field_value. contains ( '\r' ) ;
471+
472+ if needs_quoting {
473+ write ! ( f, "{}" , self . quote) ?;
474+ for ch in field_value. chars ( ) {
475+ if ch == self . quote {
476+ // Escape quote by doubling it
477+ write ! ( f, "{}{}" , self . quote, self . quote) ?;
478+ } else if ch == self . escape {
479+ // Escape escape character
480+ write ! ( f, "{}{}" , self . escape, self . escape) ?;
481+ } else {
482+ write ! ( f, "{}" , ch) ?;
483+ }
484+ }
485+ write ! ( f, "{}" , self . quote) ?;
486+ } else {
487+ write ! ( f, "{}" , field_value) ?;
488+ }
489+ Ok ( ( ) )
490+ }
491+ }
492+
493+ /// COPY statement.
494+ ///
495+ /// Represents a PostgreSQL COPY statement for bulk data transfer between
496+ /// a file and a table. The statement can copy data FROM a file to a table
497+ /// or TO a file from a table or query.
498+ ///
499+ /// # Syntax
500+ ///
501+ /// ```sql
502+ /// COPY table_name [(column_list)] FROM { 'filename' | STDIN | PROGRAM 'command' }
503+ /// COPY { table_name [(column_list)] | (query) } TO { 'filename' | STDOUT | PROGRAM 'command' }
504+ /// ```
505+ ///
506+ /// # Examples
507+ ///
508+ /// ```
509+ /// # use sqlparser::ast::{Copy, CopySource, CopyTarget, ObjectName};
510+ /// # use sqlparser::dialect::PostgreSqlDialect;
511+ /// # use sqlparser::parser::Parser;
512+ /// let sql = "COPY users FROM 'data.csv'";
513+ /// let dialect = PostgreSqlDialect {};
514+ /// let ast = Parser::parse_sql(&dialect, sql).unwrap();
515+ /// ```
516+ ///
517+ /// See [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-copy.html)
518+ #[ derive( Debug , Clone , PartialEq , PartialOrd , Eq , Ord , Hash ) ]
519+ #[ cfg_attr( feature = "serde" , derive( Serialize , Deserialize ) ) ]
520+ #[ cfg_attr( feature = "visitor" , derive( Visit , VisitMut ) ) ]
521+ pub struct Copy {
522+ /// The source of 'COPY TO', or the target of 'COPY FROM'.
523+ /// Can be a table name with optional column list, or a query (for COPY TO only).
524+ pub source : CopySource ,
525+ /// Direction of the copy operation.
526+ /// - `true` for COPY TO (table/query to file)
527+ /// - `false` for COPY FROM (file to table)
528+ pub to : bool ,
529+ /// The target of 'COPY TO', or the source of 'COPY FROM'.
530+ /// Can be a file, STDIN, STDOUT, or a PROGRAM command.
531+ pub target : CopyTarget ,
532+ /// Modern COPY options (PostgreSQL 9.0+), specified within parentheses.
533+ /// Examples: FORMAT, DELIMITER, NULL, HEADER, QUOTE, ESCAPE, etc.
534+ pub options : Vec < CopyOption > ,
535+ /// Legacy COPY options (pre-PostgreSQL 9.0), specified without parentheses.
536+ /// Also used by AWS Redshift extensions like IAM_ROLE, MANIFEST, etc.
537+ pub legacy_options : Vec < CopyLegacyOption > ,
538+ /// CSV data rows for COPY FROM STDIN statements.
539+ /// Each row is a vector of optional strings (None represents NULL).
540+ /// Populated only when copying from STDIN with inline data.
541+ pub values : Vec < Vec < Option < String > > > ,
542+ }
543+
544+ impl Display for Copy {
545+ fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
546+ write ! ( f, "COPY" ) ?;
547+ match & self . source {
548+ CopySource :: Query ( query) => write ! ( f, " ({query})" ) ?,
549+ CopySource :: Table {
550+ table_name,
551+ columns,
552+ } => {
553+ write ! ( f, " {table_name}" ) ?;
554+ if !columns. is_empty ( ) {
555+ write ! ( f, " ({})" , display_comma_separated( columns) ) ?;
556+ }
557+ }
558+ }
559+ write ! (
560+ f,
561+ " {} {}" ,
562+ if self . to { "TO" } else { "FROM" } ,
563+ self . target
564+ ) ?;
565+ if !self . options . is_empty ( ) {
566+ write ! ( f, " ({})" , display_comma_separated( & self . options) ) ?;
567+ }
568+ if !self . legacy_options . is_empty ( ) {
569+ write ! ( f, " {}" , display_separated( & self . legacy_options, " " ) ) ?;
570+ }
571+
572+ if !self . values . is_empty ( ) {
573+ writeln ! ( f, ";" ) ?;
574+
575+ let csv_options =
576+ CsvFormatOptions :: from_copy_options ( & self . options , & self . legacy_options ) ;
577+
578+ // Write CSV data
579+ for row in & self . values {
580+ for ( idx, column) in row. iter ( ) . enumerate ( ) {
581+ if idx > 0 {
582+ write ! ( f, "{}" , csv_options. delimiter) ?;
583+ }
584+ csv_options. format_csv_field ( f, column. as_deref ( ) ) ?;
585+ }
586+ writeln ! ( f) ?;
587+ }
588+
589+ write ! ( f, "\\ ." ) ?;
590+ }
591+ Ok ( ( ) )
592+ }
593+ }
0 commit comments