Skip to content

Commit d050b1c

Browse files
authored
fix: support e-strings for PostgreSQL (#607)
PostgreSQL by default follows the SQL standard, which says that quotes inside quoted strings must be escaped by specifying the quote twice. That is like this: ```sql 'It''s a valid string' ``` PostgreSQL however also supports escaping using a backslash. To enable this, the string must be prefixed by an e or E: ```sql e'It\'s a valid string' ```
1 parent d052774 commit d050b1c

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

parser/statement_parser.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,13 @@ func (p *StatementParser) supportsBackslashEscape() bool {
216216
return p.Dialect != databasepb.DatabaseDialect_POSTGRESQL
217217
}
218218

219+
// supportsEscapeStrings returns true if the dialect supports enabling escaping using backslashes
220+
// by prepending an e or E to the string. Example:
221+
// e'It\'s a valid string' => This is the string "It's a valid string".
222+
func (p *StatementParser) supportsEscapeStrings() bool {
223+
return p.Dialect == databasepb.DatabaseDialect_POSTGRESQL
224+
}
225+
219226
// supportsEscapeQuoteWithQuote returns true if the dialect supports escaping a quote within a quoted
220227
// literal by repeating the quote twice. Example (note that the way that two single quotes are written in the following
221228
// examples is something that is enforced by gofmt):
@@ -402,6 +409,17 @@ func (p *StatementParser) skipMultiLineComment(sql []byte, pos int) int {
402409
// could not be read.
403410
// The quote length is either 1 for normal quoted strings, and 3 for triple-quoted string.
404411
func (p *StatementParser) skipQuoted(sql []byte, pos int, quote byte) (int, int, error) {
412+
isEscapeString := false
413+
if p.supportsEscapeStrings() && pos > 0 {
414+
// TODO: Also implement support for the standard_conforming_strings property in PostgreSQL.
415+
// See https://www.postgresql.org/docs/current/runtime-config-compatible.html#GUC-STANDARD-CONFORMING-STRINGS
416+
// Check if it is an escape-string. This enables the use of a backslash to start an escape sequence, even if
417+
// the dialect normally does not support that. Escape strings start with an e or E, e.g. "e'It\'s valid'".
418+
// The second part of the check is to verify that the e or E is not part of a keyword, e.g. WHERE.
419+
// The following is valid SQL, but does not designate an escape-string:
420+
// SELECT * FROM my_table WHERE'test'=col1;
421+
isEscapeString = (sql[pos-1] == 'e' || sql[pos-1] == 'E') && (pos == 1 || !isLatinLetter(sql[pos-2]))
422+
}
405423
isTripleQuoted := p.supportsTripleQuotedLiterals() && len(sql) > pos+2 && sql[pos+1] == quote && sql[pos+2] == quote
406424
if isTripleQuoted && (isMultibyte(sql[pos+1]) || isMultibyte(sql[pos+2])) {
407425
isTripleQuoted = false
@@ -434,7 +452,7 @@ func (p *StatementParser) skipQuoted(sql []byte, pos int, quote byte) (int, int,
434452
// This was the end quote.
435453
return pos + 1, quoteLength, nil
436454
}
437-
} else if p.supportsBackslashEscape() && len(sql) > pos+1 && c == '\\' && sql[pos+1] == quote {
455+
} else if (p.supportsBackslashEscape() || isEscapeString) && len(sql) > pos+1 && c == '\\' && sql[pos+1] == quote {
438456
// This is an escaped quote (e.g. 'foo\'bar').
439457
// Note that in raw strings, the \ officially does not start an
440458
// escape sequence, but the result is still the same, as in a raw

parser/statement_parser_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,32 @@ SELECT * FROM PersonsTable WHERE id=@id`,
897897
?it\'?s'?`)),
898898
},
899899
},
900+
"e-string": {
901+
input: `SELECT e'ab\'c?'`,
902+
wantSQL: map[databasepb.DatabaseDialect]string{
903+
databasepb.DatabaseDialect_GOOGLE_STANDARD_SQL: `SELECT e'ab\'c?'`,
904+
databasepb.DatabaseDialect_POSTGRESQL: `SELECT e'ab\'c?'`,
905+
},
906+
want: map[databasepb.DatabaseDialect][]string{
907+
databasepb.DatabaseDialect_GOOGLE_STANDARD_SQL: {},
908+
databasepb.DatabaseDialect_POSTGRESQL: {},
909+
},
910+
},
911+
"not an e-string": {
912+
input: `SELECT * from my_table WHERE'ab\'c?' = col1`,
913+
wantSQL: map[databasepb.DatabaseDialect]string{
914+
databasepb.DatabaseDialect_GOOGLE_STANDARD_SQL: `SELECT * from my_table WHERE'ab\'c?' = col1`,
915+
databasepb.DatabaseDialect_POSTGRESQL: ``,
916+
},
917+
want: map[databasepb.DatabaseDialect][]string{
918+
databasepb.DatabaseDialect_GOOGLE_STANDARD_SQL: {},
919+
databasepb.DatabaseDialect_POSTGRESQL: nil,
920+
},
921+
wantErr: map[databasepb.DatabaseDialect]error{
922+
databasepb.DatabaseDialect_GOOGLE_STANDARD_SQL: nil,
923+
databasepb.DatabaseDialect_POSTGRESQL: spanner.ToSpannerError(status.Error(codes.InvalidArgument, "SQL statement contains an unclosed literal: SELECT * from my_table WHERE'ab\\'c?' = col1")),
924+
},
925+
},
900926
}
901927
for _, dialect := range []databasepb.DatabaseDialect{databasepb.DatabaseDialect_GOOGLE_STANDARD_SQL, databasepb.DatabaseDialect_POSTGRESQL} {
902928
parser, err := NewStatementParser(dialect, 1000)

0 commit comments

Comments
 (0)