1+ // Copyright 2025 Google LLC
2+ //
3+ // Licensed under the Apache License, Version 2.0 (the "License");
4+ // you may not use this file except in compliance with the License.
5+ // You may obtain a copy of the License at
6+ //
7+ // https://www.apache.org/licenses/LICENSE-2.0
8+ //
9+ // Unless required by applicable law or agreed to in writing, software
10+ // distributed under the License is distributed on an "AS IS" BASIS,
11+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ // See the License for the specific language governing permissions and
13+ // limitations under the License.
14+
115package parser
216
317import (
@@ -50,6 +64,13 @@ func union(m1 map[string]bool, m2 map[string]bool) map[string]bool {
5064 return res
5165}
5266
67+ type statementsCacheEntry struct {
68+ sql string
69+ params []string
70+ info * StatementInfo
71+ parsedStatement ParsedStatement
72+ }
73+
5374var createParserLock sync.Mutex
5475var statementParsers = sync.Map {}
5576
@@ -72,12 +93,22 @@ func getStatementParser(dialect databasepb.DatabaseDialect, cacheSize int) (*Sta
7293 }
7394}
7495
96+ // StatementParser is a simple, dialect-aware SQL statement parser for Spanner.
97+ // It can be used to determine the type of SQL statement (e.g. DQL/DML/DDL), and
98+ // extract further information from the statement, such as the query parameters.
99+ //
100+ // This is an internal type that can receive breaking changes without prior notice.
75101type StatementParser struct {
76102 Dialect databasepb.DatabaseDialect
77103 useCache bool
78104 statementsCache * lru.Cache [string , * statementsCacheEntry ]
79105}
80106
107+ // NewStatementParser creates a new parser for the given SQL dialect and with the given
108+ // cache size. Parsers can be shared among multiple database connections. The Spanner
109+ // database/sql driver will only create one parser per database dialect and cache size combination.
110+ //
111+ // This is an internal function that can receive breaking changes without prior notice.
81112func NewStatementParser (dialect databasepb.DatabaseDialect , cacheSize int ) (* StatementParser , error ) {
82113 if cacheSize > 0 {
83114 cache , err := lru.New [string , * statementsCacheEntry ](cacheSize )
@@ -89,32 +120,54 @@ func NewStatementParser(dialect databasepb.DatabaseDialect, cacheSize int) (*Sta
89120 return & StatementParser {Dialect : dialect }, nil
90121}
91122
123+ // CacheSize returns the current size of the statement cache of this StatementParser.
92124func (p * StatementParser ) CacheSize () int {
93125 if p .useCache {
94126 return p .statementsCache .Len ()
95127 }
96128 return 0
97129}
98130
131+ // UseCache returns true if this StatementParser uses a cache.
99132func (p * StatementParser ) UseCache () bool {
100133 return p .useCache
101134}
102135
136+ // supportsHashSingleLineComments returns true if the database dialect of this parser supports
137+ // comments of the following form:
138+ //
139+ // # This is a single-line comment.
140+ //
141+ // GoogleSQL supports this type of comment.
142+ // PostgreSQL does not support this type of comment.
103143func (p * StatementParser ) supportsHashSingleLineComments () bool {
104144 return p .Dialect != databasepb .DatabaseDialect_POSTGRESQL
105145}
106146
147+ // supportsNestedComments returns true if the database dialect of this parser supports
148+ // nested comments. Nested comments means that comments of this style are supported:
149+ //
150+ // /* This is a comment. /* This is a nested comment. */ This is still a comment. */
151+ //
152+ // GoogleSQL does not support nested comments.
153+ // PostgreSQL supports nested comments.
107154func (p * StatementParser ) supportsNestedComments () bool {
108155 return p .Dialect == databasepb .DatabaseDialect_POSTGRESQL
109156}
110157
158+ // identifierQuoteToken returns the token that is used for quoted identifiers.
159+ // GoogleSQL uses ` (backtick) for quoted identifiers.
160+ // PostgreSQL uses " (double quotes) for quoted identifiers.
111161func (p * StatementParser ) identifierQuoteToken () byte {
112162 if p .Dialect == databasepb .DatabaseDialect_POSTGRESQL {
113163 return '"'
114164 }
115165 return '`'
116166}
117167
168+ // supportsBacktickQuotes returns true if the dialect supports backticks as a valid quote token.
169+ // GoogleSQL supports backtick quotes for identifiers.
170+ // PostgreSQL does not support backticks as a valid quote token.
118171func (p * StatementParser ) supportsBacktickQuotes () bool {
119172 return p .Dialect != databasepb .DatabaseDialect_POSTGRESQL
120173}
@@ -125,10 +178,26 @@ func (p *StatementParser) supportsDoubleQuotedStringLiterals() bool {
125178 return p .Dialect != databasepb .DatabaseDialect_POSTGRESQL
126179}
127180
181+ // supportsTripleQuotedLiterals returns true if the dialect supports quoted strings and identifiers
182+ // that start with three occurrences of the same quote token. Triple-quoted strings and identifiers
183+ // are allowed to contain linefeeds and single occurrences of the quote token. Example:
184+ //
185+ // ”'This is a string, and it's allowed to use a single quote inside the string”'
186+ //
187+ // GoogleSQL supports triple-quoted literals.
188+ // PostgreSQL does not support triple-quoted literals.
128189func (p * StatementParser ) supportsTripleQuotedLiterals () bool {
129190 return p .Dialect != databasepb .DatabaseDialect_POSTGRESQL
130191}
131192
193+ // supportsDollarQuotedStrings returns true if the dialect supports strings that use double dollar signs
194+ // to mark the start and end of a string. The two dollar signs can optionally contain a tag. Examples:
195+ //
196+ // $$ This is a dollar-quoted string without a tag $$
197+ // $my_tag$ This is a dollar-quoted string with a tag $my_tag$
198+ //
199+ // GoogleSQL does not support dollar-quoted strings.
200+ // PostgreSQL supports dollar-quoted strings.
132201func (p * StatementParser ) supportsDollarQuotedStrings () bool {
133202 return p .Dialect == databasepb .DatabaseDialect_POSTGRESQL
134203}
@@ -492,7 +561,7 @@ func (p *StatementParser) calculateFindParamsResult(sql string) (string, []strin
492561func (p * StatementParser ) ParseClientSideStatement (query string ) (ParsedStatement , error ) {
493562 if p .useCache {
494563 if val , ok := p .statementsCache .Get (query ); ok {
495- if val .info .StatementType == statementTypeClientSide {
564+ if val .info .StatementType == StatementTypeClientSide {
496565 return val .parsedStatement , nil
497566 }
498567 return nil , nil
@@ -513,7 +582,7 @@ func (p *StatementParser) ParseClientSideStatement(query string) (ParsedStatemen
513582 sql : query ,
514583 parsedStatement : stmt ,
515584 info : & StatementInfo {
516- StatementType : statementTypeClientSide ,
585+ StatementType : StatementTypeClientSide ,
517586 },
518587 }
519588 p .statementsCache .Add (query , cacheEntry )
@@ -552,7 +621,7 @@ func isDmlKeyword(keyword string) bool {
552621// of the sql string have been removed.
553622func (p * StatementParser ) isQuery (query string ) bool {
554623 info := p .DetectStatementType (query )
555- return info .StatementType == statementTypeQuery
624+ return info .StatementType == StatementTypeQuery
556625}
557626
558627func isCreateKeyword (keyword string ) bool {
@@ -596,28 +665,46 @@ func isStatementKeyword(keyword string, keywords map[string]bool) bool {
596665 return ok
597666}
598667
668+ // StatementType indicates the type of SQL statement.
599669type StatementType int
600670
601671const (
602- statementTypeUnknown StatementType = iota
603- statementTypeQuery
672+ // StatementTypeUnknown indicates that the parser was not able to determine the
673+ // type of SQL statement. This could be an indication that the SQL string is invalid,
674+ // or that it uses a syntax that is not (yet) supported by the parser.
675+ StatementTypeUnknown StatementType = iota
676+ // StatementTypeQuery indicates that the statement is a query that will return rows from
677+ // Spanner, and that will not make any modifications to the database.
678+ StatementTypeQuery
679+ // StatementTypeDml indicates that the statement is a data modification language (DML)
680+ // statement that will make modifications to the data in the database. It may or may not
681+ // return rows, depending on whether it contains a THEN RETURN (GoogleSQL) or RETURNING
682+ // (PostgreSQL) clause.
604683 StatementTypeDml
684+ // StatementTypeDdl indicates that the statement is a data definition language (DDL)
685+ // statement that will modify the schema of the database. It will never return rows.
605686 StatementTypeDdl
606- statementTypeClientSide
687+ // StatementTypeClientSide indicates that the statement will be handled client-side in
688+ // the database/sql driver, and not be sent to Spanner. Examples of this includes SHOW
689+ // and SET statements.
690+ StatementTypeClientSide
607691)
608692
609- type dmlType int
693+ // DmlType designates the type of modification that a DML statement will execute.
694+ type DmlType int
610695
611696const (
612- dmlTypeUnknown dmlType = iota
697+ DmlTypeUnknown DmlType = iota
613698 DmlTypeInsert
614- dmlTypeUpdate
615- dmlTypeDelete
699+ DmlTypeUpdate
700+ DmlTypeDelete
616701)
617702
703+ // StatementInfo contains the type of SQL statement, and in case of a DML statement,
704+ // the type of DML command.
618705type StatementInfo struct {
619706 StatementType StatementType
620- DmlType dmlType
707+ DmlType DmlType
621708}
622709
623710// DetectStatementType returns the type of SQL statement based on the first
@@ -643,7 +730,7 @@ func (p *StatementParser) calculateDetectStatementType(sql string) *StatementInf
643730 _ = parser .skipStatementHint ()
644731 keyword := strings .ToUpper (parser .readKeyword ())
645732 if isQueryKeyword (keyword ) {
646- return & StatementInfo {StatementType : statementTypeQuery }
733+ return & StatementInfo {StatementType : StatementTypeQuery }
647734 } else if isDmlKeyword (keyword ) {
648735 return & StatementInfo {
649736 StatementType : StatementTypeDml ,
@@ -652,16 +739,16 @@ func (p *StatementParser) calculateDetectStatementType(sql string) *StatementInf
652739 } else if isDDLKeyword (keyword ) {
653740 return & StatementInfo {StatementType : StatementTypeDdl }
654741 }
655- return & StatementInfo {StatementType : statementTypeUnknown }
742+ return & StatementInfo {StatementType : StatementTypeUnknown }
656743}
657744
658- func detectDmlKeyword (keyword string ) dmlType {
745+ func detectDmlKeyword (keyword string ) DmlType {
659746 if isStatementKeyword (keyword , insertStatements ) {
660747 return DmlTypeInsert
661748 } else if isStatementKeyword (keyword , updateStatements ) {
662- return dmlTypeUpdate
749+ return DmlTypeUpdate
663750 } else if isStatementKeyword (keyword , deleteStatements ) {
664- return dmlTypeDelete
751+ return DmlTypeDelete
665752 }
666- return dmlTypeUnknown
753+ return DmlTypeUnknown
667754}
0 commit comments