diff --git a/docker/antlr4-profiling.Dockerfile b/docker/antlr4-profiling.Dockerfile new file mode 100644 index 00000000..abc751a7 --- /dev/null +++ b/docker/antlr4-profiling.Dockerfile @@ -0,0 +1,41 @@ +FROM registry.cn-hangzhou.aliyuncs.com/liuxy0551/eclipse-temurin:17-jdk-jammy + +# 安装必要工具 +RUN apt-get update && apt-get install -y curl unzip vim && apt-get clean + +# 下载 ANTLR jar +RUN curl -O https://www.antlr.org/download/antlr-4.13.1-complete.jar && \ + mv antlr-4.13.1-complete.jar /usr/local/lib/ + +# 设置环境变量 +RUN echo 'export CLASSPATH=".:/usr/local/lib/antlr-4.13.1-complete.jar:$CLASSPATH"' >> ~/.bashrc \ + && echo 'alias antlr4="java -jar /usr/local/lib/antlr-4.13.1-complete.jar"' >> ~/.bashrc \ + && echo 'alias grun="java org.antlr.v4.gui.TestRig"' >> ~/.bashrc + +# 工作目录 +WORKDIR /grammar + +# 默认命令保持 bash +CMD ["bash"] + + + +########################## 使用方式 ########################## +# 1. 构建镜像 +# docker build -f ./docker/antlr4-profiling.Dockerfile -t antlr4-profiling . +# 2. 运行容器 +# docker run -d -it --name antlr4-profiling -v ./src/grammar:/grammar antlr4-profiling +# 3. 进入容器 +# docker exec -it antlr4-profiling bash + +# 注意:以下 java 命令需要进入指定目录,否则 java 类会找不到报错 +# cd /grammar/spark + +# 4. 在容器中执行,生成 Java 版解析器 +# antlr4 -Dlanguage=Java -visitor -no-listener ./SparkSqlLexer.g4 ./SparkSqlParser.g4 + +# 5. 编译 Java 文件 +# javac -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling.java + +# 6. 运行 Java 程序 +# java -cp .:/usr/local/lib/antlr-4.13.1-complete.jar SparkSqlProfiling "SELECT * FROM a WHERE b = 1" diff --git a/src/grammar/spark/SparkSqlLexer.g4 b/src/grammar/spark/SparkSqlLexer.g4 index 69058ced..556a356f 100644 --- a/src/grammar/spark/SparkSqlLexer.g4 +++ b/src/grammar/spark/SparkSqlLexer.g4 @@ -29,23 +29,6 @@ options { caseInsensitive= true; } -@members { - /** - * When true, parser should throw ParseException for unclosed bracketed comment. - */ - public has_unclosed_bracketed_comment = false; - - /** - * This method will be called when the character stream ends and try to find out the - * unclosed bracketed comment. - * If the method be called, it means the end of the entire character stream match, - * and we set the flag and fail later. - */ - public markUnclosedComment() { - this.has_unclosed_bracketed_comment = true; - } -} - SEMICOLON: ';'; LEFT_PAREN : '('; @@ -478,8 +461,7 @@ fragment LETTER: [A-Z]; LINE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN); -BRACKETED_COMMENT: - '/*' (BRACKETED_COMMENT | .)*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN); +BRACKETED_COMMENT: '/*' (BRACKETED_COMMENT | .)*? '*/' -> channel(HIDDEN); WHITE_SPACE: (' ' | '\t' | '\r' | '\n') -> channel(HIDDEN); diff --git a/src/grammar/spark/SparkSqlParser.g4 b/src/grammar/spark/SparkSqlParser.g4 index 2dad67ea..ebb4cfc2 100644 --- a/src/grammar/spark/SparkSqlParser.g4 +++ b/src/grammar/spark/SparkSqlParser.g4 @@ -27,11 +27,6 @@ parser grammar SparkSqlParser; options { tokenVocab=SparkSqlLexer; caseInsensitive= true; - superClass=SQLParserBase; -} - -@header { -import { SQLParserBase } from '../SQLParserBase'; } program @@ -415,7 +410,6 @@ viewName columnName : multipartIdentifier - | {this.shouldMatchEmpty()}? ; columnNamePath diff --git a/src/grammar/spark/SparkSqlProfiling.java b/src/grammar/spark/SparkSqlProfiling.java new file mode 100644 index 00000000..d1a95de6 --- /dev/null +++ b/src/grammar/spark/SparkSqlProfiling.java @@ -0,0 +1,29 @@ +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.atn.PredictionMode; + +public class SparkSqlProfiling { + public static void main(String[] args) throws Exception { + if(args.length == 0){ + System.out.println("请传入 SQL 测试语句,例如: java SparkSqlProfiling \"SELECT * FROM a WHERE b = 1\""); + return; + } + + String sql = String.join(" ", args); + + // 创建 Lexer & Parser + SparkSqlLexer lexer = new SparkSqlLexer(CharStreams.fromString(sql)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + SparkSqlParser parser = new SparkSqlParser(tokens); + + // 开启 LL 回溯性能分析 + parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION); + parser.addErrorListener(new DiagnosticErrorListener(true)); + + // 入口规则 + parser.singleStatement(); + + // 输出 profiling 信息 + System.out.println(parser.getParseInfo()); + } +} +