Skip to content

Commit 9beac25

Browse files
committed
contain every character and skip comment tokens in strings
1 parent d05152f commit 9beac25

File tree

1 file changed

+38
-24
lines changed

1 file changed

+38
-24
lines changed

src/lexer.ts

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -69,35 +69,42 @@ function tpr(start: Position, end: Position): Range {
6969
* @param {string} source Source string.
7070
* @returns A list of tokens generated from source string.
7171
* @author efekos
72-
* @version 1.0.7
73-
* @since 0.0.1-alpha
74-
* @throws LexerError if an error occurs.
72+
* @version 1.0.8
73+
* @since 0.0.2-alpha
7574
*/
7675
export function tokenizeSyx(source: string): Token[] {
7776
const tokens: Token[] = [];
7877
const src = source.split('');
78+
let lastString = 'n';
79+
let inString = false;
80+
function t(s:string){
81+
if(lastString==='\''&&s==='\'') {lastString='n';inString = !inString;}
82+
if(lastString==='"'&&s==='"') {lastString='n';inString = !inString;}
83+
if(lastString==='n') {lastString=s;inString=!inString;}
84+
}
7985
let curPos = 1;
8086
let curLine = 1;
8187

8288
while (src.length > 0) {
83-
if (src[0] === '/' && src[1] === '/') {
89+
if (src[0] === '/' && src[1] === '/'&&!inString) {
8490
while (src.length > 0 && src[0] as string !== '\n') {
8591
src.shift();
92+
curPos++;
8693
}
8794
}
88-
if (src[0] === '(') tokens.push({ type: TokenType.OpenParen, value: src.shift(), range: opr(curLine, curPos++) });
89-
else if (src[0] === ')') tokens.push({ type: TokenType.CloseParen, value: src.shift(), range: opr(curLine, curPos++) });
90-
else if (src[0] === '{') tokens.push({ type: TokenType.OpenBrace, value: src.shift(), range: opr(curLine, curPos++) });
91-
else if (src[0] === '}') tokens.push({ type: TokenType.CloseBrace, value: src.shift(), range: opr(curLine, curPos++) });
92-
else if (src[0] === '[') tokens.push({ type: TokenType.OpenSquare, value: src.shift(), range: opr(curLine, curPos++) });
93-
else if (src[0] === ']') tokens.push({ type: TokenType.CloseSquare, value: src.shift(), range: opr(curLine, curPos++) });
94-
else if (src[0] === ',') tokens.push({ type: TokenType.Comma, value: src.shift(), range: opr(curLine, curPos++) });
95-
else if (src[0] === ';') tokens.push({ type: TokenType.Semicolon, value: src.shift(), range: opr(curLine, curPos++) });
96-
else if (src[0] === '<') tokens.push({ type: TokenType.OpenDiamond, value: src.shift(), range: opr(curLine, curPos++) });
97-
else if (src[0] === '>') tokens.push({ type: TokenType.CloseDiamond, value: src.shift(), range: opr(curLine, curPos++) });
98-
else if (src[0] === '\'') tokens.push({ type: TokenType.SingleQuote, value: src.shift(), range: opr(curLine, curPos++) });
99-
else if (src[0] === '"') tokens.push({ type: TokenType.DoubleQuote, value: src.shift(), range: opr(curLine, curPos++) });
100-
else if (src[0] === '|') tokens.push({ type: TokenType.VarSeperator, value: src.shift(), range: opr(curLine, curPos++) });
95+
if (src[0] === '(') tokens.push({ type: inString?20:TokenType.OpenParen, value: src.shift(), range: opr(curLine, curPos++) });
96+
else if (src[0] === ')') tokens.push({ type: inString?20:TokenType.CloseParen, value: src.shift(), range: opr(curLine, curPos++) });
97+
else if (src[0] === '{') tokens.push({ type: inString?20:TokenType.OpenBrace, value: src.shift(), range: opr(curLine, curPos++) });
98+
else if (src[0] === '}') tokens.push({ type: inString?20:TokenType.CloseBrace, value: src.shift(), range: opr(curLine, curPos++) });
99+
else if (src[0] === '[') tokens.push({ type: inString?20:TokenType.OpenSquare, value: src.shift(), range: opr(curLine, curPos++) });
100+
else if (src[0] === ']') tokens.push({ type: inString?20:TokenType.CloseSquare, value: src.shift(), range: opr(curLine, curPos++) });
101+
else if (src[0] === ',') tokens.push({ type: inString?20:TokenType.Comma, value: src.shift(), range: opr(curLine, curPos++) });
102+
else if (src[0] === ';') tokens.push({ type: inString?20:TokenType.Semicolon, value: src.shift(), range: opr(curLine, curPos++) });
103+
else if (src[0] === '<') tokens.push({ type: inString?20:TokenType.OpenDiamond, value: src.shift(), range: opr(curLine, curPos++) });
104+
else if (src[0] === '>') tokens.push({ type: inString?20:TokenType.CloseDiamond, value: src.shift(), range: opr(curLine, curPos++) });
105+
else if (src[0] === '\'') {tokens.push({ type: TokenType.SingleQuote, value: src.shift(), range: opr(curLine, curPos++) });t('\'');}
106+
else if (src[0] === '"') {tokens.push({ type: TokenType.DoubleQuote, value: src.shift(), range: opr(curLine, curPos++) });t('"');}
107+
else if (src[0] === '|') tokens.push({ type: inString?20:TokenType.VarSeperator, value: src.shift(), range: opr(curLine, curPos++) });
101108
else if (src[0] === '+' && chars.includes(src[1])) {
102109
if (src[1] === 's') tokens.push({ type: TokenType.WhitespaceIdentifier, value: '+s', range: tpr(pos(curLine, curPos), pos(curLine, curPos + 2)) });
103110
curPos += 2;
@@ -121,7 +128,7 @@ export function tokenizeSyx(source: string): Token[] {
121128

122129
const reserved = keywords[ident];
123130
tokens.push({ type: reserved ?? TokenType.Identifier, value: ident, range: tpr(pos(curLine, startPos), pos(curLine, curPos)) });
124-
} else if (isSkippable(src[0])) {
131+
} else if (isSkippable(src[0])&&!inString) {
125132
src.shift();
126133
curPos++;
127134
if (src[0] === '\n') { curLine++; curPos = 0; };
@@ -138,20 +145,27 @@ export function tokenizeSyx(source: string): Token[] {
138145
* @param {string} source Source string.
139146
* @returns A list of tokens generated from the source file.
140147
* @author efekos
141-
* @version 1.0.4
142-
* @since 0.0.1-alpha
148+
* @version 1.0.5
149+
* @since 0.0.2-alpha
143150
*/
144151
export function tokenizeSys(source: string): Token[] {
145152
const src = source.split('');
146153
const tokens: Token[] = [];
154+
let lastString = 'n';
155+
let inString = false;
156+
function t(s:string){
157+
if(lastString==='\''&&s==='\'') {lastString='n';inString = !inString;}
158+
if(lastString==='"'&&s==='"') {lastString='n';inString = !inString;}
159+
if(lastString==='n') {lastString=s;inString=!inString;}
160+
}
147161

148162
let curPos = 0;
149163
let curLine = 1;
150164

151165
while (src.length > 0 && `${src[0]}${src[1]}${src[2]}` !== ':::') {
152-
if (src[0] === ';') tokens.push({ type: TokenType.Semicolon, value: src.shift(), range: opr(curLine, curPos++) });
153-
else if (src[0] === '\'') tokens.push({ type: TokenType.SingleQuote, value: src.shift(), range: opr(curLine, curPos++) });
154-
else if (src[0] === '"') tokens.push({ type: TokenType.DoubleQuote, value: src.shift(), range: opr(curLine, curPos++) });
166+
if (src[0] === ';') tokens.push({ type: inString?20:TokenType.Semicolon, value: src.shift(), range: opr(curLine, curPos++) });
167+
else if (src[0] === '\'') {tokens.push({ type: TokenType.SingleQuote, value: src.shift(), range: opr(curLine, curPos++) });t('\'');}
168+
else if (src[0] === '"') {tokens.push({ type: TokenType.DoubleQuote, value: src.shift(), range: opr(curLine, curPos++) });t('"');}
155169
else if (isAlphabetic(src[0])) {
156170
let ident = '';
157171
const startPos = curPos;
@@ -162,7 +176,7 @@ export function tokenizeSys(source: string): Token[] {
162176

163177
const reserved = keywords[ident];
164178
tokens.push({ type: reserved ?? TokenType.Identifier, value: ident, range: tpr(pos(curLine, startPos), pos(curLine, curPos)) });
165-
} else if (isSkippable(src[0])) {
179+
} else if (isSkippable(src[0])&&!inString) {
166180
src.shift();
167181
curPos++;
168182
if (src[0] === '\n') curLine++;

0 commit comments

Comments
 (0)