@@ -172,6 +172,46 @@ public ASTTerm removeOuterTag()
172172 return null;
173173 }
174174
175+ public ASTTerm removeWhitespaceTerms()
176+ { ASTTerm res = null;
177+ Vector newterms = new Vector();
178+ for (int i = 0; i < terms.size(); i++)
179+ { ASTTerm trm = (ASTTerm) terms.get(i);
180+ ASTTerm ntrm = trm.removeWhitespaceTerms();
181+ if (ntrm == null)
182+ { continue; }
183+ if (trm instanceof ASTSymbolTerm)
184+ { String str = ntrm.literalForm();
185+ String strim = str.trim();
186+ // System.out.println(">--- old term: " + str);
187+ if ("\\r\\n".equals(strim) ||
188+ "\\n\\r".equals(strim)) { }
189+ else if (strim.endsWith("\\r\\n") ||
190+ strim.endsWith("\\n\\r"))
191+ { str = strim.substring(0,strim.length()-4);
192+ ntrm = new ASTSymbolTerm(str);
193+ // System.out.println(">--- new term: " + str);
194+ newterms.add(ntrm);
195+ }
196+ else
197+ { newterms.add(ntrm); }
198+ }
199+ else
200+ { newterms.add(ntrm); }
201+ }
202+
203+ if (newterms.size() > 1)
204+ { res = new ASTCompositeTerm(tag,newterms); }
205+ else if (newterms.size() == 1)
206+ { ASTTerm tt = (ASTTerm) newterms.get(0);
207+ if (tt instanceof ASTSymbolTerm)
208+ { res = new ASTBasicTerm(tag,tt.literalForm()); }
209+ else
210+ { res = new ASTCompositeTerm(tag,newterms); }
211+ }
212+ return res;
213+ }
214+
175215 public ASTTerm getTerm(int i)
176216 { if (terms.size() > i)
177217 { return (ASTTerm) terms.get(i); }
@@ -317,6 +357,8 @@ public String cgRules(CGSpec cgs, Vector rules)
317357 }
318358 else if (vars.contains("_*") && terms.size() >= tokens.size())
319359 { } // ok
360+ else if (vars.contains("_+") && terms.size() >= tokens.size())
361+ { } // ok
320362 else if (tokens.size() == terms.size())
321363 { } // ok
322364 else
@@ -329,14 +371,22 @@ else if (tokens.size() == terms.size())
329371 Vector eargs = new Vector();
330372 // the actual terms[k]
331373
332- int k = 0;
374+ int k = 0; // terms position.
333375 boolean failed = false;
376+
334377 for (int j = 0; j < tokens.size() &&
335378 k < terms.size() && !failed; j++)
336379 { String tok = (String) tokens.get(j);
337380 ASTTerm tm = (ASTTerm) terms.get(k);
338381
339- if ("_*".equals(tok) && vars.contains(tok))
382+ System.out.println("$$$ matching token " + tok + " and term " + tm);
383+
384+ if (tok.equals(tm.literalForm()))
385+ { System.out.println(">> Matched token " + tok +
386+ " and term " + tm);
387+ k++;
388+ }
389+ else if ("_*".equals(tok) && vars.contains(tok))
340390 { // remainder of terms is processed as a list
341391 // _* should be the last token, or terminated by
342392 // nextTok
@@ -345,29 +395,68 @@ else if (tokens.size() == terms.size())
345395 if (tokens.size() > j+1)
346396 { nextTok = (String) tokens.get(j+1); }
347397
348- // System.out.println(">> End token for _* is: " + nextTok);
398+ System.out.println(">> Terminator token for _* is: " + nextTok);
349399 int remainingTokens = tokens.size() - (j+1);
350400
351401 boolean finished = false;
352402
353403 Vector rem = new Vector();
354- for (int p = j ; p < terms.size() && !finished; p++)
404+ for (int p = k ; p < terms.size() && !finished; p++)
355405 { ASTTerm pterm = (ASTTerm) terms.get(p);
356406 int remainingTerms = terms.size() - (k+1);
357407
358408 if (nextTok != null &&
359409 pterm.literalForm().equals(nextTok))
360- { finished = true; }
410+ { System.out.println("$$$ Matched terminator token " +
411+ nextTok +
412+ " for _* and term " + pterm);
413+ finished = true;
414+ // k++; // next term after terminator
415+ // j++; // Next lhs token after _*
416+ // j++; // Next lhs token after terminator
417+ }
361418 else if (remainingTokens > remainingTerms)
362419 { finished = true; }
363420 else
364421 { rem.add(pterm);
365422 k++;
366423 }
367- // System.out.println(">>> Terms for _* are: " + rem);
424+ System.out.println(">>> Terms for _* are: " + rem);
368425 }
369426 eargs.add(rem); // corresponds to _* variable
370427 }
428+ else if ("_+".equals(tok) && vars.contains(tok))
429+ { // remainder of terms is processed as a list
430+ // _+ should be the last token, or terminated by
431+ // nextTok
432+
433+ String nextTok = null;
434+ if (tokens.size() > j+1)
435+ { nextTok = (String) tokens.get(j+1); }
436+
437+ System.out.println(">> Terminator token for _+ is: " + nextTok);
438+ int remainingTokens = tokens.size() - (j+1);
439+
440+ boolean finished = false;
441+
442+ Vector rem = new Vector();
443+ for (int p = k ; p < terms.size() && !finished; p++)
444+ { ASTTerm pterm = (ASTTerm) terms.get(p);
445+ int remainingTerms = terms.size() - (k+1);
446+
447+ if (nextTok != null &&
448+ pterm.literalForm().equals(nextTok))
449+ { finished = true; }
450+ else if (remainingTokens > remainingTerms)
451+ { finished = true; }
452+ else
453+ { rem.add(pterm);
454+ k++;
455+ }
456+ System.out.println(">>> Terms for _+ are: " + rem);
457+ }
458+ eargs.add(rem); // corresponds to _+ variable
459+ }
371460 else if (vars.contains(tok))
372461 { // allocate terms(j) to tok
373462
@@ -393,11 +482,6 @@ else if (oldterm.equals(tm))
393482 failed = true;
394483 }
395484 }
396- else if (tok.equals(tm.literalForm()))
397- { System.out.println(">> Matched token " + tok +
398- " and term " + tm);
399- k++;
400- }
401485 else
402486 { // System.out.println("> " + tag + " rule " + r + " does not match " + this);
403487 // System.out.println(tok + " /= " + tm.literalForm());
@@ -39750,13 +39834,15 @@ public String antlr2cstl()
3975039834 { String res = "";
3975139835
3975239836 Vector conditions = new Vector();
39837+ Vector rulerefs = new Vector();
3975339838 for (int i = 0; i < terms.size(); i++)
3975439839 { ASTTerm tt = (ASTTerm) terms.get(i);
3975539840 String tg = tt.getTag();
3975639841 if ("element".equals(tg) || "atom".equals(tg) ||
3975739842 "lexerElement".equals(tg) ||
3975839843 "lexerAtom".equals(tg))
39759- { String elem = tt.antlrElement2cstl(i,conditions);
39844+ { String elem =
39845+ tt.antlrElement2cstl(rulerefs,conditions);
3976039846 res = res + elem;
3976139847 }
3976239848 }
@@ -39778,11 +39864,12 @@ public String antlr2cstl()
3977839864 return "";
3977939865 }
3978039866
39781- public String antlrElement2cstl(int i, Vector conditions)
39867+ public String antlrElement2cstl(Vector rulerefs,
39868+ Vector conditions)
3978239869 { if ("atom".equals(tag) ||
3978339870 "lexerAtom".equals(tag))
3978439871 { ASTTerm t1 = (ASTTerm) terms.get(0);
39785- return t1.antlrElement2cstl(i ,conditions);
39872+ return t1.antlrElement2cstl(rulerefs ,conditions);
3978639873 }
3978739874
3978839875 if ("terminal".equals(tag))
@@ -39791,23 +39878,29 @@ public String antlrElement2cstl(int i, Vector conditions)
3979139878 }
3979239879
3979339880 if ("ruleref".equals(tag))
39794- { ASTTerm t1 = (ASTTerm) terms.get(0);
39795- conditions.add("_" + (i+1) + " " + t1.literalForm());
39881+ { ASTTerm t1 = (ASTTerm) terms.get(0);
39882+ int i = rulerefs.size();
39883+ String t1lit = t1.literalForm();
39884+ conditions.add("_" + (i+1) + " " + t1lit);
39885+ rulerefs.add(t1lit);
3979639886 return "_" + (i+1) + " ";
3979739887 }
3979839888
3979939889 if (("element".equals(tag) ||
3980039890 "lexerElement".equals(tag)) && terms.size() == 2)
3980139891 { ASTTerm trm = (ASTTerm) terms.get(0);
3980239892 ASTTerm suffix = (ASTTerm) terms.get(1);
39803- conditions.add("_" + (i+1) + " " + trm.literalForm());
39893+ int i = rulerefs.size();
39894+ String t1lit = trm.literalForm();
39895+ conditions.add("_" + (i+1) + " " + t1lit);
39896+ rulerefs.add(t1lit);
3980439897 return "_" + (i+1) + suffix.antlr2cstl() + " ";
3980539898 }
3980639899
3980739900 if ("element".equals(tag) ||
3980839901 "lexerElement".equals(tag))
3980939902 { ASTTerm t1 = (ASTTerm) terms.get(0);
39810- return t1.antlrElement2cstl(i ,conditions);
39903+ return t1.antlrElement2cstl(rulerefs ,conditions);
3981139904 }
3981239905
3981339906 return "";
@@ -39844,30 +39937,36 @@ public Vector normaliseAntlr()
3984439937 return alternatives;
3984539938 }
3984639939 else if ("+".equals(suf))
39847- { Vector res = new Vector();
39940+ { Vector res = new Vector();
39941+
3984839942 for (int k = 0; k < innerpaths.size(); k++)
3984939943 { Vector path = (Vector) innerpaths.get(k);
39850- ASTBasicTerm ts1 =
39944+ res.add(path); /* one occurrence */
39945+ }
39946+ ASTBasicTerm ts1 =
3985139947 new ASTBasicTerm("terminal", "'_*'");
39852- ASTCompositeTerm terminalStar =
39948+ ASTCompositeTerm terminalStar =
3985339949 new ASTCompositeTerm("atom", ts1);
39854- path.add(terminalStar);
39855- res.add(path);
39856- }
39950+ Vector iterations = new Vector();
39951+ iterations.add(terminalStar);
39952+ res.add(iterations);
39953+
3985739954 return res;
3985839955 }
3985939956 else if ("*".equals(suf))
3986039957 { Vector res = new Vector();
39958+ res.add(new Vector());
3986139959 for (int k = 0; k < innerpaths.size(); k++)
39862- { Vector path = (Vector) innerpaths.get(k);
39863- ASTBasicTerm ts1 =
39960+ { Vector path = (Vector) innerpaths.get(k);
39961+ res.add(path);
39962+ }
39963+ ASTBasicTerm ts1 =
3986439964 new ASTBasicTerm("terminal", "'_*'");
39865- ASTCompositeTerm terminalStar =
39965+ ASTCompositeTerm terminalStar =
3986639966 new ASTCompositeTerm("atom", ts1);
39867- path.add(terminalStar);
39868- res.add(path);
39869- }
39870- res.add(new Vector());
39967+ Vector iterations = new Vector();
39968+ iterations.add(terminalStar);
39969+ res.add(iterations);
3987139970 return res;
3987239971 }
3987339972 }
@@ -39955,18 +40054,22 @@ else if ("+".equals(suf))
3995540054 ASTBasicTerm ts1 = new ASTBasicTerm("terminal", "'_*'");
3995640055 ASTCompositeTerm terminalStar =
3995740056 new ASTCompositeTerm("atom", ts1);
39958- emptyAlt.add(terminalStar);
40057+ Vector newEmpty = new Vector();
40058+ newEmpty.add(terminalStar);
40059+ alternatives.add(newEmpty);
3995940060 }
3996040061 else if ("*".equals(suf))
3996140062 { Vector newalts = new Vector();
3996240063 Vector newEmpty = new Vector();
3996340064 newalts.add(newEmpty);
3996440065 newalts.addAll(alternatives);
39965- // emptyAlt.add(trm);
40066+ emptyAlt.add(trm);
3996640067 ASTBasicTerm ts1 = new ASTBasicTerm("terminal", "'_*'");
3996740068 ASTCompositeTerm terminalStar =
39968- new ASTCompositeTerm("atom", ts1);
39969- emptyAlt.add(terminalStar);
40069+ new ASTCompositeTerm("atom", ts1);
40070+ Vector iterations = new Vector();
40071+ iterations.add(terminalStar);
40072+ newalts.add(iterations);
3997040073 alternatives = newalts;
3997140074 }
3997240075
0 commit comments