88
99
1010class ParseError (NamedTuple ):
11- command_ordinal : int
11+ ordinal : int
1212 message : str
1313 line : int
1414 column : int
@@ -17,10 +17,12 @@ class ParseError(NamedTuple):
1717 def __str__ (self ):
1818 line_msg = f'; LINE #{ self .line } ' if self .line else ''
1919 col_msg = f'; COLUMN #{ self .column } ' if self .column else ''
20+ cmd_or_block = 'BLOCK' if self .line else 'COMMAND'
21+ error_type = 'Source Parsing' if self .line else 'CST Parsing'
2022 suggestion_msg = f'{ self .suggestion } ' if self .suggestion else ''
2123 return (
22- f"<error-details><error-location>COMMAND #{ self .command_ordinal } { line_msg } { col_msg } </error-location>"
23- f"<type>PARSING (no commands were applied at all)</type><description>{ self .message } </description>"
24+ f"<error-details><error-location>{ cmd_or_block } #{ self .ordinal } { line_msg } { col_msg } </error-location>"
25+ f"<type>{ error_type } (no commands were applied at all)</type><description>{ self .message } </description>"
2426 f"<suggestion>{ suggestion_msg } "
2527 "(NEVER apologize; just take a deep breath, re-read grammar rules (enclosed by <grammar.js> tags) "
2628 "and fix you CEDARScript syntax)</suggestion></error-details>"
@@ -60,7 +62,9 @@ class Marker(MarkerCompatible):
6062 type : MarkerType
6163 value : str
6264 offset : int | None = None
63- marker_subtype : str | None = None # 'REGEX', 'PREFIX', 'SUFFIX' for LINE type
65+
66+ # See `line_base`
67+ marker_subtype : str | None = None
6468
6569 @property
6670 def as_marker (self ) -> 'Marker' :
@@ -71,10 +75,13 @@ def __str__(self):
7175 match self .marker_subtype :
7276 case 'string' | None :
7377 pass
78+ case 'empty' :
79+ result = 'empty line'
7480 case _:
75- result += self .marker_subtype
81+ result += f' { self .marker_subtype } '
7682
77- result += f" '{ self .value .strip ()} '"
83+ if self .marker_subtype != 'empty' :
84+ result += f" '{ self .value .strip ()} '"
7885 if self .offset is not None :
7986 result += f" at offset { self .offset } "
8087 return result
@@ -243,11 +250,12 @@ class LoopControl(StrEnum):
243250class CaseWhen :
244251 """Represents a WHEN condition in a CASE statement"""
245252 empty : bool = False
253+ indent_level : int | None = None
254+ line_number : int | None = None
255+ line_matcher : str | None = None
246256 regex : str | None = None
247257 prefix : str | None = None
248258 suffix : str | None = None
249- indent_level : int | None = None
250- line_number : int | None = None
251259
252260
253261@dataclass
@@ -360,7 +368,7 @@ def parse_script(self, code_text: str) -> tuple[Sequence[Command], Sequence[Pars
360368 for child in root_node .children :
361369 node_type = child .type .casefold ()
362370 if node_type == 'comment' :
363- print ("(COMMENT) " + self .parse_string (child ).removeprefix ("--" ).strip ())
371+ print ("(COMMENT) " + self .parse_string (child ).removeprefix ("--" ).removeprefix ( "/*" ). strip ())
364372 if not node_type .endswith ('_command' ):
365373 continue
366374 commands .append (self .parse_command (child ))
@@ -371,7 +379,7 @@ def parse_script(self, code_text: str) -> tuple[Sequence[Command], Sequence[Pars
371379 # Handle any unexpected exceptions during parsing
372380 error_message = str (e )
373381 error = ParseError (
374- command_ordinal = command_ordinal ,
382+ ordinal = command_ordinal ,
375383 message = error_message ,
376384 line = 0 ,
377385 column = 0 ,
@@ -399,7 +407,7 @@ def _collect_parse_errors(self, node, code_text, command_ordinal: int) -> list[P
399407 suggestion = _generate_suggestion (node , code_text )
400408
401409 error = ParseError (
402- command_ordinal = command_ordinal ,
410+ ordinal = command_ordinal ,
403411 message = message ,
404412 line = line ,
405413 column = column ,
@@ -474,18 +482,18 @@ def parse_update_target(self, node):
474482 raise ValueError (f"[parse_update_target] Invalid target: { invalid } " )
475483
476484 def parse_identifier_from_file (self , node ):
477- identifier_marker = self .find_first_by_type (node .named_children , 'identifierMarker ' )
478- identifier_type = MarkerType (identifier_marker .children [0 ].type .casefold ())
479- name = self .parse_string (identifier_marker .named_children [0 ])
480- offset_clause = self .find_first_by_type (identifier_marker .named_children , 'offset_clause' )
485+ identifier_matcher = self .find_first_by_type (node .named_children , 'identifier_matcher ' )
486+ identifier_type = MarkerType (identifier_matcher .children [0 ].type .casefold ())
487+ name = self .parse_string (identifier_matcher .named_children [0 ])
488+ offset_clause = self .find_first_by_type (identifier_matcher .named_children , 'offset_clause' )
481489 file_clause = self .find_first_by_type (node .named_children , 'singlefile_clause' )
482490 where_clause = self .find_first_by_type (node .named_children , 'where_clause' )
483491
484492 if not file_clause or not name :
485493 raise ValueError ("Invalid identifier_from_file clause" )
486494
487495 file_path = self .parse_singlefile_clause (file_clause ).file_path
488- offset = self .parse_offset_clause (offset_clause ) if offset_clause else None
496+ offset = self .find_primitive (offset_clause ) if offset_clause else None
489497 where = self .parse_where_clause (where_clause )
490498
491499 return IdentifierFromFile (file_path = file_path ,
@@ -579,7 +587,7 @@ def parse_region(self, node) -> Region:
579587 node = node .named_children [0 ]
580588
581589 match node .type .casefold ():
582- case 'marker' | 'linemarker ' | 'identifiermarker ' :
590+ case 'marker' | 'line_matcher ' | 'identifier_matcher ' :
583591 result = self .parse_marker (node )
584592 case 'segment' :
585593 result = self .parse_segment (node )
@@ -596,27 +604,25 @@ def parse_marker(self, node) -> Marker:
596604 if node .type .casefold () == 'marker' :
597605 node = node .named_children [0 ]
598606
599- marker_type = node .children [0 ].type # LINE, VARIABLE, FUNCTION, METHOD or CLASS
607+ marker_type = node .named_children [0 ].type
608+ # LINE, VARIABLE, FUNCTION, METHOD or CLASS
600609 marker_subtype = None
601610 value = None
602611
603- if marker_type != 'LINE' : # VARIABLE, FUNCTION, METHOD or CLASS
604- value = self .parse_string (self .find_first_by_type (node .named_children , 'string' ))
605612 # Handle the different marker types
606- else :
607- # Get the second child which is either a string/number or a subtype specifier
608- second_child = node .children [1 ]
609- marker_subtype = second_child .type
610- if second_child .type in ['string' , 'number' ]:
611- match second_child .type :
612- case 'string' :
613- value = self .parse_string (second_child )
614- case _:
615- value = second_child .text .decode ('utf8' )
616- else : # REGEX, PREFIX, or SUFFIX
617- value = self .parse_string (node .children [2 ])
618-
619- offset = self .parse_offset_clause (self .find_first_by_type (node .named_children , 'offset_clause' ))
613+ if marker_type == 'line_base' :
614+ marker_type = 'line'
615+ # subtype: None, number, EMPTY, REGEX, PREFIX, SUFFIX, INDENT-LEVEL
616+ line_base_node = node .named_children [0 ] # line_base
617+ marker_subtype = [n .type .casefold () for n in line_base_node .children if n .type .casefold () != 'line' ][0 ]
618+ value = self .find_primitive (line_base_node )
619+
620+ else : # identifier_matcher
621+ marker_type = node .children [0 ].type .casefold ()
622+ value = self .find_primitive (node )
623+
624+ node1 = self .find_first_by_type (node .named_children , 'offset_clause' )
625+ offset = self .find_primitive (node1 )
620626 return Marker (
621627 type = MarkerType (marker_type .casefold ()),
622628 marker_subtype = marker_subtype ,
@@ -631,16 +637,9 @@ def parse_segment(self, node) -> Segment:
631637 end : RelativeMarker = self .parse_region (relpos_end )
632638 return Segment (start = start , end = end )
633639
634- def parse_offset_clause (self , node ):
635- if node is None :
636- return None
637- return int (self .find_first_by_type (node .children , 'number' ).text )
638-
639640 def parse_relative_indentation (self , node ) -> int | None :
640641 node = self .find_first_by_type (node .named_children , 'relative_indentation' )
641- if node is None :
642- return None
643- return int (self .find_first_by_type (node .named_children , 'number' ).text )
642+ return self .find_primitive (node )
644643
645644 def parse_content (self , node ) -> str | tuple [Region , int | None ] | None :
646645 content = self .find_first_by_type (node .named_children , [
@@ -668,7 +667,7 @@ def parse_case_stmt(self, node) -> CaseStatement:
668667 current_when = None
669668 for child in node .children :
670669 match child .type :
671- case 'case_when ' :
670+ case 'line_base ' :
672671 current_when = self .parse_case_when (child )
673672 case 'case_action' if current_when is not None :
674673 action = self .parse_case_action (child )
@@ -686,20 +685,28 @@ def parse_case_stmt(self, node) -> CaseStatement:
686685 def parse_case_when (self , node ) -> CaseWhen :
687686 """Parse a WHEN clause in a CASE statement"""
688687 when = CaseWhen ()
689-
688+
690689 if self .find_first_by_field_name (node , 'empty' ):
691690 when .empty = True
691+
692+ elif indent := self .find_first_by_field_name (node , 'indent_level' ):
693+ when .indent_level = int (indent .text )
694+
695+ elif line_num := self .find_first_by_field_name (node , 'line_number' ):
696+ when .line_number = int (line_num .text )
697+
698+ elif line_str := self .find_first_by_field_name (node , 'line_matcher' ):
699+ when .line_matcher = self .parse_string (line_str )
700+
692701 elif regex := self .find_first_by_field_name (node , 'regex' ):
693702 when .regex = re .compile (self .parse_string (regex ))
703+
694704 elif prefix := self .find_first_by_field_name (node , 'prefix' ):
695705 when .prefix = self .parse_string (prefix )
706+
696707 elif suffix := self .find_first_by_field_name (node , 'suffix' ):
697708 when .suffix = self .parse_string (suffix )
698- elif indent := self .find_first_by_field_name (node , 'indent_level' ):
699- when .indent_level = int (indent .text )
700- elif line_num := self .find_first_by_field_name (node , 'line_number' ):
701- when .line_number = int (line_num .text )
702-
709+
703710 return when
704711
705712 def parse_case_action (self , node ) -> CaseAction :
@@ -775,6 +782,20 @@ def parse_to_value_clause(self, node):
775782 raise ValueError ("No value found in to_value_clause" )
776783 return self .parse_string (value_node )
777784
785+ def find_primitive (self , node ):
786+ if node is None :
787+ return None
788+ node = self .find_first_by_type (node .named_children , ['string' , 'number' ])
789+ if node is None :
790+ return None
791+ match node .type .casefold ():
792+ case 'string' :
793+ return self .parse_string (node )
794+ case 'number' :
795+ return int (node .text )
796+ case _:
797+ raise ValueError (f"[find_primitive] Invalid primitive: { node .type } ({ node .text } )" )
798+
778799 @staticmethod
779800 def parse_string (node ):
780801 match node .type .casefold ():
0 commit comments