|
3 | 3 | def find_enum_or_set_definition_end(line: str) -> Tuple[int, str, str]: |
4 | 4 | """ |
5 | 5 | Find the end of an enum or set definition in a DDL line |
6 | | - |
| 6 | +
|
7 | 7 | Args: |
8 | 8 | line: The DDL line containing an enum or set definition |
9 | | - |
| 9 | +
|
10 | 10 | Returns: |
11 | 11 | Tuple containing (end_position, field_type, field_parameters) |
12 | 12 | """ |
@@ -34,56 +34,68 @@ def find_enum_or_set_definition_end(line: str) -> Tuple[int, str, str]: |
34 | 34 | field_type = line[:end_pos] |
35 | 35 | field_parameters = line[end_pos:].strip() |
36 | 36 | return end_pos, field_type, field_parameters |
37 | | - |
38 | | - # Fallback to splitting by space if we can't find the end |
39 | | - # Use split() instead of split(' ') to handle multiple consecutive spaces |
40 | | - definition = line.split() |
41 | | - field_type = definition[0] if definition else "" |
42 | | - field_parameters = ' '.join(definition[1:]) if len(definition) > 1 else '' |
43 | | - |
44 | | - return -1, field_type, field_parameters |
| 37 | + |
| 38 | + # If we couldn't find the end, raise an error with detailed information |
| 39 | + # instead of silently falling back to incorrect parsing |
| 40 | + raise ValueError( |
| 41 | + f"Could not find end of enum/set definition in line. " |
| 42 | + f"Input line: {line!r}, " |
| 43 | + f"open_parens={open_parens}, " |
| 44 | + f"in_quotes={in_quotes}, " |
| 45 | + f"quote_char={quote_char!r}" |
| 46 | + ) |
45 | 47 |
|
46 | 48 |
|
47 | 49 | def parse_enum_or_set_field(line: str, field_name: str, is_backtick_quoted: bool = False) -> Tuple[str, str, str]: |
48 | 50 | """ |
49 | 51 | Parse a field definition line containing an enum or set type |
50 | | - |
| 52 | +
|
51 | 53 | Args: |
52 | 54 | line: The line to parse |
53 | 55 | field_name: The name of the field (already extracted) |
54 | 56 | is_backtick_quoted: Whether the field name was backtick quoted |
55 | | - |
| 57 | +
|
56 | 58 | Returns: |
57 | 59 | Tuple containing (field_name, field_type, field_parameters) |
58 | 60 | """ |
59 | | - # If the field name was backtick quoted, it's already been extracted |
60 | | - if is_backtick_quoted: |
61 | | - line = line.strip() |
62 | | - # Don't split by space for enum and set types that might contain spaces |
63 | | - if line.lower().startswith('enum(') or line.lower().startswith('set('): |
64 | | - end_pos, field_type, field_parameters = find_enum_or_set_definition_end(line) |
| 61 | + try: |
| 62 | + # If the field name was backtick quoted, it's already been extracted |
| 63 | + if is_backtick_quoted: |
| 64 | + line = line.strip() |
| 65 | + # Don't split by space for enum and set types that might contain spaces |
| 66 | + if line.lower().startswith('enum(') or line.lower().startswith('set('): |
| 67 | + end_pos, field_type, field_parameters = find_enum_or_set_definition_end(line) |
| 68 | + else: |
| 69 | + # Use split() instead of split(' ') to handle multiple consecutive spaces |
| 70 | + definition = line.split() |
| 71 | + field_type = definition[0] if definition else "" |
| 72 | + field_parameters = ' '.join(definition[1:]) if len(definition) > 1 else '' |
65 | 73 | else: |
| 74 | + # For non-backtick quoted fields |
66 | 75 | # Use split() instead of split(' ') to handle multiple consecutive spaces |
67 | 76 | definition = line.split() |
68 | | - field_type = definition[0] if definition else "" |
69 | | - field_parameters = ' '.join(definition[1:]) if len(definition) > 1 else '' |
70 | | - else: |
71 | | - # For non-backtick quoted fields |
72 | | - # Use split() instead of split(' ') to handle multiple consecutive spaces |
73 | | - definition = line.split() |
74 | | - definition = definition[1:] # Skip the field name which was already extracted |
75 | | - |
76 | | - if definition and ( |
77 | | - definition[0].lower().startswith('enum(') |
78 | | - or definition[0].lower().startswith('set(') |
79 | | - ): |
80 | | - line = ' '.join(definition) |
81 | | - end_pos, field_type, field_parameters = find_enum_or_set_definition_end(line) |
82 | | - else: |
83 | | - field_type = definition[0] if definition else "" |
84 | | - field_parameters = ' '.join(definition[1:]) if len(definition) > 1 else '' |
85 | | - |
86 | | - return field_name, field_type, field_parameters |
| 77 | + definition = definition[1:] # Skip the field name which was already extracted |
| 78 | + |
| 79 | + if definition and ( |
| 80 | + definition[0].lower().startswith('enum(') |
| 81 | + or definition[0].lower().startswith('set(') |
| 82 | + ): |
| 83 | + line = ' '.join(definition) |
| 84 | + end_pos, field_type, field_parameters = find_enum_or_set_definition_end(line) |
| 85 | + else: |
| 86 | + field_type = definition[0] if definition else "" |
| 87 | + field_parameters = ' '.join(definition[1:]) if len(definition) > 1 else '' |
| 88 | + |
| 89 | + return field_name, field_type, field_parameters |
| 90 | + except ValueError as e: |
| 91 | + # Enhanced error reporting with full context |
| 92 | + raise ValueError( |
| 93 | + f"Failed to parse field definition. " |
| 94 | + f"field_name={field_name!r}, " |
| 95 | + f"line={line!r}, " |
| 96 | + f"is_backtick_quoted={is_backtick_quoted}, " |
| 97 | + f"Original error: {e}" |
| 98 | + ) from e |
87 | 99 |
|
88 | 100 |
|
89 | 101 | def extract_enum_or_set_values(field_type: str, from_parser_func=None) -> Optional[List[str]]: |
|
0 commit comments