|
| 1 | +interface MhchemParser { |
| 2 | + go: { (input: string | undefined, stateMachine: StateMachineName): Parsed[]; }; |
| 3 | + concatArray: { (a: any[], b: any): void }; |
| 4 | + patterns: { |
| 5 | + patterns: { |
| 6 | + [pattern in PatternNameReturningString]?: |
| 7 | + RegExp | PatternFunction<string> |
| 8 | + } & { |
| 9 | + [pattern in PatternNameReturningArray]?: |
| 10 | + RegExp | PatternFunction<string[]> |
| 11 | + }; |
| 12 | + findObserveGroups: { |
| 13 | + ( |
| 14 | + input: string, |
| 15 | + begExcl: string | RegExp, |
| 16 | + begIncl: string | RegExp, |
| 17 | + endIncl: string | RegExp, |
| 18 | + endExcl: string | RegExp, |
| 19 | + beg2Excl?: string | RegExp, |
| 20 | + beg2Incl?: string | RegExp, |
| 21 | + end2Incl?: string | RegExp, |
| 22 | + end2Excl?: string | RegExp, |
| 23 | + combine?: boolean |
| 24 | + ): MatchResult<string | string[]> |
| 25 | + }; |
| 26 | + match_: { (m: PatternName, input: string): MatchResult<string | string[]> | null }; |
| 27 | + }, |
| 28 | + actions: ActionList; |
| 29 | + stateMachines: { [key in StateMachineName]: StateMachine }; |
| 30 | +} |
| 31 | + |
| 32 | +type PatternFunction<T> = (input: string) => MatchResult<T> | null; |
| 33 | +type StateMachineName = "tex" | "ce" | "a" | "o" | "text" | "pq" | "bd" | "oxidation" | "tex-math" | "tex-math tight" | "9,9" | "pu" | "pu-2" | "pu-9,9"; |
| 34 | +type MatchResult<T> = { |
| 35 | + match_: T; |
| 36 | + remainder: string; |
| 37 | +} |
| 38 | +type StateName = |
| 39 | + "0" | // begin of main part (arrow/operator unlikely) |
| 40 | + "1" | // next entity |
| 41 | + "2" | // next entity (arrow/operator unlikely) |
| 42 | + "3" | // next atom |
| 43 | + "c" | // macro |
| 44 | + "a" | // amount |
| 45 | + "o" | // element |
| 46 | + "b" | // left-side superscript |
| 47 | + "p" | // left-side subscript |
| 48 | + "q" | // right-side subscript |
| 49 | + "d" | "D" | // right-side superscript |
| 50 | + "r" | // arrow |
| 51 | + "rd" | // arrow, script above |
| 52 | + "f" | "*"; |
| 53 | +type StateNameCombined = StateName | "0|1|2" | "0|1|2|3" | "0|1|2|3|a|as|b|p|bp" | "0|1|2|3|a|as|b|p|bp|o" | "0|1|2|3|a|as|b|p|bp|o|c0" | "0|1|2|3|a|as|o" | "0|1|2|3|a|as|o|q|d|D|qd|qD|dq" | "0|1|2|3|as|b|p|bp|o" | "0|1|2|3|b|p|bp|o" | "0|1|2|a|as" | "0|1|2|as" | "0|2" | "0|a" | "0|d" | "1|2" | "1|3" | "3|o" | "a|as" | "a|as|d|D|q|qd|qD|dq" | "a|as|o" | "as" | "as|o" | "b|p|bp" | "d|D" | "d|D|q|qd|qD|dq" | "d|D|qd|qD" | "d|D|qd|qD|dq" | "d|qd|D|qD" | "d|qd|dq" | "D|qD|p" | "dq" | "o|d|D|dq|qd|qD" | "o|d|D|q|qd|qD|dq" | "o|q" | "q|d|D|qd|qD|dq" | "q|dq" | "q|qd|qD|dq" | "qd" | "qD|dq" | "qd|qD" | "r|rt" | "r|rt|rd|rdt|rdq" | "rd|rdt" | "/|q"; |
| 54 | + |
| 55 | +type ActionList = { |
| 56 | + [key in ActionNameUsingMNone]?: ActionFunction<string> |
| 57 | +} & { |
| 58 | + [key in ActionNameUsingMString]?: ActionFunction<string> |
| 59 | +} & { |
| 60 | + [key in ActionNameUsingMStringOption]?: ActionFunction<string> |
| 61 | +} & { |
| 62 | + [key in ActionNameUsingMArray]?: ActionFunction<string[]> |
| 63 | +} |
| 64 | +interface ActionFunction<T> { |
| 65 | + (buffer: Buffer, m?: T, option?: string | number | boolean): undefined | Parsed | Parsed[]; |
| 66 | +} |
| 67 | + |
| 68 | +type TransitionsRaw = { |
| 69 | + [pattern in PatternNameReturningString]?: { |
| 70 | + [state in StateNameCombined]?: { |
| 71 | + action_: |
| 72 | + ActionNameUsingMString | ActionNameUsingMStringOption | ActionNameUsingMNone | |
| 73 | + ActionNameWithParameter<ActionNameUsingMString> | ActionNameWithParameter<ActionNameUsingMStringOption> | ActionNameWithParameter<ActionNameUsingMNone> | |
| 74 | + (ActionNameUsingMString | ActionNameUsingMStringOption | ActionNameUsingMNone | |
| 75 | + ActionNameWithParameter<ActionNameUsingMString> | ActionNameWithParameter<ActionNameUsingMStringOption> | ActionNameWithParameter<ActionNameUsingMNone>)[]; |
| 76 | + nextState?: string; |
| 77 | + revisit?: boolean; |
| 78 | + toContinue?: boolean; |
| 79 | + stateArray?: StateName[]; |
| 80 | + } |
| 81 | + } |
| 82 | +} & { |
| 83 | + [pattern in PatternNameReturningArray2]?: { |
| 84 | + [state in StateNameCombined]?: { |
| 85 | + action_: |
| 86 | + ActionNameUsingMArray2 | (ActionNameUsingMArray2 | ActionNameUsingMString | ActionNameUsingMNone | ActionNameWithParameter<ActionNameUsingMNone>)[]; // ... list ist not complete |
| 87 | + nextState?: string; |
| 88 | + revisit?: boolean; |
| 89 | + toContinue?: boolean; |
| 90 | + stateArray?: StateName[]; |
| 91 | + } |
| 92 | + } |
| 93 | +} & { |
| 94 | + [pattern in PatternNameReturningArray3]?: { |
| 95 | + [state in StateNameCombined]?: { |
| 96 | + action_: ActionNameUsingMArray3 | ActionNameUsingMArray3[]; |
| 97 | + nextState?: string; |
| 98 | + revisit?: boolean; |
| 99 | + toContinue?: boolean; |
| 100 | + stateArray?: StateName[]; |
| 101 | + } |
| 102 | + } |
| 103 | +} & { |
| 104 | + [pattern in PatternNameReturningArray6]?: { |
| 105 | + [state in StateNameCombined]?: { |
| 106 | + action_: ActionNameUsingMArray6 | ActionNameUsingMArray6[]; |
| 107 | + nextState?: string; |
| 108 | + revisit?: boolean; |
| 109 | + toContinue?: boolean; |
| 110 | + stateArray?: StateName[]; |
| 111 | + } |
| 112 | + } |
| 113 | +} |
| 114 | +type PatternNameReturningString = |
| 115 | + "empty" | "else" | "else2" | "space" | "space A" | "space$" | "a-z" | "x" | "x$" | "i$" | "letters" | "\\greek" | "one lowercase latin letter $" | "$one lowercase latin letter$ $" | "one lowercase greek letter $" | "digits" | "-9.,9" | "-9.,9 no missing 0" | "state of aggregation $" | "{[(" | ")]}" | ", " | "," | "." | ". __* " | "..." | "^{(...)}" | "^($...$)" | "^a" | "^\\x{}" | "^\\x" | "^(-1)" | "\'" | "_{(...)}" | "_($...$)" | "_9" | "_\\x{}{}" | "_\\x{}" | "_\\x" | "^_" | "{}" | "{...}" | "{(...)}" | "$...$" | "${(...)}$__$(...)$" | "=<>" | "#" | "+" | "-$" | "-9" | "- orbital overlap" | "-" | "pm-operator" | "operator" | "arrowUpDown" | "\\bond{(...)}" | "->" | "CMT" | "[(...)]" | "1st-level escape" | "\\," | "\\x{}" | "\\ca" | "\\x" | "orbital" | "others" | "\\color{(...)}" | "\\ce{(...)}" | "\\pu{(...)}" | "oxidation$" | "d-oxidation$" | "roman numeral" | "1/2$" | "amount" | "amount2" | "(KV letters)," | "formula$" | "uprightEntities" | "/" | "//" | "*" | "\\x{}{}" | "^\\x{}{}" | |
| 116 | + "^{(...)}|^($...$)" | "^a|^\\x{}{}|^\\x{}|^\\x|\'" | "_{(...)}|_($...$)|_9|_\\x{}{}|_\\x{}|_\\x" | "\\,|\\x{}{}|\\x{}|\\x" | "{...}|\\,|\\x{}{}|\\x{}|\\x" | "\\x{}{}|\\x{}|\\x" | "-|+" | "{[(|)]}" | "{...}|else" | "^{(...)}|^(-1)"; |
| 117 | +type PatternNameReturningArray2 = "_{(state of aggregation)}$" | "\\frac{(...)}" | "\\overset{(...)}" | "\\underset{(...)}" | "\\underbrace{(...)}" | "\\color{(...)}{(...)}"; |
| 118 | +type PatternNameReturningArray3 = "(-)(9)^(-9)"; |
| 119 | +type PatternNameReturningArray6 = "(-)(9.,9)(e)(99)"; |
| 120 | +type PatternName = PatternNameReturningString | PatternNameReturningArray2 | PatternNameReturningArray3 | PatternNameReturningArray6; |
| 121 | +type PatternNameReturningArray = PatternNameReturningArray2 | PatternNameReturningArray3 | PatternNameReturningArray6; |
| 122 | +type ActionNameUsingMNone = "a to o" | "sb=true" | "sb=false" | "beginsWithBond=true" | "beginsWithBond=false" | "parenthesisLevel++" | "parenthesisLevel--" | "output" | "space" | "cdot" | "output-0" | "output-o" | "tight operator"; |
| 123 | +type ActionNameUsingMString = "a=" | "b=" | "p=" | "o=" | "q=" | "d=" | "rm=" | "text=" | "copy" | "rm" | "text" | "tex-math" | "tex-math tight" | "ce" | "pu" | "1/2" | "9,9" | "o after d" | "d= kv" | "charge or bond" | "state of aggregation" | "comma" | "oxidation-output" | "r=" | "rdt=" | "rd=" | "rqt=" | "rq=" | "operator" | "bond" | "color0-output" | "roman-numeral" | "^(-1)"; |
| 124 | +type ActionNameUsingMStringOption = "insert" | "insert+p1" | "write" | "bond" | "- after o/d"; |
| 125 | +type ActionNameUsingMArray2 = "insert+p1+p2" | "frac-output" | "overset-output" | "underset-output" | "underbrace-output" | "color-output"; |
| 126 | +type ActionNameUsingMArray3 = "number^"; |
| 127 | +type ActionNameUsingMArray6 = "enumber"; |
| 128 | +type ActionName = ActionNameUsingMNone | ActionNameUsingMString | ActionNameUsingMStringOption | ActionNameUsingMArray2 | ActionNameUsingMArray3 | ActionNameUsingMArray6; |
| 129 | +type ActionNameUsingMArray = ActionNameUsingMArray2 | ActionNameUsingMArray3 | ActionNameUsingMArray6; |
| 130 | + |
| 131 | +type StateMachine = { |
| 132 | + transitions: Transitions; |
| 133 | + actions: ActionList; |
| 134 | +} |
| 135 | +type Transitions = { |
| 136 | + [state in StateName]?: Transition[]; |
| 137 | +} |
| 138 | +interface Transition { // e.g. { pattern: 'letter', task: { '*': { action_: ['output'], nextState: '1' } } } |
| 139 | + pattern: PatternName; |
| 140 | + task: Action; |
| 141 | +} |
| 142 | +interface Action { |
| 143 | + action_: ActionNameWithParameter<ActionName>[]; |
| 144 | + nextState?: StateName; |
| 145 | + revisit?: boolean; |
| 146 | + toContinue?: boolean; |
| 147 | +} |
| 148 | +interface ActionNameWithParameter<T> { |
| 149 | + type_: T; |
| 150 | + option?: string | number | boolean; |
| 151 | +} |
| 152 | + |
| 153 | + |
| 154 | + |
| 155 | + |
| 156 | +interface Buffer { |
| 157 | + a?: string; // amount |
| 158 | + o?: string; // element |
| 159 | + b?: string; // left-side superscript |
| 160 | + p?: string; // left-side subscript |
| 161 | + q?: string; // right-side subscript |
| 162 | + d?: string; // right-side superscript |
| 163 | + dType?: string; |
| 164 | + |
| 165 | + r?: ArrowName; // arrow |
| 166 | + rdt?: string; // arrow, script above, type |
| 167 | + rd?: string; // arrow, script above, content |
| 168 | + rqt?: string; // arrow, script below, type |
| 169 | + rq?: string; // arrow, script below, content |
| 170 | + |
| 171 | + text_?: string; |
| 172 | + rm?: string; |
| 173 | + |
| 174 | + parenthesisLevel?: number; // starting at 0 |
| 175 | + sb?: boolean; // space before |
| 176 | + beginsWithBond?: boolean; |
| 177 | +} |
| 178 | + |
| 179 | + |
| 180 | + |
| 181 | + |
| 182 | +type ParsedWithoutString = |
| 183 | + { type_: "chemfive", |
| 184 | + a: Parsed[], |
| 185 | + b: Parsed[], |
| 186 | + p: Parsed[], |
| 187 | + o: Parsed[], |
| 188 | + q: Parsed[], |
| 189 | + d: Parsed[], |
| 190 | + dType: string } | |
| 191 | + { type_: "rm", p1: string } | |
| 192 | + { type_: "text", p1: string } | |
| 193 | + { type_: "roman numeral", p1: string } | |
| 194 | + { type_: "state of aggregation", p1: Parsed[] } | |
| 195 | + { type_: "state of aggregation subscript", p1: Parsed[] } | |
| 196 | + { type_: "bond", kind_: BondName } | |
| 197 | + { type_: "frac", p1: string, p2: string } | |
| 198 | + { type_: "pu-frac", p1: Parsed[], p2: Parsed[] } | |
| 199 | + { type_: "tex-math", p1: string } | |
| 200 | + { type_: "frac-ce", p1: Parsed[], p2: Parsed[] } | |
| 201 | + { type_: "overset", p1: Parsed[], p2: Parsed[] } | |
| 202 | + { type_: "underset", p1: Parsed[], p2: Parsed[] } | |
| 203 | + { type_: "underbrace", p1: Parsed[], p2: Parsed[] } | |
| 204 | + { type_: "color", color1: string, color2: Parsed[] } | |
| 205 | + { type_: "color0", color: string } | |
| 206 | + { type_: "arrow", |
| 207 | + r: ArrowName, |
| 208 | + rd?: Parsed[], |
| 209 | + rq?: Parsed[]} | |
| 210 | + { type_: "operator", kind_: OperatorName } | |
| 211 | + { type_: "1st-level escape", p1: string } | |
| 212 | + { type_: "space" } | |
| 213 | + { type_: "entitySkip" } | |
| 214 | + { type_: "pu-space-1" } | |
| 215 | + { type_: "pu-space-2" } | |
| 216 | + { type_: "1000 separator" } | |
| 217 | + { type_: "commaDecimal" } | |
| 218 | + { type_: "comma enumeration L", p1: string } | |
| 219 | + { type_: "comma enumeration M", p1: string } | |
| 220 | + { type_: "comma enumeration S", p1: string } | |
| 221 | + { type_: "hyphen" } | |
| 222 | + { type_: "addition compound" } | |
| 223 | + { type_: "electron dot" } | |
| 224 | + { type_: "KV x" } | |
| 225 | + { type_: "prime" } | |
| 226 | + { type_: "cdot" } | |
| 227 | + { type_: "tight cdot" } | |
| 228 | + { type_: "times" } | |
| 229 | + { type_: "circa" } | |
| 230 | + { type_: "^" } | |
| 231 | + { type_: "v" } | |
| 232 | + { type_: "ellipsis" } | |
| 233 | + { type_: "/" } | |
| 234 | + { type_: " / " }; |
| 235 | +type Parsed = ParsedWithoutString | string; |
| 236 | +type ArrowName = "->" | "\u2192" | "\u27F6" | "<-" | "<->" | "<-->" | "<=>" | "\u21CC" | "<=>>" | "<<=>"; // keep aligned with definition of pattern '->' |
| 237 | +type BondName = "-" | "1" | "=" | "2" | "#" | "3" | "~" | "~-" | "~=" | "~--" | "-~-" | "..." | "...." | "->" | "<-" | "<" | ">"; |
| 238 | +type OperatorName = "+" | "-" | "=" | "<" | ">" | "<<" | ">>" | "\\pm" | "\\approx" | "$\\approx$" | "v" | "(v)" | "^" | "(^)"; |
| 239 | + |
| 240 | + |
| 241 | + |
| 242 | + |
| 243 | +interface MhchemTexify { |
| 244 | + go: { (input: Parsed[] | undefined, isInner?: boolean): string; }; |
| 245 | + _goInner: { (input: Parsed[]): string}; |
| 246 | + _go2: { (input: ParsedWithoutString): string }; |
| 247 | + _getArrow: { (input: ArrowName): string }; |
| 248 | + _getBond: { (input: BondName): string }; |
| 249 | + _getOperator: { (input: OperatorName): string }; |
| 250 | +} |
0 commit comments