@@ -139,237 +139,47 @@ public static void load() {
139139 Tsurgeon .parseOperation ("relabel target " + newOp [1 ])));
140140
141141 }
142- String newLine = System .lineSeparator ();
143- String rawPattern = String .join (newLine ,
144- // ------------------------------
145- // 1 to 1 mappings
146- // ------------------------------
147- // CC -> CCONJ
148- "CC=target <... {/.*/}" ,
149- "" ,
150- "relabel target CCONJ" ,
151- "" ,
152-
153- // CD -> NUM
154- "CD=target <... {/.*/}" ,
155- "" ,
156- "relabel target NUM" ,
157- "" ,
158-
159- // EX -> PRON
160- "EX=target <... {/.*/}" ,
161- "" ,
162- "relabel target PRON" ,
163- "" ,
164-
165- // FW -> X
166- "FW=target <... {/.*/}" ,
167- "" ,
168- "relabel target X" ,
169- "" ,
170-
171- // JJ.* -> ADJ
172- "/^JJ.*$/=target < __" ,
173- "" ,
174- "relabel target ADJ" ,
175- "" ,
176-
177- // LS -> X
178- "LS=target <... {/.*/}" ,
179- "" ,
180- "relabel target X" ,
181- "" ,
182-
183- // MD -> AUX
184- "MD=target <... {/.*/}" ,
185- "" ,
186- "relabel target AUX" ,
187- "" ,
188-
189- // NNS -> NOUN
190- "NNS=target <... {/.*/}" ,
191- "" ,
192- "relabel target NOUN" ,
193- "" ,
194-
195- // NNP -> PROPN
196- "NNP=target <... {/.*/}" ,
197- "" ,
198- "relabel target PROPN" ,
199- "" ,
200-
201- // NNPS -> PROPN
202- "NNPS=target <... {/.*/}" ,
203- "" ,
204- "relabel target PROPN" ,
205- "" ,
206-
207- // PDT -> DET
208- "PDT=target <... {/.*/}" ,
209- "" ,
210- "relabel target DET" ,
211- "" ,
212-
213- // POS -> PART
214- "POS=target <... {/.*/}" ,
215- "" ,
216- "relabel target PART" ,
217- "" ,
218-
219- // PRP -> PRON
220- "PRP=target <... {/.*/}" ,
221- "" ,
222- "relabel target PRON" ,
223- "" ,
224-
225- // PRP$ -> PRON
226- "/^PRP\\ $$/=target <... {/.*/}" ,
227- "" ,
228- "relabel target PRON" ,
229- "" ,
230-
231- // RBR -> ADV
232- "RBR=target <... {/.*/}" ,
233- "" ,
234- "relabel target ADV" ,
235- "" ,
236-
237- // RBS -> ADV
238- "RBS=target <... {/.*/}" ,
239- "" ,
240- "relabel target ADV" ,
241- "" ,
242-
243- // RP -> ADP
244- "RP=target <... {/.*/}" ,
245- "" ,
246- "relabel target ADP" ,
247- "" ,
248-
249- // UH -> INTJ
250- "UH=target <... {/.*/}" ,
251- "" ,
252- "relabel target INTJ" ,
253- "" ,
254-
255- // WP -> PRON
256- "WP=target <... {/.*/}" ,
257- "" ,
258- "relabel target PRON" ,
259- "" ,
260-
261- // WP$ -> PRON
262- "/^WP\\ $$/=target <... {/.*/}" ,
263- "" ,
264- "relabel target PRON" ,
265- "" ,
266-
267- // WRB -> ADV
268- "WRB=target <... {/.*/}" ,
269- "" ,
270- "relabel target ADV" ,
271- "" ,
272-
273- // `` -> PUNCT
274- "/^``$/=target <... {/.*/}" ,
275- "" ,
276- "relabel target PUNCT" ,
277- "" ,
278-
279- // '' -> PUNCT
280- "/^''$/=target < __" ,
281- "" ,
282- "relabel target PUNCT" ,
283- "" ,
284-
285- // ( -> PUNCT
286- "/^\\ ($/=target <... {/.*/}" ,
287- "" ,
288- "relabel target PUNCT" ,
289- "" ,
290-
291- // ) -> PUNCT
292- "/^\\ )$/=target <... {/.*/}" ,
293- "" ,
294- "relabel target PUNCT" ,
295- "" ,
296-
297- // -LRB- -> PUNCT
298- "/^-LRB-$/=target <... {/.*/}" ,
299- "" ,
300- "relabel target PUNCT" ,
301- "" ,
302-
303- // -RRB- -> PUNCT
304- "/^-RRB-$/=target <... {/.*/}" ,
305- "" ,
306- "relabel target PUNCT" ,
307- "" ,
308-
309- // , -> PUNCT
310- "/^,$/=target <... {/.*/}" ,
311- "" ,
312- "relabel target PUNCT" ,
313- "" ,
314-
315- // . -> PUNCT
316- "/^\\ .$/=target <... {/.*/}" ,
317- "" ,
318- "relabel target PUNCT" ,
319- "" ,
320-
321- // : -> PUNCT
322- "/^:$/=target <... {/.*/}" ,
323- "" ,
324- "relabel target PUNCT" ,
325- "" ,
326-
327- // HYPH -> PUNCT
328- "HYPH=target <... {/.*/}" ,
329- "" ,
330- "relabel target PUNCT" ,
331- "" ,
332-
333- // # -> SYM
334- "/^#$/=target <... {/.*/}" ,
335- "" ,
336- "relabel target SYM" ,
337- "" ,
338-
339- // $ -> SYM. Also note that there is a no-op rule of SYM -> SYM!
340- "/^\\ $$/=target <... {/.*/}" ,
341- "" ,
342- "relabel target SYM" ,
343- "" ,
344-
345- // ADD -> X
346- "ADD=target <... {/.*/}" ,
347- "" ,
348- "relabel target X" ,
349- "" ,
350-
351- // AFX -> X
352- "AFX=target <... {/.*/}" ,
353- "" ,
354- "relabel target X" ,
355- "" ,
356-
357- // GW -> X
358- "GW=target <... {/.*/}" ,
359- "" ,
360- "relabel target X" ,
361- "" ,
362-
363- // XX -> X
364- "XX=target <... {/.*/}" ,
365- "" ,
366- "relabel target X" );
367- StringReader reader = new StringReader (rawPattern );
368- try (BufferedReader buffered = new BufferedReader (reader )) {
369- List <Pair <TregexPattern , TsurgeonPattern >> newOperations = Tsurgeon .getOperationsFromReader (buffered , new TregexPatternCompiler ());
370- operations .addAll (newOperations );
371- } catch (IOException e ) {
372- throw new RuntimeIOException (e );
142+
143+
144+ String [][] one2oneMappings = new String [][] {
145+ {"CC" , "CCONJ" },
146+ {"CD" , "NUM" },
147+ {"EX" , "PRON" },
148+ {"FW" , "X" },
149+ {"/^JJ.*$/" , "ADJ" },
150+ {"LS" , "X" },
151+ {"MD" , "AUX" },
152+ {"NNS" , "NOUN" },
153+ {"NNP" , "PROPN" },
154+ {"NNPS" , "PROPN" },
155+ {"PDT" , "DET" },
156+ {"POS" , "PART" },
157+ {"PRP" , "PRON" },
158+ {"/^PRP[$]$/" , "PRON" },
159+ {"RBR" , "ADV" },
160+ {"RBS" , "ADV" },
161+ {"RP" , "ADP" },
162+ {"UH" , "INTJ" },
163+ {"WP" , "PRON" },
164+ {"/^WP[$]$/" , "PRON" },
165+ {"WRB" , "ADV" },
166+ {"/^``$/" , "PUNCT" },
167+ {"/^''$/" , "PUNCT" },
168+ {"/^[()]$/" , "PUNCT" },
169+ {"/^-[RL]RB-$/" , "PUNCT" },
170+ {"/^[,.:]$/" , "PUNCT" },
171+ {"HYPH" , "PUNCT" },
172+ // Also note that there is a no-op rule of SYM -> SYM!
173+ {"/^[#$]$/" , "SYM" },
174+ {"ADD" , "X" },
175+ {"AFX" , "X" },
176+ {"GW" , "X" },
177+ {"XX" , "X" },
178+ };
179+ for (String [] newOp : one2oneMappings ) {
180+ operations .add (new Pair <>(TregexPattern .compile (newOp [0 ] + "=target <: __" ),
181+ Tsurgeon .parseOperation ("relabel target " + newOp [1 ])));
182+
373183 }
374184 loaded = true ;
375185 }
0 commit comments