@@ -60,14 +60,25 @@ def __init__(self, check_paired_punct_upos=False, copy_to_enhanced=False, **kwar
6060 self .check_paired_punct_upos = check_paired_punct_upos
6161 self .copy_to_enhanced = copy_to_enhanced
6262
63+ def _is_punct (self , node ):
64+ if node .upos == 'PUNCT' :
65+ return True
66+ if self .check_paired_punct_upos :
67+ return False
68+ if node .form == "'" :
69+ return False
70+ if node .form in PAIRED_PUNCT or node .form in PAIRED_PUNCT .values ():
71+ return True
72+ return False
73+
6374 def process_tree (self , root ):
6475 # First, make sure no PUNCT has children.
6576 # This may introduce multiple subroots, which will be fixed later on
6677 # (preventing to temporarily create multiple subroots here would prevent fixing some errors).
6778 for node in root .descendants :
68- while node .parent . upos == 'PUNCT' :
79+ while self . _is_punct ( node .parent ) :
6980 node .parent = node .parent .parent
70-
81+ root . draw ()
7182 # Second, fix paired punctuations: quotes and brackets, marking them in _punct_type.
7283 # This should be done before handling the subordinate punctuation,
7384 # in order to prevent non-projectivities e.g. in dot-before-closing-quote style sentences:
@@ -77,7 +88,7 @@ def process_tree(self, root):
7788 self ._punct_type = [None ] * (1 + len (root .descendants ))
7889 for node in root .descendants :
7990 if self ._punct_type [node .ord ] != 'closing' :
80- closing_punct = PAIRED_PUNCT .get (node .form , None )
91+ closing_punct = PAIRED_PUNCT .get (node .form )
8192 if closing_punct is not None :
8293 self ._fix_paired_punct (root , node , closing_punct )
8394
@@ -236,12 +247,11 @@ def _fix_pair(self, root, opening_node, closing_node):
236247 # they also must not cause non-projectivity of other relations. This could
237248 # happen if an outside node is attached to an inside node. To account for
238249 # this, mark the inside parent as a head, too.
239- else :
240- if node .parent > opening_node and node .parent < closing_node :
241- if node .parent .upos == 'PUNCT' :
242- punct_heads .append (node .parent )
243- else :
244- heads .append (node .parent )
250+ elif node .parent > opening_node and node .parent < closing_node :
251+ if node .parent .upos == 'PUNCT' :
252+ punct_heads .append (node .parent )
253+ else :
254+ heads .append (node .parent )
245255
246256 # Punctuation should not have children, but if there is no other head candidate,
247257 # let's break this rule.
0 commit comments