Skip to content

Commit 38880e7

Browse files
committed
add recursive automaton annotations
1 parent 33d0895 commit 38880e7

File tree

2 files changed

+79
-83
lines changed

2 files changed

+79
-83
lines changed

pyformlang/rsa/box.py

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"""
22
Representation of a box for recursive automaton
33
"""
4-
from typing import Union, Any
4+
5+
from typing import Set, Hashable, Any
56

67
from pyformlang.finite_automaton import DeterministicFiniteAutomaton
7-
from pyformlang.finite_automaton.symbol import Symbol
8+
from pyformlang.finite_automaton import State, Symbol
89
from pyformlang.finite_automaton.utils import to_symbol
910

1011

@@ -24,58 +25,27 @@ class Box:
2425

2526
def __init__(self,
2627
dfa: DeterministicFiniteAutomaton,
27-
nonterminal: Union[Symbol, str]):
28+
nonterminal: Hashable) -> None:
2829
self._dfa = dfa
29-
30-
nonterminal = to_symbol(nonterminal)
31-
self._nonterminal = nonterminal
32-
33-
def to_subgraph_dot(self):
34-
"""Creates a named subgraph representing a box"""
35-
graph = self._dfa.to_networkx()
36-
strange_nodes = []
37-
nonterminal = self.nonterminal.value.replace('"', '').replace("'", "").replace(".", "")
38-
dot_string = (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n'
39-
f'fontname="Helvetica,Arial,sans-serif"\n'
40-
f'node [fontname="Helvetica,Arial,sans-serif"]\n'
41-
f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n'
42-
f'node [shape = circle style=filled fillcolor=white]')
43-
for node, data in graph.nodes(data=True):
44-
node = node.replace('"', '').replace("'", "")
45-
if 'is_start' not in data.keys() or 'is_final' not in data.keys():
46-
strange_nodes.append(node)
47-
continue
48-
if data['is_start']:
49-
dot_string += f'\n"{node}" [fillcolor = green];'
50-
if data['is_final']:
51-
dot_string += f'\n"{node}" [shape = doublecircle];'
52-
for strange_node in strange_nodes:
53-
graph.remove_node(strange_node)
54-
for node_from, node_to, data in graph.edges(data=True):
55-
node_from = node_from.replace('"', '').replace("'", "")
56-
node_to = node_to.replace('"', '').replace("'", "")
57-
label = data['label'].replace('"', '').replace("'", "")
58-
dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];'
59-
dot_string += "\n}"
60-
return dot_string
30+
self._nonterminal = to_symbol(nonterminal)
6131

6232
@property
63-
def dfa(self):
33+
def dfa(self) -> DeterministicFiniteAutomaton:
6434
""" Box's dfa """
6535
return self._dfa
6636

6737
@property
68-
def nonterminal(self):
38+
def nonterminal(self) -> Symbol:
6939
""" Box's nonterminal """
7040
return self._nonterminal
7141

7242
@property
73-
def start_states(self):
43+
def start_states(self) -> Set[State]:
7444
""" The start states """
7545
return self._dfa.start_states
7646

7747
@property
78-
def final_states(self):
48+
def final_states(self) -> Set[State]:
7949
""" The final states """
8050
return self._dfa.final_states
8151

@@ -100,5 +70,36 @@ def __eq__(self, other: Any) -> bool:
10070
return False
10171
return self.is_equivalent_to(other)
10272

103-
def __hash__(self):
104-
return self._nonterminal.__hash__()
73+
def __hash__(self) -> int:
74+
return hash(self.nonterminal)
75+
76+
def to_subgraph_dot(self) -> str:
77+
"""Creates a named subgraph representing a box"""
78+
graph = self._dfa.to_networkx()
79+
strange_nodes = []
80+
nonterminal = str(self.nonterminal) \
81+
.replace('"', '').replace("'", "").replace(".", "")
82+
dot_string = \
83+
(f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n'
84+
f'fontname="Helvetica,Arial,sans-serif"\n'
85+
f'node [fontname="Helvetica,Arial,sans-serif"]\n'
86+
f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n'
87+
f'node [shape = circle style=filled fillcolor=white]')
88+
for node, data in graph.nodes(data=True):
89+
node = node.replace('"', '').replace("'", "")
90+
if 'is_start' not in data.keys() or 'is_final' not in data.keys():
91+
strange_nodes.append(node)
92+
continue
93+
if data['is_start']:
94+
dot_string += f'\n"{node}" [fillcolor = green];'
95+
if data['is_final']:
96+
dot_string += f'\n"{node}" [shape = doublecircle];'
97+
for strange_node in strange_nodes:
98+
graph.remove_node(strange_node)
99+
for node_from, node_to, data in graph.edges(data=True):
100+
node_from = node_from.replace('"', '').replace("'", "")
101+
node_to = node_to.replace('"', '').replace("'", "")
102+
label = data['label'].replace('"', '').replace("'", "")
103+
dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];'
104+
dot_string += "\n}"
105+
return dot_string

pyformlang/rsa/recursive_automaton.py

Lines changed: 37 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@
22
Representation of a recursive automaton
33
"""
44

5-
from typing import AbstractSet, Union
5+
from typing import Dict, Set, AbstractSet, Optional, Hashable, Any
66

7-
from pyformlang.finite_automaton import DeterministicFiniteAutomaton
8-
from pyformlang.finite_automaton.symbol import Symbol
7+
from pyformlang.finite_automaton import Symbol
98
from pyformlang.finite_automaton.utils import to_symbol
109
from pyformlang.regular_expression import Regex
1110
from pyformlang.cfg import Epsilon
1211

13-
from pyformlang.rsa.box import Box
12+
from .box import Box
1413

1514

1615
class RecursiveAutomaton:
@@ -29,15 +28,15 @@ class RecursiveAutomaton:
2928

3029
def __init__(self,
3130
start_box: Box,
32-
boxes: AbstractSet[Box]):
33-
self._nonterminal_to_box = {}
31+
boxes: AbstractSet[Box]) -> None:
32+
self._nonterminal_to_box: Dict[Symbol, Box] = {}
33+
self._start_nonterminal = start_box.nonterminal
3434
if start_box not in boxes:
35-
self._nonterminal_to_box[to_symbol(start_box.nonterminal)] = start_box
36-
self._start_nonterminal = to_symbol(start_box.nonterminal)
35+
self._nonterminal_to_box[start_box.nonterminal] = start_box
3736
for box in boxes:
38-
self._nonterminal_to_box[to_symbol(box.nonterminal)] = box
37+
self._nonterminal_to_box[box.nonterminal] = box
3938

40-
def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]):
39+
def get_box_by_nonterminal(self, nonterminal: Hashable) -> Optional[Box]:
4140
"""
4241
Box by nonterminal
4342
@@ -53,50 +52,35 @@ def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]):
5352
"""
5453

5554
nonterminal = to_symbol(nonterminal)
56-
if nonterminal in self._nonterminal_to_box:
57-
return self._nonterminal_to_box[nonterminal]
55+
return self._nonterminal_to_box.get(nonterminal, None)
5856

59-
return None
60-
61-
def get_number_boxes(self):
57+
def get_number_boxes(self) -> int:
6258
""" Size of set of boxes """
63-
6459
return len(self._nonterminal_to_box)
6560

66-
def to_dot(self):
67-
""" Create dot representation of recursive automaton """
68-
dot_string = 'digraph "" {'
69-
for box in self._nonterminal_to_box.values():
70-
dot_string += f'\n{box.to_subgraph_dot()}'
71-
dot_string += "\n}"
72-
return dot_string
73-
7461
@property
75-
def nonterminals(self) -> set:
62+
def nonterminals(self) -> Set[Symbol]:
7663
""" The set of nonterminals """
77-
7864
return set(self._nonterminal_to_box.keys())
7965

8066
@property
81-
def boxes(self) -> dict:
67+
def boxes(self) -> Set[Box]:
8268
""" The set of boxes """
83-
84-
return self._nonterminal_to_box
69+
return set(self._nonterminal_to_box.values())
8570

8671
@property
8772
def start_nonterminal(self) -> Symbol:
8873
""" The start nonterminal """
89-
9074
return self._start_nonterminal
9175

9276
@property
93-
def start_box(self):
77+
def start_box(self) -> Box:
9478
""" The start box """
95-
96-
return self.boxes[self.start_nonterminal]
79+
return self._nonterminal_to_box[self.start_nonterminal]
9780

9881
@classmethod
99-
def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]):
82+
def from_regex(cls, regex: Regex, start_nonterminal: Hashable) \
83+
-> "RecursiveAutomaton":
10084
""" Create a recursive automaton from regular expression
10185
10286
Parameters
@@ -116,14 +100,17 @@ def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]):
116100
return RecursiveAutomaton(box, {box})
117101

118102
@classmethod
119-
def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")):
120-
""" Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form)
103+
def from_ebnf(cls, text: str, start_nonterminal: Hashable = Symbol("S")) \
104+
-> "RecursiveAutomaton":
105+
""" Create a recursive automaton from ebnf \
106+
(ebnf = Extended Backus-Naur Form)
121107
122108
Parameters
123109
-----------
124110
text : str
125111
The text of transform
126-
start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional
112+
start_nonterminal : \
113+
:class:`~pyformlang.finite_automaton.Symbol` | str, optional
127114
The start nonterminal, S by default
128115
129116
Returns
@@ -132,7 +119,7 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")):
132119
The new recursive automaton built from context-free grammar
133120
"""
134121
start_nonterminal = to_symbol(start_nonterminal)
135-
productions = {}
122+
productions: Dict[Hashable, str] = {}
136123
boxes = set()
137124
nonterminals = set()
138125
for production in text.splitlines():
@@ -160,7 +147,7 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")):
160147
start_box = Box(start_box_dfa, start_nonterminal)
161148
return RecursiveAutomaton(start_box, boxes)
162149

163-
def is_equals_to(self, other):
150+
def is_equal_to(self, other: "RecursiveAutomaton") -> bool:
164151
"""
165152
Check whether two recursive automata are equals by boxes.
166153
Not equivalency in terms of formal languages theory, just mapping boxes
@@ -175,9 +162,17 @@ def is_equals_to(self, other):
175162
are_equivalent : bool
176163
Whether the two recursive automata are equals or not
177164
"""
165+
return self.boxes == other.boxes
166+
167+
def __eq__(self, other: Any) -> bool:
178168
if not isinstance(other, RecursiveAutomaton):
179169
return False
180-
return self.boxes == other.boxes
170+
return self.is_equal_to(other)
181171

182-
def __eq__(self, other):
183-
return self.is_equals_to(other)
172+
def to_dot(self) -> str:
173+
""" Create dot representation of recursive automaton """
174+
dot_string = 'digraph "" {'
175+
for box in self._nonterminal_to_box.values():
176+
dot_string += f'\n{box.to_subgraph_dot()}'
177+
dot_string += "\n}"
178+
return dot_string

0 commit comments

Comments
 (0)