Skip to content

Commit 7fcec6c

Browse files
authored
Merge pull request #12 from FormalLanguageConstrainedPathQuerying/rework_finite_automaton
Rework finite_automaton
2 parents 0c9930d + b83c71a commit 7fcec6c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2056
-1993
lines changed

pyformlang/cfg/cfg.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@
77

88
# pylint: disable=cyclic-import
99
from pyformlang import pda
10-
from pyformlang.finite_automaton import FiniteAutomaton
10+
from pyformlang.finite_automaton import DeterministicFiniteAutomaton
1111
# pylint: disable=cyclic-import
1212
from pyformlang.pda import cfg_variable_converter as cvc
13-
from pyformlang import regular_expression
1413
from .cfg_object import CFGObject
1514
# pylint: disable=cyclic-import
1615
from .cyk_table import CYKTable, DerivationDoesNotExist
@@ -788,7 +787,7 @@ def to_pda(self) -> "pda.PDA":
788787
state, [])
789788
return new_pda
790789

791-
def intersection(self, other: Any) -> "CFG":
790+
def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG":
792791
""" Gives the intersection of the current CFG with an other object
793792
794793
Equivalent to:
@@ -810,13 +809,6 @@ def intersection(self, other: Any) -> "CFG":
810809
When trying to intersect with something else than a regex or a
811810
finite automaton
812811
"""
813-
if isinstance(other, regular_expression.Regex):
814-
other = other.to_epsilon_nfa().to_deterministic()
815-
elif isinstance(other, FiniteAutomaton):
816-
if not other.is_deterministic():
817-
other = other.to_deterministic()
818-
else:
819-
raise NotImplementedError
820812
if other.is_empty():
821813
return CFG()
822814
generate_empty = self.contains([]) and other.accepts([])
@@ -845,10 +837,12 @@ def intersection(self, other: Any) -> "CFG":
845837
return res_cfg
846838

847839
@staticmethod
848-
def _intersection_starting_rules(cfg, other, cv_converter):
840+
def _intersection_starting_rules(cfg: "CFG",
841+
other: DeterministicFiniteAutomaton,
842+
cv_converter):
849843
start = Variable("Start")
850844
productions_temp = []
851-
start_other = list(other.start_states)[0] # it is deterministic
845+
start_other = other.start_state
852846
for final_state in other.final_states:
853847
new_body = [
854848
cv_converter.to_cfg_combined_variable(
@@ -860,15 +854,17 @@ def _intersection_starting_rules(cfg, other, cv_converter):
860854
return productions_temp
861855

862856
@staticmethod
863-
def _intersection_when_terminal(other_fst, production,
857+
def _intersection_when_terminal(other: DeterministicFiniteAutomaton,
858+
production,
864859
cv_converter, states):
865860
productions_temp = []
866861
for state_p in states:
867-
next_states = other_fst(state_p, production.body[0].value)
868-
if next_states:
862+
next_state = other.get_next_state(
863+
state_p, production.body[0].value)
864+
if next_state:
869865
new_head = \
870866
cv_converter.to_cfg_combined_variable(
871-
state_p, production.head, next_states[0])
867+
state_p, production.head, next_state)
872868
productions_temp.append(
873869
Production(new_head,
874870
[production.body[0]],
@@ -904,7 +900,7 @@ def _get_all_bodies(production, state_p, state_r, states, cv_converter):
904900
state_r)]
905901
for state_q in states]
906902

907-
def __and__(self, other):
903+
def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG":
908904
""" Gives the intersection of the current CFG with an other object
909905
910906
Parameters

pyformlang/cfg/tests/test_cfg.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,8 @@ def test_finite(self):
516516
def test_intersection(self):
517517
""" Tests the intersection with a regex """
518518
regex = Regex("a*b*")
519-
dfa = regex.to_epsilon_nfa()
519+
enfa = regex.to_epsilon_nfa()
520+
dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa)
520521
symb_a = Symbol("a")
521522
symb_b = Symbol("b")
522523
assert dfa.accepts([symb_a, symb_a, symb_b, symb_b])
@@ -530,7 +531,7 @@ def test_intersection(self):
530531
cfg = CFG(productions=productions, start_symbol=var_s)
531532
assert cfg.contains([ter_a, ter_a, ter_b, ter_b])
532533
assert not cfg.contains([ter_a, ter_a, ter_b])
533-
cfg_i = cfg.intersection(regex)
534+
cfg_i = cfg.intersection(regex.to_minimal_dfa())
534535
assert cfg_i.contains([ter_a, ter_a, ter_b, ter_b])
535536
assert not cfg_i.contains([ter_a, ter_a, ter_b])
536537
assert cfg_i.contains([])
@@ -548,7 +549,7 @@ def test_intersection_empty(self):
548549
Production(var_s, [ter_b, var_s, ter_a]),
549550
Production(var_s, [])}
550551
cfg = CFG(productions=productions, start_symbol=var_s)
551-
cfg_i = cfg & regex
552+
cfg_i = cfg & regex.to_minimal_dfa()
552553
assert not cfg_i
553554

554555
def test_intersection_dfa(self):

pyformlang/cfg/tests/test_llone_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def test_sentence_cfg(self):
250250
N -> gorilla | sky | carrots
251251
""")
252252
regex = Regex("georges touches (a|an) (sky|gorilla) !")
253-
cfg_inter = cfg.intersection(regex)
253+
cfg_inter = cfg.intersection(regex.to_minimal_dfa())
254254
assert not cfg_inter.is_empty()
255255
assert cfg_inter.is_finite()
256256
assert not cfg_inter.contains(["georges", "sees", "a", "gorilla", "."])

pyformlang/finite_automaton/__init__.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,10 @@
4141
from .state import State
4242
from .symbol import Symbol
4343
from .epsilon import Epsilon
44-
from .transition_function import (TransitionFunction,
45-
DuplicateTransitionError,
46-
InvalidEpsilonTransition)
44+
from .deterministic_transition_function import \
45+
(DeterministicTransitionFunction,
46+
DuplicateTransitionError,
47+
InvalidEpsilonTransition)
4748
from .nondeterministic_transition_function import \
4849
NondeterministicTransitionFunction
4950

@@ -54,7 +55,7 @@
5455
"State",
5556
"Symbol",
5657
"Epsilon",
57-
"TransitionFunction",
58+
"DeterministicTransitionFunction",
5859
"NondeterministicTransitionFunction",
5960
"DuplicateTransitionError",
6061
"InvalidEpsilonTransition"]

0 commit comments

Comments
 (0)