Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 57 additions & 16 deletions src/pydna/alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@

"""

from collections import namedtuple
import re as _re
from dataclasses import dataclass

# An alias for whitespace
emptyspace = chr(32)
Expand Down Expand Up @@ -452,16 +452,61 @@
annealing_dict_w_holes.update(mixed_case_dict)


def get_parts(datastring: str) -> namedtuple:
@dataclass
class DseqParts:
sticky_left5: str
sticky_left3: str
middle: str
sticky_right3: str
sticky_right5: str
single_watson: str
single_crick: str

def __iter__(self):
"""
Allow unpacking DseqParts instances.
>>> from pydna.alphabet import get_parts
>>> sticky_left5, sticky_left3, middle, sticky_right3, sticky_right5, single_watson, single_crick = get_parts("eeATCGuggCCGgg")
>>> sticky_left5
'ee'
>>> middle
'ATCGuggCCGgg'
"""
return iter(
(
self.sticky_left5,
self.sticky_left3,
self.middle,
self.sticky_right3,
self.sticky_right5,
self.single_watson,
self.single_crick,
)
)

def __getitem__(self, index: int) -> str:
"""
Allow indexing DseqParts instances.
>>> from pydna.alphabet import get_parts
>>> parts = get_parts("eeATCGuggCCGgg")
>>> parts[0]
'ee'
>>> parts[2]
'ATCGuggCCGgg'
"""
return tuple(self)[index]


def get_parts(datastring: str) -> DseqParts:
"""
A namedtuple containing the parts of a dsDNA sequence.
Returns a DseqParts instance containing the parts of a dsDNA sequence.

The datastring should contain a string with dscode symbols.
A regex is used to capture the single stranded regions at the ends as
well as the regiond in the middle.

The figure below numbers the regex capture groups and what they capture
as well as the namedtuple field name.
as well as the DseqParts instance field name.

::

Expand Down Expand Up @@ -552,20 +597,16 @@ def get_parts(datastring: str) -> namedtuple:

result = ["" if e is None else e for e in result]

field_names = (
"sticky_left5",
"sticky_left3",
"middle",
"sticky_right3",
"sticky_right5",
"single_watson",
"single_crick",
return DseqParts(
sticky_left5=result[0],
sticky_left3=result[1],
middle=result[2],
sticky_right3=result[3],
sticky_right5=result[4],
single_watson=result[5],
single_crick=result[6],
)

fragment = namedtuple("fragment", field_names)

return fragment(*result)


def dsbreaks(data: str):

Expand Down
12 changes: 4 additions & 8 deletions tests/test_module_assembly2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1868,7 +1868,7 @@ def test_assemble_function():
assembly_plan = [
(1, 2, loc_end, loc_start),
]
# FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result.

assert (fragments[0] + fragments[1]).seq == assembly.assemble(
fragments, assembly_plan
).seq
Expand All @@ -1878,7 +1878,7 @@ def test_assemble_function():
(1, 2, loc_end, loc_start),
(2, 1, loc_end, loc_start),
]
# FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result.

assert (fragments[0] + fragments[1]).looped().seq == assembly.assemble(
fragments, assembly_plan
).seq
Expand Down Expand Up @@ -2156,9 +2156,7 @@ def test_ligation_assembly():

# Blunt ligation combined with sticky end
fragments = Dseqrecord("AAAGAATTCAAA").cut(EcoRI)
result = assembly.ligation_assembly(
fragments, allow_blunt=True
) # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result.
result = assembly.ligation_assembly(fragments, allow_blunt=True)
result_str = [str(x.seq) for x in result]
assert sorted(result_str) == sorted(["AAAGAATTCAAA"])
assert result[0].circular
Expand All @@ -2180,9 +2178,7 @@ def test_blunt_assembly():
use_fragment_order=False,
)

assert dseqrecord_list_to_dseq_list(asm.assemble_linear()) == [
(b + a).seq
] # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result.
assert dseqrecord_list_to_dseq_list(asm.assemble_linear()) == [(b + a).seq]
assert asm.assemble_circular() == []

# Circular assembly
Expand Down
Loading