Source code for moclo.core.vectors

# coding: utf-8
"""MoClo vector classes.

A vector is a plasmidic DNA sequence that can hold a combination of modules of
the same level to create a single module of the following level. Vectors
contain a placeholder sequence that is replaced by the concatenation of the
modules during the Golden Gate assembly.
"""

import typing

from Bio.Seq import Seq
from property_cached import cached_property

from .. import errors
from ._assembly import AssemblyManager
from ._utils import cutter_check, add_as_source
from ._structured import StructuredRecord

if typing.TYPE_CHECKING:
    from typing import Any, MutableMapping, Union  # noqa: F401
    from Bio.SeqRecord import SeqRecord  # noqa: F401
    from Bio.Restriction.Restriction import RestrictionType  # noqa: F401
    from .modules import AbstractModule  # noqa: F401


[docs]class AbstractVector(StructuredRecord): """An abstract modular cloning vector. """ _level = None # type: Union[None, int] cutter = NotImplemented # type: Union[NotImplemented, RestrictionType] def __new__(cls, *args, **kwargs): cutter_check(cls.cutter, name=cls.__name__) return super(AbstractVector, cls).__new__(cls)
[docs] @classmethod def structure(cls): # type: () -> Text """Get the vector structure, as a DNA regex pattern. Warning: If overloading this method, the returned pattern must include 3 capture groups to capture the following features: 1. The downstream (3') overhang sequence 2. The vector placeholder sequence 3. The upstream (5') overhang sequence """ downstream = cls.cutter.elucidate() upstream = str(Seq(downstream).reverse_complement()) return "".join( [ upstream.replace("^", ")(").replace("_", "("), "N*", downstream.replace("^", ")(").replace("_", ")"), ] )
[docs] def overhang_start(self): # type: () -> Seq """Get the upstream overhang of the vector sequence. """ return self._match.group(3).seq
[docs] def overhang_end(self): # type: () -> Seq """Get the downstream overhang of the vector sequence. """ return self._match.group(1).seq
[docs] def placeholder_sequence(self): # type: () -> SeqRecord """Get the placeholder sequence in the vector. The placeholder sequence is replaced by the concatenation of modules during the assembly. It often contains a dropout sequence, such as a GFP expression cassette that can be used to measure the progress of the assembly. """ if self.cutter.is_3overhang(): return self._match.group(2) + self.overhang_end() else: return self.overhang_start() + self._match.group(2)
[docs] def target_sequence(self): # type: () -> SeqRecord """Get the target sequence in the vector. The target sequence if the part of the plasmid that is not discarded during the assembly (everything except the placeholder sequence). """ if self.cutter.is_3overhang(): start, end = self._match.span(2)[0], self._match.span(3)[1] else: start, end = self._match.span(1)[0], self._match.span(2)[1] return add_as_source(self.record, (self.record << start)[end - start :])
@cached_property def _match(self): _match = super(AbstractVector, self)._match if len(self.cutter.catalyse(_match.group(0).seq)) > 3: raise errors.IllegalSite(self.seq) return _match
[docs] def assemble(self, module, *modules, **kwargs): # type: (AbstractModule, *AbstractModule, **Any) -> SeqRecord """Assemble the provided modules into the vector. Arguments: module (`~moclo.base.modules.AbstractModule`): a module to insert in the vector. modules (`~moclo.base.modules.AbstractModule`, optional): additional modules to insert in the vector. The order of the parameters is not important, since modules will be sorted by their start overhang in the function. Returns: `~Bio.SeqRecord.SeqRecord`: the assembled sequence with sequence annotations inherited from the vector and the modules. Raises: `~moclo.errors.DuplicateModules`: when two different modules share the same start overhang, leading in possibly non-deterministic constructs. `~moclo.errors.MissingModule`: when a module has an end overhang that is not shared by any other module, leading to a partial construct only `~moclo.errors.InvalidSequence`: when one of the modules does not match the required module structure (missing site, wrong overhang, etc.). `~moclo.errors.UnusedModules`: when some modules were not used during the assembly (mostly caused by duplicate parts). """ mgr = AssemblyManager( vector=self, modules=[module] + list(modules), name=kwargs.get("name", "assembly"), id_=kwargs.get("id", "assembly"), ) return mgr.assemble()
[docs]class EntryVector(AbstractVector): """Level 0 vector. """ _level = 0
[docs]class CassetteVector(AbstractVector): """Level 1 vector. """ _level = 1
[docs]class DeviceVector(AbstractVector): """Level 2 vector. """ _level = 2