Source code for moclo.core.modules

# coding: utf-8
"""Moclo module classes.

A module is a sequence of DNA that contains a sequence of interest, such as a
promoter, a CDS, a protein binding site, etc., organised in a way it can be
combined to other modules to create an assembly. This involves flanking that
target sequence with Type IIS restriction sites, which depend on the level of
the module, as well as the chosen MoClo protocol.
"""

import typing

from Bio.Seq import Seq
from property_cached import cached_property

from .. import errors
from ._structured import StructuredRecord
from ._utils import cutter_check, add_as_source

if typing.TYPE_CHECKING:
    from typing import Union, Text  # noqa: F401
    from Bio.SeqRecord import SeqRecord  # noqa: F401


[docs]class AbstractModule(StructuredRecord): """An abstract modular cloning module. Attributes: cutter (`~Bio.Restriction.Restriction.RestrictionType`): the enzyme used to cut the target sequence from the backbone plasmid during Golden Gate assembly. """ _level = None # type: Union[None, int] cutter = NotImplemented # type: Union[NotImplemented, RestrictionType] def __new__(cls, *args, **kwargs): cutter_check(cls.cutter, name=cls.__name__) return super(AbstractModule, cls).__new__(cls)
[docs] @classmethod def structure(cls): # type: () -> Text """Get the module structure, as a DNA regex pattern. Warning: If overloading this method, the returned pattern must include 3 capture groups to capture the following features: 1. The upstream (5') overhang sequence 2. The module target sequence 3. The downstream (3') overhang sequence """ upstream = cls.cutter.elucidate() downstream = str(Seq(upstream).reverse_complement()) return "".join( [ upstream.replace("^", "(").replace("_", ")("), "N*", downstream.replace("^", ")").replace("_", ")("), ] )
[docs] def overhang_start(self): # type: () -> Seq """Get the upstream overhang of the target sequence. Returns: `~Bio.Seq.Seq`: the downstream overhang. """ return self._match.group(1).seq
[docs] def overhang_end(self): # type: () -> Seq """Get the downstream overhang of the target sequence. Returns: `~Bio.Seq.Seq`: the downstream overhang. """ return self._match.group(3).seq
[docs] def target_sequence(self): # type: () -> SeqRecord """Get the target sequence of the module. Modules are often stored in a standardized way, and contain more than the sequence of interest: for instance they can contain an antibiotic marker, that will not be part of the assembly when that module is assembled into a vector; only the target sequence is inserted. Returns: `~Bio.SeqRecord.SeqRecord`: the target sequence with annotations. Note: Depending on the cutting direction of the restriction enzyme used during assembly, the overhang will be left at the beginning or at the end, so the obtained record is exactly the sequence the enzyme created during restriction. """ if self.cutter.is_3overhang(): start, end = self._match.span(2)[0], self._match.span(3)[1] else: start, end = self._match.span(1)[0], self._match.span(2)[1] return add_as_source(self.record, (self.record << start)[: end - start])
@cached_property def _match(self): _match = super(AbstractModule, self)._match if len(self.cutter.catalyse(_match.group(0).seq)) > 3: raise errors.IllegalSite(self.seq) return _match
[docs]class Product(AbstractModule): """A level -1 module, often obtained as a PCR product. Modules of this level are the lowest components of the MoClo system, but are not practical to work with until they are assembled in a standard vector to obtain *entries*. """ _level = -1
[docs]class Entry(AbstractModule): """A level 0 module, often obtained from the official toolkits plamisds. Entries are assembled from products into a standard vector suitable for selection and storage. """ _level = 0
[docs]class Cassette(AbstractModule): """A level 1 module, also refered as a Transcriptional Unit. Cassettes can either express genes in their target organism, or be assembled into *multigene* modules for expressing many genes at once, depending on the chosen cassette vector during level 0 assembly. """ _level = 1
[docs]class Device(AbstractModule): """A level 2 module, also refered as a Multigene plasmid. Modules of this level are assembled from several transcriptional units so that they contain several genes that can be expressed all at once. Most of the MoClo implementations are designed so that multiple devices can be assembled into a module that is also a valid level 1 module, as does the **Golden Braid** system with its **α** and **Ω** plasmids. """ _level = 2