# coding: utf-8
from __future__ import absolute_import
from __future__ import unicode_literals
import copy
import functools
import typing
import six
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
from Bio.SeqRecord import SeqRecord
if typing.TYPE_CHECKING:
from typing import Dict, List, Text, Union # noqa: F401
from Bio.Seq import Seq # noqa: F401
def _ambiguous(func):
@functools.wraps(func)
def newfunc(self, *args, **kwargs):
raise TypeError("ambiguous operation: '{}'".format(func.__name__))
return newfunc
[docs]class CircularRecord(SeqRecord):
"""A derived ``SeqRecord`` that contains a circular DNA sequence.
It handles the `in` operator as expected, and removes the implementation
of the `+` operator since circular DNA sequence do not have an end to
append more nucleotides to. In addition, it overloads the `>>` and `<<`
operators to allow rotating the sequence and its annotations, effectively
changing the *0* position.
See Also:
``Bio.SeqRecord.SeqRecord`` documentation on the `Biopython wiki
<https://biopython.org/wiki/SeqRecord>`_.
"""
# Patch constructor to allow constructing a CircularRecord from
# a SeqRecord argument (other arguments are ignored).
[docs] def __init__(
self,
seq, # type: Union[Seq, SeqRecord]
id="<unknown id>", # type: Text
name="<unknown name>", # type: Text
description="<unknown description>", # type: Text
dbxrefs=None, # type: List[Text]
features=None, # type: List[SeqFeature]
annotations=None, # type: Dict[Text, Any]
letter_annotations=None,
):
# type: (...) -> None
"""Create a new `CircularRecord` instance.
If given a `SeqRecord` as the first argument, it will simply copy all
attributes of the record. This allows using `Bio.SeqIO.read` to open
records, then loading them into a `CircularRecord`.
"""
if isinstance(seq, SeqRecord):
self.__init__( # noqa: T484
seq.seq,
seq.id,
seq.name,
seq.description,
copy.deepcopy(seq.dbxrefs),
copy.deepcopy(seq.features),
copy.deepcopy(seq.annotations),
copy.deepcopy(seq.letter_annotations),
)
else:
if annotations is not None:
topology = annotations.get("topology", "circular")
if topology.lower() != "circular":
raise ValueError("record does not describe a circular sequence")
super(CircularRecord, self).__init__(
seq,
id,
name,
description,
dbxrefs,
features,
annotations,
letter_annotations,
)
# Patch methods that are ambiguous with a non-linear sequence.
[docs] @_ambiguous
def __add__(self, other):
"""Add another sequence or string to this sequence.
Since adding an arbitrary sequence to a plasmid is ambiguous (there is
no sequence end), trying to add a sequence to a ``CircularRecord``
will raise a `TypeError`.
"""
return NotImplemented
[docs] @_ambiguous
def __radd__(self, other): # noqa: D105
"""Add another sequence or string to this sequence (from the left).
Since adding an arbitrary sequence to a plasmid is ambiguous (there is
no sequence end), trying to add a sequence to a ``CircularRecord``
will raise a `TypeError`.
"""
return NotImplemented
# Patch other methods to work as intended
[docs] def __contains__(self, char): # noqa: D105
"""Implement the `in` keyword, searches the sequence.
"""
return len(char) <= len(self) and char in str(self.seq) * 2
[docs] def __getitem__(self, index): # noqa: D105
"""Return a sub-sequence or an individual letter.
The sub-sequence is always returned as a ``SeqRecord``, since it is
probably not circular anymore.
"""
rec = super(CircularRecord, self).__getitem__(index)
if isinstance(index, slice):
annotations = copy.deepcopy(rec.annotations)
if "topology" in annotations:
annotations["topology"] = "linear"
return SeqRecord(
rec.seq,
rec.id,
rec.name,
rec.description,
copy.deepcopy(rec.dbxrefs),
copy.deepcopy(rec.features),
annotations,
copy.deepcopy(rec.letter_annotations),
)
else:
return rec
[docs] def reverse_complement(
self,
id=False,
name=False,
description=False,
features=True,
annotations=False,
letter_annotations=True,
dbxrefs=False,
):
"""Return a new ``CircularRecord`` with reverse complement sequence.
"""
return type(self)(
super(CircularRecord, self).reverse_complement(
id=id,
name=name,
description=description,
features=features,
annotations=annotations,
letter_annotations=letter_annotations,
dbxrefs=dbxrefs,
)
)
# Additional methods
[docs] def __lshift__(self, index):
"""Rotate the sequence counter-clockwise, preserving annotations.
"""
return self >> (-index % len(self.seq))
[docs] def __rshift__(self, index):
"""Rotate the sequence clockwise, preserving annotations.
"""
index %= len(self.seq) # avoid unnecessary cycles
if index == 0:
return self
elif index < 0:
return self << -index
newseq = self.seq[-index:] + self.seq[:-index]
newfeats = []
newletan = {
k: v[index:] + v[:index] for k, v in six.iteritems(self.letter_annotations)
}
for feature in self.features:
loc = feature.location
if loc is None:
newloc = None
elif feature.type == "source" and loc.start == 0 and loc.end == len(self):
newloc = loc
else:
_newloc = []
for part in (loc + index).parts:
if part.end >= len(newseq) and part.start >= len(newseq):
r = part.start // len(newseq) # remainder is used to
_newloc.append(
FeatureLocation( # make sure that part.end
start=part.start
- r * len(newseq), # is always after part.start
end=part.end
- r * len(newseq), # even on additional end
strand=part.strand, # overlap
ref=part.ref,
ref_db=part.ref_db,
)
)
else:
_newloc.append(part)
newloc = _newloc[0] if len(_newloc) == 1 else CompoundLocation(_newloc)
newfeats.append(
SeqFeature(
location=newloc,
type=feature.type,
id=feature.id,
qualifiers=feature.qualifiers,
)
)
return type(self)(
seq=newseq,
id=self.id,
name=self.name,
description=self.description,
dbxrefs=self.dbxrefs,
features=newfeats,
annotations=self.annotations,
letter_annotations=newletan,
)