Compare commits

...

5 Commits

Author SHA1 Message Date
Damien Goutte-Gattat 4078e160ab Write Features segment to SnapGene file. 2 years ago
Damien Goutte-Gattat b4ef6e4ded Write SnapGene 'Notes' segment. 2 years ago
Damien Goutte-Gattat ff4bdb943b Force SnapGene file type to 'DNA'. 2 years ago
Damien Goutte-Gattat 64e91d6a5e Write sequence type and flags in SnapGene export. 2 years ago
Damien Goutte-Gattat ec6a498253 Add basic write support for SnapGene. 2 years ago
  1. 132
      incenp/bio/seqio/SnapGeneIO.py
  2. 3
      incenp/bio/seqio/__init__.py

132
incenp/bio/seqio/SnapGeneIO.py

@ -26,11 +26,12 @@ Provide read support for the binary format used by SnapGene.
from datetime import datetime
from re import sub
from struct import unpack
from struct import pack, unpack
from xml.dom.minidom import parseString
from Bio import Alphabet
from Bio.Seq import Seq
from Bio.SeqIO.Interfaces import SequenceWriter
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.SeqRecord import SeqRecord
@ -88,6 +89,21 @@ _months = [
'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'
]
_months_rev = {
'JAN': 1,
'FEB': 2,
'MAR': 3,
'APR': 4,
'MAY': 5,
'JUN': 6,
'JUL': 7,
'AUG': 8,
'SEP': 9,
'OCT': 10,
'NOV': 11,
'DEC': 12
}
def _parse_notes_segment(length, data, record):
xml = parseString(data.decode('ASCII'))
@ -181,12 +197,19 @@ def _parse_primers_segment(length, data, record):
record.features.append(feature)
_DNA_SEGMENT = 0x00
_PRIMERS_SEGMENT = 0x05
_NOTES_SEGMENT = 0x06
_FILEDESC_SEGMENT = 0x09
_FEATURES_SEGMENT = 0x0A
_segment_handlers = {
0x00: _parse_dna_segment,
0x05: _parse_primers_segment,
0x06: _parse_notes_segment,
0x09: _parse_file_description_segment,
0x0A: _parse_features_segment
_DNA_SEGMENT: _parse_dna_segment,
_PRIMERS_SEGMENT: _parse_primers_segment,
_NOTES_SEGMENT: _parse_notes_segment,
_FILEDESC_SEGMENT: _parse_file_description_segment,
_FEATURES_SEGMENT: _parse_features_segment
}
@ -222,7 +245,7 @@ def SnapGeneIterator(handle):
n = 0
for type, length, data in _SegmentIterator(handle):
if n == 0 and type != 0x09:
if n == 0 and type != _FILEDESC_SEGMENT:
raise ValueError("The file does not start with a File Description Segment")
if type in _segment_handlers:
@ -234,3 +257,98 @@ def SnapGeneIterator(handle):
raise ValueError("No DNA Segment in file")
yield record
class SnapGeneWriter(SequenceWriter):
def write_file(self, records):
if len(records) != 1:
raise ValueError("A SnapGene file can only contain one record")
record = records[0]
self._write_segment(_FILEDESC_SEGMENT, pack('>8sHHH',
'SnapGene'.encode('ASCII'),
0x01, # File type (DNA)
0x0d, # Export version
0x0b # Import version
))
seqflags = 0 # Linear sequence
if record.annotations.get('topology', 'linear') == 'circular':
seqflags = 1
self._write_segment(_DNA_SEGMENT, pack('>B%ds' % (len(record)),
seqflags,
str(record.seq).encode('ASCII')))
notes = []
if record.annotations.get('data_file_division', 'UNC') == 'SYN':
notes.append('<Type>Synthetic</Type>')
else:
notes.append('<Type>Natural</Type>')
if 'date' in record.annotations:
day, month, year = record.annotations['date'].split('-')
notes.append('<Created>%s.%d.%s</Created>'
% (year, _months_rev[month], day))
notes.append('<Description>%s</Description>' % record.description)
notes.append('<AccessionNumber>%s</AccessionNumber>' % record.id)
self._write_xml_segment(_NOTES_SEGMENT, 'Notes', notes)
features = []
for feature in [f for f in record.features if f.type != 'primer_bind']:
features.extend(self._feature_to_snapgene_xml(feature))
if features:
self._write_xml_segment(_FEATURES_SEGMENT, 'Features', features)
primers = []
for feature in [f for f in record.features if f.type == 'primer_bind']:
primers.extend(self._primer_to_snapgene_xml(feature))
if primers:
self._write_xml_segment(_PRIMERS_SEGMENT, 'Primers', primers)
return 1
def _write_xml_segment(self, type, topnode, nodes):
xml = '<' + topnode + '>' + ''.join(nodes) + '</' + topnode + '>'
self._write_segment(type, xml.encode('utf-8'))
def _write_segment(self, type, data):
self.handle.write(pack('>BI', type, len(data)))
self.handle.write(data)
def _feature_to_snapgene_xml(self, feature):
xml = []
name = 'Unnamed feature'
if 'label' in feature.qualifiers:
name = feature.qualifiers['label'][0]
elif 'gene' in feature.qualifiers:
name = feature.qualifiers['gene'][0]
elif 'note' in feature.qualifiers:
name = feature.qualifiers['note'][0]
directionality = 0
if feature.strand == 1:
directionality = 1
elif feature.strand == -1:
directionality = 2
xml.append('<Feature name="%s" directionality="%d" type="%s">'
% (name, directionality, feature.type))
xml.append('<Segment range="%d-%d" type="standard" />'
% (feature.location.start + 1, feature.location.end))
for name in feature.qualifiers.keys():
xml.append('<Q name="%s"><V text="%s"/></Q>'
% (name, feature.qualifiers[name][0]))
xml.append('</Feature>')
return xml
def _primer_to_snapgene_xml(self, record):
return []

3
incenp/bio/seqio/__init__.py

@ -43,5 +43,8 @@ if not 'xdna' in SeqIO._BinaryFormats:
if not 'snapgene' in SeqIO._FormatToIterator:
SeqIO._FormatToIterator['snapgene'] = SnapGeneIO.SnapGeneIterator
if not 'snapgene' in SeqIO._FormatToWriter:
SeqIO._FormatToWriter['snapgene'] = SnapGeneIO.SnapGeneWriter
if not 'snapgene' in SeqIO._BinaryFormats:
SeqIO._BinaryFormats.append('snapgene')
Loading…
Cancel
Save