Browse Source

Apply changes from the Biopython pull request.

I am about to abandon this project as soon as the parsers will
have been merged in Biopython itself. But until that happens, I
will make sure to duplicate here any changes I do on the Biopython
version so that the parsers always behave identically no matter
which project they are from.
master
Damien Goutte-Gattat 2 years ago
parent
commit
17265f4884
  1. 16
      incenp/bio/seqio/GckIO.py
  2. 7
      incenp/bio/seqio/SnapGeneIO.py
  3. 40
      incenp/bio/seqio/XdnaIO.py

16
incenp/bio/seqio/GckIO.py

@ -107,11 +107,10 @@ def GckIterator(handle):
if strand == 1: # Reverse strand
strand = -1
elif strand == 2: # Forward strand
strand = 1
elif strand == 3: # Both strands
# Treated the same as a forward strand as BioPython does
# not seem to support dual-stranded features.
else:
# Other possible values are 0 (no strand specified),
# 2 (forward strand), and 3 (both strands). All are
# treated as a forward strand.
strand = 1
location = FeatureLocation(start, end, strand=strand)
@ -192,11 +191,8 @@ def GckIterator(handle):
# Read the construct's name
name = _read_pstring(handle)
if len(name) > 16 or ' ' in name:
# Store that as the record's description
record.description = name
else:
record.name = name
record.name = record.id = name.split(' ')[0]
record.description = name
# Circularity byte
# There may be other flags in that block, but their meaning

7
incenp/bio/seqio/SnapGeneIO.py

@ -106,6 +106,13 @@ def _parse_notes_segment(length, data, record):
if acc:
record.id = acc
comment = _get_child_value(xml, 'Comments')
if comment:
record.name = comment.split(' ', 1)[0]
record.description = comment
if not acc:
record.id = record.name
def _parse_file_description_segment(length, data, record):
cookie, seq_type, exp_version, imp_version = unpack('>8sHHH', data)

40
incenp/bio/seqio/XdnaIO.py

@ -144,13 +144,11 @@ def XdnaIterator(handle):
comment = _read(handle, com_length).decode('ASCII')
# Try to derive a name from the first "word" of the comment.
name = comment.split(' ', 2)[0]
if len(name) > 16:
name = None
name = comment.split(' ')[0]
# Create record object
record = SeqRecord(Seq(sequence, _seq_types[type]),
description=comment, name=name)
description=comment, name=name, id=name)
if topology in _seq_topologies:
record.annotations['topology'] = _seq_topologies[topology]
@ -196,31 +194,41 @@ class XdnaWriter(SequenceWriter):
else:
topology = 0
# We store the record's id and description in the comment field.
# Make sure to avoid duplicating the id if it is already
# contained in the description.
if record.description.startswith(record.id):
comment = record.description
else:
comment = '{} {}'.format(record.id, record.description)
# Write header
self.handle.write(pack('>BBB25xII60xI11xB',
0, # version
seqtype, topology, len(record),
0, # negative length
len(record.description),
len(comment),
255 # end of header
))
# Actual sequence and comment
self.handle.write(str(record.seq))
self.handle.write(record.description)
self.handle.write(str(record.seq).encode('ASCII'))
self.handle.write(comment.encode('ASCII'))
self.handle.write(pack('>B', 0)) # Annotation section marker
self._write_pstring('0') # right-side overhang
self._write_pstring('0') # left-side overhand
# Write features
self.handle.write(pack('>B', len(record.features)))
for feature in record.features:
if type(feature.location.start) != ExactPosition or type(feature.location.end) != ExactPosition:
# Cannot store fuzzy locations, skip feature
continue
# We must skip features with fuzzy locations as they cannot be
# represented in the Xdna format
features = [f for f in record.features if type(f.location.start) == ExactPosition and type(f.location.end) == ExactPosition]
# We also cannot store more than 255 features as the number of
# features is stored on a single byte...
if len(features) > 255:
features = features[:255]
self.handle.write(pack('>B', len(features)))
for feature in features:
self._write_pstring(feature.qualifiers.get('label', [''])[0])
description = ''
@ -252,5 +260,7 @@ class XdnaWriter(SequenceWriter):
def _write_pstring(self, s):
if len(s) > 255:
s = s[:255]
self.handle.write(pack('>B', len(s)))
self.handle.write(s)
self.handle.write(s.encode('ASCII'))
Loading…
Cancel
Save