Switch to Black-enforced code style.

master
Damien Goutte-Gattat 3 months ago
parent fa20c968af
commit 0e801b94ea
  1. 33
      incenp/bio/modelling/cc3d.py
  2. 137
      incenp/bio/seq/databases.py
  3. 72
      incenp/bio/seq/plasmidmap.py
  4. 136
      incenp/bio/seq/seqtool.py
  5. 83
      incenp/bio/seq/seqvault.py
  6. 46
      incenp/bio/seq/usa.py
  7. 118
      incenp/bio/seq/utils.py
  8. 45
      incenp/bio/seq/vault.py
  9. 8
      incenp/bio/seq/wrappers.py
  10. 2
      pyproject.toml
  11. 45
      setup.py

@ -28,16 +28,24 @@ import click
@click.command()
@click.argument('simfile', type=click.Path(exists=True))
@click.option('--cc3d-home', '-C', default='/opt/cc3d',
type=click.Path(file_okay=False, dir_okay=True, exists=True),
help="Path to the CC3D install directory.")
@click.option('--output-dir', '-o',
default='{}/cc3d-runs'.format(getenv('HOME', default='.')),
type=click.Path(file_okay=False, dir_okay=True),
help="Path to the output directory.")
@click.option(
'--cc3d-home',
'-C',
default='/opt/cc3d',
type=click.Path(file_okay=False, dir_okay=True, exists=True),
help="Path to the CC3D install directory.",
)
@click.option(
'--output-dir',
'-o',
default='{}/cc3d-runs'.format(getenv('HOME', default='.')),
type=click.Path(file_okay=False, dir_okay=True),
help="Path to the output directory.",
)
@click.option('--runs', '-n', default=10, help="Number of runs to perform.")
@click.option('--parameters', '-p', type=click.Path(exists=True),
help="Path to a parameter file.")
@click.option(
'--parameters', '-p', type=click.Path(exists=True), help="Path to a parameter file."
)
def main(simfile, cc3d_home, output_dir, runs, parameters):
"""Run CC3D simulations."""
@ -48,12 +56,7 @@ def main(simfile, cc3d_home, output_dir, runs, parameters):
if not exists(cc3dbin):
raise click.ClickException(f"CC3d script runner {cc3dbin} not found.")
command = [
cc3dbin,
'-i', simfile,
'--current-dir', simdir,
'-o', output_dir
]
command = [cc3dbin, '-i', simfile, '--current-dir', simdir, '-o', output_dir]
if parameters:
with open(parameters, 'r') as f:

@ -37,75 +37,75 @@ from incenp.bio import Error
class DatabaseProvider(object):
"""Provides database adapters for biological databases.
This object provides a unique way to access a set of user-configured
set of sequence databases.
The databases this object gives access to should be described in a
INI-style configuration file, located in
INI-style configuration file, located in
`$XDG_CONFIG_HOME/bioutils/databases.ini`.
Each section in the configuration file describes a database. Within a
section, the mandatory ``type`` parameter indicates the type of
database and therefore the type of database adapter to use to query
that database. Other parameters are dependent on the database type.
Supported database types:
BioSQL database (``type: biosql``)
Any database following the BioSQL scheme. Parameters for this type
of database are:
* ``driver`` indicating the SQL driver to use;
* ``host`` for the hostname of the SQL server;
* ``user`` for the user account to connect to the server with;
* ``password`` for the associated password;
* ``database`` for the SQL database name;
* ``subdb`` for the name of the BioSQL subdatabase, if any.
ExPASy database (``type: expasy``)
The ExPASy server. This type expects no parameter.
Entrez database (``type: entrez``)
One of the NCBI Entrez database. Parameters for this type of
database are:
* ``email`` for the email address to send to the NCBI server along
with each query;
* ``database`` for the name of the Entrez database (can be
``nuccore`` or ``protein``, for the DNA/RNA or protein database
respectively).
Here is an example configuration file::
[db1]
type: biosql
host: localhost
user: bioutils
database: mydb
[uniprot]
type: expasy
[genbank]
type: entrez
email: bioutils@example.org
database: nuccore
With such a configuration file, the database provider can either be
used directly::
# Create the object and parse the configuration file
dbprovider = DatabaseProvider()
# Query the ExPASy server
dbprovider['uniprot'].fetch('NP_001800')
or through a USA::
# Query the ExPASy server
usa.read_usa('uniprot::NP_001800', databases=dbprovider)
"""
def __init__(self):
@ -114,8 +114,11 @@ class DatabaseProvider(object):
self.biosql_servers = {}
cfg_file = '{}/bioutils/databases.ini'.format(
getenv('XDG_CONFIG_HOME', default='{}/.config'.format(
getenv('HOME', default='.'))))
getenv(
'XDG_CONFIG_HOME',
default='{}/.config'.format(getenv('HOME', default='.')),
)
)
self.cfg.read(cfg_file)
def __contains__(self, database):
@ -153,8 +156,7 @@ class DatabaseProvider(object):
if subdb:
if not subdb in server:
raise Error(f"No subdatabase {subdb} on server")
adapter = BioSqlAdapter(server[subdb].adaptor,
server[subdb].dbid)
adapter = BioSqlAdapter(server[subdb].adaptor, server[subdb].dbid)
else:
adapter = BioSqlAdapter(server.adaptor)
@ -166,8 +168,7 @@ class DatabaseProvider(object):
email = self.cfg.get(database, 'email')
dbname = self.cfg.get(database, 'database')
except NoOptionError:
raise Error("Incomplete configuration for database "
f"{database!r}")
raise Error("Incomplete configuration for database " f"{database!r}")
if dbname not in ['nuccore', 'protein']:
raise Error(f"Invalid database for {database!r}: {dbname}")
@ -196,8 +197,9 @@ class DatabaseProvider(object):
conn_settings['host'] = self.cfg.get(name, 'host')
conn_settings['user'] = self.cfg.get(name, 'user')
conn_settings['database'] = self.cfg.get(name, 'database')
conn_settings['password'] = self.cfg.get(name, 'password',
fallback=None)
conn_settings['password'] = self.cfg.get(
name, 'password', fallback=None
)
except NoOptionError:
raise Error(f"Incomplete configuration for database {name!r}")
@ -213,19 +215,19 @@ class DatabaseProvider(object):
class DatabaseAdapter(object):
"""Base class for database-specific access providers.
This class defines the common interface shared by all the database
adapters.
"""
def query(self, field, pattern):
"""Gets records matching the specified query pattern.
This method queries the underlying database for all records
matching the indicated pattern in the specified field.
The *field* argument can take the following values:
* ``acc`` to search for an accession number;
* ``id`` to search for a record name;
* ``sv`` to search for a versioned accession number or a
@ -233,14 +235,14 @@ class DatabaseAdapter(object):
* ``des`` to search for words in a record's description;
* ``org`` to search for an organism;
* ``key`` to search for words in a record's keywords.
Not all database adapters may support all those types of
queries.
The *pattern* argument may contain wildcards: ``?`` stands for
any character, and ``*`` stands for any number of characters.
Not all database adapters may support wildcards.
:param field: the database field to search against
:param pattern: the pattern to search for
:return: the matching records, as a list of
@ -253,16 +255,16 @@ class DatabaseAdapter(object):
def fetch(self, identifier):
"""Gets records matching the specified identifier.
This method queries the underlying database for all records
whose name or accession number matches the specified pattern.
The identifier may contain wildcards: ``?`` stands for any
character, and ``*`` stands for any number of characters. Not
all database adapters may support wildcards.
:param identifier: the pattern to look for
:return: the matching records, as a list of
:return: the matching records, as a list of
:class:`Bio.SeqRecord.SeqRecord` objects (or objects with a
compatible interface, such as
:class:`BioSQL.BioSeq.DBSeqRecord`)
@ -272,22 +274,21 @@ class DatabaseAdapter(object):
def fetchall(self):
"""Gets all records in the database.
This method returns *all* the records contained in the
underlying database.
Not all database adapters may support this method. In
particular, it is expected that adapters for online databases
will most likely not support it.
:return: the database records, as a list of
:class:`Bio.SeqRecord.SeqRecord` objects (or objects with a
compatible interface, such as
:class:`BioSQL.BioSeq.DBSeqRecord`)
"""
raise Error("Fetching all records from this database is not "
"supported")
raise Error("Fetching all records from this database is not " "supported")
def close(self):
"""Frees resources associated with the database."""
@ -312,20 +313,20 @@ def _pattern_to_sql_pattern(pattern):
class BioSqlAdapter(DatabaseAdapter):
"""Adapter for BioSQL-based sequence databases.
This adapter provides access to any biological database following
the BioSQL schema, as supported by Biopython's ``Bio.BioSQL``
module.
Usage::
from BioSQL.BioSeqDatabase import open_database
server = open_database(...)
# For a server-wide adapter
adapter = BioSqlAdapter(server.adaptor)
# For an adapter restricted to a subdatabase
database = server[database_name]
adapter = BioSqlAdapter(database.adaptor, database.dbid)
@ -334,7 +335,7 @@ class BioSqlAdapter(DatabaseAdapter):
def __init__(self, adaptor, dbid=None):
"""Creates a new BioSQL database adapter.
:param adaptor: a :class:`BioSQL.BioSeqDatabase.Adaptor` object
connected to the target database
:param dbid: a BioSQL subdatabase identifier; if ``None``,
@ -350,13 +351,10 @@ class BioSqlAdapter(DatabaseAdapter):
sql = (
'SELECT bioentry_id FROM bioentry '
'WHERE accession LIKE %s ESCAPE \'\\\''
)
)
args = [_pattern_to_sql_pattern(keyword)]
elif field == 'id':
sql = (
'SELECT bioentry_id FROM bioentry '
'WHERE name LIKE %s ESCAPE \'\\\''
)
sql = 'SELECT bioentry_id FROM bioentry ' 'WHERE name LIKE %s ESCAPE \'\\\''
args = [_pattern_to_sql_pattern(keyword)]
elif field == 'sv':
if '.' in keyword:
@ -368,21 +366,21 @@ class BioSqlAdapter(DatabaseAdapter):
sql = (
'SELECT bioentry_id FROM bioentry '
'WHERE accession LIKE %s ESCAPE \'\\\''
)
)
args = [_pattern_to_sql_pattern(accession)]
else:
sql = (
'SELECT bioentry_id FROM bioentry '
'WHERE (accession LIKE %s ESCAPE \'\\\' '
' AND version = %s)'
)
)
args = [_pattern_to_sql_pattern(accession), version]
else:
sql = (
'SELECT bioentry_id FROM bioentry '
'WHERE (accession LIKE %s ESCAPE \'\\\' '
' OR identifier LIKE %s ESCAPE \'\\\')'
)
)
pattern = _pattern_to_sql_pattern(keyword)
args = [pattern, pattern]
else:
@ -401,7 +399,7 @@ class BioSqlAdapter(DatabaseAdapter):
'SELECT bioentry_id FROM bioentry '
'WHERE (accession LIKE %s ESCAPE \'\\\' '
' OR name LIKE %s ESCAPE \'\\\')'
)
)
args = [pattern, pattern]
if self.dbid is not None:
@ -417,13 +415,13 @@ class BioSqlAdapter(DatabaseAdapter):
'SELECT max(bioentry_id) FROM bioentry '
'WHERE biodatabase_id = %s '
'GROUP BY accession ORDER BY accession'
)
)
args = [self.dbid]
else:
sql = (
'SELECT max(bioentry_id) FROM bioentry '
'GROUP BY accession ORDER BY accession'
)
)
args = []
res = self.adaptor.execute_and_fetchall(sql, args)
return [DBSeqRecord(self.adaptor, r[0]) for r in res]
@ -431,10 +429,10 @@ class BioSqlAdapter(DatabaseAdapter):
class ExpasyAdapter(DatabaseAdapter):
"""Adapter for the ExPASy sequence server.
This adapter queries the ExPASy server to fetch sequences directly
over the Internet.
Only queries by identifier (without wildcards) are supported.
"""
@ -451,16 +449,16 @@ class ExpasyAdapter(DatabaseAdapter):
class EntrezAdapter(DatabaseAdapter):
"""Adapter for the NCBI E-Utilities.
This adapter queries the NCBI server to fetch sequences directly
over the Internet.
Only queries by identifier (without wildcards) are supported.
"""
def __init__(self, email, database):
"""Creates a new Entrez adapter.
:param email: the email address to pass on to the NCBI server
along with any query
:param database: the name of the Entrez database to query; can
@ -472,8 +470,9 @@ class EntrezAdapter(DatabaseAdapter):
def fetch(self, identifier):
try:
handle = Entrez.efetch(db=self.database, id=identifier,
rettype='gb', retmode='text')
handle = Entrez.efetch(
db=self.database, id=identifier, rettype='gb', retmode='text'
)
except HTTPError as e:
raise Error(f"Cannot fetch sequence {identifier}", e)

@ -51,22 +51,44 @@ _text_width = _page_width - 2 * _page_margin
_text_vert_offset = _page_height - 30
# Arbitrary selection of default enzymes
_enzymes = RestrictionBatch(['AflII', 'ApaBI', 'ApaLI', 'AscI', 'AvaI',
'BamHI', 'BglII', 'BstBI',
'ClaI',
'EcoRI', 'EcoRV',
'HindIII',
'KpnI',
'MluI',
'NcoI', 'NdeI', 'NheI', 'NotI',
'PstI', 'PvuI', 'PvuII',
'SacI', 'SacII', 'SalI', 'ScaI', 'SnaBI', 'SpeI',
'XbaI', 'XhoI'])
_enzymes = RestrictionBatch(
[
'AflII',
'ApaBI',
'ApaLI',
'AscI',
'AvaI',
'BamHI',
'BglII',
'BstBI',
'ClaI',
'EcoRI',
'EcoRV',
'HindIII',
'KpnI',
'MluI',
'NcoI',
'NdeI',
'NheI',
'NotI',
'PstI',
'PvuI',
'PvuII',
'SacI',
'SacII',
'SalI',
'ScaI',
'SnaBI',
'SpeI',
'XbaI',
'XhoI',
]
)
def generate_map(vector, size=(500, 500)):
"""Create a circular map from a plasmid sequence.
:param vector: the annotated plasmid sequence, as a
:class:`Bio.SeqRecord.SeqRecord` object
:param size: the size of the map to generate
@ -99,14 +121,20 @@ def generate_map(vector, size=(500, 500)):
feat.strand = 1
if feat.type == 'CDS':
cds_set.add_feature(feat, label=True, sigil='ARROW',
color=colors.green, label_size=8)
cds_set.add_feature(
feat, label=True, sigil='ARROW', color=colors.green, label_size=8
)
elif feat.type in ('promoter', 'LTR'):
cds_set.add_feature(feat, sigil='ARROW', color=colors.lightgreen)
elif feat.type == 'primer_bind':
primer_set.add_feature(feat, name="primer", label=True,
color=colors.lightblue, label_size=4,
sigil='ARROW')
primer_set.add_feature(
feat,
name="primer",
label=True,
color=colors.lightblue,
label_size=4,
sigil='ARROW',
)
rest_track = diagram.new_track(4, name='Enzymes')
rest_track.scale = False
@ -126,7 +154,7 @@ def generate_map(vector, size=(500, 500)):
def summarize_vector(canvas, vector):
"""Create a report describing a plasmid.
:param canvas: the canvas where the report is to be drawn on, as a
:class:`reportlib.pdfgen.canvas.Canvas` object
:param vector: the annotated plasmid sequence, as a
@ -153,10 +181,10 @@ def summarize_vector(canvas, vector):
if feat.strand == -1:
strand = '-'
label = _get_feature_label(feat)
flist.textLine("%d-%d (%s) [%s] %s" % (
feat.location.start + 1, feat.location.end, strand,
feat.type, label
))
flist.textLine(
"%d-%d (%s) [%s] %s"
% (feat.location.start + 1, feat.location.end, strand, feat.type, label)
)
canvas.drawText(flist)
canvas.showPage()

@ -51,28 +51,60 @@ def seqtool(ctx):
@seqtool.command()
@click.argument('sequences', nargs=-1)
@click.option('--output', '-o', default='genbank::stdout', metavar="USA",
help="""Write to the specified USA instead of standard
output.""")
@click.option(
'--output',
'-o',
default='genbank::stdout',
metavar="USA",
help="""Write to the specified USA instead of standard
output.""",
)
@click.option('--name', '-n', metavar="NAME", help="Set the sequence name.")
@click.option('--accession', '-a', metavar="ACC", help="Set the sequence ID.")
@click.option('--description', '-d', help="Set the sequence description.")
@click.option('--circular', '-c', 'topology', flag_value='circular',
help="Force circular topology.")
@click.option('--linear', 'topology', flag_value='linear', default=True,
help="Force linear topology.")
@click.option('--division', '-D', metavar="DIV",
help="""The data file division to use if none is already
assigned to the sequence.""")
@click.option('--clean', '-C', is_flag=True,
help="Clean the output sequence.")
@click.option('--remove-external-features', '-r', is_flag=True,
help="Remove features referring to an external sequence.")
@click.option(
'--circular',
'-c',
'topology',
flag_value='circular',
help="Force circular topology.",
)
@click.option(
'--linear',
'topology',
flag_value='linear',
default=True,
help="Force linear topology.",
)
@click.option(
'--division',
'-D',
metavar="DIV",
help="""The data file division to use if none is already
assigned to the sequence.""",
)
@click.option('--clean', '-C', is_flag=True, help="Clean the output sequence.")
@click.option(
'--remove-external-features',
'-r',
is_flag=True,
help="Remove features referring to an external sequence.",
)
@click.pass_context
def cat(ctx, sequences, output, name, accession, description, topology,
division, clean, remove_external_features):
def cat(
ctx,
sequences,
output,
name,
accession,
description,
topology,
division,
clean,
remove_external_features,
):
"""Read and write sequences.
This tool reads the specified input SEQUENCES and catenate them into
a single written to standard output.
"""
@ -117,13 +149,18 @@ def cat(ctx, sequences, output, name, accession, description, topology,
@seqtool.command()
@click.argument('source')
@click.option('--output', '-o', metavar="USA", default='fasta::stdout',
help="""Write to the specified USA instead of standard
output.""")
@click.option(
'--output',
'-o',
metavar="USA",
default='fasta::stdout',
help="""Write to the specified USA instead of standard
output.""",
)
@click.pass_context
def siresist(ctx, source, output):
"""Silently mutate a CDS.
Creates a variant of the SOURCE sequence with silent mutations.
"""
@ -143,19 +180,28 @@ def siresist(ctx, source, output):
@seqtool.command()
@click.argument('source')
@click.argument('destination')
@click.option('--output', '-o', metavar="USA",
help="""Write to the specified USA instead of standard
output.""")
@click.option('--reaction', '-r', type=click.Choice(['BP', 'LR']),
default='LR',
help="Specify the type of Gateway reaction.")
@click.option(
'--output',
'-o',
metavar="USA",
help="""Write to the specified USA instead of standard
output.""",
)
@click.option(
'--reaction',
'-r',
type=click.Choice(['BP', 'LR']),
default='LR',
help="Specify the type of Gateway reaction.",
)
@click.option('--name', '-n', metavar="NAME", help="Set the sequence name.")
@click.option('--accession', '-a', metavar="ACC", help="Set the sequence ID.")
@click.option('--description', '-d', help="Set the sequence description.")
@click.option('--clean', '-c', is_flag=True, help="Clean the output sequence.")
@click.pass_context
def gateway(ctx, source, destination, output, reaction, name, accession,
description, clean):
def gateway(
ctx, source, destination, output, reaction, name, accession, description, clean
):
"""Perform in-silico Gateway cloning."""
try:
@ -184,8 +230,14 @@ def gateway(ctx, source, destination, output, reaction, name, accession,
@seqtool.command()
@click.argument('sequences', nargs=-1)
@click.option('--output', '-o', metavar="FILE", default='plasmm.pdf',
help="Write to the specified file.", show_default=True)
@click.option(
'--output',
'-o',
metavar="FILE",
default='plasmm.pdf',
help="Write to the specified file.",
show_default=True,
)
@click.option('--enzymes', '-e', help="Specify the enzymes to display.")
@click.pass_context
def plasmm(ctx, sequences, output, enzymes):
@ -218,18 +270,22 @@ def plasmm(ctx, sequences, output, enzymes):
@seqtool.command()
@click.argument('subject')
@click.argument('query')
@click.option('--type', '-t', 'blast_type', default='blastn',
type=click.Choice(['blastn', 'blastp', 'blastx', 'tblastn',
'tblastx']),
help="The type of alignment to perform.")
@click.option('--database', '-d', is_flag=True,
help="Treat SUBJECT as a database name.")
@click.option('--short', '-s', is_flag=True,
help="Optimize BLAST for short matches.")
@click.option(
'--type',
'-t',
'blast_type',
default='blastn',
type=click.Choice(['blastn', 'blastp', 'blastx', 'tblastn', 'tblastx']),
help="The type of alignment to perform.",
)
@click.option(
'--database', '-d', is_flag=True, help="Treat SUBJECT as a database name."
)
@click.option('--short', '-s', is_flag=True, help="Optimize BLAST for short matches.")
@click.pass_context
def blast(ctx, subject, query, blast_type, database, short):
"""Wrapper for the BLAST programs.
SUBJECT and QUERY should be USAs representing the subject and query
sequences, respectively.
"""

@ -71,14 +71,24 @@ def _check_config_file(ctx, param, value):
return value
@shell(context_settings={'help_option_names': ['-h', '--help']},
prompt=f"{prog_name}> ")
@click.option('--config', '-c', type=click.Path(), default=default_config,
callback=_check_config_file,
help="Path to the configuration file.")
@click.option('--server', '-s', metavar="SERVER",
help="""The server section to use in the configuration
file (defaults to the first section).""")
@shell(
context_settings={'help_option_names': ['-h', '--help']}, prompt=f"{prog_name}> "
)
@click.option(
'--config',
'-c',
type=click.Path(),
default=default_config,
callback=_check_config_file,
help="Path to the configuration file.",
)
@click.option(
'--server',
'-s',
metavar="SERVER",
help="""The server section to use in the configuration file
(defaults to the first section).""",
)
@click.version_option(version=__version__, message=prog_notice)
@click.pass_context
def seqvault(ctx, config, server):
@ -88,8 +98,7 @@ def seqvault(ctx, config, server):
cfg.read(config)
if server and not cfg.has_section(server):
raise click.ClickException(f"No server {server!r} in "
"configuration file")
raise click.ClickException(f"No server {server!r} in " "configuration file")
if not server:
sections = cfg.sections()
@ -98,14 +107,12 @@ def seqvault(ctx, config, server):
server = sections[0]
if cfg.get(server, 'type', fallback=None) != 'biosql':
raise click.ClickException(f"Server {server!r} is not a BioSQL "
"server")
raise click.ClickException(f"Server {server!r} is not a BioSQL " "server")
if cfg.has_option(server, 'server'):
server = cfg.get(server, 'server')
if not cfg.has_section(server):
raise click.ClickException(f"Missing referred server "
f"{server!r}")
raise click.ClickException(f"Missing referred server " f"{server!r}")
conn_settings = {}
try:
@ -118,8 +125,7 @@ def seqvault(ctx, config, server):
conn_settings['host'] = cfg.get(server, 'host')
conn_settings['user'] = cfg.get(server, 'user')
conn_settings['database'] = cfg.get(server, 'database')
conn_settings['password'] = cfg.get(server, 'password',
fallback=None)
conn_settings['password'] = cfg.get(server, 'password', fallback=None)
except NoOptionError:
raise click.ClickException(f"Incomplete configuration for {server!r}")
@ -153,7 +159,7 @@ def listdb(server):
@click.pass_obj
def newdb(server, name, prefix, description):
"""Create a new database.
This command creates a new database on the server.
"""
@ -165,9 +171,14 @@ def newdb(server, name, prefix, description):
@seqvault.command()
@click.argument('database', callback=_get_database)
@click.option('--output', '-o', metavar="USA", default='fasta::stdout',
callback=_get_usa,
help="Write to the specified USA instead of standard output.")
@click.option(
'--output',
'-o',
metavar="USA",
default='fasta::stdout',
callback=_get_usa,
help="Write to the specified USA instead of standard output.",
)
def export(database, output):
"""Export sequences from a database.
@ -180,8 +191,9 @@ def export(database, output):
@seqvault.command('list')
@click.argument('database', callback=_get_database)
@click.option('--all', '-a', 'show_all', is_flag=True,
help="Include obsolete sequences.")
@click.option(
'--all', '-a', 'show_all', is_flag=True, help="Include obsolete sequences."
)
def list_records(database, show_all):
"""List database contents.
@ -199,9 +211,14 @@ def list_records(database, show_all):
@seqvault.command()
@click.argument('accessions', nargs=-1)
@click.option('--output', '-o', metavar="USA", default='fasta::stdout',
callback=_get_usa,
help="Write to the specified USA instead of standard output.")
@click.option(
'--output',
'-o',
metavar="USA",
default='fasta::stdout',
callback=_get_usa,
help="Write to the specified USA instead of standard output.",
)
@click.pass_obj
def get(server, accessions, output):
"""Extract sequences from a database.
@ -254,10 +271,18 @@ def add(server, database, sequences):
@seqvault.command()
@click.argument('accession')
@click.option('--editor', '-e', default='/usr/bin/gvim --nofork',
help="The editor command to use.")
@click.option('--read-only', '-r', is_flag=True,
help="View the record only, do not store back any change.")
@click.option(
'--editor',
'-e',
default='/usr/bin/gvim --nofork',
help="The editor command to use.",
)
@click.option(
'--read-only',
'-r',
is_flag=True,
help="View the record only, do not store back any change.",
)
@click.pass_obj
def edit(server, accession, editor, read_only):
"""Edit a record.

@ -50,14 +50,7 @@ from incenp.bio import Error
_range_spec = re.compile('^([^]]+)\[(-?\d+)?:(-?\d+)?(:r)?\]$')
_search_fields = [
'acc',
'des',
'id',
'key',
'org',
'sv'
]
_search_fields = ['acc', 'des', 'id', 'key', 'org', 'sv']
class FragmentSpec:
@ -71,7 +64,7 @@ class FragmentSpec:
class USA:
"""Represents a parsed Uniform Sequence Address.
This is the compiled form of an USA, as returned by
:func:`parse_usa`.
"""
@ -82,7 +75,7 @@ class USA:
def read(self):
"""Get the sequence records referred to by this USA.
:return: a list of :class:`Bio.SeqRecord.SeqRecord` objects
"""
@ -93,7 +86,7 @@ class USA:
def write(self, records):
"""Write sequence records to the backend referred to by this USA.
:param records: the :class:`Bio.SeqRecord.SeqRecord` to write
"""
@ -328,8 +321,7 @@ def _parse_database(usa, databases):
return DatabaseUSA(databases[dbname])
elif len(parts) == 2:
if dbfield is not None:
return DatabaseUSA(databases[dbname], field=dbfield,
keyword=parts[1])
return DatabaseUSA(databases[dbname], field=dbfield, keyword=parts[1])
else:
return DatabaseUSA(databases[dbname], identifier=parts[1])
else:
@ -343,8 +335,8 @@ _format_extensions = {
'.xdna': 'xdna',
'.dna': 'snapgene',
'.ab1': 'abi',
'.gck': 'gck'
}
'.gck': 'gck',
}
def _parse_file(usa, guess_format=False, extensions_map={}):
@ -370,11 +362,11 @@ def _parse_file(usa, guess_format=False, extensions_map={}):
return ret
def parse_usa(usa, fmt='fasta', fragment=None,
extensions_map=_format_extensions,
databases=None):
def parse_usa(
usa, fmt='fasta', fragment=None, extensions_map=_format_extensions, databases=None
):
"""Parses a Uniform Sequence Address into a compiled form.
:param usa: the Uniform Sequence Address to parse
:param fmt: the default format to use
:param fragment: the default fragment specification, as a
@ -438,15 +430,15 @@ def parse_usa(usa, fmt='fasta', fragment=None,
def read_usa(usa, databases=None):
"""Read sequences referred to by a USA.
This function parses the provided USA and then fetches the corresponding
sequences.
This is a convenience function, equivalent to::
compiled = parse_usa(usa)
compiled.read()
:param usa: the Uniform Sequence Address to parse
:param databases: a dictionary mapping database names to
:class:`incenp.bio.seq.databases.DatabaseAdapter` objects
@ -460,15 +452,15 @@ def read_usa(usa, databases=None):
def write_usa(records, usa):
"""Write sequence records to a USA.
This function parses the provided USA and then write the records to
the location referred by it.
This is a convenience function, equivalent to::
compiled = parse_usa(usa)
compiled.write(records)
:param records: the sequence records to write, as a list of
:class:`Bio.SeqRecord.SeqRecord` object
:param usa: the Uniform Sequence Address to write to

@ -46,10 +46,10 @@ def _build_reverse_codon_table(code):
def silently_mutate(sequence, code=None):
"""Mutate a CDS without changing its translation.
Generate a mutated sequence by altering all possible codons
without introducing any changes in the aminoacids translation.
:param sequence: the sequence to mutate, as a :class:`Bio.Seq.Seq`
object
:param code: the codon table to use, as a
@ -66,7 +66,7 @@ def silently_mutate(sequence, code=None):
output = MutableSeq('')
for i in range(int(len(sequence) / 3)):
codon = str(sequence[i * 3:(i * 3) + 3])
codon = str(sequence[i * 3 : (i * 3) + 3])
if codon in code.forward_table:
residue = code.forward_table[codon]
possible_codons = rt[residue]
@ -92,18 +92,20 @@ def remove_external_features(record):
def clean(record, div='UNK', topology='linear'):
"""Clean up a sequence record.
Fix various small issues that can be found in a record depending on
its origin, such as:
* missing division or topology;
* missing translation for CDS features
* presence of non-standard qualifiers added by some programs
"""
# Set the data division if needed
if (not 'data_file_division' in record.annotations
or record.annotations['data_file_division'].isspace()):
if (
not 'data_file_division' in record.annotations
or record.annotations['data_file_division'].isspace()
):
record.annotations['data_file_division'] = div
# Fix topology
@ -120,7 +122,7 @@ def clean(record, div='UNK', topology='linear'):
continue
if record.annotations['molecule_type'] == 'protein':
continue
seq = record.seq[cds.location.start.position:cds.location.end.position]
seq = record.seq[cds.location.start.position : cds.location.end.position]
if cds.strand == -1:
seq = seq.reverse_complement()
cds.qualifiers['translation'] = str(seq.translate())
@ -132,7 +134,10 @@ def clean(record, div='UNK', topology='linear'):
record.features.remove(feature)
# Fragments
if 'ugene_group' in feature.qualifiers and 'fragments' in feature.qualifiers['ugene_group']:
if (
'ugene_group' in feature.qualifiers
and 'fragments' in feature.qualifiers['ugene_group']
):
record.features.remove(feature)
# Source
@ -153,8 +158,8 @@ def clean(record, div='UNK', topology='linear'):
_alphabets = {
'DNA': 'ACGTRYSWKMBDHVN',
'RNA': 'ACGURYSWKMBDHVN',
'protein': 'ACDEFGHIKLMNPQRSTVWY*'
}
'protein': 'ACDEFGHIKLMNPQRSTVWY*',
}
def guess_molecule_type(sequence, alphabets=_alphabets):
@ -169,40 +174,40 @@ def guess_molecule_type(sequence, alphabets=_alphabets):
_gateway_sequences = {
'attB1': 'ACAAGTTTGTACAAAAAAGCAGGCT',
'attB2': 'ACCCAGCTTTCTTGTACAAAGTGGT',
'attL1': 'AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACACATTGATGAGCAAT'
'GCTTTTTTATAATGCCAACTTTGTACAAAAAAGCAGGCT',
'attL2': 'ACCCAGCTTTCTTGTACAAAGTTGGCATTATAAGAAAGCATTGCTTATCAATTTGTTGCA'
'ACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTT',
'attP1': 'AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACACATTGATGAGCAAT'
'GCTTTTTTATAATGCCAACTTTGTACAAAAAAGCTGAACGAGAAACGTAAAATGATATAA'
'ATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATACTGTAAAACAC'
'AACATATCCAGTCACTATGAATCAACTACTTAGATGGTATTAGTGACCTGTA',
'attP2': 'TACAGGTCACTAATACCATCTAAGTAGTTGATTCATAGTGACTGGATATGTTGTGTTTTA'
'CAGTATTATGTAGTCTGTTTTTTATGCAAAATCTAATTTAATATATTGATATTTATATCA'
'TTTTACGTTTCTCGTTCAGCTTTCTTGTACAAAGTTGGCATTATAAGAAAGCATTGCTTA'
'TCAATTTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTT',
'attR1': 'ACAAGTTTGTACAAAAAAGCTGAACGAGAAACGTAAAATGATATAAATATCAATATATTA'
'AATTAGATTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATCCAGTCA'
'CTATG',
'attR2': 'ATAGTGACTGGATATGTTGTGTTTTACAGTATTATGTAGTCTGTTTTTTATGCAAAATCT'
'AATTTAATATATTGATATTTATATCATTTTACGTTTCTCGTTCAGCTTTCTTGTACAAAG'
'TGGT'
}
'attB1': 'ACAAGTTTGTACAAAAAAGCAGGCT',
'attB2': 'ACCCAGCTTTCTTGTACAAAGTGGT',
'attL1': 'AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACACATTGATGAGCAAT'
'GCTTTTTTATAATGCCAACTTTGTACAAAAAAGCAGGCT',
'attL2': 'ACCCAGCTTTCTTGTACAAAGTTGGCATTATAAGAAAGCATTGCTTATCAATTTGTTGCA'
'ACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTT',
'attP1': 'AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACACATTGATGAGCAAT'
'GCTTTTTTATAATGCCAACTTTGTACAAAAAAGCTGAACGAGAAACGTAAAATGATATAA'
'ATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATACTGTAAAACAC'
'AACATATCCAGTCACTATGAATCAACTACTTAGATGGTATTAGTGACCTGTA',
'attP2': 'TACAGGTCACTAATACCATCTAAGTAGTTGATTCATAGTGACTGGATATGTTGTGTTTTA'
'CAGTATTATGTAGTCTGTTTTTTATGCAAAATCTAATTTAATATATTGATATTTATATCA'
'TTTTACGTTTCTCGTTCAGCTTTCTTGTACAAAGTTGGCATTATAAGAAAGCATTGCTTA'
'TCAATTTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTT',
'attR1': 'ACAAGTTTGTACAAAAAAGCTGAACGAGAAACGTAAAATGATATAAATATCAATATATTA'
'AATTAGATTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATCCAGTCA'
'CTATG',
'attR2': 'ATAGTGACTGGATATGTTGTGTTTTACAGTATTATGTAGTCTGTTTTTTATGCAAAATCT'
'AATTTAATATATTGATATTTATATCATTTTACGTTTCTCGTTCAGCTTTCTTGTACAAAG'
'TGGT',
}
_gateway_reactions = {
'BP': {
'source': ['attB1', 'attB2'],
'target': ['attP1', 'attP2'],
'result': ['attL1', 'attL2']
},
'LR': {
'source': ['attL1', 'attL2'],
'target': ['attR1', 'attR2'],
'result': ['attB1', 'attB2']
}
}
'BP': {
'source': ['attB1', 'attB2'],
'target': ['attP1', 'attP2'],
'result': ['attL1', 'attL2'],
},
'LR': {
'source': ['attL1', 'attL2'],
'target': ['attR1', 'attR2'],
'result': ['attB1', 'attB2'],
},
}
def _find_gateway_sites(sequence, reaction='LR', kind='source', log=None):
@ -223,7 +228,7 @@ def _find_gateway_sites(sequence, reaction='LR', kind='source', log=None):
def gateway(source, destination, reaction='LR', log=None):
"""Perform a Gateway reaction between two sequences.
:param source: the source sequence
:param destination: the destination sequence
:param reaction: the type of Gateway reaction to perform (can be
@ -239,16 +244,20 @@ def gateway(source, destination, reaction='LR', log=None):
src_indexes = _find_gateway_sites(source, reaction, 'source', log)
if len(src_indexes) != 2:
return
source_part = source[src_indexes[0]:src_indexes[1]]
source_part = source[src_indexes[0] : src_indexes[1]]
if log:
log.write("Found source region: {}..{}\n".format(src_indexes[0], src_indexes[1]))
log.write(
"Found source region: {}..{}\n".format(src_indexes[0], src_indexes[1])
)
dst_indexes = _find_gateway_sites(destination, reaction, 'target', log)
if len(dst_indexes) != 2:
return
dest_parts = [destination[:dst_indexes[0]], destination[dst_indexes[1]:]]
dest_parts = [destination[: dst_indexes[0]], destination[dst_indexes[1] :]]
if log:
log.write("Found target region: {}..{}\n".format(dst_indexes[0], dst_indexes[1]))
log.write(
"Found target region: {}..{}\n".format(dst_indexes[0], dst_indexes[1])
)
result_sites = []
for i in [0, 1]:
@ -259,11 +268,16 @@ def gateway(source, destination, reaction='LR', log=None):
if i == 1:
strand = -1
rec.features.append(
SeqFeature(FeatureLocation(0, len(seq), strand=strand),
type='misc_recomb',
qualifiers={'note': [name + ' recombination site']}))
SeqFeature(
FeatureLocation(0, len(seq), strand=strand),
type='misc_recomb',
qualifiers={'note': [name + ' recombination site']},
)
)
result_sites.append(rec)
clone = dest_parts[0] + result_sites[0] + source_part + result_sites[1] + dest_parts[1]
clone = (
dest_parts[0] + result_sites[0] + source_part + result_sites[1] + dest_parts[1]
)
return clone

@ -41,7 +41,6 @@ from BioSQL.Loader import DatabaseLoader
class Server(DBServer):
def __getitem__(self, name):
"""Get the specified sub-database."""
@ -54,13 +53,10 @@ class Server(DBServer):