Browse Source

Rework the seqvault command.

Move the seqvault command to a separate Click-based module.
master
Damien Goutte-Gattat 2 years ago
parent
commit
ab6e617215
  1. 238
      incenp/bio/seq/seqvault.py
  2. 230
      incenp/bio/seq/vault.py
  3. 6
      setup.py

238
incenp/bio/seq/seqvault.py

@ -0,0 +1,238 @@
# -*- coding: utf-8 -*-
# Incenp.Bioutils - Incenp.org's utilities for computational biology
# Copyright © 2020 Damien Goutte-Gattat
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""A tool to access a BioSQL-based sequence database."""
import sys
from configparser import ConfigParser
from hashlib import md5
from subprocess import run
from tempfile import NamedTemporaryFile
from Bio import SeqIO
from BioSQL.BioSeqDatabase import open_database
import click
from click_shell import shell
from incenp.bio import __version__
from incenp.bio.seq import vault
from incenp.bio.seq.usa import read_usa, write_usa
prog_name = "seqvault"
prog_notice = f"""\
{prog_name} {__version__}
Copyright © 2020 Damien Goutte-Gattat
This program is released under the GNU General Public License.
See the COPYING file or <http://www.gnu.org/licenses/gpl.html>.
"""
def _get_database(ctx, _, value):
try:
return ctx.obj[value]
except KeyError:
raise click.BadParameter(f"No {value!r} database on the server")
@shell(context_settings={'help_option_names': ['-h', '--help']},
prompt=f"{prog_name}> ")
@click.option('--config', '-c', type=click.Path(exists=True),
default='{}/seqvault.rc'.format(click.get_app_dir('seqvault')),
help="Path to the configuration file.")
@click.option('--driver', metavar="DRIVER",
help="Specify the database driver.")
@click.option('--host', '-H', metavar="HOST",
help="Specify the database host.")
@click.option('--user', '-u', metavar="USER",
help="Specify the database user name.")
@click.option('--name', '-n', metavar="NAME",
help="Specify the database name.")
@click.version_option(version=__version__, message=prog_notice)
@click.pass_context
def seqvault(ctx, config, driver, host, user, name):
"""Access a BioSQL sequence database."""
cfg = ConfigParser()
cfg.add_section('Server')
cfg.set('Server', 'driver', 'psycopg2')
cfg.set('Server', 'host', 'localhost')
cfg.set('Server', 'user', 'seqvault')
cfg.set('Server', 'database', 'seqvault')
cfg.read(config)
if driver:
cfg.set('Server', 'driver', driver)
if host:
cfg.set('Server', 'host', host)
if user:
cfg.set('Server', 'user', user)
if name:
cfg.set('Server', 'database', name)
server = open_database(**dict(cfg.items('Server')))
server.__class__ = vault.Server
ctx.obj = server
@seqvault.command()
@click.pass_obj
def listdb(server):
"""List databases.
This command prints information about the databases available on the
server.
"""
print("NAME PREFIX ENTRIES")
for db in server.values():
print(f"{db.name:16}{db.get_prefix():8}{len(db)}")
@seqvault.command()
@click.argument('database', callback=_get_database)
@click.option('--output', '-o', metavar="USA", default='fasta::stdout',
help="Write to the specified USA instead of standard output.")
def export(database, output):
"""Export sequences from a database.
This command exports all the sequences contained in the specified
DATABASE.
"""
write_usa(database.get_unique_Seqs(), output)
@seqvault.command('list')
@click.argument('database', callback=_get_database)
@click.option('--all', '-a', 'show_all', is_flag=True,
help="Include obsolete sequences.")
def list_records(database, show_all):
"""List database contents.
This command list all the sequences contained in the specified
DATABASE.
"""
if show_all:
entries = database.values()
else:
entries = database.get_unique_seqs()
for entry in entries:
print(f"{entry.name:17}{entry.id:15}{entry.description}")
@seqvault.command()
@click.argument('accessions', nargs=-1)
@click.option('--output', '-o', metavar="USA", default='fasta::stdout',
help="Write to the specified USA instead of standard output.")
@click.pass_obj
def get(server, accessions, output):
"""Extract sequences from a database.
This command extract sequences with the specified ACCESSIONS from
any database on the server.
"""
records = []
for accession in accessions:
records.append(server.get_Seq_by_accession(accession))
if len(records) > 0:
write_usa(records, output)
@seqvault.command()
@click.argument('database', callback=_get_database)
@click.argument('sequences', nargs=-1)
@click.pass_obj
def add(server, database, sequences):
"""Add sequences to a database.
This command imports the specified SEQUENCES (as USAs) into the
specified DATABASE.
"""
try:
records = []
for usa in sequences:
records.extend(read_usa(usa))
except Exception as e:
raise click.ClickException(f"Cannot read sequences: {e}")
try:
database.load(records)
server.commit()
except Exception as e:
raise click.ClickException(f"Cannot load sequences: {e}")
# Extract newly inserted records and write them out
extracted = []
try:
for record in records:
rid = str(record.annotations['gi'])
extracted.append(database.lookup(gi=rid))
write_usa(extracted, 'genbank::stdout')
except Exception as e:
raise click.ClickException(f"Cannot write sequences: {e}")
@seqvault.command()
@click.argument('accession')
@click.option('--editor', '-e', default='/usr/bin/gvim --nofork',
help="The editor command to use.")
@click.option('--read-only', '-r', is_flag=True,
help="View the record only, do not store back any change.")
@click.pass_obj
def edit(server, accession, editor, read_only):
"""Edit a record.
This command extracts the sequence with the specified ACCESSION
number and fires up an external editor to view and edit the
sequence before saving any changes back to the database.
"""
record = server.get_Seq_by_accession(accession)
tmpfile = NamedTemporaryFile(mode='w', delete=False)
SeqIO.write(record, tmpfile, 'genbank')
tmpfile.close()
if not read_only:
h1 = md5(open(tmpfile.name, 'rb').read()).hexdigest()
command = editor.split()
command.append(tmpfile.name)
run(command)
if not read_only:
h2 = md5(open(tmpfile.name, 'rb').read()).hexdigest()
if h1 != h2:
new_record = SeqIO.read(tmpfile.name, 'genbank')
db = server.get_database_by_prefix(new_record.id[:3])
db.load([new_record])
server.commit()
extracted = db.lookup(gi=str(new_record.annotations['gi']))
write_usa([extracted], 'genbank::stdout')
if __name__ == '__main__':
try:
seqvault()
except Exception as e:
print(f"seqvault: Unexpected error: {e}", file=sys.stderr)

230
incenp/bio/seq/vault.py

@ -17,20 +17,9 @@
"""Access a BioSQL-based sequence vault."""
from argparse import ArgumentParser
from configparser import ConfigParser
from hashlib import md5
from os import getenv
from subprocess import run
from tempfile import NamedTemporaryFile
from Bio import SeqIO
from BioSQL.BioSeq import DBSeqRecord
from BioSQL.BioSeqDatabase import open_database, BioSeqDatabase, DBServer
from BioSQL.BioSeqDatabase import BioSeqDatabase, DBServer
from BioSQL.Loader import DatabaseLoader
from IPython import embed
from incenp.bio.seq.usa import read_usa, write_usa
from incenp.helpers.subcommands import Command, CommandList
class Server(DBServer):
@ -216,220 +205,3 @@ class Database(BioSeqDatabase):
db_loader.load_seqrecord(record)
return num_records
class ListDatabaseCommand(Command):
def __init__(self):
super(ListDatabaseCommand, self).__init__('listdb', "list databases")
def prepare_parser(self, subparser):
subparser.add_argument('database', nargs='?', default=None, help="show only the specified database")
def execute(self, args):
if args.database:
db = args.server[args.database]
print("{:16s}{:d} sequences".format(db.name, len(db)))
else:
print("{:16s}{:8s}{:8s}".format('NAME', 'PREFIX', 'ENTRIES'))
for db in args.server.values():
print("{:16s}{:8s}{:d}".format(db.name, db.get_prefix(), len(db)))
class GetRecordCommand(Command):
def __init__(self):
Command.__init__(self, 'get', "extract records from a database")
def prepare_parser(self, subparser):
subparser.add_argument('accessions', nargs='+', help="accession number(s)")
def execute(self, args):
records = []
for accession in args.accessions:
records.append(args.server.get_Seq_by_accession(accession))
for record in records:
print("{} - {}".format(record.name, len(record)))
class ListRecordCommand(Command):
def __init__(self):
Command.__init__(self, 'list', "list database contents")
def prepare_parser(self, subparser):
subparser.add_argument('database', help="the database to list")
subparser.add_argument('-a', '--all', dest='show_all', action='store_true',
help="include obsolete sequences")
def execute(self, args):
db = args.server[args.database]
if args.show_all:
entries = db.values()
else:
entries = db.get_unique_seqs()
for entry in entries:
print("{:17s}{:15s}{}".format(entry.name, entry.id, entry.description))
class AddRecordCommand(Command):
def __init__(self):
Command.__init__(self, 'add', "add records to a database")
def prepare_parser(self, subparser):
subparser.add_argument('database', help="the database to add records to")
subparser.add_argument('records', nargs='+', help="the records to add, as USAs")
def execute(self, args):
db = args.server[args.database]
try:
records = []
for usa in args.records:
print("Reading usa {}".format(usa))
records.extend(read_usa(usa))
except Exception as e:
raise Exception("cannot read sequences: {}".format(e))
try:
db.load(records)
args.server.commit()
except Exception as e:
raise Exception("cannot load sequences: {}".format(e))
# Extract newly inserted records and write them out
extracted_records = []
try:
for record in records:
rid = str(record.annotations['gi'])
extracted_records.append(db.lookup(gi=rid))
write_usa(extracted_records, 'genbank::stdout')
except Exception as e:
raise Exception("cannot write sequences: {}".format(e))
class EditRecordCommand(Command):
def __init__(self):
Command.__init__(self, 'edit', "edit a record")
def prepare_parser(self, subparser):
subparser.add_argument('record', help="accession number of the record to edit")
subparser.add_argument('--editor', '-e', default='/usr/bin/gvim --nofork',
help="the editor command to use")
subparser.add_argument('--view-only', '-v', action='store_true', dest='readonly',
help="view the record only, do not store back any change")
def execute(self, args):
record = args.server.get_Seq_by_accession(args.record)
tmpfile = NamedTemporaryFile(mode='w', delete=False)
SeqIO.write(record, tmpfile, 'genbank')
tmpfile.close()
if not args.readonly:
h1 = md5(open(tmpfile.name, 'rb').read()).hexdigest()
command = args.editor.split()
command.append(tmpfile.name)
run(command)
if not args.readonly:
h2 = md5(open(tmpfile.name, 'rb').read()).hexdigest()
if h1 != h2:
new_record = SeqIO.read(tmpfile.name, 'genbank')
db = args.server.get_database_by_prefix(new_record.id[:3])
db.load([new_record])
args.server.commit()
extracted_record = db.lookup(gi=str(new_record.annotations['gi']))
write_usa([extracted_record], 'genbank::stdout')
class ExportCommand(Command):
def __init__(self):
Command.__init__(self, 'export', "export all sequences from a database")
def prepare_parser(self, subparser):
subparser.add_argument('database', help="the database to export")
subparser.add_argument('--output', '-o', default='fasta::stdout',
help="where to write the exported sequences, as a USA")
def execute(self, args):
db = args.server[args.database]
write_usa(db.get_unique_Seqs(), args.output)
class ShellCommand(Command):
def __init__(self):
Command.__init__(self, 'shell', "open a IPython shell")
def execute(self, args):
server = args.server
embed()
def main():
home_dir = getenv('HOME', default='')
config_dir = getenv('XDG_CONFIG_HOME', default='{}/.config'.format(home_dir))
config_file = '{}/seqvault/seqvault.rc'.format(config_dir)
parser = ArgumentParser(description="access a BioSQL sequence vault")
parser.add_argument('--config', '-c', default=config_file,
help="path to the configuration file")
db_group = parser.add_argument_group("database options")
db_group.add_argument('--driver', default=None, help="database driver")
db_group.add_argument('--host', default=None, help="database host")
db_group.add_argument('--user', default=None, help="database user")
db_group.add_argument('--name', default=None, help="database name")
CommandList(parser.add_subparsers(dest='command', required=True),
[ListDatabaseCommand(),
GetRecordCommand(),
AddRecordCommand(),
ListRecordCommand(),
EditRecordCommand(),
ShellCommand(),
ExportCommand()
])
args = parser.parse_args()
config = ConfigParser()
config.add_section('Server')
config.set('Server', 'driver', 'psycopg2')
config.set('Server', 'host', 'localhost')
config.set('Server', 'user', 'seqvault')
config.set('Server', 'database', 'seqvault')
config.read(args.config)
if args.driver:
config.set('Server', 'driver', args.driver)
if args.host:
config.set('Server', 'host', args.host)
if args.user:
config.set('Server', 'user', args.user)
if args.name:
config.set('Server', 'database', args.name)
server = open_database(**dict(config.items('Server')))
server.__class__ = Server
args.server = server
try:
args.func(args)
except Exception as e:
parser.exit(1, "{}: unknown error: {}".format(parser.prog, e))
if __name__ == '__main__':
main()

6
setup.py

@ -38,7 +38,8 @@ setup(
],
install_requires=[
'click'
'click',
'click_shell'
],
packages=[
@ -50,7 +51,8 @@ setup(
entry_points={
'console_scripts': [
'seqtool = incenp.bio.seq.seqtool:main',
'seqtool = incenp.bio.seq.seqtool:seqtool',
'seqvault = incenp.bio.seq.seqvault:seqvault',
'cc3d-runner = incenp.bio.modelling.cc3d:main'
]
}

Loading…
Cancel
Save