Source code for aiida.cmdline.commands.exportfile

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
import click
from aiida.cmdline.commands import verdi, export
from aiida.cmdline.baseclass import VerdiCommandWithSubcommands

CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])


[docs]class DanglingLinkError(Exception): pass
[docs]class Export(VerdiCommandWithSubcommands): """ Create and manage AiiDA export archives """
[docs] def __init__(self): self.valid_subcommands = { 'create': (self.cli, self.complete_none), 'migrate': (self.cli, self.complete_none) }
[docs] def cli(self, *args): verdi()
@export.command('create', context_settings=CONTEXT_SETTINGS) @click.argument('outfile', type=click.Path()) @click.option('-n', '--nodes', multiple=True, type=int, help='Export the given nodes by pk') @click.option('-c', '--computers', multiple=True, type=int, help='Export the given computers by pk') @click.option('-G', '--groups', multiple=True, type=int, help='Export the given groups by pk') @click.option('-g', '--group_names', multiple=True, type=str, help='Export the given groups by group name') @click.option('-P', '--no-parents', is_flag=True, default=False, help='Store only the nodes that are explicitly given, without exporting the parents') @click.option('-O', '--no-calc-outputs', is_flag=True, default=False, help='If a calculation is included in the list of nodes to export, do not export its outputs') @click.option('-f', '--overwrite', is_flag=True, default=False, help='Overwrite the output file, if it exists') @click.option('-a', '--archive-format', type=click.Choice(['zip', 'zip-uncompressed', 'tar.gz']), default='zip') def create(outfile, computers, groups, nodes, group_names, no_parents, no_calc_outputs, overwrite, archive_format): """ Export nodes and groups of nodes to an archive file for backup or sharing purposes """ import sys from aiida.backends.utils import load_dbenv load_dbenv() from aiida.orm import Group, Node, Computer from aiida.orm.querybuilder import QueryBuilder from aiida.orm.importexport import export, export_zip node_id_set = set(nodes) group_dict = dict() if group_names: qb = QueryBuilder() qb.append(Group, tag='group', project=['*'], filters={'name': {'in': group_names}}) qb.append(Node, tag='node', member_of='group', project=['id']) res = qb.dict() group_dict.update({group['group']['*'].name: group['group']['*'].dbgroup for group in res}) node_id_set.update([node['node']['id'] for node in res]) if groups: qb = QueryBuilder() qb.append(Group, tag='group', project=['*'], filters={'id': {'in': groups}}) qb.append(Node, tag='node', member_of='group', project=['id']) res = qb.dict() group_dict.update({group['group']['*'].name: group['group']['*'].dbgroup for group in res}) node_id_set.update([node['node']['id'] for node in res]) # The db_groups that correspond to what was searched above dbgroups_list = group_dict.values() # Getting the nodes that correspond to the ids that were found above if len(node_id_set) > 0: qb = QueryBuilder() qb.append(Node, tag='node', project=['*'], filters={'id': {'in': node_id_set}}) node_list = [node[0] for node in qb.all()] else: node_list = list() # Check if any of the nodes wasn't found in the database. missing_nodes = node_id_set.difference(node.id for node in node_list) for node_id in missing_nodes: print >> sys.stderr, ('WARNING! Node with pk={} not found, skipping'.format(node_id)) # The dbnodes of the above node list dbnode_list = [node.dbnode for node in node_list] if computers: qb = QueryBuilder() qb.append(Computer, tag='comp', project=['*'], filters={'id': {'in': set(computers)}}) computer_list = [computer[0] for computer in qb.all()] missing_computers = set(computers).difference(computer.id for computer in computer_list) for computer_id in missing_computers: print >> sys.stderr, ('WARNING! Computer with pk={} not found, skipping'.format(computer_id)) else: computer_list = [] # The dbcomputers of the above computer list dbcomputer_list = [computer.dbcomputer for computer in computer_list] what_list = dbnode_list + dbcomputer_list + dbgroups_list additional_kwargs = dict() if archive_format == 'zip': export_function = export_zip additional_kwargs.update({'use_compression': True}) elif archive_format == 'zip-uncompressed': export_function = export_zip additional_kwargs.update({'use_compression': False}) elif archive_format == 'tar.gz': export_function = export else: print >> sys.stderr, 'invalid --archive-format value {}'.format(archive_format) sys.exit(1) try: export_function( what=what_list, also_parents=not no_parents, also_calc_outputs=not no_calc_outputs, outfile=outfile, overwrite=overwrite, **additional_kwargs ) except IOError as e: print >> sys.stderr, 'IOError: {}'.format(e.message) sys.exit(1) @export.command('migrate', context_settings=CONTEXT_SETTINGS) @click.argument('file_input', type=click.Path(exists=True)) @click.argument('file_output', type=click.Path()) @click.option('-f', '--force', is_flag=True, default=False, help='overwrite output file if it already exists') @click.option('-s', '--silent', is_flag=True, default=False, help='suppress output') @click.option('-a', '--archive-format', type=click.Choice(['zip', 'zip-uncompressed', 'tar.gz']), default='zip') def migrate(file_input, file_output, force, silent, archive_format): """ An entry point to migrate existing AiiDA export archives between version numbers """ import os, json, sys import tarfile, zipfile from aiida.common.folders import SandboxFolder from aiida.common.archive import extract_zip, extract_tar if os.path.exists(file_output) and not force: print >> sys.stderr, 'Error: the output file already exists' sys.exit(2) with SandboxFolder(sandbox_in_repo=False) as folder: if zipfile.is_zipfile(file_input): extract_zip(file_input, folder, silent=silent) elif tarfile.is_tarfile(file_input): extract_tar(file_input, folder, silent=silent) else: print >> sys.stderr, 'Error: invalid file format, expected either a zip archive or gzipped tarball' sys.exit(2) try: with open(folder.get_abs_path('data.json')) as f: data = json.load(f) with open(folder.get_abs_path('metadata.json')) as f: metadata = json.load(f) except IOError as e: raise ValueError('export archive does not contain the required file {}'.format(e.filename)) old_version = verify_metadata_version(metadata) try: if old_version == '0.1': migrate_v1_to_v2(metadata, data) elif old_version == '0.2': try: migrate_v2_to_v3(metadata, data) except DanglingLinkError as e: print "An exception occured!" print e raise RuntimeError("You're export file is broken because it contains dangling links") else: raise ValueError('cannot migrate from version {}'.format(old_version)) except ValueError as exception: print >> sys.stderr, 'Error:', exception sys.exit(2) new_version = verify_metadata_version(metadata) with open(folder.get_abs_path('data.json'), 'w') as f: json.dump(data, f) with open(folder.get_abs_path('metadata.json'), 'w') as f: json.dump(metadata, f) if archive_format == 'zip' or archive_format == 'zip-uncompressed': compression = zipfile.ZIP_DEFLATED if archive_format == 'zip' else zipfile.ZIP_STORED with zipfile.ZipFile(file_output, mode='w', compression=compression, allowZip64=True) as archive: src = folder.abspath for dirpath, dirnames, filenames in os.walk(src): relpath = os.path.relpath(dirpath, src) for fn in dirnames + filenames: real_src = os.path.join(dirpath,fn) real_dest = os.path.join(relpath,fn) archive.write(real_src, real_dest) elif archive_format == 'tar.gz': with tarfile.open(file_output, 'w:gz', format=tarfile.PAX_FORMAT, dereference=True) as archive: archive.add(folder.abspath, arcname='') if not silent: print 'Successfully migrated the archive from version {} to {}'.format(old_version, new_version)
[docs]def verify_metadata_version(metadata, version=None): """ Utility function to verify that the metadata has the correct version number. If no version number is passed, it will just extract the version number and return it. :param metadata: the content of an export archive metadata.json file :param version: string version number that the metadata is expected to have """ try: metadata_version = metadata['export_version'] except KeyError: raise ValueError('could not find the export_version field in the metadata') if version is None: return metadata_version if metadata_version != version: raise ValueError('expected export file with version {} but found version {}' .format(version, metadata_version))
[docs]def update_metadata(metadata, version): """ Update the metadata with a new version number and a notification of the conversion that was executed :param metadata: the content of an export archive metadata.json file :param version: string version number that the updated metadata should get """ import aiida old_version = metadata['export_version'] conversion_info = metadata.get('conversion_info', []) conversion_message = 'Converted from version {} to {} with external script'.format(old_version, version) conversion_info.append(conversion_message) metadata['aiida_version'] = aiida.get_version() metadata['export_version'] = version metadata['conversion_info'] = conversion_info
[docs]def migrate_v1_to_v2(metadata, data): """ Migration of export files from v0.1 to v0.2, which means generalizing the field names with respect to the database backend :param data: the content of an export archive data.json file :param metadata: the content of an export archive metadata.json file """ old_version = '0.1' new_version = '0.2' old_start = "aiida.djsite" new_start = "aiida.backends.djsite" try: verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) except ValueError: raise def get_new_string(old_string): if old_string.startswith(old_start): return "{}{}".format(new_start, old_string[len(old_start):]) else: return old_string def replace_requires(data): if isinstance(data, dict): new_data = {} for k, v in data.iteritems(): if k == 'requires' and v.startswith(old_start): new_data[k] = get_new_string(v) else: new_data[k] = replace_requires(v) return new_data else: return data for field in ['export_data']: for k in list(data[field]): if k.startswith(old_start): new_k = get_new_string(k) data[field][new_k] = data[field][k] del data[field][k] for field in ['unique_identifiers', 'all_fields_info']: for k in list(metadata[field].keys()): if k.startswith(old_start): new_k = get_new_string(k) metadata[field][new_k] = metadata[field][k] del metadata[field][k] metadata['all_fields_info'] = replace_requires(metadata['all_fields_info'])
[docs]def migrate_v2_to_v3(metadata, data): """ Migration of export files from v0.2 to v0.3, which means adding the link types to the link entries and making the entity key names backend agnostic by effectively removing the prefix 'aiida.backends.djsite.db.models' :param data: the content of an export archive data.json file :param metadata: the content of an export archive metadata.json file """ import json import enum from aiida.common.links import LinkType old_version = '0.2' new_version = '0.3' class NodeType(enum.Enum): """ A simple enum of relevant node types """ NONE = 'none' CALC = 'calculation' CODE = 'code' DATA = 'data' WORK = 'work' entity_map = { 'aiida.backends.djsite.db.models.DbNode': 'Node', 'aiida.backends.djsite.db.models.DbLink': 'Link', 'aiida.backends.djsite.db.models.DbGroup': 'Group', 'aiida.backends.djsite.db.models.DbComputer': 'Computer', 'aiida.backends.djsite.db.models.DbUser': 'User', 'aiida.backends.djsite.db.models.DbAttribute': 'Attribute' } try: verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) except ValueError: raise # Create a mapping from node uuid to node type mapping = {} for category, nodes in data['export_data'].iteritems(): for pk, node in nodes.iteritems(): try: node_uuid = node['uuid'] node_type_string = node['type'] except KeyError: continue if node_type_string.startswith('calculation.job.'): node_type = NodeType.CALC elif node_type_string.startswith('calculation.inline.'): node_type = NodeType.CALC elif node_type_string.startswith('code.Code'): node_type = NodeType.CODE elif node_type_string.startswith('data.'): node_type = NodeType.DATA elif node_type_string.startswith('calculation.work.'): node_type = NodeType.WORK else: node_type = NodeType.NONE mapping[node_uuid] = node_type # For each link, deduce the link type and insert it in place for link in data['links_uuid']: try: input_type = NodeType(mapping[link['input']]) output_type = NodeType(mapping[link['output']]) except KeyError: raise DanglingLinkError('Unknown node UUID {} or {}'.format(link['input'],link['output'])) # The following table demonstrates the logic for infering the link type # (CODE, DATA) -> (WORK, CALC) : INPUT # (CALC) -> (DATA) : CREATE # (WORK) -> (DATA) : RETURN # (WORK) -> (CALC, WORK) : CALL if (input_type == NodeType.CODE or input_type == NodeType.DATA) \ and (output_type == NodeType.CALC or output_type == NodeType.WORK): link['type'] = LinkType.INPUT.value elif input_type == NodeType.CALC and output_type == NodeType.DATA: link['type'] = LinkType.CREATE.value elif input_type == NodeType.WORK and output_type == NodeType.DATA: link['type'] = LinkType.RETURN.value elif input_type == NodeType.WORK \ and (output_type == NodeType.CALC or output_type == NodeType.WORK): link['type'] = LinkType.CALL.value else: link['type'] = LinkType.UNSPECIFIED.value # Now we migrate the entity key names i.e. removing the 'aiida.backends.djsite.db.models' prefix for field in ['unique_identifiers', 'all_fields_info']: for old_key, new_key in entity_map.iteritems(): if old_key in metadata[field]: metadata[field][new_key] = metadata[field][old_key] del metadata[field][old_key] # Replace the 'requires' keys in the nested dictionaries in 'all_fields_info' for entity in metadata['all_fields_info'].values(): for prop in entity.values(): for key, value in prop.iteritems(): if key == 'requires' and value in entity_map: prop[key] = entity_map[value] # Replace any present keys in the data.json for field in ['export_data']: for old_key, new_key in entity_map.iteritems(): if old_key in data[field]: data[field][new_key] = data[field][old_key] del data[field][old_key]