Source code for aiida.common.datastructures

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
"""
This module defines the main data structures used by the Calculation.
"""
from aiida.common.extendeddicts import DefaultFieldsAttributeDict, Enumerate

class CalcState(Enumerate):
    pass

_sorted_datastates = (
    'NEW',  # just created
    'TOSUBMIT',  # used by the executionmanager to submit new calculations scheduled to be submitted
    'SUBMITTING',  # being submitted to cluster
    'WITHSCHEDULER',  # on the scheduler (on any unfinished status:
    # QUEUED, QUEUED_HELD, SUSPENDED, RUNNING)
    'COMPUTED',  # Calculation finished on scheduler, not yet retrieved
    # (both DONE and FAILED)
    'RETRIEVING',  # while retrieving data
    'PARSING',  # while parsing data
    'FINISHED',  # Final state of the calculation: data retrieved and eventually parsed
    'SUBMISSIONFAILED',  # error occurred during submission phase
    'RETRIEVALFAILED',  # error occurred during retrieval phase
    'PARSINGFAILED',  # error occurred during parsing phase due to a problem in the parse
    'FAILED',  # The parser recognized the calculation as failed
    'IMPORTED',  # The calculation was imported from another DB
)

# The order of states is not random: is the order of precedence.
# This is used to verify that calculations always procede in the correct order.
# calc_states, instead, has a random order
calc_states = CalcState(_sorted_datastates)


[docs]def sort_states(list_states, use_key=False):
    """
    Given a list of state names, return a sorted list of states (the first
    is the most recent) sorted according to their logical appearance in
    the DB (i.e., NEW before of SUBMITTING before of FINISHED).

    .. note:: The order of the internal variable _sorted_datastates is
      used.

    :param list_states: a list (or tuple) of state strings.

    :param use_key: if True, expects that each element is not
        just a string, but a pair (someobject, string).
        Only string is used to sort (should be the state string),
        and only someobject is returned in the final list.

    :return: a sorted list of the given data states.

    :raise ValueError: if any of the given states is not a valid state.
    """
    datastates_order_dict = {state: idx for idx, state in enumerate(
        _sorted_datastates)}

    try:
        if use_key:
            list_to_sort = [(datastates_order_dict[st[1]], st[0])
                            for st in list_states]
        else:
            list_to_sort = [(datastates_order_dict[st], st)
                            for st in list_states]

    except KeyError as e:
        raise ValueError("At least one of the provided states is not "
                         "valid ({})".format(e.message))

    # In-place sort
    list_to_sort.sort()

    return [_[1] for _ in list_to_sort[::-1]]


[docs]class CalcInfo(DefaultFieldsAttributeDict):
    """
    This object will store the data returned by the calculation plugin and to be
    passed to the ExecManager
    """
    # Note: some of the variables might have never been used in AiiDA
    #       one might want to clean all this stuff in a future revision
    # Note: probably some of the fields below are not used anymore inside
    #       calcinfo, but are rather directly set from calculation attributes to
    #       the JobInfo to be passed to the ExecManager
    #       (see, for instance, 'queue_name').

    _default_fields = (
        'job_environment',  # TODO UNDERSTAND THIS!
        'email',
        'email_on_started',
        'email_on_terminated',
        'uuid',
        'prepend_text',
        'append_text',
#        'cmdline_params',  # as a list of strings. These 5 variables are now in CalcInfo
#        'stdin_name',
#        'stdout_name',
#        'stderr_name',
#        'join_files',
        # 'queue_name', This is not used in CalcInfo, it is automatically set from
        # calculation attributes to JobInfo
        'num_machines',
        'num_mpiprocs_per_machine',
        'priority',
        'max_wallclock_seconds',
        'max_memory_kb',
        'rerunnable',
        'retrieve_list',  # a list of files or patterns to retrieve, with two
        # possible formats: [ 'remotepath',  # just the name of the file to retrieve. Will be put in '.' of the repositorym with name os.path.split(item)[1]
        # ['remotepath','localpath',depth]  ]
        # second format will copy the remotepath file/folder to localpath.
        # if remotepath is a file/folder, localpath will be its local name
        # if remotepath has file patterns, localpath should only be '.'
        # depth is an integer to decide the localname: will be os.path.join(localpath, filename )
        # where filename takes remotepath.split() and joins the last #depth elements
        # use the second option if you are using file patterns (*,[0-9],...)
        # ALL PATHS ARE RELATIVE!
        'local_copy_list',  # a list of length-two tuples with (localabspath, relativedestpath)
        'remote_copy_list',  # a list of length-three tuples with (remotemachinename, remoteabspath, relativedestpath)
        'remote_symlink_list',
        # a list of length-three tuples with (remotemachinename, remoteabspath, relativedestpath)
        'retrieve_singlefile_list',  # a list of files, that will be retrieved
        # from cluster and saved in SinglefileData nodes
        # in the following format:
        # ["linkname_from calc to singlefile","subclass of singlefile","filename"]
        # filename remote = filename local
        'codes_info',  # a list of dictionaries used to pass the info of the execution of a code.
        'codes_run_mode', # a string used to specify the order in which multi codes can be executed
    )


class CodeRunmode(Enumerate):
    pass

# these are the possible ways to execute more than one code in the same scheduling job
# if parallel, the codes will be executed as something like:
#   code1.x &
#   code2.x &
#   wait
# if serial, it will be:
#   code1.x
#   code2.x
code_run_modes = CodeRunmode(('PARALLEL',
                              'SERIAL'))


[docs]class CodeInfo(DefaultFieldsAttributeDict):
    """
    This attribute-dictionary contains the information needed to execute a code.
    Possible attributes are:

    * ``cmdline_params``: a list of strings, containing parameters to be written on
      the command line right after the call to the code, as for example::

        code.x cmdline_params[0] cmdline_params[1] ... < stdin > stdout

    * ``stdin_name``: (optional) the name of the standard input file. Note, it is
      only possible to use the stdin with the syntax::

        code.x < stdin_name

      If no stdin_name is specified, the string "< stdin_name" will not be
      passed to the code.
      Note: it is not possible to substitute/remove the '<' if stdin_name is specified;
      if that is needed, avoid stdin_name and use instead the cmdline_params to
      specify a suitable syntax.
    * ``stdout_name``: (optional) the name of the standard output file. Note, it is
      only possible to pass output to stdout_name with the syntax::

        code.x ... > stdout_name

      If no stdout_name is specified, the string "> stdout_name" will not be
      passed to the code.
      Note: it is not possible to substitute/remove the '>' if stdout_name is specified;
      if that is needed, avoid stdout_name and use instead the cmdline_params to
      specify a suitable syntax.
    * ``stderr_name``: (optional) a string, the name of the error file of the code.
    * ``join_files``: (optional) if True, redirects the error to the output file.
      If join_files=True, the code will be called as::

        code.x ... > stdout_name 2>&1

      otherwise, if join_files=False and stderr is passed::

        code.x ... > stdout_name 2> stderr_name

    * ``withmpi``: if True, executes the code with mpirun (or another MPI installed
      on the remote computer)
    * ``code_uuid``: the uuid of the code associated to the CodeInfo
    """
    _default_fields = ('cmdline_params',  # as a list of strings
                       'stdin_name',
                       'stdout_name',
                       'stderr_name',
                       'join_files',
                       'withmpi',
                       'code_uuid'
                       )


class WorkflowState(Enumerate):
    pass


wf_states = WorkflowState((
    'CREATED',
    'INITIALIZED',
    'RUNNING',
    'FINISHED',
    'SLEEP',
    'ERROR'
))


class WorkflowDataType(Enumerate):
    pass


wf_data_types = WorkflowDataType((
    'PARAMETER',
    'RESULT',
    'ATTRIBUTE',
))


class WorkflowDataValueType(Enumerate):
    pass


wf_data_value_types = WorkflowDataValueType((
    'NONE',
    'JSON',
    'AIIDA',
))

wf_start_call = "start"
wf_exit_call = "exit"
wf_default_call = "none"

# TODO Improve/implement this!
# class DynResourcesInfo(AttributeDict):
#    """
#    This object will contain a list of 'dynamical' resources to be
#    passed from the code plugin to the ExecManager, containing
#    things like
#    * resources in the permanent repository, that will be simply
#      linked locally (but copied remotely on the remote computer)
#      to avoid a waste of permanent repository space
#    * remote resources to be directly copied over only remotely
#    """
#    pass