Source code for aiida.utils.delete_nodes

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################



[docs]def delete_nodes(pks, follow_calls=False, follow_returns=False, dry_run=False, force=False, disable_checks=False, verbosity=0): """ Delete nodes by a list of pks :note: The script will also delete all children calculations generated from the specified nodes. :param pks: a list of the PKs of the nodes to delete :param bool follow_calls: Follow calls :param bool follow_returns: Follow returns. This is a very dangerous option, since anything returned by a workflow might have been used as input in many other calculations. Use with care, and never combine with force. :param bool dry_run: Do not delete, a dry run, with statistics printed according to verbosity levels. :param bool force: Do not ask for confirmation to delete nodes. :param bool disable_checks: If True, will not check whether calculations are losing created data or called instances. If checks are disabled, also logging is disabled. :param bool force: Do not ask for confirmation to delete nodes. :param int verbosity: The verbosity levels, 0 prints nothing, 1 prints just sums and total, 2 prints individual nodes. """ from aiida.orm.querybuilder import QueryBuilder from aiida.common.links import LinkType from aiida.orm.node import Node from aiida.orm.calculation import Calculation from aiida.orm.data import Data from aiida.orm import load_node from aiida.backends.utils import delete_nodes_and_connections from aiida.backends.utils import get_automatic_user user_email = get_automatic_user().email if not pks: # If I was passed an empty list, I don't to anything # I prefer checking explicitly, an empty set might be problematic for the queries done below. if verbosity: print "Nothing to delete" return # The following code is just for the querying of downwards provenance. # Ideally, there should be a module to interface with, but this is the solution for now. # By only dealing with ids, and keeping track of what has been already # visited in the query, there's good performance and no infinite loops. link_types_to_follow = [LinkType.CREATE.value, LinkType.INPUT.value] if follow_calls: link_types_to_follow.append(LinkType.CALL.value) if follow_returns: link_types_to_follow.append(LinkType.RETURN.value) edge_filters={'type':{'in':link_types_to_follow}} # Operational set always includes the recently (in the last iteration added) nodes. operational_set = set().union(set(pks)) # Union to copy the set! pks_set_to_delete = set().union(set(pks)) while operational_set: # new_pks_set are the the pks of all nodes that are connected to the operational node set # with the links specified. new_pks_set = set([i for i, in QueryBuilder().append( Node, filters={'id':{'in':operational_set}}).append( Node,project='id', edge_filters=edge_filters).iterall()]) # The operational set is only those pks that haven't been yet put into the pks_set_to_delete. operational_set = new_pks_set.difference(pks_set_to_delete) # I add these pks in the pks_set_to_delete with a union pks_set_to_delete = pks_set_to_delete.union(new_pks_set) if verbosity > 0: print "I {} delete {} node{}".format( 'would' if dry_run else 'will', len(pks_set_to_delete), 's' if len(pks_set_to_delete)> 1 else '') if verbosity > 1: qb = QueryBuilder().append(Node, filters={'id':{'in':pks_set_to_delete}}, project=('uuid', 'id', 'type', 'label')) print "The nodes I {} delete:".format('would' if dry_run else 'will') for uuid, pk, type_string, label in qb.iterall(): try: short_type_string = type_string.split('.')[-2] except IndexError: short_type_string = type_string print " {} {} {} {}".format(uuid, pk, short_type_string, label) # Here I am checking whether I am deleting ## A data instance without also deleting the creator, which brakes relationship between a calculation and its data ## A calculation instance that was called, without also deleting the caller. if not disable_checks: called_qb = QueryBuilder() called_qb.append(Calculation, filters={'id': {'!in': pks_set_to_delete}}, project='id') called_qb.append(Calculation, project='type', edge_project='label', filters={'id': {'in': pks_set_to_delete}}, edge_filters={'type': {'==': LinkType.CALL.value}}) caller_to_called2delete = called_qb.all() if verbosity > 0 and caller_to_called2delete: calculation_pks_losing_called = set(zip(*caller_to_called2delete)[0]) print "\n{} calculation{} {} lose at least one called instance".format( len(calculation_pks_losing_called), 's' if len(calculation_pks_losing_called) > 1 else '', 'would' if dry_run else 'will') if verbosity > 1: print "These are the calculations that {} lose a called instance:".format('would' if dry_run else 'will') for calc_losing_called_pk in calculation_pks_losing_called: print ' ', load_node(calc_losing_called_pk) created_qb = QueryBuilder() created_qb.append(Calculation, filters={'id':{'!in': pks_set_to_delete}}, project='id') created_qb.append(Data, project='type', edge_project='label', filters={'id':{'in': pks_set_to_delete}}, edge_filters={'type':{'==':LinkType.CREATE.value}}) creator_to_created2delete = created_qb.all() if verbosity > 0 and creator_to_created2delete: calculation_pks_losing_created = set(zip(*creator_to_created2delete)[0]) print "\n{} calculation{} {} lose at least one created data-instance".format( len(calculation_pks_losing_created), 's' if len(calculation_pks_losing_created) > 1 else '', 'would' if dry_run else 'will') if verbosity > 1: print "These are the calculations that {} lose a created data-instance:".format('would' if dry_run else 'will') for calc_losing_created_pk in calculation_pks_losing_created: print ' ', load_node(calc_losing_created_pk) if dry_run: if verbosity > 0: print "\nThis was a dry run, exiting without deleting anything" return # Asking for user confirmation here if force: pass else: print "YOU ARE ABOUT TO DELETE {} NODES! THIS CANNOT BE UNDONE!".format(len(pks_set_to_delete)) if raw_input("Shall I continue? [Y/N] ").lower() != 'y': print "Exiting without deleting" return # Recover the list of folders to delete before actually deleting # the nodes. I will delete the folders only later, so that if # there is a problem during the deletion of the nodes in # the DB, I don't delete the folders folders = [load_node(_).folder for _ in pks_set_to_delete] delete_nodes_and_connections(pks_set_to_delete) if not disable_checks: # I pass now to the log the information for calculations losing created data or called instances for calc_pk, calc_type_string, link_label in caller_to_called2delete: calc = load_node(calc_pk) calc.logger.warning("User {} deleted " "an instance of type {} " "called with the label {} " "by this calculation".format( user_email, calc_type_string, link_label)) for calc_pk, data_type_string, link_label in creator_to_created2delete: calc = load_node(calc_pk) calc.logger.warning("User {} deleted " "an instance of type {} " "created with the label {} " "by this calculation".format( user_email, data_type_string, link_label)) # If we are here, we managed to delete the entries from the DB. # I can now delete the folders for f in folders: f.erase()