Source code for aiida.parsers.plugins.quantumespresso.raw_parser_cp

# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved.                     #
# This file is part of the AiiDA code.                                    #
#                                                                         #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file        #
# For further information please visit http://www.aiida.net               #
###########################################################################
from aiida.parsers.plugins.quantumespresso import QEOutputParsingError
from xml.dom.minidom import parseString
from aiida.parsers.plugins.quantumespresso.raw_parser_pw import (read_xml_card,
                   parse_xml_child_integer,xml_card_header,parse_xml_child_bool,
                   parse_xml_child_str,parse_xml_child_float,
                   parse_xml_child_attribute_str,xml_card_cell,xml_card_ions,
                   xml_card_exchangecorrelation,xml_card_spin,xml_card_planewaves)



[docs]def parse_cp_traj_stanzas(num_elements, splitlines, prepend_name,rescale=1.): """ num_elements: Number of lines (with three elements) between lines with two only elements (containing step number and time in ps). num_elements is 3 for cell, and the number of atoms for coordinates and positions. splitlines: a list of lines of the file, already split in pieces using string.split prepend_name: a string to be prepended to the name of keys returned in the return dictionary. rescale: the values in each stanza are multiplied by this factor, for units conversion """ steps = [] times = [] stanzas = [] this_stanza = [] start_stanza = False linenum = -1 try: for linenum, l in enumerate(splitlines): if len(l) == 2: steps.append(int(l[0])) times.append(float(l[1])) start_stanza = True if len(this_stanza) != 0: raise ValueError("Wrong position of short line.") elif len(l) == 3: if len(this_stanza) == 0 and not start_stanza: raise ValueError("Wrong position of long line.") start_stanza = False this_stanza.append([float(l[0])*rescale,float(l[1])*rescale,float(l[2])*rescale]) if len(this_stanza) == num_elements: stanzas.append(this_stanza) this_stanza = [] else: raise ValueError("Wrong line length ({})".format(len(l))) if len(this_stanza) != 0: raise ValueError("Wrong length of last block ({} lines instead of 0)." .format(len(this_stanza))) if len(steps) != len(stanzas): raise ValueError("Length mismatch between number of steps and number of defined stanzas.") return { '{}_steps'.format(prepend_name): steps, '{}_times'.format(prepend_name): times, '{}_data'.format(prepend_name): stanzas, } except Exception as e: e.message = "At line {}: {}".format(linenum+1, e.message) raise e
[docs]def parse_cp_text_output(data,xml_data): """ data must be a list of strings, one for each lines, as returned by readlines(). On output, a dictionary with parsed values """ # TODO: uniform readlines() and read() usage for passing input to the parser parsed_data={} parsed_data['warnings']=[] for count,line in enumerate(data): if 'warning' in line.lower(): parsed_data['warnings'].append(line) elif 'bananas' in line: parsed_data['warnings'].append('Bananas from the ortho.') elif 'CP' in line and 'WALL' in line: try: time=line.split('CPU')[1].split('WALL')[0] parsed_data['wall_time']=time except: raise QEOutputParsingError('Error while parsing wall time.') for count,line in enumerate(reversed(data)): if 'nfi' in line and 'ekinc' in line and 'econs' in line: this_line = data[len(data)-count] try: parsed_data['ekinc'] = [float( this_line.split()[1] )] except ValueError: pass try: parsed_data['temph'] = [float( this_line.split()[2] )] except ValueError: pass try: parsed_data['tempp'] = [float( this_line.split()[3] )] except ValueError: pass try: parsed_data['etot'] = [float( this_line.split()[4] )] except ValueError: pass try: parsed_data['enthal'] = [float( this_line.split()[5] )] except ValueError: pass try: parsed_data['econs'] = [float( this_line.split()[6] )] except ValueError: pass try: parsed_data['econt'] = [float( this_line.split()[7] )] except ValueError: pass try: parsed_data['vnhh'] = [float( this_line.split()[8] )] except (ValueError, IndexError): pass try: parsed_data['xnhh0'] = [float( this_line.split()[9] )] except (ValueError, IndexError): pass try: parsed_data['vnhp'] = [float( this_line.split()[10] )] except (ValueError, IndexError): pass try: parsed_data['xnhp0'] = [float( this_line.split()[11] )] except (ValueError, IndexError): pass return parsed_data
[docs]def parse_cp_xml_counter_output(data): """ Parse xml file print_counter.xml data must be a single string, as returned by file.read() (notice the difference with parse_text_output!) On output, a dictionary with parsed values. """ dom = parseString(data) parsed_data={} cardname='LAST_SUCCESSFUL_PRINTOUT' card1 = [ _ for _ in dom.childNodes if _.nodeName=='PRINT_COUNTER'][0] card2 = [ _ for _ in card1.childNodes if _.nodeName=='LAST_SUCCESSFUL_PRINTOUT'][0] tagname='STEP' parsed_data[cardname.lower().replace('-','_')] = parse_xml_child_integer(tagname,card2) return parsed_data
def parse_cp_raw_output(out_file,xml_file=None,xml_counter_file=None): parser_version = '0.1' parser_info = {} parser_info['parser_warnings'] = [] parser_info['parser_info'] = 'AiiDA QE Parser v{}'.format(parser_version) # analyze the xml if xml_file is not None: try: with open(xml_file,'r') as f: xml_lines = f.read() except IOError: raise QEOutputParsingError("Failed to open xml file: %s." .format(xml_file) ) # TODO: this function should probably be the same of pw. # after all, the parser was fault-tolerant xml_data=parse_cp_xml_output(xml_lines) else: parser_info['parser_warnings'].append('Skipping the parsing of the xml file.') xml_data = {} # analyze the counter file, which keeps info on the steps if xml_counter_file is not None: try: with open(xml_counter_file,'r') as f: xml_counter_lines = f.read() except IOError: raise QEOutputParsingError("Failed to open xml counter file: %s." .format(xml_file) ) xml_counter_data=parse_cp_xml_counter_output(xml_counter_lines) else: xml_counter_data={} # analyze the standard output try: with open(out_file,'r') as f: out_lines = f.readlines() except IOError: raise QEOutputParsingError("Failed to open output file: %s." % out_file) # understand if the job ended smoothly job_successful=False for line in reversed(out_lines): if 'JOB DONE' in line: job_successful=True break out_data=parse_cp_text_output(out_lines,xml_data) for key in out_data.keys(): if key in xml_data.keys(): raise AssertionError('%s found in both dictionaries' % key) if key in xml_counter_data.keys(): raise AssertionError('%s found in both dictionaries' % key) # out_data keys take precedence and overwrite xml_data keys, # if the same key name is shared by both (but this should not happen!) final_data = dict(xml_data.items() + out_data.items() + xml_counter_data.items()) # TODO: parse the trajectory and save them in a reasonable format return final_data,job_successful # TODO: the xml has a lot in common with pw, maybe I should avoid duplication of code # or maybe should I wait for the new version of data-file.xml ?
[docs]def parse_cp_xml_output(data): """ Parse xml data data must be a single string, as returned by file.read() (notice the difference with parse_text_output!) On output, a dictionary with parsed values. Democratically, we have decided to use picoseconds as units of time, eV for energies, Angstrom for lengths. """ import copy dom = parseString(data) parsed_data={} #CARD HEADER parsed_data = copy.deepcopy(xml_card_header(parsed_data,dom)) # CARD CONTROL cardname='CONTROL' target_tags=read_xml_card(dom,cardname) tagname='PP_CHECK_FLAG' parsed_data[tagname.lower()]=parse_xml_child_bool(tagname,target_tags) # CARD STATUS cardname = 'STATUS' target_tags = read_xml_card(dom,cardname) tagname = 'STEP' attrname = 'ITERATION' parsed_data[(tagname+'_'+attrname).lower()]=int(parse_xml_child_attribute_str(tagname,attrname,target_tags)) tagname = 'TIME' attrname = 'UNITS' value=parse_xml_child_float(tagname,target_tags) units = parse_xml_child_attribute_str(tagname,attrname,target_tags) if units not in ['pico-seconds']: raise QEOutputParsingError("Units {} are not supported by parser".format(units)) parsed_data[tagname.lower()]=value tagname = 'TITLE' parsed_data[tagname.lower()]=parse_xml_child_str(tagname,target_tags) # CARD CELL parsed_data,lattice_vectors,volume = copy.deepcopy(xml_card_cell(parsed_data,dom)) # CARD IONS parsed_data = copy.deepcopy(xml_card_ions(parsed_data,dom,lattice_vectors,volume)) # CARD PLANE WAVES parsed_data = copy.deepcopy(xml_card_planewaves(parsed_data,dom,'cp')) # CARD SPIN parsed_data = copy.deepcopy(xml_card_spin(parsed_data,dom)) # CARD EXCHANGE_CORRELATION parsed_data = copy.deepcopy(xml_card_exchangecorrelation(parsed_data,dom)) # TODO CARD OCCUPATIONS # CARD BRILLOUIN ZONE # TODO: k points are saved for CP... Why? cardname='BRILLOUIN_ZONE' target_tags=read_xml_card(dom,cardname) tagname='NUMBER_OF_K-POINTS' parsed_data[tagname.replace('-','_').lower()]=parse_xml_child_integer(tagname,target_tags) tagname='UNITS_FOR_K-POINTS' attrname='UNITS' metric=parse_xml_child_attribute_str(tagname,attrname,target_tags) if metric not in ['2 pi / a']: raise QEOutputParsingError('Error parsing attribute %s, tag %s inside %s, units unknown'% (attrname,tagname, target_tags.tagName ) ) parsed_data[tagname.replace('-','_').lower()]=metric # TODO: check what happens if one does not use the monkhorst pack in the code tagname='MONKHORST_PACK_GRID' try: a=target_tags.getElementsByTagName(tagname)[0] value=[int(a.getAttribute('nk'+str(i+1))) for i in range(3)] parsed_data[tagname.replace('-','_').lower()]=value except: raise QEOutputParsingError('Error parsing tag %s inside %s.'% (tagname, target_tags.tagName ) ) tagname='MONKHORST_PACK_OFFSET' try: a=target_tags.getElementsByTagName(tagname)[0] value=[int(a.getAttribute('k'+str(i+1))) for i in range(3)] parsed_data[tagname.replace('-','_').lower()]=value except: raise QEOutputParsingError('Error parsing tag %s inside %s.'% (tagname, target_tags.tagName ) ) try: kpoints=[] for i in range(parsed_data['number_of_k_points']): tagname='K-POINT.'+str(i+1) a=target_tags.getElementsByTagName(tagname)[0] b=a.getAttribute('XYZ').replace('\n','').rsplit() value=[ float(s) for s in b ] metric=parsed_data['units_for_k_points'] if metric=='2 pi / a': value=[ float(s)/parsed_data['lattice_parameter'] for s in value ] weight=float(a.getAttribute('WEIGHT')) kpoints.append([value,weight]) parsed_data['k_point']=kpoints except: raise QEOutputParsingError('Error parsing tag K-POINT.# inside %s.'% (target_tags.tagName ) ) tagname='NORM-OF-Q' # TODO decide if save this parameter parsed_data[tagname.replace('-','_').lower()]=parse_xml_child_float(tagname,target_tags) # CARD PARALLELISM # can be optional try: cardname='PARALLELISM' target_tags=read_xml_card(dom,cardname) tagname='GRANULARITY_OF_K-POINTS_DISTRIBUTION' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_PROCESSORS' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_PROCESSORS_PER_POOL' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_PROCESSORS_PER_IMAGE' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_PROCESSORS_PER_TASKGROUP' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_PROCESSORS_PER_POT' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_PROCESSORS_PER_BAND_GROUP' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_PROCESSORS_PER_DIAGONALIZATION' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) except: pass # CARD TIMESTEPS cardname = 'TIMESTEPS' target_tags = read_xml_card(dom,cardname) for tagname in ['STEP0','STEPM']: try: tag = target_tags.getElementsByTagName(tagname)[0] try: second_tagname = 'ACCUMULATORS' second_tag = tag.getElementsByTagName(second_tagname)[0] data=second_tag.childNodes[0].data.rstrip().split() # list of floats parsed_data[second_tagname.replace('-','_').lower()] = [float(i) for i in data] except: pass second_tagname = 'IONS_POSITIONS' second_tag = tag.getElementsByTagName(second_tagname)[0] third_tagname = 'stau' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat third_tagname = 'svel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat try: third_tagname = 'taui' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat except: pass try: third_tagname = 'cdmi' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = [float(i) for i in list_data] except: pass try: third_tagname = 'force' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat except: pass second_tagname = 'IONS_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] third_tagname = 'nhpcl' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = float(third_tag.childNodes[0].data) third_tagname = 'nhpdim' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = float(third_tag.childNodes[0].data) third_tagname = 'xnhp' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = float(third_tag.childNodes[0].data) try: third_tagname = 'vnhp' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = float(third_tag.childNodes[0].data) except: pass try: second_tagname = 'ekincm' second_tag = tag.getElementsByTagName(second_tagname)[0] parsed_data[second_tagname.replace('-','_').lower()] = float(second_tag.childNodes[0].data) except: pass second_tagname = 'ELECTRONS_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'xnhe' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = float(third_tag.childNodes[0].data) except: pass try: third_tagname = 'vnhe' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = float(third_tag.childNodes[0].data) except: pass second_tagname = 'CELL_PARAMETERS' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'ht' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat except: pass try: third_tagname = 'htvel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat except: pass try: third_tagname = 'gvel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat except: pass second_tagname = 'CELL_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'xnhh' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat except: pass try: third_tagname = 'vnhh' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val=[] mat=[] for i,data in enumerate(list_data): val.append(data) if (i+1)%3==0: mat.append(val) val=[] parsed_data[(second_tagname+'_'+third_tagname).replace('-','_').lower()] = mat except: pass except: raise QEOutputParsingError('Error parsing CARD {}'.format(cardname) ) # CARD BAND_STRUCTURE_INFO cardname='BAND_STRUCTURE_INFO' target_tags=read_xml_card(dom,cardname) tagname='NUMBER_OF_ATOMIC_WFC' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_ELECTRONS' parsed_data[tagname.lower().replace('-','_')] = int(parse_xml_child_float(tagname,target_tags)) tagname='NUMBER_OF_BANDS' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) tagname='NUMBER_OF_SPIN_COMPONENTS' parsed_data[tagname.lower().replace('-','_')] = parse_xml_child_integer(tagname,target_tags) # TODO # - EIGENVALUES (that actually just contains occupations) # Why should I be interested in that, if CP works for insulators only? # - EIGENVECTORS # - others TODO are written in the function return parsed_data