Source code for masci_tools.io.parsers.fleur.fleur_outxml_parser

# -*- coding: utf-8 -*-
###############################################################################
# Copyright (c), Forschungszentrum Jülich GmbH, IAS-1/PGI-1, Germany.         #
#                All rights reserved.                                         #
# This file is part of the Masci-tools package.                               #
# (Material science tools)                                                    #
#                                                                             #
# The code is hosted on GitHub at https://github.com/judftteam/masci-tools.   #
# For further information on the license, see the LICENSE.txt file.           #
# For further information please visit http://judft.de/.                      #
#                                                                             #
###############################################################################
"""
This module contains functions to load an fleur out.xml file, parse it with a schema
and convert its content to a dict, based on the tasks given
"""
from masci_tools.util.parse_tasks import ParseTasks
from masci_tools.util.schema_dict_util import tag_exists, read_constants, eval_simple_xpath, evaluate_attribute
from masci_tools.util.xml.common_functions import eval_xpath, clear_xml, validate_xml
from masci_tools.io.parsers.fleur.fleur_schema.schema_dict import OutputSchemaDict
from masci_tools.util.logging_util import DictHandler, OutParserLogAdapter
from lxml import etree
import copy
import warnings
import logging


[docs]def outxml_parser(outxmlfile,
                  version=None,
                  parser_info_out=None,
                  iteration_to_parse=None,
                  strict=False,
                  debug=False,
                  **kwargs):
    """
    Parses the out.xml file to a dictionary based on the version and the given tasks

    :param outxmlfile: either path to the out.xml file, opened file handle or a xml etree to be parsed
    :param version: version string to enforce that a given schema is used
    :param parser_info_out: dict, with warnings, info, errors, ...
    :param iteration_to_parse: either str or int, (optional, default 'last')
                               determines which iteration should be parsed.
                               Accepted are 'all', 'first', 'last' or an index for the iteration
    :param strict: bool if True  and no parser_info_out is provided any encountered error will immediately be raised
    :param debug: bool if True additional information is printed out in the logs

    Kwargs:
        :param ignore_validation: bool, if True schema validation errors are only logged
        :param minimal_mode: bool, if True only total Energy, iteration number and distances are parsed
        :param list_return: bool, if True one-item lists in the output dict are not converted to simple values
        :param additional_tasks: dict to define custom parsing tasks. For detailed explanation
                                 See :py:mod:`~masci_tools.io.parsers.fleur.default_parse_tasks`.
        :param overwrite: bool, if True and keys in additional_tasks collide with defaults
                          The defaults will be overwritten
        :param append: bool, if True and keys in additional_tasks collide with defaults
                       The inner tasks will be written into the dict. If inner keys collide
                       they are overwritten

    :return: python dictionary with the information parsed from the out.xml

    :raises ValueError: If the validation against the schema failed, or an irrecoverable error
                        occured during parsing
    :raises FileNotFoundError: If no Schema file for the given version was found
    :raises KeyError: If an unknown task is encountered

    """

    __parser_version__ = '0.5.0'

    logger = logging.getLogger(__name__)

    parser_log_handler = None
    if parser_info_out is not None or not strict:
        if parser_info_out is None:
            parser_info_out = {}

        logging_level = logging.INFO
        if debug:
            logging_level = logging.DEBUG
        logger.setLevel(logging_level)

        parser_log_handler = DictHandler(parser_info_out,
                                         WARNING='parser_warnings',
                                         ERROR='parser_errors',
                                         INFO='parser_info',
                                         DEBUG='parser_debug',
                                         CRITICAL='parser_critical',
                                         ignore_unknown_levels=True,
                                         level=logging_level)

        logger.addHandler(parser_log_handler)

    if strict:
        logger = None

    if logger is not None:
        logger.info('Masci-Tools Fleur out.xml Parser v%s', __parser_version__)

    outfile_broken = False

    if isinstance(outxmlfile, etree._ElementTree):
        xmltree = outxmlfile
    else:
        parser = etree.XMLParser(attribute_defaults=True, recover=False, encoding='utf-8')

        try:
            xmltree = etree.parse(outxmlfile, parser)
        except etree.XMLSyntaxError:
            outfile_broken = True
            if logger is None:
                warnings.warn('The out.xml file is broken I try to repair it.')
            else:
                logger.warning('The out.xml file is broken I try to repair it.')

        if outfile_broken:
            # repair xmlfile and try to parse what is possible.
            parser = etree.XMLParser(attribute_defaults=True, recover=True, encoding='utf-8')
            try:
                xmltree = etree.parse(outxmlfile, parser)
            except etree.XMLSyntaxError:
                if logger is None:
                    raise
                else:
                    logger.exception('Skipping the parsing of the xml file. ' 'Repairing was not possible.')
                    return {}

    if version is None:
        out_version = eval_xpath(xmltree, '//@fleurOutputVersion', logger=logger)
        out_version = str(out_version)
        if out_version is None:
            logger.error('Failed to extract outputVersion')
            raise ValueError('Failed to extract outputVersion')
    else:
        out_version = version

    if out_version == '0.27':
        program_version = eval_xpath(xmltree, '//programVersion/@version', logger=logger)
        if program_version == 'fleur 32':
            #Max5 release (before bugfix)
            out_version = '0.33'
            inp_version = '0.33'
            ignore_validation = True
            if logger is not None:
                logger.warning("Ignoring '0.27' outputVersion for MaX5.0 release")
            else:
                warnings.warn("Ignoring '0.27' outputVersion for MaX5.0 release")
        elif program_version == 'fleur 31':
            #Max4 release
            out_version = '0.31'
            inp_version = '0.31'
            ignore_validation = True
            if logger is not None:
                logger.warning("Ignoring '0.27' outputVersion for MaX4.0 release")
            else:
                warnings.warn("Ignoring '0.27' outputVersion for MaX4.0 release")
        elif program_version == 'fleur 30':
            #Max3.1 release
            out_version = '0.30'
            inp_version = '0.30'
            ignore_validation = True
            if logger is not None:
                logger.warning("Ignoring '0.27' outputVersion for MaX3.1 release")
            else:
                warnings.warn("Ignoring '0.27' outputVersion for MaX3.1 release")
        elif program_version == 'fleur 27':
            #Max3.1 release
            out_version = '0.29'
            inp_version = '0.29'
            ignore_validation = True
            if logger is not None:
                logger.warning("Found version before MaX3.1 release falling back to file version '0.29'")
            warnings.warn(
                'out.xml files before the MaX3.1 release are not explicitely supported.'
                ' No guarantee is given that the parser will work without error', UserWarning)
        else:
            if logger is not None:
                logger.error("Unknown fleur version: File-version '%s' Program-version '%s'", out_version,
                             program_version)
            raise ValueError(f"Unknown fleur version: File-version '{out_version}' Program-version '{program_version}'")
    else:
        ignore_validation = False
        inp_version = eval_xpath(xmltree, '//@fleurInputVersion', logger=logger)
        inp_version = str(inp_version)
        if inp_version is None:
            if logger is not None:
                logger.error('Failed to extract inputVersion')
            raise ValueError('Failed to extract inputVersion')

    ignore_validation = kwargs.get('ignore_validation', ignore_validation)

    #Load schema_dict (inp and out)
    outschema_dict = OutputSchemaDict.fromVersion(out_version, inp_version=inp_version, logger=logger)

    if outschema_dict['out_version'] != out_version or \
       outschema_dict['inp_version'] != inp_version:
        ignore_validation = True
        out_version = outschema_dict['out_version']
        inp_version = outschema_dict['inp_version']

    if logger is not None:
        logger.info('Found fleur out file with the versions out: %s; inp: %s', out_version, inp_version)

    xmltree, _ = clear_xml(xmltree)
    root = xmltree.getroot()

    errmsg = ''
    try:
        validate_xml(xmltree, outschema_dict.xmlschema, error_header='Output file does not validate against the schema')
    except etree.DocumentInvalid as err:
        errmsg = str(err)
        if logger is not None:
            logger.warning(errmsg)
        if not ignore_validation:
            if logger is not None:
                logger.exception(errmsg)
            raise ValueError(errmsg) from err

    if not outschema_dict.xmlschema.validate(xmltree) and errmsg == '':
        msg = 'Output file does not validate against the schema: Reason is unknown'
        if logger is not None:
            logger.warning(msg)
        if not ignore_validation:
            if logger is not None:
                logger.exception(msg)
            raise ValueError(msg)

    parser = ParseTasks(out_version)
    additional_tasks = kwargs.pop('additional_tasks', {})
    for task_name, task_definition in additional_tasks.items():
        parser.add_task(task_name, task_definition, **kwargs)

    out_dict, constants = parse_general_information(root,
                                                    parser,
                                                    outschema_dict,
                                                    logger=logger,
                                                    iteration_to_parse=iteration_to_parse,
                                                    **kwargs)

    out_dict['input_file_version'] = outschema_dict['inp_version']
    # get all iterations in out.xml file
    iteration_nodes = eval_simple_xpath(root, outschema_dict, 'iteration', logger=logger, list_return=True)
    n_iters = len(iteration_nodes)

    # parse only last stable interation
    # (if modes (dos and co) maybe parse anyway if broken?)
    if outfile_broken and (n_iters >= 2):
        iteration_nodes = iteration_nodes[:-2]
        if logger is not None:
            logger.info('The last parsed iteration is %s', n_iters - 2)
    elif outfile_broken and (n_iters == 1):
        iteration_nodes = [iteration_nodes[0]]
        if logger is not None:
            logger.info('The last parsed iteration is %s', n_iters)
    elif not outfile_broken and (n_iters >= 1):
        pass
    else:  # there was no iteration found.
        # only the starting charge density could be generated
        msg = 'There was no iteration found in the outfile, either just a ' \
              'starting density was generated or something went wrong.'
        if logger is None:
            raise ValueError(msg)
        else:
            logger.error(msg)

    if iteration_to_parse is None:
        iteration_to_parse = 'last'  #This is the default from the aiida_fleur parser

    if iteration_to_parse == 'last':
        iteration_nodes = iteration_nodes[-1]
    elif iteration_to_parse == 'first':
        iteration_nodes = iteration_nodes[0]
    elif iteration_to_parse == 'all':
        pass
    elif isinstance(iteration_to_parse, int):
        try:
            iteration_nodes = iteration_nodes[iteration_to_parse]
        except IndexError as exc:
            if logger is not None:
                logger.exception(exc)
            raise ValueError(f"Invalid value for iteration_to_parse: Got '{iteration_to_parse}'"
                             f"; but only '{len(iteration_nodes)}' iterations are available") from exc
    else:
        if logger is not None:
            logger.error(
                "Invalid value for iteration_to_parse: Got '%s' "
                "Valid values are: 'first', 'last', 'all', or int", iteration_to_parse)
        raise ValueError(f"Invalid value for iteration_to_parse: Got '{iteration_to_parse}' "
                         "Valid values are: 'first', 'last', 'all', or int")

    if not isinstance(iteration_nodes, list):
        iteration_nodes = [iteration_nodes]

    logger_info = {'iteration': 'unknown'}
    iteration_logger = OutParserLogAdapter(logger, logger_info)

    for node in iteration_nodes:
        iteration_number = evaluate_attribute(node, outschema_dict, 'numberForCurrentRun', optional=True)

        if iteration_number is not None:
            logger_info['iteration'] = iteration_number

        out_dict = parse_iteration(node, parser, outschema_dict, out_dict, constants, logger=iteration_logger, **kwargs)

        logger_info['iteration'] = 'unknown'

    if not kwargs.get('list_return', False):
        #Convert one item lists to simple values
        for key, value in out_dict.items():
            if isinstance(value, list):
                if len(value) == 1:
                    out_dict[key] = value[0]
            elif isinstance(value, dict):
                for subkey, subvalue in value.items():
                    if isinstance(subvalue, list):
                        if len(subvalue) == 1:
                            out_dict[key][subkey] = subvalue[0]

    if parser_log_handler is not None:
        if logger is not None:
            logger.removeHandler(parser_log_handler)

    return out_dict


def parse_general_information(root, parser, outschema_dict, logger, iteration_to_parse=None, **kwargs):
    """
    Parses the information from the out.xml outside scf iterations

    Also defined constants and fleur calculation modes are read in

    Args:
        :param root: etree Element for the root of the out.xml
        :param parser: ParseTasks object with all defined tasks
        :param outschema_dict: dict with the information parsed from the OutputSchema
        :param parser_info_out: dict, with warnings, info, errors, ...

    Kwargs:
        :param minimal_mode: bool, if True only total Energy, iteration number and distances are parsed

    """
    from masci_tools.util.xml.xml_getters import get_fleur_modes

    minimal_mode = kwargs.get('minimal_mode', False)
    if iteration_to_parse is None:
        iteration_to_parse = 'last'

    constants = read_constants(root, outschema_dict, logger=logger)
    if logger is not None:
        logger.info('The following defined constants were found: %s', constants)

    fleurmode = get_fleur_modes(root, outschema_dict, logger=logger)
    if logger is not None:
        logger.info('The following Fleur modes were found: %s', fleurmode)

    parser.determine_tasks(fleurmode, minimal=minimal_mode)

    #For certain fleur modes we need to overwrite the tasks
    if fleurmode['dos'] or fleurmode['band']:
        parser.iteration_tasks = ['iteration_number', 'fermi_energy']
        if fleurmode['bz_integration'] == 'hist':
            parser.iteration_tasks = ['iteration_number', 'fermi_energy', 'bandgap']

    if fleurmode['relax'] and iteration_to_parse == 'last':
        if 'distances' in parser.iteration_tasks:
            parser.iteration_tasks.remove('distances')
        if 'magnetic_distances' in parser.iteration_tasks:
            parser.iteration_tasks.remove('magnetic_distances')

    if logger is not None:
        logger.debug('The following tasks are performed on the root: %s', parser.general_tasks)

    out_dict = {'fleur_modes': fleurmode}

    for task in parser.general_tasks:

        if logger is not None:
            logger.debug('Performing task: %s', task)
        out_dict = parser.perform_task(task, root, out_dict, outschema_dict, constants, logger=logger, use_lists=False)

    return out_dict, constants


def parse_iteration(iteration_node, parser, outschema_dict, out_dict, constants, logger, **kwargs):
    """
    Parses an scf iteration node. Which tasks to perform is stored in parser.iteration_tasks

    Args:
        :param iteration_node: etree Element for a scf iteration
        :param parser: ParseTasks object with all defined tasks
        :param outschema_dict: dict with the information parsed form the OutputSchema
        :param out_dict: dict with the parsed results
        :param constants: dict with all the defined mathematical constants
        :param parser_info_out: dict, with warnings, info, errors, ...

    Kwargs:
        :param strict: bool, if True an error will be raised if an unknown task is encountered
                       otherwise a warning is written to parser_info_out
        :param minimal_mode: bool, if True only total Energy, iteration number and distances are parsed
    """

    minimal_mode = kwargs.get('minimal_mode', False)

    iteration_tasks = copy.deepcopy(parser.iteration_tasks)
    #If the iteration is a forcetheorem calculation
    #Replace all tasks with the given tasks for the calculation
    forcetheorem_tags = ['Forcetheorem_DMI', 'Forcetheorem_SSDISP', 'Forcetheorem_JIJ', 'Forcetheorem_MAE']
    for tag in forcetheorem_tags:
        exists = tag_exists(iteration_node, outschema_dict, tag)
        if exists:
            if minimal_mode:
                iteration_tasks = []
            else:
                iteration_tasks = [tag.lower()]
            break

    if logger is not None:
        logger.debug('The following tasks are performed for the iteration: %s', iteration_tasks)

    for task in iteration_tasks:

        if logger is not None:
            logger.debug('Performing task: %s', task)

        try:
            out_dict = parser.perform_task(task, iteration_node, out_dict, outschema_dict, constants, logger=logger)
        except KeyError:
            if logger is not None:
                logger.exception("Unknown task: '%s'. Skipping this one", task)
            raise

    return out_dict