Source code for masci_tools.io.parsers.fleur.fleur_outxml_parser

###############################################################################
# Copyright (c), Forschungszentrum Jülich GmbH, IAS-1/PGI-1, Germany.         #
#                All rights reserved.                                         #
# This file is part of the Masci-tools package.                               #
# (Material science tools)                                                    #
#                                                                             #
# The code is hosted on GitHub at https://github.com/judftteam/masci-tools.   #
# For further information on the license, see the LICENSE.txt file.           #
# For further information please visit http://judft.de/.                      #
#                                                                             #
###############################################################################
"""
This module contains functions to load an fleur out.xml file, parse it with a schema
and convert its content to a dict, based on the tasks given
"""
from __future__ import annotations
from functools import partial

from masci_tools.util.xml.common_functions import clear_xml
from masci_tools.util.xml.converters import convert_str_version_number
from masci_tools.util.xml import xml_getters
from masci_tools.util.xml.xpathbuilder import FilterType
from masci_tools.util.parse_utils import Conversion
from masci_tools.io.fleur_xml import FleurXMLContext, load_outxml_and_check_for_broken_xml, _EvalContext
from masci_tools.util.logging_util import DictHandler, OutParserLogAdapter
from masci_tools.util.typing import XMLFileLike
import copy
import warnings
import logging
from typing import Any, Callable, Iterable, TypeVar
try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal  #type:ignore
import sys
if sys.version_info >= (3, 10):
    from typing import TypeAlias
else:
    from typing_extensions import TypeAlias

__all__ = ('outxml_parser', 'conversion_function', 'register_migration')


[docs]def outxml_parser(outxmlfile: XMLFileLike,
                  parser_info_out: dict[str, Any] | None = None,
                  iteration_to_parse: Literal['all', 'last', 'first'] | int = 'last',
                  minimal_mode: bool = False,
                  additional_tasks: dict[str, dict[str, Any]] | None = None,
                  optional_tasks: Iterable[str] | None = None,
                  overwrite: bool = False,
                  append: bool = False,
                  list_return: bool = False,
                  strict: bool = False,
                  debug: bool = False,
                  ignore_validation: bool = False,
                  base_url: str | None = None) -> dict[str, Any]:
    """
    Parses the out.xml file to a dictionary based on the version and the given tasks

    :param outxmlfile: either path to the out.xml file, opened file handle (in bytes modes i.e. rb)
                       or a xml etree to be parsed
    :param parser_info_out: dict, with warnings, info, errors, ...
    :param iteration_to_parse: either str or int, (optional, default 'last')
                               determines which iteration should be parsed.
                               Accepted are 'all', 'first', 'last' or an index for the iteration
    :param minimal_mode: bool, if True only total Energy, iteration number and distances are parsed
    :param additional_tasks: dict to define custom parsing tasks. For detailed explanation
                             See :py:mod:`~masci_tools.io.parsers.fleur.default_parse_tasks`.
    :param overwrite: bool, if True and keys in additional_tasks collide with defaults
                      The defaults will be overwritten
    :param append: bool, if True and keys in additional_tasks collide with defaults
                   The inner tasks will be written into the dict. If inner keys collide
                   they are overwritten
    :param optional_tasks: Iterable of strings, defines additional tasks to perform.
                           See :py:mod:`~masci_tools.io.parsers.fleur.default_parse_tasks` for examples.
    :param list_return: bool, if True one-item lists in the output dict are not converted to simple values
    :param strict: bool if True  and no parser_info_out is provided any encountered error will immediately be raised
    :param debug: bool if True additional information is printed out in the logs
    :param ignore_validation: bool, if True schema validation errors are only logged

    :return: python dictionary with the information parsed from the out.xml

    :raises ValueError: If the validation against the schema failed, or an irrecoverable error
                        occurred during parsing
    :raises FileNotFoundError: If no Schema file for the given version was found
    :raises KeyError: If an unknown task is encountered
    """

    __parser_version__ = '0.7.1'

    logger: logging.Logger | None = logging.getLogger(__name__)
    if strict:
        logger = None

    parser_log_handler = None
    if logger is not None:
        if parser_info_out is None:
            parser_info_out = {}

        logging_level = logging.INFO
        if debug:
            logging_level = logging.DEBUG
        logger.setLevel(logging_level)

        parser_log_handler = DictHandler(parser_info_out,
                                         WARNING='parser_warnings',
                                         ERROR='parser_errors',
                                         INFO='parser_info',
                                         DEBUG='parser_debug',
                                         CRITICAL='parser_critical',
                                         ignore_unknown_levels=True,
                                         level=logging_level)

        logger.addHandler(parser_log_handler)

    if logger is not None:
        logger.info('Masci-Tools Fleur out.xml Parser v%s', __parser_version__)

    try:
        xmltree, schema_dict, outfile_broken = load_outxml_and_check_for_broken_xml(outxmlfile,
                                                                                    logger=logger,
                                                                                    base_url=base_url)
    except ValueError as err:
        if logger is not None:
            logger.error(str(err))
        if 'Skipping the parsing of the XML file' in str(err):
            return {}
        raise
    xmltree, _ = clear_xml(xmltree)

    with FleurXMLContext(xmltree, schema_dict, logger=logger) as root:

        out_version = root.attribute('fleurOutputVersion')
        if out_version == '0.27':
            inp_version = out_version
        else:
            inp_version = root.attribute('fleurInputVersion')

        if schema_dict['out_version'] != out_version or \
           schema_dict['inp_version'] != inp_version:
            ignore_validation = True
            out_version = schema_dict['out_version']
            inp_version = schema_dict['inp_version']

        if logger is not None:
            logger.info('Found fleur out file with the versions out: %s; inp: %s', out_version, inp_version)

        try:
            schema_dict.validate(xmltree, logger=logger)
        except ValueError as err:
            if not ignore_validation:
                if logger is not None:
                    logger.exception(err)
                raise

        parser = _TaskParser(out_version)
        if additional_tasks is None:
            additional_tasks = {}
        for task_name, task_definition in additional_tasks.items():
            parser.add_task(task_name, task_definition, overwrite=overwrite, append=append)

        if logger is not None:
            logger.info('The following defined constants were found: %s', root.constants)

        fleur_modes = xml_getters.get_fleur_modes(xmltree, schema_dict, logger=logger)
        if logger is not None:
            logger.info('The following Fleur modes were found: %s', fleur_modes)
        parser.determine_tasks(fleur_modes, optional_tasks, minimal=minimal_mode, iteration_to_parse=iteration_to_parse)

        out_dict = {'input_file_version': schema_dict['inp_version'], 'fleur_modes': fleur_modes}
        if logger is not None:
            logger.debug('The following tasks are performed on the root: %s', parser.general_tasks)
        for task in parser.general_tasks:

            if logger is not None:
                logger.debug('Performing task: %s', task)
            out_dict = parser.perform_task(task, root, out_dict, use_lists=False)

        iteration_filter = _determine_iteration_condition(iteration_to_parse, root.number_nodes('iteration'),
                                                          outfile_broken, logger)

        logger_info: dict[str, Any] = {}
        iteration_logger: logging.LoggerAdapter | None = None
        if logger is not None:
            iteration_logger = OutParserLogAdapter(logger, logger_info)

        for iteration in root.iter('iteration', filters=iteration_filter):
            iteration.logger = iteration_logger  #type:ignore[assignment] #TODO: Should this be allowed to be overwritten in iter?
            logger_info['iteration'] = iteration.attribute('numberForCurrentRun', default='unknown')

            iteration_tasks = parser.iteration_tasks
            #If the iteration is a forcetheorem calculation
            #Replace all tasks with the given tasks for the calculation
            forcetheorem_tags = ['Forcetheorem_DMI', 'Forcetheorem_SSDISP', 'Forcetheorem_JIJ', 'Forcetheorem_MAE']
            for tag in forcetheorem_tags:
                if iteration.tag_exists(tag):
                    if minimal_mode:
                        iteration_tasks = []
                    else:
                        iteration_tasks = [tag.lower()]
                    break

            if iteration.logger is not None:
                iteration.logger.debug('The following tasks are performed for the iteration: %s', iteration_tasks)

            for task in iteration_tasks:

                if iteration.logger is not None:
                    iteration.logger.debug('Performing task: %s', task)

                try:
                    out_dict = parser.perform_task(task, iteration, out_dict)
                except KeyError:
                    if logger is not None:
                        logger.exception("Unknown task: '%s'. Skipping this one", task)
                    raise

    if not list_return:
        #Convert one item lists to simple values
        for key, value in out_dict.items():
            if isinstance(value, list) and len(value) == 1:
                out_dict[key] = value[0]
            elif isinstance(value, dict):
                for subkey, subvalue in value.items():
                    if isinstance(subvalue, list) and len(subvalue) == 1:
                        out_dict[key][subkey] = subvalue[0]

    if parser_log_handler is not None:
        if logger is not None:
            logger.removeHandler(parser_log_handler)

    return out_dict


def _determine_iteration_condition(iteration_to_parse: Literal['all', 'first', 'last'] | int, n_iters: int,
                                   broken: bool, logger: logging.Logger | None) -> FilterType:
    """
    Determine which iterations should be parsed
    If the XML file is broken the last stable iteration is parsed if possible

    :param iteration_to_parse: either str or int, (optional, default 'last')
                               determines which iteration should be parsed.
                               Accepted are 'all', 'first', 'last' or an index for the iteration
    :param n_iters: How many iterations are in the file
    :param broken: if True the last iteration is assumed to be broken
    :param logger: logger for warnings
    """
    if n_iters == 0:
        # there was no iteration found.
        # only the starting charge density could be generated
        # Or fleur was started with -check or similar flags
        msg = 'There was no iteration found in the outfile, either just a ' \
              'starting density was generated or something went wrong.'
        if logger is None:
            raise ValueError(msg)
        logger.error(msg)
        return {}

    if logger is not None and broken and n_iters == 1:
        logger.info('The last parsed iteration is %s', n_iters)

    index_condition: int | dict[str, int] | None = None
    if iteration_to_parse == 'last':
        index_condition = -1
        if broken and n_iters >= 2:
            index_condition = -2
            if logger is not None:
                logger.info('The last parsed iteration is %s', n_iters - 2)
    elif iteration_to_parse == 'first':
        index_condition = 1
    elif isinstance(iteration_to_parse, int):
        if iteration_to_parse >= n_iters or iteration_to_parse < -n_iters:
            if logger is not None:
                logger.error("Invalid value for iteration_to_parse: Got '%s'", iteration_to_parse)
            raise ValueError(f"Invalid value for iteration_to_parse: Got '{iteration_to_parse}'"
                             f"; but only '{n_iters}' iterations are available")
        index_condition = iteration_to_parse if iteration_to_parse < 0 else iteration_to_parse + 1  #1-based indexing in XPaths
    elif iteration_to_parse == 'all':
        if broken and n_iters >= 2:
            index_condition = {'<=': -2}
            if logger is not None:
                logger.info('The last parsed iteration is %s', n_iters - 2)
    else:
        if logger is not None:
            logger.error(
                "Invalid value for iteration_to_parse: Got '%s' "
                "Valid values are: 'first', 'last', 'all', or int", iteration_to_parse)
        raise ValueError(f"Invalid value for iteration_to_parse: Got '{iteration_to_parse}' "
                         "Valid values are: 'first', 'last', 'all', or int")

    filters = {}
    if index_condition is not None:
        filters['iteration'] = {'index': index_condition}
    return filters


MigrationDict: TypeAlias = "dict[str, dict[str, Literal['compatible'] | Callable]]"
"""
Type describing the dictionary defining the migration pathways
"""


def _find_migration(start: str, target: str, migrations: MigrationDict) -> list[Callable] | None:
    """
    Tries to find a migration path from the start to the target version
    via the defined migration functions

    :param start: str of the starting version
    :param target: str of the target version
    :param migrations: dict of funcs registered via the register_migration_function decorator

    :returns: list of migration functions to be called to go from start to target
    """

    if start == target:
        return []

    if start not in migrations:
        return None

    possible_migrations = migrations[start]
    if target in possible_migrations:
        migration = possible_migrations[target]
        if isinstance(migration, str) and migration == 'compatible':
            return []
        return [migration]

    for migrated_version, migration in possible_migrations.items():
        new_call_list = _find_migration(migrated_version, target, migrations)
        if new_call_list is None:
            #Cannot migrate to target from this version
            continue

        if isinstance(migration, str) and migration == 'compatible':
            call_list = []
        else:
            call_list = [migration]
        call_list += new_call_list
        return call_list
    return None


class _TaskParser:
    """
    Representation of all known parsing tasks for the out.xml file

    When set up it will initialize the known default tasks and check if they work
    for the given output version

    Accessing definition of task example

    .. code-block:: python

        from masci_tools.io.parsers.fleur.fleur_outxml_parser import _TaskParser

        p = _TaskParser('0.33')
        totE_definition = p.tasks['total_energy']
    """

    PARSE_FUNCTIONS = {
        'attrib', 'text', 'allAttribs', 'parentAttribs', 'singleValue', 'exists', 'attrib_exists', 'numberNodes'
    }
    ALL_ATTRIBS_FUNCTIONS = {'allAttribs', 'parentAttribs', 'singleValue'}

    CONTROL_KEYS = {'_general', '_modes', '_minimal', '_special', '_conversions', '_optional', '_minimum_version'}
    REQUIRED_KEYS = {'parse_type'}
    REQUIRED_KEYS_XML_GETTER = {'parse_type', 'name'}
    REQUIRED_KEYS_UTIL = {'parse_type', 'path_spec'}
    ALLOWED_KEYS = {'parse_type', 'path_spec', 'subdict', 'overwrite_last', 'kwargs'}
    ALLOWED_KEYS_ALLATTRIBS = {'parse_type', 'path_spec', 'subdict', 'base_value', 'overwrite', 'flat', 'kwargs'}
    ALLOWED_KEYS_XML_GETTER = {'parse_type', 'name', 'kwargs', 'result_names'}

    _version = '0.4.0'
    migrations: MigrationDict = {}
    conversion_functions: dict[str, Callable] = {}

    def __init__(self, version: str, validate_defaults: bool = False) -> None:
        """
        Initialize the default parse tasks
        Terminates if the version is not marked as working with the default tasks

        :param version: str of the wanted output version
        :param task_file: optional, file to override default_parse_tasks
        :param validate_defaults: bool, if True all tasks from the default tasks
                                  are added one by one and are checked for
                                  inconsistent keys
        """
        from . import default_parse_tasks as tasks

        self.iteration_tasks: list[str] = []
        self.general_tasks: list[str] = []
        self.version = convert_str_version_number(version)

        tasks_dict: dict[str, dict[str, Any]] = copy.deepcopy(tasks.TASKS_DEFINITION)  #type: ignore[arg-type]
        if validate_defaults:
            #Manually add each task to make sure that there are no typos/inconsitencies in the keys
            self.tasks = {}
            for task_name, task in tasks_dict.items():
                self.add_task(task_name, task)
        else:
            self.tasks = tasks_dict

        working: set[str] = tasks.__working_out_versions__
        #Look if the base version is compatible if not look for a migration
        if version not in working:

            working_version_tuples = {convert_str_version_number(v) for v in working}
            if all(working_version < self.version for working_version in working_version_tuples):
                warnings.warn(
                    f"Output version '{version}' is not explicitly stated as 'working'\n"
                    'with the current version of the outxml_parser.\n'
                    'Since the given version is newer than the latest working version\n'
                    'I will continue. Errors and warnings can occur!', UserWarning)
            else:
                base: str = tasks.__base_version__
                migration_list = _find_migration(base, version, self.migrations)

                if migration_list is None:
                    raise ValueError(f'Unsupported output version: {version}')

                for migration in migration_list:
                    self.tasks = migration(self.tasks)

    @property
    def optional_tasks(self) -> set[str]:
        """
        Return a set of the available optional defined tasks
        """
        return {key for key, val in self.tasks.items() if val.get('_optional', False)}

    def add_task(self,
                 task_name: str,
                 task_definition: dict[str, Any],
                 append: bool = False,
                 overwrite: bool = False) -> None:
        """
        Add a new task definition to the tasks dictionary

        Will first check if the definition has all the required keys

        :param task_name: str, key in the tasks dict
        :param task_definition: dict with the defined tasks
        :param overwrite: bool (optional), if True and the key is present in the dictionary it will be
                          overwritten with the new definition
        :param append: bool (optional), if True and the key is present in the dictionary the new definitions
                       will be inserted into this dictionary (inner keys WILL BE OVERWRITTEN). Additionally
                       if an inner key is overwritten with an empty dict the inner key will be removed

        The following keys are expected in each entry of the task_definition dictionary:
            :param parse_type: str, defines which methods to use when extracting the information
            :param path_spec: dict with all the arguments that should be passed to tag_xpath
                              or attrib_xpath to get the correct path
            :param subdict: str, if present the parsed values are put into this key in the output dictionary
            :param overwrite_last: bool, if True no list is inserted and each entry overwrites the last

        For the allAttribs parse_type there are more keys that can appear:
            :param base_value: str, optional. If given the attribute
                               with this name will be inserted into the key from the task_definition
                               all other keys are formatted as {task_key}_{attribute_name}
            :param ignore: list of str, these attributes will be ignored
            :param overwrite: list of str, these attributes will not create a list and overwrite any value
                              that might be there
            :param flat: bool, if False the dict parsed from the tag is inserted as a dict into the correspondin key
                               if True the values will be extracted and put into the output dictionary with the
                               format {task_key}_{attribute_name}

        """

        if task_name in self.tasks and not (append or overwrite):
            raise ValueError(f"Task '{task_name}' is already defined."
                             'Use append=True to append them (conflicting keys are overwritten)'
                             'or overwrite=True to remove all existing tasks')

        for task_key, definition in task_definition.items():

            if task_key.startswith('_'):
                if task_key not in self.CONTROL_KEYS:
                    raise ValueError(f'Unknown control key: {task_key}')
                continue

            task_keys = set(definition.keys())

            if not task_keys and task_key in self.tasks[task_name]:
                continue

            parse_type = definition['parse_type']
            if parse_type not in self.PARSE_FUNCTIONS | {'xmlGetter'}:
                raise ValueError(f'Unknown parse_type: {parse_type}')

            required = self.REQUIRED_KEYS
            allowed = self.ALLOWED_KEYS
            if parse_type == 'xmlGetter':
                required = required | self.REQUIRED_KEYS_XML_GETTER
                allowed = self.ALLOWED_KEYS_XML_GETTER
            else:
                required = required | self.REQUIRED_KEYS_UTIL
                if parse_type in self.ALL_ATTRIBS_FUNCTIONS:
                    allowed = self.ALLOWED_KEYS_ALLATTRIBS

            missing_required = required.difference(task_keys)
            if missing_required:
                raise ValueError(f'Reqired Keys missing: {missing_required}')

            extra_keys = task_keys.difference(allowed)
            if extra_keys:
                raise ValueError(f'Got extra Keys: {extra_keys}')

        if append:
            self.tasks.setdefault(task_name, {})
            for key, definition in task_definition.items():
                if definition:
                    self.tasks[task_name][key] = definition
                elif key in self.tasks[task_name]:
                    self.tasks[task_name].pop(key)
        else:
            self.tasks[task_name] = task_definition

    def determine_tasks(self,
                        fleurmodes: dict[str, Any],
                        optional_tasks: Iterable[str] | None = None,
                        minimal: bool = False,
                        iteration_to_parse: Literal['all', 'last', 'first'] | int = 'last') -> None:
        """
        Determine, which tasks to perform based on the fleur_modes

        :param fleurmodes: dict with the calculation modes
        :param minimal: bool, whether to only perform minimal tasks
        """

        if optional_tasks is None:
            optional_tasks = set()

        unknown = {name for name in optional_tasks if name not in self.optional_tasks}
        if unknown:
            raise ValueError(f"Unknown optional task(s): '{unknown}'\n"
                             f'The following are available: {self.optional_tasks}')

        for task_name, definition in self.tasks.items():

            if '_minimum_version' in definition:
                min_version = convert_str_version_number(definition['_minimum_version'])
                if self.version < min_version:
                    continue

            optional = definition.get('_optional', False)
            if optional and task_name not in optional_tasks:
                continue

            if minimal and not definition.get('_minimal', False):
                continue

            #These tasks are always added manually
            if definition.get('_special', False):
                continue

            requirements = definition.get('_modes', [])
            check = [fleurmodes[mode] == required_value for mode, required_value in requirements]
            if not all(check):
                continue

            if definition.get('_general', False):
                self.general_tasks.append(task_name)
            else:
                self.iteration_tasks.append(task_name)

        #Manual overrides for certain fleur modes
        if fleurmodes['dos'] or fleurmodes['band'] or fleurmodes['cf_coeff']:
            self.iteration_tasks = ['iteration_number', 'fermi_energy']
            if fleurmodes['bz_integration'] == 'hist':
                self.iteration_tasks = ['iteration_number', 'fermi_energy', 'bandgap']

        if fleurmodes['plot']:
            self.iteration_tasks = []  #In this case there are multiple possibilities where fleur terminates
            #So we discard all the iteration tasks

        if fleurmodes['relax'] and iteration_to_parse == 'last':
            if 'distances' in self.iteration_tasks:
                self.iteration_tasks.remove('distances')
            if 'magnetic_distances' in self.iteration_tasks:
                self.iteration_tasks.remove('magnetic_distances')
            if 'nmmp_distances' in self.iteration_tasks:
                self.iteration_tasks.remove('nmmp_distances')

    def perform_task(self, task_name: str, context: _EvalContext, out_dict: dict, use_lists: bool = True) -> dict:
        """
        Evaluates the task given in the tasks_definition dict

        :param task_name: str, specifies the task to perform
        :param node: etree.Element, the xpath expressions are evaluated from this node
        :param out_dict: dict, output will be put in this dictionary
        :param schema_dict: dict, here all paths and attributes are stored according to the
                            outputschema
        :param constants: dict with all the defined mathematical constants
        :param logger: logger object for logging warnings, errors
        :param root_tag: str, this string will be appended in front of any xpath before it is evaluated
        :param use_lists: bool, if True lists are created for each key if not otherwise specified

        """
        from masci_tools.io.common_functions import camel_to_snake

        #TODO: Could be moved into _EvalContext.__getitem__
        parse_functions = {
            'attrib': context.attribute,
            'text': context.text,
            'allAttribs': context.all_attributes,
            'parentAttribs': context.parent_attributes,
            'singleValue': context.single_value,
            'exists': context.tag_exists,
            'attrib_exists': context.attribute_exists,
            'numberNodes': context.number_nodes
        }

        try:
            tasks_definition = self.tasks[task_name]
        except KeyError as exc:
            raise KeyError(f'Unknown Task: {task_name}') from exc

        for task_key, spec in tasks_definition.items():

            if task_key.startswith('_'):
                continue

            if spec['parse_type'] == 'xmlGetter':
                action = getattr(xml_getters, spec['name'])
                action = partial(action,
                                 context.node,
                                 context.schema_dict,
                                 logger=context.logger,
                                 constants=context.constants)
                args = spec.get('kwargs', {}).copy()
            else:
                action = parse_functions[spec['parse_type']]
                args = spec['path_spec'].copy()
                args = {**args, **spec.get('kwargs', {})}

            if spec['parse_type'] == 'singleValue':
                args.setdefault('ignore', []).append('comment')

            parsed_dict = out_dict
            if 'subdict' in spec:
                parsed_dict = out_dict.setdefault(spec['subdict'], {})

            parsed_value = action(**args)

            if spec['parse_type'] == 'xmlGetter' and 'result_names' in spec:
                if isinstance(parsed_value, tuple):
                    if len(spec['result_names']) != len(parsed_value):
                        raise ValueError('Wrong number of result names given.'
                                         f"Got {len(parsed_value)} values and {len(spec['result_names'])} names")
                    parsed_value = dict(zip(spec['result_names'], parsed_value))
                else:
                    task_key = spec['result_names'][0]

            if isinstance(parsed_value, dict):

                if spec['parse_type'] == 'singleValue':
                    base_value = 'value'
                    no_list = ['units']
                    flat = True
                elif spec['parse_type'] in ['allAttribs', 'parentAttribs']:
                    base_value = spec.get('base_value', '')
                    no_list = spec.get('overwrite', [])
                    flat = spec.get('flat', True)

                if flat:
                    for key, val in parsed_value.items():
                        current_key = f'{task_key}_{camel_to_snake(key)}' if key != base_value else task_key
                        if key in no_list or not use_lists:
                            parsed_dict[current_key] = val
                        else:
                            parsed_dict.setdefault(current_key, []).append(val)

                else:
                    parsed_dict[task_key] = {camel_to_snake(key): val for key, val in parsed_value.items()}

            else:
                overwrite = spec.get('overwrite_last', False)
                if use_lists and not overwrite:
                    parsed_dict.setdefault(task_key, []).append(parsed_value)
                else:
                    parsed_dict[task_key] = parsed_value if parsed_value is not None else parsed_dict.get(task_key)

        conversions = tasks_definition.get('_conversions', [])
        for conversion in conversions:
            if not isinstance(conversion, Conversion):
                warnings.warn(
                    'Providing the _conversions as a list of strings is deprecated'
                    'Use the Conversion namedtuple from masci_tools.util.parse_utils instead', DeprecationWarning)
                conversion = Conversion(name=conversion)

            action = self.conversion_functions[conversion.name]
            out_dict = action(out_dict, *conversion.args, logger=context.logger, **conversion.kwargs)

        return out_dict


F = TypeVar('F', bound=Callable[..., Any])
"""Generic Callable type"""


[docs]def register_migration(base_version: str, target_version: str | list[str]) -> Callable[[F], F]:
    """
    Decorator to add migration for task definition dictionary to the _TaskParser class
    The function should only take the dict of task definitions as an argument

    :param base_version: str of the version, from which the migration starts
    :param target_version: str or list of str with the versions that work after
                           the migration has been performed

    """

    def migration_decorator(func: F) -> F:
        """
        Return decorated _TaskParser object with migrations dict attribute
        Here all registered migrations are inserted
        """

        target_version_list = target_version
        if not isinstance(target_version_list, list):
            target_version_list = [target_version_list]

        for valid_version in target_version_list:
            _TaskParser.migrations.setdefault(base_version, {})[valid_version] = func

            for valid_version_2 in target_version_list:
                if valid_version == valid_version_2:
                    continue
                if int(valid_version.split('.')[1]) > int(valid_version_2.split('.')[1]):
                    _TaskParser.migrations.setdefault(valid_version, {})[valid_version_2] = 'compatible'
                else:
                    _TaskParser.migrations.setdefault(valid_version_2, {})[valid_version] = 'compatible'

        return func

    return migration_decorator


[docs]def conversion_function(func: F) -> F:
    """
    Marks a function as a conversion function, which can be called after
    performing a parsing task. The function can be specified via the _conversions
    control key in the task definitions.

    A conversion function has to have the following arguments:
        :param out_dict: dict with the previously parsed information
        :param parser_info_out: dict, with warnings, info, errors, ...

    and return only the modified output dict
    """
    _TaskParser.conversion_functions[func.__name__] = func

    return func