1818fe14eSLuca Vizzarro# SPDX-License-Identifier: BSD-3-Clause 2818fe14eSLuca Vizzarro# Copyright(c) 2024 Arm Limited 3818fe14eSLuca Vizzarro 4818fe14eSLuca Vizzarro"""Parsing utility module. 5818fe14eSLuca Vizzarro 6818fe14eSLuca VizzarroThis module provides :class:`~TextParser` which can be used to model any dataclass to a block of 7818fe14eSLuca Vizzarrotext. 8818fe14eSLuca Vizzarro""" 9818fe14eSLuca Vizzarro 10818fe14eSLuca Vizzarroimport re 11818fe14eSLuca Vizzarrofrom abc import ABC 12818fe14eSLuca Vizzarrofrom dataclasses import MISSING, dataclass, fields 13818fe14eSLuca Vizzarrofrom functools import partial 14818fe14eSLuca Vizzarrofrom typing import Any, Callable, TypedDict, cast 15818fe14eSLuca Vizzarro 16818fe14eSLuca Vizzarrofrom typing_extensions import Self 17818fe14eSLuca Vizzarro 18818fe14eSLuca Vizzarrofrom framework.exception import InternalError 19818fe14eSLuca Vizzarro 20818fe14eSLuca Vizzarro 21818fe14eSLuca Vizzarroclass ParserFn(TypedDict): 22818fe14eSLuca Vizzarro """Parser function in a dict compatible with the :func:`dataclasses.field` metadata param.""" 23818fe14eSLuca Vizzarro 24818fe14eSLuca Vizzarro #: 25818fe14eSLuca Vizzarro TextParser_fn: Callable[[str], Any] 26818fe14eSLuca Vizzarro 27818fe14eSLuca Vizzarro 28818fe14eSLuca Vizzarro@dataclass 29818fe14eSLuca Vizzarroclass TextParser(ABC): 30818fe14eSLuca Vizzarro r"""Helper abstract dataclass that parses a text according to the fields' rules. 31818fe14eSLuca Vizzarro 32818fe14eSLuca Vizzarro In order to enable text parsing in a dataclass, subclass it with :class:`TextParser`. 33818fe14eSLuca Vizzarro 34818fe14eSLuca Vizzarro The provided `parse` method is a factory which parses the supplied text and creates an instance 35818fe14eSLuca Vizzarro with populated dataclass fields. This takes text as an argument and for each field in the 36818fe14eSLuca Vizzarro dataclass, the field's parser function is run against the whole text. The returned value is then 37818fe14eSLuca Vizzarro assigned to the field of the new instance. If the field does not have a parser function its 38818fe14eSLuca Vizzarro default value or factory is used instead. If no default is available either, an exception is 39818fe14eSLuca Vizzarro raised. 40818fe14eSLuca Vizzarro 41818fe14eSLuca Vizzarro This class provides a selection of parser functions and a function to wrap parser functions with 42818fe14eSLuca Vizzarro generic functions. Parser functions are designed to be passed to the fields' metadata param. The 43818fe14eSLuca Vizzarro most commonly used parser function is expected to be the `find` method, which runs a regular 44818fe14eSLuca Vizzarro expression against the text to find matches. 45818fe14eSLuca Vizzarro 46818fe14eSLuca Vizzarro Example: 47818fe14eSLuca Vizzarro The following example makes use of and demonstrates every parser function available: 48818fe14eSLuca Vizzarro 49818fe14eSLuca Vizzarro .. code:: python 50818fe14eSLuca Vizzarro 51818fe14eSLuca Vizzarro from dataclasses import dataclass, field 52818fe14eSLuca Vizzarro from enum import Enum 53818fe14eSLuca Vizzarro from framework.parser import TextParser 54818fe14eSLuca Vizzarro 55818fe14eSLuca Vizzarro class Colour(Enum): 56818fe14eSLuca Vizzarro BLACK = 1 57818fe14eSLuca Vizzarro WHITE = 2 58818fe14eSLuca Vizzarro 59818fe14eSLuca Vizzarro @classmethod 60818fe14eSLuca Vizzarro def from_str(cls, text: str): 61818fe14eSLuca Vizzarro match text: 62818fe14eSLuca Vizzarro case "black": 63818fe14eSLuca Vizzarro return cls.BLACK 64818fe14eSLuca Vizzarro case "white": 65818fe14eSLuca Vizzarro return cls.WHITE 66818fe14eSLuca Vizzarro case _: 67818fe14eSLuca Vizzarro return None # unsupported colour 68818fe14eSLuca Vizzarro 69818fe14eSLuca Vizzarro @classmethod 70818fe14eSLuca Vizzarro def make_parser(cls): 71818fe14eSLuca Vizzarro # make a parser function that finds a match and 72818fe14eSLuca Vizzarro # then makes it a Colour object through Colour.from_str 73818fe14eSLuca Vizzarro return TextParser.wrap(TextParser.find(r"is a (\w+)"), cls.from_str) 74818fe14eSLuca Vizzarro 75818fe14eSLuca Vizzarro @dataclass 76818fe14eSLuca Vizzarro class Animal(TextParser): 77818fe14eSLuca Vizzarro kind: str = field(metadata=TextParser.find(r"is a \w+ (\w+)")) 78818fe14eSLuca Vizzarro name: str = field(metadata=TextParser.find(r"^(\w+)")) 79818fe14eSLuca Vizzarro colour: Colour = field(metadata=Colour.make_parser()) 80818fe14eSLuca Vizzarro age: int = field(metadata=TextParser.find_int(r"aged (\d+)")) 81818fe14eSLuca Vizzarro 82818fe14eSLuca Vizzarro steph = Animal.parse("Stephanie is a white cat aged 10") 83818fe14eSLuca Vizzarro print(steph) # Animal(kind='cat', name='Stephanie', colour=<Colour.WHITE: 2>, age=10) 84818fe14eSLuca Vizzarro """ 85818fe14eSLuca Vizzarro 86818fe14eSLuca Vizzarro """============ BEGIN PARSER FUNCTIONS ============""" 87818fe14eSLuca Vizzarro 88818fe14eSLuca Vizzarro @staticmethod 89818fe14eSLuca Vizzarro def wrap(parser_fn: ParserFn, wrapper_fn: Callable) -> ParserFn: 90818fe14eSLuca Vizzarro """Makes a wrapped parser function. 91818fe14eSLuca Vizzarro 92818fe14eSLuca Vizzarro `parser_fn` is called and if a non-None value is returned, `wrapper_function` is called with 93*1e4f1855SJuraj Linkeš it. Otherwise the function returns early with None. In pseudo-code:: 94818fe14eSLuca Vizzarro 95818fe14eSLuca Vizzarro intermediate_value := parser_fn(input) 96818fe14eSLuca Vizzarro if intermediary_value is None then 97818fe14eSLuca Vizzarro output := None 98818fe14eSLuca Vizzarro else 99818fe14eSLuca Vizzarro output := wrapper_fn(intermediate_value) 100818fe14eSLuca Vizzarro 101818fe14eSLuca Vizzarro Args: 102818fe14eSLuca Vizzarro parser_fn: The dictionary storing the parser function to be wrapped. 103818fe14eSLuca Vizzarro wrapper_fn: The function that wraps `parser_fn`. 104818fe14eSLuca Vizzarro 105818fe14eSLuca Vizzarro Returns: 106818fe14eSLuca Vizzarro ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the 107818fe14eSLuca Vizzarro newly wrapped parser function. 108818fe14eSLuca Vizzarro """ 109818fe14eSLuca Vizzarro inner_fn = parser_fn["TextParser_fn"] 110818fe14eSLuca Vizzarro 111818fe14eSLuca Vizzarro def _composite_parser_fn(text: str) -> Any: 112818fe14eSLuca Vizzarro intermediate_value = inner_fn(text) 113818fe14eSLuca Vizzarro if intermediate_value is None: 114818fe14eSLuca Vizzarro return None 115818fe14eSLuca Vizzarro return wrapper_fn(intermediate_value) 116818fe14eSLuca Vizzarro 117818fe14eSLuca Vizzarro return ParserFn(TextParser_fn=_composite_parser_fn) 118818fe14eSLuca Vizzarro 119818fe14eSLuca Vizzarro @staticmethod 120818fe14eSLuca Vizzarro def find( 121818fe14eSLuca Vizzarro pattern: str | re.Pattern[str], 122818fe14eSLuca Vizzarro flags: re.RegexFlag = re.RegexFlag(0), 123818fe14eSLuca Vizzarro named: bool = False, 124818fe14eSLuca Vizzarro ) -> ParserFn: 125818fe14eSLuca Vizzarro """Makes a parser function that finds a regular expression match in the text. 126818fe14eSLuca Vizzarro 127818fe14eSLuca Vizzarro If the pattern has any capturing groups, it returns None if no match was found, otherwise a 128818fe14eSLuca Vizzarro tuple containing the values per each group is returned. If the pattern has only one 129818fe14eSLuca Vizzarro capturing group and a match was found, its value is returned. If the pattern has no 130818fe14eSLuca Vizzarro capturing groups then either True or False is returned if the pattern had a match or not. 131818fe14eSLuca Vizzarro 132818fe14eSLuca Vizzarro Args: 133818fe14eSLuca Vizzarro pattern: The regular expression pattern. 134818fe14eSLuca Vizzarro flags: The regular expression flags. Ignored if the given pattern is already compiled. 135818fe14eSLuca Vizzarro named: If set to True only the named capturing groups will be returned, as a dictionary. 136818fe14eSLuca Vizzarro 137818fe14eSLuca Vizzarro Returns: 138818fe14eSLuca Vizzarro ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the find 139818fe14eSLuca Vizzarro parser function. 140818fe14eSLuca Vizzarro """ 141818fe14eSLuca Vizzarro if isinstance(pattern, str): 142818fe14eSLuca Vizzarro pattern = re.compile(pattern, flags) 143818fe14eSLuca Vizzarro 144818fe14eSLuca Vizzarro def _find(text: str) -> Any: 145818fe14eSLuca Vizzarro m = pattern.search(text) 146818fe14eSLuca Vizzarro if m is None: 147818fe14eSLuca Vizzarro return None if pattern.groups > 0 else False 148818fe14eSLuca Vizzarro 149818fe14eSLuca Vizzarro if pattern.groups == 0: 150818fe14eSLuca Vizzarro return True 151818fe14eSLuca Vizzarro 152818fe14eSLuca Vizzarro if named: 153818fe14eSLuca Vizzarro return m.groupdict() 154818fe14eSLuca Vizzarro 155818fe14eSLuca Vizzarro matches = m.groups() 156818fe14eSLuca Vizzarro if len(matches) == 1: 157818fe14eSLuca Vizzarro return matches[0] 158818fe14eSLuca Vizzarro 159818fe14eSLuca Vizzarro return matches 160818fe14eSLuca Vizzarro 161818fe14eSLuca Vizzarro return ParserFn(TextParser_fn=_find) 162818fe14eSLuca Vizzarro 163818fe14eSLuca Vizzarro @staticmethod 164818fe14eSLuca Vizzarro def find_int( 165818fe14eSLuca Vizzarro pattern: str | re.Pattern[str], 166818fe14eSLuca Vizzarro flags: re.RegexFlag = re.RegexFlag(0), 167818fe14eSLuca Vizzarro int_base: int = 0, 168818fe14eSLuca Vizzarro ) -> ParserFn: 169818fe14eSLuca Vizzarro """Makes a parser function that converts the match of :meth:`~find` to int. 170818fe14eSLuca Vizzarro 171818fe14eSLuca Vizzarro This function is compatible only with a pattern containing one capturing group. 172818fe14eSLuca Vizzarro 173818fe14eSLuca Vizzarro Args: 174818fe14eSLuca Vizzarro pattern: The regular expression pattern. 175818fe14eSLuca Vizzarro flags: The regular expression flags. Ignored if the given pattern is already compiled. 176818fe14eSLuca Vizzarro int_base: The base of the number to convert from. 177818fe14eSLuca Vizzarro 178818fe14eSLuca Vizzarro Raises: 179818fe14eSLuca Vizzarro InternalError: If the pattern does not have exactly one capturing group. 180818fe14eSLuca Vizzarro 181818fe14eSLuca Vizzarro Returns: 182818fe14eSLuca Vizzarro ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the 183818fe14eSLuca Vizzarro :meth:`~find` parser function wrapped by the int built-in. 184818fe14eSLuca Vizzarro """ 185818fe14eSLuca Vizzarro if isinstance(pattern, str): 186818fe14eSLuca Vizzarro pattern = re.compile(pattern, flags) 187818fe14eSLuca Vizzarro 188818fe14eSLuca Vizzarro if pattern.groups != 1: 189818fe14eSLuca Vizzarro raise InternalError("only one capturing group is allowed with this parser function") 190818fe14eSLuca Vizzarro 191818fe14eSLuca Vizzarro return TextParser.wrap(TextParser.find(pattern), partial(int, base=int_base)) 192818fe14eSLuca Vizzarro 193818fe14eSLuca Vizzarro """============ END PARSER FUNCTIONS ============""" 194818fe14eSLuca Vizzarro 195818fe14eSLuca Vizzarro @classmethod 196818fe14eSLuca Vizzarro def parse(cls, text: str) -> Self: 197818fe14eSLuca Vizzarro """Creates a new instance of the class from the given text. 198818fe14eSLuca Vizzarro 199818fe14eSLuca Vizzarro A new class instance is created with all the fields that have a parser function in their 200818fe14eSLuca Vizzarro metadata. Fields without one are ignored and are expected to have a default value, otherwise 201818fe14eSLuca Vizzarro the class initialization will fail. 202818fe14eSLuca Vizzarro 203818fe14eSLuca Vizzarro A field is populated with the value returned by its corresponding parser function. 204818fe14eSLuca Vizzarro 205818fe14eSLuca Vizzarro Args: 206818fe14eSLuca Vizzarro text: the text to parse 207818fe14eSLuca Vizzarro 208818fe14eSLuca Vizzarro Raises: 209818fe14eSLuca Vizzarro InternalError: if the parser did not find a match and the field does not have a default 210818fe14eSLuca Vizzarro value or default factory. 211818fe14eSLuca Vizzarro 212818fe14eSLuca Vizzarro Returns: 213818fe14eSLuca Vizzarro A new instance of the class. 214818fe14eSLuca Vizzarro """ 215818fe14eSLuca Vizzarro fields_values = {} 216818fe14eSLuca Vizzarro for field in fields(cls): 217818fe14eSLuca Vizzarro parse = cast(ParserFn, field.metadata).get("TextParser_fn") 218818fe14eSLuca Vizzarro if parse is None: 219818fe14eSLuca Vizzarro continue 220818fe14eSLuca Vizzarro 221818fe14eSLuca Vizzarro value = parse(text) 222818fe14eSLuca Vizzarro if value is not None: 223818fe14eSLuca Vizzarro fields_values[field.name] = value 224818fe14eSLuca Vizzarro elif field.default is MISSING and field.default_factory is MISSING: 225818fe14eSLuca Vizzarro raise InternalError( 226818fe14eSLuca Vizzarro f"parser for field {field.name} returned None, but the field has no default" 227818fe14eSLuca Vizzarro ) 228818fe14eSLuca Vizzarro 229818fe14eSLuca Vizzarro return cls(**fields_values) 230