1# SPDX-License-Identifier: BSD-3-Clause 2# Copyright(c) 2024 Arm Limited 3 4"""Parsing utility module. 5 6This module provides :class:`~TextParser` which can be used to model any dataclass to a block of 7text. 8""" 9 10import re 11from abc import ABC 12from dataclasses import MISSING, dataclass, fields 13from functools import partial 14from typing import Any, Callable, TypedDict, cast 15 16from typing_extensions import Self 17 18from framework.exception import InternalError 19 20 21class ParserFn(TypedDict): 22 """Parser function in a dict compatible with the :func:`dataclasses.field` metadata param.""" 23 24 #: 25 TextParser_fn: Callable[[str], Any] 26 27 28@dataclass 29class TextParser(ABC): 30 r"""Helper abstract dataclass that parses a text according to the fields' rules. 31 32 In order to enable text parsing in a dataclass, subclass it with :class:`TextParser`. 33 34 The provided `parse` method is a factory which parses the supplied text and creates an instance 35 with populated dataclass fields. This takes text as an argument and for each field in the 36 dataclass, the field's parser function is run against the whole text. The returned value is then 37 assigned to the field of the new instance. If the field does not have a parser function its 38 default value or factory is used instead. If no default is available either, an exception is 39 raised. 40 41 This class provides a selection of parser functions and a function to wrap parser functions with 42 generic functions. Parser functions are designed to be passed to the fields' metadata param. The 43 most commonly used parser function is expected to be the `find` method, which runs a regular 44 expression against the text to find matches. 45 46 Example: 47 The following example makes use of and demonstrates every parser function available: 48 49 .. code:: python 50 51 from dataclasses import dataclass, field 52 from enum import Enum 53 from framework.parser import TextParser 54 55 class Colour(Enum): 56 BLACK = 1 57 WHITE = 2 58 59 @classmethod 60 def from_str(cls, text: str): 61 match text: 62 case "black": 63 return cls.BLACK 64 case "white": 65 return cls.WHITE 66 case _: 67 return None # unsupported colour 68 69 @classmethod 70 def make_parser(cls): 71 # make a parser function that finds a match and 72 # then makes it a Colour object through Colour.from_str 73 return TextParser.wrap(TextParser.find(r"is a (\w+)"), cls.from_str) 74 75 @dataclass 76 class Animal(TextParser): 77 kind: str = field(metadata=TextParser.find(r"is a \w+ (\w+)")) 78 name: str = field(metadata=TextParser.find(r"^(\w+)")) 79 colour: Colour = field(metadata=Colour.make_parser()) 80 age: int = field(metadata=TextParser.find_int(r"aged (\d+)")) 81 82 steph = Animal.parse("Stephanie is a white cat aged 10") 83 print(steph) # Animal(kind='cat', name='Stephanie', colour=<Colour.WHITE: 2>, age=10) 84 """ 85 86 """============ BEGIN PARSER FUNCTIONS ============""" 87 88 @staticmethod 89 def wrap(parser_fn: ParserFn, wrapper_fn: Callable) -> ParserFn: 90 """Makes a wrapped parser function. 91 92 `parser_fn` is called and if a non-None value is returned, `wrapper_function` is called with 93 it. Otherwise the function returns early with None. In pseudo-code:: 94 95 intermediate_value := parser_fn(input) 96 if intermediary_value is None then 97 output := None 98 else 99 output := wrapper_fn(intermediate_value) 100 101 Args: 102 parser_fn: The dictionary storing the parser function to be wrapped. 103 wrapper_fn: The function that wraps `parser_fn`. 104 105 Returns: 106 ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the 107 newly wrapped parser function. 108 """ 109 inner_fn = parser_fn["TextParser_fn"] 110 111 def _composite_parser_fn(text: str) -> Any: 112 intermediate_value = inner_fn(text) 113 if intermediate_value is None: 114 return None 115 return wrapper_fn(intermediate_value) 116 117 return ParserFn(TextParser_fn=_composite_parser_fn) 118 119 @staticmethod 120 def find( 121 pattern: str | re.Pattern[str], 122 flags: re.RegexFlag = re.RegexFlag(0), 123 named: bool = False, 124 ) -> ParserFn: 125 """Makes a parser function that finds a regular expression match in the text. 126 127 If the pattern has any capturing groups, it returns None if no match was found, otherwise a 128 tuple containing the values per each group is returned. If the pattern has only one 129 capturing group and a match was found, its value is returned. If the pattern has no 130 capturing groups then either True or False is returned if the pattern had a match or not. 131 132 Args: 133 pattern: The regular expression pattern. 134 flags: The regular expression flags. Ignored if the given pattern is already compiled. 135 named: If set to True only the named capturing groups will be returned, as a dictionary. 136 137 Returns: 138 ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the find 139 parser function. 140 """ 141 if isinstance(pattern, str): 142 pattern = re.compile(pattern, flags) 143 144 def _find(text: str) -> Any: 145 m = pattern.search(text) 146 if m is None: 147 return None if pattern.groups > 0 else False 148 149 if pattern.groups == 0: 150 return True 151 152 if named: 153 return m.groupdict() 154 155 matches = m.groups() 156 if len(matches) == 1: 157 return matches[0] 158 159 return matches 160 161 return ParserFn(TextParser_fn=_find) 162 163 @staticmethod 164 def find_int( 165 pattern: str | re.Pattern[str], 166 flags: re.RegexFlag = re.RegexFlag(0), 167 int_base: int = 0, 168 ) -> ParserFn: 169 """Makes a parser function that converts the match of :meth:`~find` to int. 170 171 This function is compatible only with a pattern containing one capturing group. 172 173 Args: 174 pattern: The regular expression pattern. 175 flags: The regular expression flags. Ignored if the given pattern is already compiled. 176 int_base: The base of the number to convert from. 177 178 Raises: 179 InternalError: If the pattern does not have exactly one capturing group. 180 181 Returns: 182 ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the 183 :meth:`~find` parser function wrapped by the int built-in. 184 """ 185 if isinstance(pattern, str): 186 pattern = re.compile(pattern, flags) 187 188 if pattern.groups != 1: 189 raise InternalError("only one capturing group is allowed with this parser function") 190 191 return TextParser.wrap(TextParser.find(pattern), partial(int, base=int_base)) 192 193 """============ END PARSER FUNCTIONS ============""" 194 195 @classmethod 196 def parse(cls, text: str) -> Self: 197 """Creates a new instance of the class from the given text. 198 199 A new class instance is created with all the fields that have a parser function in their 200 metadata. Fields without one are ignored and are expected to have a default value, otherwise 201 the class initialization will fail. 202 203 A field is populated with the value returned by its corresponding parser function. 204 205 Args: 206 text: the text to parse 207 208 Raises: 209 InternalError: if the parser did not find a match and the field does not have a default 210 value or default factory. 211 212 Returns: 213 A new instance of the class. 214 """ 215 fields_values = {} 216 for field in fields(cls): 217 parse = cast(ParserFn, field.metadata).get("TextParser_fn") 218 if parse is None: 219 continue 220 221 value = parse(text) 222 if value is not None: 223 fields_values[field.name] = value 224 elif field.default is MISSING and field.default_factory is MISSING: 225 raise InternalError( 226 f"parser for field {field.name} returned None, but the field has no default" 227 ) 228 229 return cls(**fields_values) 230