xref: /dpdk/dts/framework/parser.py (revision 1e4f18558427c0aa876851d72609b302c2f6b224)
1818fe14eSLuca Vizzarro# SPDX-License-Identifier: BSD-3-Clause
2818fe14eSLuca Vizzarro# Copyright(c) 2024 Arm Limited
3818fe14eSLuca Vizzarro
4818fe14eSLuca Vizzarro"""Parsing utility module.
5818fe14eSLuca Vizzarro
6818fe14eSLuca VizzarroThis module provides :class:`~TextParser` which can be used to model any dataclass to a block of
7818fe14eSLuca Vizzarrotext.
8818fe14eSLuca Vizzarro"""
9818fe14eSLuca Vizzarro
10818fe14eSLuca Vizzarroimport re
11818fe14eSLuca Vizzarrofrom abc import ABC
12818fe14eSLuca Vizzarrofrom dataclasses import MISSING, dataclass, fields
13818fe14eSLuca Vizzarrofrom functools import partial
14818fe14eSLuca Vizzarrofrom typing import Any, Callable, TypedDict, cast
15818fe14eSLuca Vizzarro
16818fe14eSLuca Vizzarrofrom typing_extensions import Self
17818fe14eSLuca Vizzarro
18818fe14eSLuca Vizzarrofrom framework.exception import InternalError
19818fe14eSLuca Vizzarro
20818fe14eSLuca Vizzarro
21818fe14eSLuca Vizzarroclass ParserFn(TypedDict):
22818fe14eSLuca Vizzarro    """Parser function in a dict compatible with the :func:`dataclasses.field` metadata param."""
23818fe14eSLuca Vizzarro
24818fe14eSLuca Vizzarro    #:
25818fe14eSLuca Vizzarro    TextParser_fn: Callable[[str], Any]
26818fe14eSLuca Vizzarro
27818fe14eSLuca Vizzarro
28818fe14eSLuca Vizzarro@dataclass
29818fe14eSLuca Vizzarroclass TextParser(ABC):
30818fe14eSLuca Vizzarro    r"""Helper abstract dataclass that parses a text according to the fields' rules.
31818fe14eSLuca Vizzarro
32818fe14eSLuca Vizzarro    In order to enable text parsing in a dataclass, subclass it with :class:`TextParser`.
33818fe14eSLuca Vizzarro
34818fe14eSLuca Vizzarro    The provided `parse` method is a factory which parses the supplied text and creates an instance
35818fe14eSLuca Vizzarro    with populated dataclass fields. This takes text as an argument and for each field in the
36818fe14eSLuca Vizzarro    dataclass, the field's parser function is run against the whole text. The returned value is then
37818fe14eSLuca Vizzarro    assigned to the field of the new instance. If the field does not have a parser function its
38818fe14eSLuca Vizzarro    default value or factory is used instead. If no default is available either, an exception is
39818fe14eSLuca Vizzarro    raised.
40818fe14eSLuca Vizzarro
41818fe14eSLuca Vizzarro    This class provides a selection of parser functions and a function to wrap parser functions with
42818fe14eSLuca Vizzarro    generic functions. Parser functions are designed to be passed to the fields' metadata param. The
43818fe14eSLuca Vizzarro    most commonly used parser function is expected to be the `find` method, which runs a regular
44818fe14eSLuca Vizzarro    expression against the text to find matches.
45818fe14eSLuca Vizzarro
46818fe14eSLuca Vizzarro    Example:
47818fe14eSLuca Vizzarro        The following example makes use of and demonstrates every parser function available:
48818fe14eSLuca Vizzarro
49818fe14eSLuca Vizzarro        .. code:: python
50818fe14eSLuca Vizzarro
51818fe14eSLuca Vizzarro            from dataclasses import dataclass, field
52818fe14eSLuca Vizzarro            from enum import Enum
53818fe14eSLuca Vizzarro            from framework.parser import TextParser
54818fe14eSLuca Vizzarro
55818fe14eSLuca Vizzarro            class Colour(Enum):
56818fe14eSLuca Vizzarro                BLACK = 1
57818fe14eSLuca Vizzarro                WHITE = 2
58818fe14eSLuca Vizzarro
59818fe14eSLuca Vizzarro                @classmethod
60818fe14eSLuca Vizzarro                def from_str(cls, text: str):
61818fe14eSLuca Vizzarro                    match text:
62818fe14eSLuca Vizzarro                        case "black":
63818fe14eSLuca Vizzarro                            return cls.BLACK
64818fe14eSLuca Vizzarro                        case "white":
65818fe14eSLuca Vizzarro                            return cls.WHITE
66818fe14eSLuca Vizzarro                        case _:
67818fe14eSLuca Vizzarro                            return None # unsupported colour
68818fe14eSLuca Vizzarro
69818fe14eSLuca Vizzarro                @classmethod
70818fe14eSLuca Vizzarro                def make_parser(cls):
71818fe14eSLuca Vizzarro                    # make a parser function that finds a match and
72818fe14eSLuca Vizzarro                    # then makes it a Colour object through Colour.from_str
73818fe14eSLuca Vizzarro                    return TextParser.wrap(TextParser.find(r"is a (\w+)"), cls.from_str)
74818fe14eSLuca Vizzarro
75818fe14eSLuca Vizzarro            @dataclass
76818fe14eSLuca Vizzarro            class Animal(TextParser):
77818fe14eSLuca Vizzarro                kind: str = field(metadata=TextParser.find(r"is a \w+ (\w+)"))
78818fe14eSLuca Vizzarro                name: str = field(metadata=TextParser.find(r"^(\w+)"))
79818fe14eSLuca Vizzarro                colour: Colour = field(metadata=Colour.make_parser())
80818fe14eSLuca Vizzarro                age: int = field(metadata=TextParser.find_int(r"aged (\d+)"))
81818fe14eSLuca Vizzarro
82818fe14eSLuca Vizzarro            steph = Animal.parse("Stephanie is a white cat aged 10")
83818fe14eSLuca Vizzarro            print(steph) # Animal(kind='cat', name='Stephanie', colour=<Colour.WHITE: 2>, age=10)
84818fe14eSLuca Vizzarro    """
85818fe14eSLuca Vizzarro
86818fe14eSLuca Vizzarro    """============ BEGIN PARSER FUNCTIONS ============"""
87818fe14eSLuca Vizzarro
88818fe14eSLuca Vizzarro    @staticmethod
89818fe14eSLuca Vizzarro    def wrap(parser_fn: ParserFn, wrapper_fn: Callable) -> ParserFn:
90818fe14eSLuca Vizzarro        """Makes a wrapped parser function.
91818fe14eSLuca Vizzarro
92818fe14eSLuca Vizzarro        `parser_fn` is called and if a non-None value is returned, `wrapper_function` is called with
93*1e4f1855SJuraj Linkeš        it. Otherwise the function returns early with None. In pseudo-code::
94818fe14eSLuca Vizzarro
95818fe14eSLuca Vizzarro            intermediate_value := parser_fn(input)
96818fe14eSLuca Vizzarro            if intermediary_value is None then
97818fe14eSLuca Vizzarro                output := None
98818fe14eSLuca Vizzarro            else
99818fe14eSLuca Vizzarro                output := wrapper_fn(intermediate_value)
100818fe14eSLuca Vizzarro
101818fe14eSLuca Vizzarro        Args:
102818fe14eSLuca Vizzarro            parser_fn: The dictionary storing the parser function to be wrapped.
103818fe14eSLuca Vizzarro            wrapper_fn: The function that wraps `parser_fn`.
104818fe14eSLuca Vizzarro
105818fe14eSLuca Vizzarro        Returns:
106818fe14eSLuca Vizzarro            ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the
107818fe14eSLuca Vizzarro                newly wrapped parser function.
108818fe14eSLuca Vizzarro        """
109818fe14eSLuca Vizzarro        inner_fn = parser_fn["TextParser_fn"]
110818fe14eSLuca Vizzarro
111818fe14eSLuca Vizzarro        def _composite_parser_fn(text: str) -> Any:
112818fe14eSLuca Vizzarro            intermediate_value = inner_fn(text)
113818fe14eSLuca Vizzarro            if intermediate_value is None:
114818fe14eSLuca Vizzarro                return None
115818fe14eSLuca Vizzarro            return wrapper_fn(intermediate_value)
116818fe14eSLuca Vizzarro
117818fe14eSLuca Vizzarro        return ParserFn(TextParser_fn=_composite_parser_fn)
118818fe14eSLuca Vizzarro
119818fe14eSLuca Vizzarro    @staticmethod
120818fe14eSLuca Vizzarro    def find(
121818fe14eSLuca Vizzarro        pattern: str | re.Pattern[str],
122818fe14eSLuca Vizzarro        flags: re.RegexFlag = re.RegexFlag(0),
123818fe14eSLuca Vizzarro        named: bool = False,
124818fe14eSLuca Vizzarro    ) -> ParserFn:
125818fe14eSLuca Vizzarro        """Makes a parser function that finds a regular expression match in the text.
126818fe14eSLuca Vizzarro
127818fe14eSLuca Vizzarro        If the pattern has any capturing groups, it returns None if no match was found, otherwise a
128818fe14eSLuca Vizzarro        tuple containing the values per each group is returned. If the pattern has only one
129818fe14eSLuca Vizzarro        capturing group and a match was found, its value is returned. If the pattern has no
130818fe14eSLuca Vizzarro        capturing groups then either True or False is returned if the pattern had a match or not.
131818fe14eSLuca Vizzarro
132818fe14eSLuca Vizzarro        Args:
133818fe14eSLuca Vizzarro            pattern: The regular expression pattern.
134818fe14eSLuca Vizzarro            flags: The regular expression flags. Ignored if the given pattern is already compiled.
135818fe14eSLuca Vizzarro            named: If set to True only the named capturing groups will be returned, as a dictionary.
136818fe14eSLuca Vizzarro
137818fe14eSLuca Vizzarro        Returns:
138818fe14eSLuca Vizzarro            ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the find
139818fe14eSLuca Vizzarro                parser function.
140818fe14eSLuca Vizzarro        """
141818fe14eSLuca Vizzarro        if isinstance(pattern, str):
142818fe14eSLuca Vizzarro            pattern = re.compile(pattern, flags)
143818fe14eSLuca Vizzarro
144818fe14eSLuca Vizzarro        def _find(text: str) -> Any:
145818fe14eSLuca Vizzarro            m = pattern.search(text)
146818fe14eSLuca Vizzarro            if m is None:
147818fe14eSLuca Vizzarro                return None if pattern.groups > 0 else False
148818fe14eSLuca Vizzarro
149818fe14eSLuca Vizzarro            if pattern.groups == 0:
150818fe14eSLuca Vizzarro                return True
151818fe14eSLuca Vizzarro
152818fe14eSLuca Vizzarro            if named:
153818fe14eSLuca Vizzarro                return m.groupdict()
154818fe14eSLuca Vizzarro
155818fe14eSLuca Vizzarro            matches = m.groups()
156818fe14eSLuca Vizzarro            if len(matches) == 1:
157818fe14eSLuca Vizzarro                return matches[0]
158818fe14eSLuca Vizzarro
159818fe14eSLuca Vizzarro            return matches
160818fe14eSLuca Vizzarro
161818fe14eSLuca Vizzarro        return ParserFn(TextParser_fn=_find)
162818fe14eSLuca Vizzarro
163818fe14eSLuca Vizzarro    @staticmethod
164818fe14eSLuca Vizzarro    def find_int(
165818fe14eSLuca Vizzarro        pattern: str | re.Pattern[str],
166818fe14eSLuca Vizzarro        flags: re.RegexFlag = re.RegexFlag(0),
167818fe14eSLuca Vizzarro        int_base: int = 0,
168818fe14eSLuca Vizzarro    ) -> ParserFn:
169818fe14eSLuca Vizzarro        """Makes a parser function that converts the match of :meth:`~find` to int.
170818fe14eSLuca Vizzarro
171818fe14eSLuca Vizzarro        This function is compatible only with a pattern containing one capturing group.
172818fe14eSLuca Vizzarro
173818fe14eSLuca Vizzarro        Args:
174818fe14eSLuca Vizzarro            pattern: The regular expression pattern.
175818fe14eSLuca Vizzarro            flags: The regular expression flags. Ignored if the given pattern is already compiled.
176818fe14eSLuca Vizzarro            int_base: The base of the number to convert from.
177818fe14eSLuca Vizzarro
178818fe14eSLuca Vizzarro        Raises:
179818fe14eSLuca Vizzarro            InternalError: If the pattern does not have exactly one capturing group.
180818fe14eSLuca Vizzarro
181818fe14eSLuca Vizzarro        Returns:
182818fe14eSLuca Vizzarro            ParserFn: A dictionary for the `dataclasses.field` metadata argument containing the
183818fe14eSLuca Vizzarro                :meth:`~find` parser function wrapped by the int built-in.
184818fe14eSLuca Vizzarro        """
185818fe14eSLuca Vizzarro        if isinstance(pattern, str):
186818fe14eSLuca Vizzarro            pattern = re.compile(pattern, flags)
187818fe14eSLuca Vizzarro
188818fe14eSLuca Vizzarro        if pattern.groups != 1:
189818fe14eSLuca Vizzarro            raise InternalError("only one capturing group is allowed with this parser function")
190818fe14eSLuca Vizzarro
191818fe14eSLuca Vizzarro        return TextParser.wrap(TextParser.find(pattern), partial(int, base=int_base))
192818fe14eSLuca Vizzarro
193818fe14eSLuca Vizzarro    """============ END PARSER FUNCTIONS ============"""
194818fe14eSLuca Vizzarro
195818fe14eSLuca Vizzarro    @classmethod
196818fe14eSLuca Vizzarro    def parse(cls, text: str) -> Self:
197818fe14eSLuca Vizzarro        """Creates a new instance of the class from the given text.
198818fe14eSLuca Vizzarro
199818fe14eSLuca Vizzarro        A new class instance is created with all the fields that have a parser function in their
200818fe14eSLuca Vizzarro        metadata. Fields without one are ignored and are expected to have a default value, otherwise
201818fe14eSLuca Vizzarro        the class initialization will fail.
202818fe14eSLuca Vizzarro
203818fe14eSLuca Vizzarro        A field is populated with the value returned by its corresponding parser function.
204818fe14eSLuca Vizzarro
205818fe14eSLuca Vizzarro        Args:
206818fe14eSLuca Vizzarro            text: the text to parse
207818fe14eSLuca Vizzarro
208818fe14eSLuca Vizzarro        Raises:
209818fe14eSLuca Vizzarro            InternalError: if the parser did not find a match and the field does not have a default
210818fe14eSLuca Vizzarro                value or default factory.
211818fe14eSLuca Vizzarro
212818fe14eSLuca Vizzarro        Returns:
213818fe14eSLuca Vizzarro            A new instance of the class.
214818fe14eSLuca Vizzarro        """
215818fe14eSLuca Vizzarro        fields_values = {}
216818fe14eSLuca Vizzarro        for field in fields(cls):
217818fe14eSLuca Vizzarro            parse = cast(ParserFn, field.metadata).get("TextParser_fn")
218818fe14eSLuca Vizzarro            if parse is None:
219818fe14eSLuca Vizzarro                continue
220818fe14eSLuca Vizzarro
221818fe14eSLuca Vizzarro            value = parse(text)
222818fe14eSLuca Vizzarro            if value is not None:
223818fe14eSLuca Vizzarro                fields_values[field.name] = value
224818fe14eSLuca Vizzarro            elif field.default is MISSING and field.default_factory is MISSING:
225818fe14eSLuca Vizzarro                raise InternalError(
226818fe14eSLuca Vizzarro                    f"parser for field {field.name} returned None, but the field has no default"
227818fe14eSLuca Vizzarro                )
228818fe14eSLuca Vizzarro
229818fe14eSLuca Vizzarro        return cls(**fields_values)
230