xref: /freebsd-src/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.h (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
18bcb0991SDimitry Andric //===--------------------- DfaEmitter.h -----------------------------------===//
28bcb0991SDimitry Andric //
38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68bcb0991SDimitry Andric //
78bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
88bcb0991SDimitry Andric // Defines a generic automaton builder. This takes a set of transitions and
98bcb0991SDimitry Andric // states that represent a nondeterministic finite state automaton (NFA) and
108bcb0991SDimitry Andric // emits a determinized DFA in a form that include/llvm/Support/Automaton.h can
118bcb0991SDimitry Andric // drive.
128bcb0991SDimitry Andric //
138bcb0991SDimitry Andric // See file llvm/TableGen/Automaton.td for the TableGen API definition.
148bcb0991SDimitry Andric //
158bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
168bcb0991SDimitry Andric 
178bcb0991SDimitry Andric #ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H
188bcb0991SDimitry Andric #define LLVM_UTILS_TABLEGEN_DFAEMITTER_H
198bcb0991SDimitry Andric 
205ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h"
218bcb0991SDimitry Andric #include "llvm/ADT/UniqueVector.h"
225ffd83dbSDimitry Andric #include <map>
238bcb0991SDimitry Andric #include <set>
24*06c3fb27SDimitry Andric #include <utility>
25*06c3fb27SDimitry Andric #include <vector>
268bcb0991SDimitry Andric 
278bcb0991SDimitry Andric namespace llvm {
288bcb0991SDimitry Andric 
298bcb0991SDimitry Andric class raw_ostream;
305ffd83dbSDimitry Andric class StringRef;
315ffd83dbSDimitry Andric 
328bcb0991SDimitry Andric /// Construct a deterministic finite state automaton from possible
338bcb0991SDimitry Andric /// nondeterministic state and transition data.
348bcb0991SDimitry Andric ///
358bcb0991SDimitry Andric /// The state type is a 64-bit unsigned integer. The generated automaton is
368bcb0991SDimitry Andric /// invariant to the sparsity of the state representation - its size is only
378bcb0991SDimitry Andric /// a function of the cardinality of the set of states.
388bcb0991SDimitry Andric ///
398bcb0991SDimitry Andric /// The inputs to this emitter are considered to define a nondeterministic
408bcb0991SDimitry Andric /// finite state automaton (NFA). This is then converted to a DFA during
418bcb0991SDimitry Andric /// emission. The emitted tables can be used to by
428bcb0991SDimitry Andric /// include/llvm/Support/Automaton.h.
438bcb0991SDimitry Andric class DfaEmitter {
448bcb0991SDimitry Andric public:
458bcb0991SDimitry Andric   // The type of an NFA state. The initial state is always zero.
468bcb0991SDimitry Andric   using state_type = uint64_t;
478bcb0991SDimitry Andric   // The type of an action.
488bcb0991SDimitry Andric   using action_type = uint64_t;
498bcb0991SDimitry Andric 
508bcb0991SDimitry Andric   DfaEmitter() = default;
518bcb0991SDimitry Andric   virtual ~DfaEmitter() = default;
528bcb0991SDimitry Andric 
538bcb0991SDimitry Andric   void addTransition(state_type From, state_type To, action_type A);
548bcb0991SDimitry Andric   void emit(StringRef Name, raw_ostream &OS);
558bcb0991SDimitry Andric 
568bcb0991SDimitry Andric protected:
578bcb0991SDimitry Andric   /// Emit the C++ type of an action to OS.
588bcb0991SDimitry Andric   virtual void printActionType(raw_ostream &OS);
598bcb0991SDimitry Andric   /// Emit the C++ value of an action A to OS.
608bcb0991SDimitry Andric   virtual void printActionValue(action_type A, raw_ostream &OS);
618bcb0991SDimitry Andric 
628bcb0991SDimitry Andric private:
638bcb0991SDimitry Andric   /// The state type of deterministic states. These are only used internally to
648bcb0991SDimitry Andric   /// this class. This is an ID into the DfaStates UniqueVector.
658bcb0991SDimitry Andric   using dfa_state_type = unsigned;
668bcb0991SDimitry Andric 
678bcb0991SDimitry Andric   /// The actual representation of a DFA state, which is a union of one or more
688bcb0991SDimitry Andric   /// NFA states.
698bcb0991SDimitry Andric   using DfaState = SmallVector<state_type, 4>;
708bcb0991SDimitry Andric 
718bcb0991SDimitry Andric   /// A DFA transition consists of a set of NFA states transitioning to a
728bcb0991SDimitry Andric   /// new set of NFA states. The DfaTransitionInfo tracks, for every
738bcb0991SDimitry Andric   /// transitioned-from NFA state, a set of valid transitioned-to states.
748bcb0991SDimitry Andric   ///
758bcb0991SDimitry Andric   /// Emission of this transition relation allows algorithmic determination of
768bcb0991SDimitry Andric   /// the possible candidate NFA paths taken under a given input sequence to
778bcb0991SDimitry Andric   /// reach a given DFA state.
788bcb0991SDimitry Andric   using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>;
798bcb0991SDimitry Andric 
808bcb0991SDimitry Andric   /// The set of all possible actions.
818bcb0991SDimitry Andric   std::set<action_type> Actions;
828bcb0991SDimitry Andric 
838bcb0991SDimitry Andric   /// The set of nondeterministic transitions. A state-action pair can
848bcb0991SDimitry Andric   /// transition to multiple target states.
858bcb0991SDimitry Andric   std::map<std::pair<state_type, action_type>, std::vector<state_type>>
868bcb0991SDimitry Andric       NfaTransitions;
878bcb0991SDimitry Andric   std::set<state_type> NfaStates;
888bcb0991SDimitry Andric   unsigned NumNfaTransitions = 0;
898bcb0991SDimitry Andric 
908bcb0991SDimitry Andric   /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID,
918bcb0991SDimitry Andric   /// which is dfa_state_type. Note that because UniqueVector reserves state
928bcb0991SDimitry Andric   /// zero, the initial DFA state is always 1.
938bcb0991SDimitry Andric   UniqueVector<DfaState> DfaStates;
948bcb0991SDimitry Andric   /// The set of deterministic transitions. A state-action pair has only a
958bcb0991SDimitry Andric   /// single target state.
968bcb0991SDimitry Andric   std::map<std::pair<dfa_state_type, action_type>,
978bcb0991SDimitry Andric            std::pair<dfa_state_type, DfaTransitionInfo>>
988bcb0991SDimitry Andric       DfaTransitions;
998bcb0991SDimitry Andric 
1008bcb0991SDimitry Andric   /// Visit all NFA states and construct the DFA.
1018bcb0991SDimitry Andric   void constructDfa();
1028bcb0991SDimitry Andric   /// Visit a single DFA state and construct all possible transitions to new DFA
1038bcb0991SDimitry Andric   /// states.
10447395794SDimitry Andric   void visitDfaState(const DfaState &DS);
1058bcb0991SDimitry Andric };
1068bcb0991SDimitry Andric 
1078bcb0991SDimitry Andric } // namespace llvm
1088bcb0991SDimitry Andric 
1098bcb0991SDimitry Andric #endif
110