18bcb0991SDimitry Andric //===--------------------- DfaEmitter.h -----------------------------------===// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68bcb0991SDimitry Andric // 78bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 88bcb0991SDimitry Andric // Defines a generic automaton builder. This takes a set of transitions and 98bcb0991SDimitry Andric // states that represent a nondeterministic finite state automaton (NFA) and 108bcb0991SDimitry Andric // emits a determinized DFA in a form that include/llvm/Support/Automaton.h can 118bcb0991SDimitry Andric // drive. 128bcb0991SDimitry Andric // 138bcb0991SDimitry Andric // See file llvm/TableGen/Automaton.td for the TableGen API definition. 148bcb0991SDimitry Andric // 158bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 168bcb0991SDimitry Andric 178bcb0991SDimitry Andric #ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H 188bcb0991SDimitry Andric #define LLVM_UTILS_TABLEGEN_DFAEMITTER_H 198bcb0991SDimitry Andric 205ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 218bcb0991SDimitry Andric #include "llvm/ADT/UniqueVector.h" 225ffd83dbSDimitry Andric #include <map> 238bcb0991SDimitry Andric #include <set> 24*06c3fb27SDimitry Andric #include <utility> 25*06c3fb27SDimitry Andric #include <vector> 268bcb0991SDimitry Andric 278bcb0991SDimitry Andric namespace llvm { 288bcb0991SDimitry Andric 298bcb0991SDimitry Andric class raw_ostream; 305ffd83dbSDimitry Andric class StringRef; 315ffd83dbSDimitry Andric 328bcb0991SDimitry Andric /// Construct a deterministic finite state automaton from possible 338bcb0991SDimitry Andric /// nondeterministic state and transition data. 348bcb0991SDimitry Andric /// 358bcb0991SDimitry Andric /// The state type is a 64-bit unsigned integer. The generated automaton is 368bcb0991SDimitry Andric /// invariant to the sparsity of the state representation - its size is only 378bcb0991SDimitry Andric /// a function of the cardinality of the set of states. 388bcb0991SDimitry Andric /// 398bcb0991SDimitry Andric /// The inputs to this emitter are considered to define a nondeterministic 408bcb0991SDimitry Andric /// finite state automaton (NFA). This is then converted to a DFA during 418bcb0991SDimitry Andric /// emission. The emitted tables can be used to by 428bcb0991SDimitry Andric /// include/llvm/Support/Automaton.h. 438bcb0991SDimitry Andric class DfaEmitter { 448bcb0991SDimitry Andric public: 458bcb0991SDimitry Andric // The type of an NFA state. The initial state is always zero. 468bcb0991SDimitry Andric using state_type = uint64_t; 478bcb0991SDimitry Andric // The type of an action. 488bcb0991SDimitry Andric using action_type = uint64_t; 498bcb0991SDimitry Andric 508bcb0991SDimitry Andric DfaEmitter() = default; 518bcb0991SDimitry Andric virtual ~DfaEmitter() = default; 528bcb0991SDimitry Andric 538bcb0991SDimitry Andric void addTransition(state_type From, state_type To, action_type A); 548bcb0991SDimitry Andric void emit(StringRef Name, raw_ostream &OS); 558bcb0991SDimitry Andric 568bcb0991SDimitry Andric protected: 578bcb0991SDimitry Andric /// Emit the C++ type of an action to OS. 588bcb0991SDimitry Andric virtual void printActionType(raw_ostream &OS); 598bcb0991SDimitry Andric /// Emit the C++ value of an action A to OS. 608bcb0991SDimitry Andric virtual void printActionValue(action_type A, raw_ostream &OS); 618bcb0991SDimitry Andric 628bcb0991SDimitry Andric private: 638bcb0991SDimitry Andric /// The state type of deterministic states. These are only used internally to 648bcb0991SDimitry Andric /// this class. This is an ID into the DfaStates UniqueVector. 658bcb0991SDimitry Andric using dfa_state_type = unsigned; 668bcb0991SDimitry Andric 678bcb0991SDimitry Andric /// The actual representation of a DFA state, which is a union of one or more 688bcb0991SDimitry Andric /// NFA states. 698bcb0991SDimitry Andric using DfaState = SmallVector<state_type, 4>; 708bcb0991SDimitry Andric 718bcb0991SDimitry Andric /// A DFA transition consists of a set of NFA states transitioning to a 728bcb0991SDimitry Andric /// new set of NFA states. The DfaTransitionInfo tracks, for every 738bcb0991SDimitry Andric /// transitioned-from NFA state, a set of valid transitioned-to states. 748bcb0991SDimitry Andric /// 758bcb0991SDimitry Andric /// Emission of this transition relation allows algorithmic determination of 768bcb0991SDimitry Andric /// the possible candidate NFA paths taken under a given input sequence to 778bcb0991SDimitry Andric /// reach a given DFA state. 788bcb0991SDimitry Andric using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>; 798bcb0991SDimitry Andric 808bcb0991SDimitry Andric /// The set of all possible actions. 818bcb0991SDimitry Andric std::set<action_type> Actions; 828bcb0991SDimitry Andric 838bcb0991SDimitry Andric /// The set of nondeterministic transitions. A state-action pair can 848bcb0991SDimitry Andric /// transition to multiple target states. 858bcb0991SDimitry Andric std::map<std::pair<state_type, action_type>, std::vector<state_type>> 868bcb0991SDimitry Andric NfaTransitions; 878bcb0991SDimitry Andric std::set<state_type> NfaStates; 888bcb0991SDimitry Andric unsigned NumNfaTransitions = 0; 898bcb0991SDimitry Andric 908bcb0991SDimitry Andric /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID, 918bcb0991SDimitry Andric /// which is dfa_state_type. Note that because UniqueVector reserves state 928bcb0991SDimitry Andric /// zero, the initial DFA state is always 1. 938bcb0991SDimitry Andric UniqueVector<DfaState> DfaStates; 948bcb0991SDimitry Andric /// The set of deterministic transitions. A state-action pair has only a 958bcb0991SDimitry Andric /// single target state. 968bcb0991SDimitry Andric std::map<std::pair<dfa_state_type, action_type>, 978bcb0991SDimitry Andric std::pair<dfa_state_type, DfaTransitionInfo>> 988bcb0991SDimitry Andric DfaTransitions; 998bcb0991SDimitry Andric 1008bcb0991SDimitry Andric /// Visit all NFA states and construct the DFA. 1018bcb0991SDimitry Andric void constructDfa(); 1028bcb0991SDimitry Andric /// Visit a single DFA state and construct all possible transitions to new DFA 1038bcb0991SDimitry Andric /// states. 10447395794SDimitry Andric void visitDfaState(const DfaState &DS); 1058bcb0991SDimitry Andric }; 1068bcb0991SDimitry Andric 1078bcb0991SDimitry Andric } // namespace llvm 1088bcb0991SDimitry Andric 1098bcb0991SDimitry Andric #endif 110