1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 #pragma ident "%Z%%M% %I% %E% SMI"
4
5 #ifdef __GNUG__
6 #pragma implementation
7 #endif
8 #include "splib.h"
9 #include "types.h"
10 #include "macros.h"
11 #include "StringOf.h"
12 #include "Trie.h"
13 #include "TrieBuilder.h"
14 #include "Priority.h"
15 #include <stdlib.h>
16
17 #ifdef SP_NAMESPACE
18 namespace SP_NAMESPACE {
19 #endif
20
~Trie()21 Trie::~Trie()
22 {
23 if (next_)
24 delete [] next_;
25 }
26
Trie(const Trie & t)27 Trie::Trie(const Trie &t)
28 : nCodes_(t.nCodes_),
29 token_(t.token_),
30 tokenLength_(t.tokenLength_),
31 priority_(t.priority_),
32 blank_(t.blank_)
33 {
34 if (t.next_) {
35 next_ = new Trie[nCodes_];
36 for (int i = 0; i < nCodes_; i++)
37 next_[i] = t.next_[i];
38 }
39 else
40 next_ = 0;
41 }
42
operator =(const Trie & t)43 Trie &Trie::operator=(const Trie &t)
44 {
45 if (next_)
46 delete [] next_;
47 nCodes_ = t.nCodes_;
48 token_ = t.token_;
49 tokenLength_ = t.tokenLength_;
50 priority_ = t.priority_;
51 blank_ = t.blank_;
52 if (t.next_) {
53 next_ = new Trie[nCodes_];
54 for (int i = 0; i < nCodes_; i++)
55 next_[i] = t.next_[i];
56 }
57 else
58 next_ = 0;
59 return *this;
60 }
61
TrieBuilder(int nCodes)62 TrieBuilder::TrieBuilder(int nCodes)
63 : nCodes_(nCodes), root_(new Trie)
64 {
65 root_->token_ = 0;
66 root_->tokenLength_ = 0;
67 root_->priority_ = Priority::data;
68 root_->nCodes_ = nCodes;
69 }
70
recognize(const String<EquivCode> & chars,Token t,Priority::Type priority,TokenVector & ambiguities)71 void TrieBuilder::recognize(const String<EquivCode> &chars,
72 Token t,
73 Priority::Type priority,
74 TokenVector &ambiguities)
75 {
76 setToken(extendTrie(root_.pointer(), chars), chars.size(), t, priority,
77 ambiguities);
78 }
79
recognize(const String<EquivCode> & chars,const String<EquivCode> & set,Token t,Priority::Type priority,TokenVector & ambiguities)80 void TrieBuilder::recognize(const String<EquivCode> &chars,
81 const String<EquivCode> &set,
82 Token t,
83 Priority::Type priority,
84 TokenVector &ambiguities)
85 {
86 Trie *trie = extendTrie(root_.pointer(), chars);
87
88 for (size_t i = 0; i < set.size(); i++)
89 setToken(forceNext(trie, set[i]), chars.size() + 1, t, priority,
90 ambiguities);
91 }
92
recognizeB(const String<EquivCode> & chars,int bSequenceLength,size_t maxBlankSequence,const String<EquivCode> & blankCodes,const String<EquivCode> & chars2,Token token,TokenVector & ambiguities)93 void TrieBuilder::recognizeB(const String<EquivCode> &chars,
94 int bSequenceLength,
95 size_t maxBlankSequence,
96 const String<EquivCode> &blankCodes,
97 const String<EquivCode> &chars2,
98 Token token,
99 TokenVector &ambiguities)
100 {
101 doB(extendTrie(root_.pointer(), chars),
102 chars.size(),
103 bSequenceLength,
104 maxBlankSequence,
105 blankCodes,
106 chars2,
107 token,
108 Priority::blank(bSequenceLength),
109 ambiguities);
110 }
111
recognizeEE(EquivCode code,Token t)112 void TrieBuilder::recognizeEE(EquivCode code, Token t)
113 {
114 Trie *trie = forceNext(root_.pointer(), code);
115 trie->tokenLength_ = 0; // it has length 0 in the buffer
116 trie->token_ = t;
117 trie->priority_ = Priority::data;
118 }
119
doB(Trie * trie,int tokenLength,int minBLength,size_t maxLength,const String<EquivCode> & blankCodes,const String<EquivCode> & chars2,Token token,Priority::Type pri,TokenVector & ambiguities)120 void TrieBuilder::doB(Trie *trie,
121 int tokenLength,
122 int minBLength,
123 size_t maxLength,
124 const String<EquivCode> &blankCodes,
125 const String<EquivCode> &chars2,
126 Token token,
127 Priority::Type pri,
128 TokenVector &ambiguities)
129 {
130 if (minBLength == 0 && trie->next_ == 0) {
131 if (!trie->blank_) {
132 BlankTrie *b = new BlankTrie;
133 trie->blank_ = b;
134 b->maxBlanksToScan_ = maxLength;
135 b->additionalLength_ = tokenLength;
136 b->codeIsBlank_.assign(nCodes_, 0);
137 for (size_t i = 0; i < blankCodes.size(); i++)
138 b->codeIsBlank_[blankCodes[i]] = 1;
139 b->tokenLength_ = 0;
140 b->token_ = 0;
141 b->priority_ = Priority::data;
142 b->nCodes_ = nCodes_;
143 }
144 else {
145 // A B sequence is not allowed to be adjacent to a character
146 // that can occur in a blank sequence, so maxLength will be
147 // the same at a node, no matter how we got there.
148 ASSERT(trie->blank_->maxBlanksToScan_ == maxLength);
149 ASSERT(trie->blank_->additionalLength_ == tokenLength);
150 }
151 if (chars2.size() == 0)
152 setToken(trie, tokenLength, token, pri, ambiguities);
153 else
154 setToken(extendTrie(trie->blank_.pointer(), chars2),
155 chars2.size(),
156 token,
157 pri,
158 ambiguities);
159 }
160 else {
161 if (minBLength == 0)
162 setToken(extendTrie(trie, chars2), tokenLength + chars2.size(),
163 token, pri, ambiguities);
164 for (size_t i = 0; i < blankCodes.size(); i++)
165 doB(forceNext(trie, blankCodes[i]),
166 tokenLength + 1,
167 minBLength == 0 ? 0 : minBLength - 1,
168 maxLength - 1,
169 blankCodes,
170 chars2,
171 token,
172 pri,
173 ambiguities);
174 }
175 }
176
extendTrie(Trie * trie,const String<EquivCode> & s)177 Trie *TrieBuilder::extendTrie(Trie *trie, const String<EquivCode> &s)
178 {
179 for (size_t i = 0; i < s.size(); i++)
180 trie = forceNext(trie, s[i]);
181 return trie;
182 }
183
setToken(Trie * trie,int tokenLength,Token token,Priority::Type pri,TokenVector & ambiguities)184 void TrieBuilder::setToken(Trie *trie,
185 int tokenLength,
186 Token token,
187 Priority::Type pri,
188 TokenVector &ambiguities)
189 {
190 if (tokenLength > trie->tokenLength_
191 || (tokenLength == trie->tokenLength_
192 && pri > trie->priority_)) {
193 trie->tokenLength_ = tokenLength;
194 trie->token_ = token;
195 trie->priority_ = pri;
196 }
197 else if (trie->tokenLength_ == tokenLength
198 && trie->priority_ == pri
199 && trie->token_ != token
200 && trie->token_ != 0) {
201 ambiguities.push_back(Token(trie->token_));
202 ambiguities.push_back(token);
203 }
204 if (trie->hasNext()) {
205 for (int i = 0; i < nCodes_; i++)
206 setToken(&trie->next_[i], tokenLength, token, pri, ambiguities);
207 }
208 }
209
copyInto(Trie * into,const Trie * from,int additionalLength)210 void TrieBuilder::copyInto(Trie *into, const Trie *from, int additionalLength)
211 {
212 if (from->token_ != 0) {
213 TokenVector ambiguities;
214 setToken(into, from->tokenLength_ + additionalLength, from->token_,
215 from->priority_, ambiguities);
216 ASSERT(ambiguities.size() == 0);
217 }
218 if (from->hasNext())
219 for (int i = 0; i < nCodes_; i++)
220 copyInto(forceNext(into, i), &from->next_[i], additionalLength);
221 }
222
forceNext(Trie * trie,EquivCode c)223 Trie *TrieBuilder::forceNext(Trie *trie, EquivCode c)
224 {
225 if (!trie->hasNext()) {
226 trie->next_ = new Trie[nCodes_];
227 if (trie->blank_) {
228 trie->blank_->additionalLength_ += 1;
229 trie->blank_->maxBlanksToScan_ -= 1;
230 }
231 Owner<BlankTrie> blankOwner(trie->blank_.extract());
232 const BlankTrie *b = blankOwner.pointer();
233 for (int i = 0; i < nCodes_; i++) {
234 Trie *p = &trie->next_[i];
235 if (b && b->codeIsBlank(i))
236 trie->next_[i].blank_ = (blankOwner
237 ? blankOwner.extract()
238 : new BlankTrie(*b));
239 p->token_ = trie->token_;
240 p->tokenLength_ = trie->tokenLength_;
241 p->priority_ = trie->priority_;
242 p->nCodes_ = nCodes_;
243 }
244 if (b)
245 // -1 because 1 was added above
246 copyInto(trie, b, b->additionalLength_ - 1);
247 }
248 return &trie->next_[c];
249 }
250
251 #ifdef SP_NAMESPACE
252 }
253 #endif
254