1 /* $Id: Trie.cpp,v 1.11 2011/01/08 19:07:36 sarrazip Exp $
2 Trie.cpp - Tree structure for string storage
4 verbiste - French conjugation system
5 Copyright (C) 2003-2005 Pierre Sarrazin <http://sarrazip.com/>
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License
9 as published by the Free Software Foundation; either version 2
10 of the License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
35 Trie<T>::Trie(bool _userDataFromNew)
38 userDataFromNew(_userDataFromNew)
46 firstRow->recursiveDelete(userDataFromNew);
52 Trie<T>::Descriptor::Descriptor(Row *inferior /*= NULL*/)
53 : inferiorRow(inferior),
60 Trie<T>::Descriptor::~Descriptor()
67 Trie<T>::Descriptor::recursiveDelete(bool deleteUserData)
74 if (inferiorRow != NULL)
76 inferiorRow->recursiveDelete(deleteUserData);
85 Trie<T>::Descriptor::computeMemoryConsumption() const
87 return sizeof(*this) + (inferiorRow != NULL ? inferiorRow->computeMemoryConsumption() : 0);
93 Trie<T>::CharDesc::computeMemoryConsumption() const
95 return (sizeof(*this) - sizeof(desc)) + desc.computeMemoryConsumption();
101 Trie<T>::Row::computeMemoryConsumption() const
104 for (typename std::vector<CharDesc>::const_iterator it = elements.begin(); it != elements.end(); ++it)
105 sum += it->computeMemoryConsumption();
106 return sizeof(*this) + sum;
112 Trie<T>::Row::recursiveDelete(bool deleteUserData)
114 for (typename std::vector<CharDesc>::iterator it = elements.begin();
115 it != elements.end(); it++)
116 it->desc.recursiveDelete(deleteUserData);
122 typename Trie<T>::Descriptor *
123 Trie<T>::Row::find(wchar_t unichar)
125 for (typename std::vector<CharDesc>::iterator it = elements.begin();
126 it != elements.end(); it++)
127 if (it->unichar == unichar)
135 typename Trie<T>::Descriptor &
136 Trie<T>::Row::operator [] (wchar_t unichar)
138 Descriptor *pd = find(unichar);
142 elements.push_back(CharDesc(unichar));
143 assert(elements.back().unichar == unichar);
144 return elements.back().desc;
148 ///////////////////////////////////////////////////////////////////////////////
153 Trie<T>::add(const std::wstring &key, T *userData)
162 Descriptor *d = getDesc(firstRow, key, 0, true, false);
164 T *old = d->userData;
165 d->userData = userData;
172 Trie<T>::get(const std::wstring &key) const
175 onFoundPrefixWithUserData(key, 0, lambda);
180 Descriptor *d = const_cast<Trie<T> *>(this)->getDesc(firstRow, key, 0, false, true);
181 return (d != NULL ? d->userData : NULL);
187 Trie<T>::getWithDefault(const std::wstring &key, T *deFault)
196 Descriptor *d = getDesc(firstRow, key, 0, true, false);
198 if (d->userData == NULL)
199 d->userData = deFault;
206 Trie<T>::getUserDataPointer(const std::wstring &key)
211 // Get descriptor associated with 'key' (and create a new entry
212 // if the key is not known).
214 Descriptor *d = getDesc(firstRow, key, 0, true, false);
221 typename Trie<T>::Descriptor *
222 Trie<T>::getDesc(Row *row,
223 const std::wstring &key,
224 std::wstring::size_type index,
226 bool callFoundPrefixCallback)
229 assert(index < key.length());
231 wchar_t unichar = key[index]; // the "expected" character
232 assert(unichar != '\0');
234 Descriptor *pd = row->find(unichar);
236 static bool trieTrace = getenv("TRACE") != NULL;
238 std::wcout << "getDesc(row=" << row
239 << ", key='" << key << "' (len=" << key.length()
240 << "), index=" << index
241 << ", create=" << create
242 << ", call=" << callFoundPrefixCallback
243 << "): unichar=" << unichar << ", pd=" << pd << "\n";
245 if (pd == NULL) // if expected character not found
250 Descriptor &newDesc = (*row)[unichar];
251 assert(row->find(unichar) != NULL);
252 assert(row->find(unichar) == &newDesc);
254 if (index + 1 == key.length()) // if last char of string
257 // Create new descriptor that points to a new inferior row:
258 newDesc.inferiorRow = new Row();
259 assert(row->find(unichar)->inferiorRow == newDesc.inferiorRow);
261 return getDesc(newDesc.inferiorRow,
262 key, index + 1, create, callFoundPrefixCallback);
266 std::wcout << "getDesc: userData=" << pd->userData
267 << ", inferiorRow=" << pd->inferiorRow
270 if (callFoundPrefixCallback && pd->userData != NULL)
271 onFoundPrefixWithUserData(key, index + 1, pd->userData); // virtual call
273 if (index + 1 == key.length()) // if reached end of key
276 std::wcout << "getDesc: reached end of key\n";
280 if (pd->inferiorRow == NULL) // if pd is a leaf:
283 return NULL; // not found
285 pd->inferiorRow = new Row();
288 return getDesc(pd->inferiorRow,
289 key, index + 1, create, callFoundPrefixCallback);
295 Trie<T>::computeMemoryConsumption() const
297 return sizeof(*this) + (firstRow != NULL ? firstRow->computeMemoryConsumption() : 0);
301 } // namespace verbiste