Initial commit with Aho-Corasick code.
[aho.git] / src / trie.h
1 #ifndef AHO_TRIE_H\r
2 #define AHO_TRIE_H\r
3 \r
4 #include <vector>\r
5 #include <map>\r
6 \r
7 #include "typedef.h"\r
8 \r
9 namespace aho {\r
10         class trie {\r
11         public:\r
12                 typedef trie    _Myt;\r
13                 typedef tstring _string;\r
14         private:\r
15                 _string value;\r
16                 class aho::trie * parent;\r
17 \r
18                 std::map<_string, class aho::trie *> suffixes;\r
19                 std::vector<class aho::trie *> children;\r
20 \r
21                 bool pattern;\r
22                 \r
23                 /**\r
24                  * Checks if a trie node should be nested in current node.\r
25                  * @param node Trie node which will be compared with current node.\r
26                  * @return returns true if node should be nested. False otherwise.\r
27                  */\r
28                 bool should_be_nested(class aho::trie * node) {\r
29                         return this->value.size() < node->get_value().size() ? true : false;\r
30                 }\r
31         protected:\r
32         public:\r
33                 /**\r
34                  * Trie class constructor. Initializes variables with default values.\r
35                  * @param value text value of trie's node.\r
36                  */\r
37                 trie(_string _value) : parent(NULL), pattern(false), value(_value) { }\r
38                 \r
39                 /**\r
40                  * Sets value of trie node.\r
41                  * @param value Value which will be used to set the trie node value.\r
42                  */\r
43                 void set_value(_string value) {\r
44                         this->value = value;\r
45                 }\r
46 \r
47                 /**\r
48                  * Sets parent node of this trie node.\r
49                  * @param parent Parent node for current trie node.\r
50                  */\r
51                 void set_parent(class aho::trie * node) {\r
52                         this->parent = node;\r
53                 }\r
54                 \r
55                 /**\r
56                  * Adds node to the trie.\r
57                  * @param node Trie node which will be added to the current node.\r
58                  */\r
59                 void add_child(class aho::trie * node) {\r
60                         //Je¿eli powinien byæ zagnie¿d¿ony (bo d³u¿szy)\r
61                         if(this->should_be_nested(node)) {\r
62                                 uint length = this->value.size() + 1;\r
63 \r
64                                 //Je¿eli nie znalaz³ wpisu o danym fragmencie nazwy...\r
65                                 class aho::trie * tmp = this->get_child(node->get_value().substr(0, length));\r
66                                 if(tmp == NULL) {\r
67                                         //Je¿eli to jest nadal fragment (nie jest to pe³na nazwa).\r
68                                         if(length < node->get_value().size()) {\r
69                                                 tmp = new aho::trie(node->get_value().substr(0, length + 1));\r
70                                                 tmp->set_parent(this);\r
71                                                 node->set_parent(tmp);\r
72                                                 tmp->add_child(node);\r
73                                                 this->children.push_back(tmp);\r
74                                                 //this->children[tmp->get_value()] = tmp;\r
75                                         }\r
76                                         //WARNING!\r
77                                         //Je¿eli to ju¿ jest pe³na nazwa...\r
78                                         else {\r
79                                                 node->set_parent(this);\r
80                                                 this->children.push_back(node);\r
81                                                 //this->children[node->get_value()] = node;\r
82                                         }\r
83                                 }\r
84                                 //Je¿eli znalaz³ wpis o takim fragmencie nazwy, to rekursywnie...\r
85                                 else {\r
86                                         if(length < node->get_value().size()) {\r
87                                                 node->set_parent(tmp);\r
88                                                 tmp->add_child(node);\r
89                                         }\r
90                                 }\r
91                         }\r
92                         //TODO: (?)\r
93                         //Je¿eli ma nie byæ zagnie¿d¿ony...(Czyli, je¿eli powinien byæ dok³adnie na tym poziomie...)\r
94                         else {\r
95                                 if(this->get_value() == node->get_value()) {\r
96                                         delete node;\r
97                                 }\r
98                         }\r
99                 }\r
100 \r
101                 /**\r
102                  * Sets suffix link of the current node with a given node.\r
103                  * @param node Pointer to node, which will be linked as a suffix.\r
104                  */\r
105                 void add_suffix(class aho::trie * node) {\r
106                         this->suffixes[node->get_value()] = node;\r
107                 }\r
108                 \r
109                 /**\r
110                  * Gets value of current node.\r
111                  * @return Returns node value.\r
112                  */\r
113                 _string get_value() {\r
114                         return this->value;\r
115                 }\r
116                 \r
117                 /**\r
118                  * Gets specific child node of the current one.\r
119                  * @param value Specifies node which has argument's value set in value.\r
120                  * @param recursive If true, longest passing node is searched recursively. Otherwise child node of current one with exactly same value is searched.\r
121                  * @return Returns pointer to node, if the node has been found. NULL otherwise.\r
122                  */\r
123                 class aho::trie * get_child(_string value, bool recursive = false) {\r
124                         for(uint i = 0; i < this->children.size(); i++) {\r
125                                 //Je¿eli znalaz³, to zwróæ\r
126                                 if(this->children[i]->get_value() == value) {\r
127                                         return this->children[i];\r
128                                 }\r
129                                 else if(recursive && (this->children[i]->get_value().compare(value.substr(0, this->children[i]->get_value().size())) == 0)) {\r
130                                         return this->children[i]->get_child(value, recursive);\r
131                                 }\r
132                         }\r
133 \r
134                         return NULL;\r
135                 }\r
136                 \r
137                 /**\r
138                  * Gets node's suffix\r
139                  * @param value text value of suffix.\r
140                  * @return Returns pointer to suffix node, if the node has been found. NULL otherwise.\r
141                  */\r
142                 class aho::trie * get_suffix(_string value = TXT("")) {\r
143                         return this->suffixes[value];\r
144                 }\r
145 \r
146                 /**\r
147                  * Gets parent node of the current trie node.\r
148                  */\r
149                 class aho::trie * get_parent(){\r
150                         return this->parent;\r
151                 }\r
152                 \r
153                 /**\r
154                  * Checks if current node is one of patterns to search.\r
155                  * @return Returns true, if node is one of patterns. False otherwise.\r
156                  */\r
157                 bool is_pattern() {\r
158                         return this->pattern;\r
159                 }\r
160                 \r
161                 /**\r
162                  * Sets node to be a pattern to search.\r
163                  * @param pattern Specifies if node is a pattern.\r
164                  */\r
165                 void is_pattern(bool pattern) {\r
166                         this->pattern = pattern;\r
167                 }\r
168         };\r
169 };\r
170 \r
171 #endif