6 #include "algorithm.h"
\r
10 * Adds patterns to trie.
\r
12 void aho::algorithm::add_patterns_to_trie() {
\r
13 uint max_size_of_patterns = 0;
\r
15 for(uint i = 0; i < this->patterns.size(); i++) {
\r
16 max_size_of_patterns = max_size_of_patterns < this->patterns[i].size() ? this->patterns[i].size() : max_size_of_patterns;
\r
19 for(uint letter_iterator = 0; letter_iterator < max_size_of_patterns; letter_iterator++) {
\r
20 for(uint pattern_iterator = 0; pattern_iterator < this->patterns.size(); pattern_iterator++) {
\r
21 tstring current_pattern = this->patterns[pattern_iterator];
\r
23 if(letter_iterator < current_pattern.size()) {
\r
24 this->trie->add_child(new aho::trie(current_pattern.substr(0, letter_iterator + 1)));
\r
30 void aho::algorithm::set_patterns_in_trie() {
\r
31 //Iterate over all patterns
\r
32 for(uint i = 0; i < this->patterns.size(); i++) {
\r
33 //Return to root node.
\r
34 class aho::trie * node = this->trie;
\r
35 tstring pattern = this->patterns[i];
\r
37 //Search for pattern in trie
\r
38 for(uint j = 0; j < pattern.size(); j++) {
\r
39 node = node->get_child(pattern.substr(0, j + 1).c_str());
\r
41 //Set found node as pattern
\r
42 node->is_pattern(true);
\r
46 void aho::algorithm::set_suffix_link() {
\r
47 for(uint j = 0; j < this->list.size(); j++) {
\r
48 class aho::trie * node = this->list[j];
\r
50 for(uint k = 1; k < node->get_value().size(); k++) {
\r
51 tstring suffix = (node->get_value().substr(k));
\r
53 class aho::trie * found = NULL;
\r
54 if((found = this->trie->get_child(suffix, true)) != NULL) {
\r
55 if(node->get_suffix(found->get_value()) == NULL) {
\r
56 node->add_suffix(found);
\r
66 void aho::algorithm::set_nodes_list() {
\r
67 for(uint i = 0; i < this->patterns.size(); i++) {
\r
68 for(uint j = 1; j <= this->patterns[i].size(); j++) {
\r
69 class aho::trie * node = this->get_node(this->patterns[i].substr(0, j));
\r
70 //TODO: Pozbyæ siê powtarzania
\r
71 if(!this->is_on_nodes_list(node)) {
\r
72 this->list.push_back(node);
\r
79 * Checks if node exists on node list.
\r
81 bool aho::algorithm::is_on_nodes_list(class aho::trie * node) {
\r
82 for(uint i = 0; i < this->list.size(); i++) {
\r
83 if(this->list[i] == node) {
\r
91 * Adds pattern to search in a subject text.
\r
93 void aho::algorithm::add_search_pattern(tstring pattern) {
\r
94 this->patterns.push_back(pattern);
\r
96 for(uint i = 0; i < pattern.size(); i++) {
\r
97 if(this->letters.find(pattern[i]) == tstring::npos) {
\r
98 this->letters.push_back(pattern[i]);
\r
106 void aho::algorithm::generate_trie() {
\r
107 this->add_patterns_to_trie();
\r
108 this->set_patterns_in_trie();
\r
109 this->set_nodes_list();
\r
110 this->set_suffix_link();
\r
114 * Search subject text for patterns.
\r
116 void aho::algorithm::parse() {
\r
117 uint length = this->subject.size();
\r
118 class aho::trie * current_node = this->trie;
\r
120 for(uint j = 0; j < length; j++) {
\r
122 letter = this->subject[j];
\r
124 tstring value(current_node->get_value() + letter);
\r
126 class aho::trie * node = current_node->get_child(value);
\r
127 class aho::trie * suffix_node = NULL;
\r
131 //szukaj najd³u¿szego sufixu w node'zie
\r
133 for(uint k = 1; k < value.size(); k++) {
\r
134 // tstring suffix(current_node->get_value().substr(k));
\r
135 if((suffix_node = current_node->get_suffix(value.substr(k))) != NULL) {
\r
136 node = suffix_node;
\r
139 else if((suffix_node = current_node->get_suffix(current_node->get_value().substr(k))) != NULL) {
\r
140 node = suffix_node;
\r
147 if(this->is_pattern(node)) {
\r
149 this->save_result(node->get_value(), j+2-node->get_value().size());
\r
152 W przypadku natrafienia na wêze³
\r
153 oznaczony jako koniec s³owa, algorytm wypisuje je jako znalezione w tek
\9ccie oraz sprawdza, czy czasem w tym
\r
154 miejscu nie koñczy siê wiêcej wzorców przechodz¹c krawêdziami fail a¿ do korzenia sprawdzaj¹c, czy nie
\r
155 przechodzi po wierzcho³ku bêd¹cym koñcem wzorca.
\r
157 //Sprawd
\9f suffixy je
\9cli istniej¹
\r
158 class aho::trie * other = node->get_suffix();
\r
159 while(other != NULL) {
\r
160 if(this->is_pattern(other)) {
\r
161 this->save_result(other->get_value(), j+2-other->get_value().size());
\r
163 if(other->get_child(other->get_value() + this->subject[j+1])) {
\r
166 other = other->get_suffix();
\r
169 other = this->trie;
\r
171 //Zgubi³ w¹tek, bo natrafi³ na literê, która nie jest dzieckiem.
\r
172 //Przejecha³ po suffixach...
\r
173 //Pozosta³o sprawdziæ, czy na podstawie
\r
174 if(!node->get_child(node->get_value() + this->subject[j+1])) {
\r
178 current_node = node;
\r
180 current_node = node ? node : this->trie;
\r