Aho–Corasick algorithm
(include/emthrm/string/aho-corasick.hpp)
トライ木 を用いてパターンマッチングを行うアルゴリズムである。
時間計算量
検索する文字列の長さを $N$, トライ木に追加する文字列の長さを $M$ とおくと $\langle O(\sum{M}), O(N + \sum{M}) \rangle$
仕様
template < int Sigma = 26 , bool IS_FULL_VER = false >
struct AhoCorasick : Trie < Sigma + 1 > ;
Sigma
:アルファベットサイズ
IS_FULL_VER
:完全版かを表す変数
メンバ変数
名前
説明
std::vector<int> nums
各ノードが保有する文字列の数
メンバ関数
名前
効果・戻り値
要件
継承コンストラクタ
void build();
オートマトンを構築する。
int move(char c, int pos) const;
$\mathrm{pos}$ 番目のノードから見たときに、文字 $c$ に対応するノードのインデックス
int match(const std::string& t, int pos = 0) const;
$\mathrm{pos}$ 番目のノードをから見たときに、文字列 $T$ とマッチする回数
std::map<int, int> match_fully(const std::string& t, int pos = 0) const;
$\mathrm{pos}$ 番目のノードから見たときに、文字列 $T$ とそれぞれの文字列がマッチする回数
完全版
参考文献
Alfred V. Aho and Margaret J. Corasick: Efficient string matching: an aid to bibliographic search, Communications of the ACM , Vol. 18, No. 6, pp. 333–340 (1975). https://doi.org/10.1145/360825.360855
https://naoya-2.hatenadiary.org/entry/20090405/aho_corasick
https://ei1333.github.io/luzhiled/snippets/string/aho-corasick.html
https://github.com/beet-aizu/library/blob/02578ddfa9d2a46e3c724df82e84797dd41eabac/string/ahocorasick.cpp
Submissons
https://yukicoder.me/submissions/575927
Depends on
Verified with
Code
#ifndef EMTHRM_STRING_AHO_CORASICK_HPP_
#define EMTHRM_STRING_AHO_CORASICK_HPP_
#include <algorithm>
#include <cassert>
#include <iterator>
#include <map>
#include <queue>
#include <string>
#include <vector>
#include "emthrm/string/trie.hpp"
namespace emthrm {
template < int Sigma = 26 , bool IS_FULL_VER = false >
struct AhoCorasick : Trie < Sigma + 1 > {
using Trie < Sigma + 1 >:: Trie ;
std :: vector < int > nums ;
void build () {
auto & vertices = this -> nodes ;
const int n = vertices . size ();
nums . resize ( n );
for ( int i = 0 ; i < n ; ++ i ) {
if constexpr ( IS_FULL_VER ) {
std :: sort ( vertices [ i ]. tails . begin (), vertices [ i ]. tails . end ());
}
nums [ i ] = vertices [ i ]. tails . size ();
}
std :: queue < int > que ;
for ( int i = 0 ; i < Sigma ; ++ i ) {
if ( vertices . front (). nxt [ i ] == - 1 ) {
vertices . front (). nxt [ i ] = 0 ;
} else {
vertices [ vertices . front (). nxt [ i ]]. nxt [ Sigma ] = 0 ;
que . emplace ( vertices . front (). nxt [ i ]);
}
}
while ( ! que . empty ()) {
const auto & node = vertices [ que . front ()];
nums [ que . front ()] += nums [ node . nxt [ Sigma ]];
que . pop ();
for ( int i = 0 ; i < Sigma ; ++ i ) {
if ( node . nxt [ i ] == - 1 ) continue ;
int on_failure = node . nxt [ Sigma ];
while ( vertices [ on_failure ]. nxt [ i ] == - 1 ) {
on_failure = vertices [ on_failure ]. nxt [ Sigma ];
}
vertices [ node . nxt [ i ]]. nxt [ Sigma ] = vertices [ on_failure ]. nxt [ i ];
if constexpr ( IS_FULL_VER ) {
std :: vector < int >& ids = vertices [ node . nxt [ i ]]. tails ;
std :: vector < int > tmp ;
std :: set_union ( ids . begin (), ids . end (),
vertices [ vertices [ on_failure ]. nxt [ i ]]. tails . begin (),
vertices [ vertices [ on_failure ]. nxt [ i ]]. tails . end (),
std :: back_inserter ( tmp ));
ids . resize ( tmp . size ());
std :: copy ( tmp . begin (), tmp . end (), ids . begin ());
}
que . emplace ( node . nxt [ i ]);
}
}
}
int move ( char c , int pos ) const {
const int c_int = this -> convert ( c );
while ( this -> nodes [ pos ]. nxt [ c_int ] == - 1 ) pos = this -> nodes [ pos ]. nxt [ Sigma ];
return pos = this -> nodes [ pos ]. nxt [ c_int ];
}
int match ( const std :: string & t , int pos = 0 ) const {
int total = 0 ;
for ( const char c : t ) {
pos = move ( c , pos );
total += nums [ pos ];
}
return total ;
}
std :: map < int , int > match_fully ( const std :: string & t , int pos = 0 ) const {
static_assert ( IS_FULL_VER );
std :: map < int , int > mp ;
for ( const char c : t ) {
pos = move ( c , pos );
for ( const int id : this -> nodes [ pos ]. tails ) ++ mp [ id ];
}
return mp ;
}
};
} // namespace emthrm
#endif // EMTHRM_STRING_AHO_CORASICK_HPP_
#line 1 "include/emthrm/string/aho-corasick.hpp"
#include <algorithm>
#include <cassert>
#include <iterator>
#include <map>
#include <queue>
#include <string>
#include <vector>
#line 1 "include/emthrm/string/trie.hpp"
#line 5 "include/emthrm/string/trie.hpp"
#include <array>
#include <functional>
#line 9 "include/emthrm/string/trie.hpp"
namespace emthrm {
template < int Sigma = 26 >
struct Trie {
struct Node {
char c ;
std :: array < int , Sigma > nxt ;
std :: vector < int > tails ;
explicit Node ( const char c ) : c ( c ) {
std :: fill ( nxt . begin (), nxt . end (), - 1 );
}
};
const std :: function < int ( const char ) > convert ;
std :: vector < Node > nodes ;
explicit Trie ( const std :: function < int ( const char ) > convert =
[]( const char c ) -> int { return c - 'a' ; })
: convert ( convert ) { nodes . emplace_back ( '$' ); }
void add ( const std :: string & s , const int id = - 1 , int pos = 0 ) {
for ( const char c : s ) {
const int c_int = convert ( c );
if ( nodes [ pos ]. nxt [ c_int ] == - 1 ) {
const int nxt_pos = nodes . size ();
nodes [ pos ]. nxt [ c_int ] = nxt_pos ;
nodes . emplace_back ( c );
pos = nxt_pos ;
} else {
pos = nodes [ pos ]. nxt [ c_int ];
}
}
nodes [ pos ]. tails . emplace_back ( id );
}
int find ( const std :: string & t , int pos = 0 ) const {
for ( const char c : t ) {
const int c_int = convert ( c );
if ( nodes [ pos ]. nxt [ c_int ] == - 1 ) return - 1 ;
pos = nodes [ pos ]. nxt [ c_int ];
}
return pos ;
}
};
} // namespace emthrm
#line 13 "include/emthrm/string/aho-corasick.hpp"
namespace emthrm {
template < int Sigma = 26 , bool IS_FULL_VER = false >
struct AhoCorasick : Trie < Sigma + 1 > {
using Trie < Sigma + 1 >:: Trie ;
std :: vector < int > nums ;
void build () {
auto & vertices = this -> nodes ;
const int n = vertices . size ();
nums . resize ( n );
for ( int i = 0 ; i < n ; ++ i ) {
if constexpr ( IS_FULL_VER ) {
std :: sort ( vertices [ i ]. tails . begin (), vertices [ i ]. tails . end ());
}
nums [ i ] = vertices [ i ]. tails . size ();
}
std :: queue < int > que ;
for ( int i = 0 ; i < Sigma ; ++ i ) {
if ( vertices . front (). nxt [ i ] == - 1 ) {
vertices . front (). nxt [ i ] = 0 ;
} else {
vertices [ vertices . front (). nxt [ i ]]. nxt [ Sigma ] = 0 ;
que . emplace ( vertices . front (). nxt [ i ]);
}
}
while ( ! que . empty ()) {
const auto & node = vertices [ que . front ()];
nums [ que . front ()] += nums [ node . nxt [ Sigma ]];
que . pop ();
for ( int i = 0 ; i < Sigma ; ++ i ) {
if ( node . nxt [ i ] == - 1 ) continue ;
int on_failure = node . nxt [ Sigma ];
while ( vertices [ on_failure ]. nxt [ i ] == - 1 ) {
on_failure = vertices [ on_failure ]. nxt [ Sigma ];
}
vertices [ node . nxt [ i ]]. nxt [ Sigma ] = vertices [ on_failure ]. nxt [ i ];
if constexpr ( IS_FULL_VER ) {
std :: vector < int >& ids = vertices [ node . nxt [ i ]]. tails ;
std :: vector < int > tmp ;
std :: set_union ( ids . begin (), ids . end (),
vertices [ vertices [ on_failure ]. nxt [ i ]]. tails . begin (),
vertices [ vertices [ on_failure ]. nxt [ i ]]. tails . end (),
std :: back_inserter ( tmp ));
ids . resize ( tmp . size ());
std :: copy ( tmp . begin (), tmp . end (), ids . begin ());
}
que . emplace ( node . nxt [ i ]);
}
}
}
int move ( char c , int pos ) const {
const int c_int = this -> convert ( c );
while ( this -> nodes [ pos ]. nxt [ c_int ] == - 1 ) pos = this -> nodes [ pos ]. nxt [ Sigma ];
return pos = this -> nodes [ pos ]. nxt [ c_int ];
}
int match ( const std :: string & t , int pos = 0 ) const {
int total = 0 ;
for ( const char c : t ) {
pos = move ( c , pos );
total += nums [ pos ];
}
return total ;
}
std :: map < int , int > match_fully ( const std :: string & t , int pos = 0 ) const {
static_assert ( IS_FULL_VER );
std :: map < int , int > mp ;
for ( const char c : t ) {
pos = move ( c , pos );
for ( const int id : this -> nodes [ pos ]. tails ) ++ mp [ id ];
}
return mp ;
}
};
} // namespace emthrm
Back to top page