cp-library

C++ Library for Competitive Programming

View the Project on GitHub emthrm/cp-library

:heavy_check_mark: 接尾辞配列 (suffix array)
(include/emthrm/string/suffix_array.hpp)

文字列の全接尾辞を辞書順に並べた配列である。

時間計算量

テキスト長を $N$、パターン長を $M$ とおくと $\langle O(N\log{N}), O(M\log{N}) \rangle$

仕様

template <typename T = std::string>
requires requires { typename T::value_type; }
struct SuffixArray;

メンバ変数

名前 説明
std::vector<int> sa 接尾辞配列
std::vector<int> rank $\mathrm{rank}_i$ は S[i:] の接尾辞配列中での位置を表す。

メンバ関数

名前 効果・戻り値
explicit SuffixArray(const T& s_, const typename T::value_type sentinel = 0); $S$ に対してオブジェクトを構築する。
std::vector<int> match(T* t) const; $T$ が出現する文字列 $S$ 中の位置

参考文献

TODO

Submissons

https://onlinejudge.u-aizu.ac.jp/solutions/problem/ALDS1_14_B/review/4972832/emthrm/C++17

Required by

Verified with

Code

#ifndef EMTHRM_STRING_SUFFIX_ARRAY_HPP_
#define EMTHRM_STRING_SUFFIX_ARRAY_HPP_

#include <algorithm>
#include <numeric>
#include <string>
#include <vector>

namespace emthrm {

template <typename T = std::string>
requires requires { typename T::value_type; }
struct SuffixArray {
  std::vector<int> sa, rank;

  explicit SuffixArray(const T& s_, const typename T::value_type sentinel = 0)
      : s(s_) {
    const int n = s.size();
    s.push_back(sentinel);
    sa.resize(n + 1);
    std::iota(sa.rbegin(), sa.rend(), 0);
    std::ranges::stable_sort(
        sa, {}, [this](const int index) -> int { return s[index]; });
    rank.resize(n + 1);
    for (int i = 0; i <= n; ++i) {
      rank[i] = s[i];
    }
    std::vector<int> tmp(n + 1), prev_sa(n + 1);
    for (int len = 1; len <= n; len <<= 1) {
      tmp[sa[0]] = 0;
      for (int i = 1; i <= n; ++i) {
        if (rank[sa[i - 1]] == rank[sa[i]] && sa[i - 1] + len <= n &&
            rank[sa[i - 1] + (len >> 1)] == rank[sa[i] + (len >> 1)]) {
          tmp[sa[i]] = tmp[sa[i - 1]];
        } else {
          tmp[sa[i]] = i;
        }
      }
      rank.swap(tmp);
      std::iota(tmp.begin(), tmp.end(), 0);
      std::copy(sa.begin(), sa.end(), prev_sa.begin());
      for (int i = 0; i <= n; ++i) {
        const int idx = prev_sa[i] - len;
        if (idx >= 0) sa[tmp[rank[idx]]++] = idx;
      }
    }
    for (int i = 0; i <= n; ++i) {
      rank[sa[i]] = i;
    }
  }

  std::vector<int> match(T* t) const {
    const int lb = lower_bound(t);
    ++t->back();
    const int ub = lower_bound(t);
    --t->back();
    std::vector<int> res(ub - lb);
    std::copy(sa.begin() + lb, sa.begin() + ub, res.begin());
    std::sort(res.begin(), res.end());
    return res;
  }

 private:
  T s;

  int lower_bound(const T* t) const {
    const int s_size = s.size(), t_size = t->size();
    int lb = 0, ub = s_size;
    while (ub - lb > 1) {
      const int mid = std::midpoint(lb, ub);
      int s_idx = sa[mid], t_idx = 0;
      bool finished = false;
      for (; s_idx < s_size && t_idx < t_size; ++s_idx, ++t_idx) {
        if (s[s_idx] != (*t)[t_idx]) {
          (s[s_idx] < (*t)[t_idx] ? lb : ub) = mid;
          finished = true;
          break;
        }
      }
      if (!finished) (s_idx == s_size && t_idx < t_size ? lb : ub) = mid;
    }
    return ub;
  }
};

}  // namespace emthrm

#endif  // EMTHRM_STRING_SUFFIX_ARRAY_HPP_
#line 1 "include/emthrm/string/suffix_array.hpp"



#include <algorithm>
#include <numeric>
#include <string>
#include <vector>

namespace emthrm {

template <typename T = std::string>
requires requires { typename T::value_type; }
struct SuffixArray {
  std::vector<int> sa, rank;

  explicit SuffixArray(const T& s_, const typename T::value_type sentinel = 0)
      : s(s_) {
    const int n = s.size();
    s.push_back(sentinel);
    sa.resize(n + 1);
    std::iota(sa.rbegin(), sa.rend(), 0);
    std::ranges::stable_sort(
        sa, {}, [this](const int index) -> int { return s[index]; });
    rank.resize(n + 1);
    for (int i = 0; i <= n; ++i) {
      rank[i] = s[i];
    }
    std::vector<int> tmp(n + 1), prev_sa(n + 1);
    for (int len = 1; len <= n; len <<= 1) {
      tmp[sa[0]] = 0;
      for (int i = 1; i <= n; ++i) {
        if (rank[sa[i - 1]] == rank[sa[i]] && sa[i - 1] + len <= n &&
            rank[sa[i - 1] + (len >> 1)] == rank[sa[i] + (len >> 1)]) {
          tmp[sa[i]] = tmp[sa[i - 1]];
        } else {
          tmp[sa[i]] = i;
        }
      }
      rank.swap(tmp);
      std::iota(tmp.begin(), tmp.end(), 0);
      std::copy(sa.begin(), sa.end(), prev_sa.begin());
      for (int i = 0; i <= n; ++i) {
        const int idx = prev_sa[i] - len;
        if (idx >= 0) sa[tmp[rank[idx]]++] = idx;
      }
    }
    for (int i = 0; i <= n; ++i) {
      rank[sa[i]] = i;
    }
  }

  std::vector<int> match(T* t) const {
    const int lb = lower_bound(t);
    ++t->back();
    const int ub = lower_bound(t);
    --t->back();
    std::vector<int> res(ub - lb);
    std::copy(sa.begin() + lb, sa.begin() + ub, res.begin());
    std::sort(res.begin(), res.end());
    return res;
  }

 private:
  T s;

  int lower_bound(const T* t) const {
    const int s_size = s.size(), t_size = t->size();
    int lb = 0, ub = s_size;
    while (ub - lb > 1) {
      const int mid = std::midpoint(lb, ub);
      int s_idx = sa[mid], t_idx = 0;
      bool finished = false;
      for (; s_idx < s_size && t_idx < t_size; ++s_idx, ++t_idx) {
        if (s[s_idx] != (*t)[t_idx]) {
          (s[s_idx] < (*t)[t_idx] ? lb : ub) = mid;
          finished = true;
          break;
        }
      }
      if (!finished) (s_idx == s_size && t_idx < t_size ? lb : ub) = mid;
    }
    return ub;
  }
};

}  // namespace emthrm
Back to top page