IOI 2006 - Deciphering the Mayan Writing
Aho-Corasick Multi-Pattern Matching Build a trie from all patterns, recording which pattern ends at each terminal node. Compute failure links via BFS from the root: for each node u with child v on character c, the fai...
Source-first archive entry
This page is built from the copied files in competitive_programming/ioi/2006/writing. Edit
competitive_programming/ioi/2006/writing/solution.tex to update the written solution and
competitive_programming/ioi/2006/writing/solution.cpp to update the implementation.
The website does not replace those files with hand-maintained HTML. It reads the copied source tree during the build and exposes the exact files below.
Problem Statement
Rendered from the "Problem Statement" section inside solution.tex because this entry has no separate statement file.
Given a text string $T$ and $K$ pattern strings $P_1, \ldots, P_K$, count how many times each pattern occurs as a substring of $T$. Patterns may overlap in the text.
Editorial
Rendered from the copied solution.tex file. The original TeX source remains
available below.
Solution
Aho-Corasick Multi-Pattern Matching
Build a trie from all patterns, recording which pattern ends at each terminal node.
Compute failure links via BFS from the root: for each node $u$ with child $v$ on character $c$, the failure link of $v$ points to $\delta(\text{fail}(u), c)$.
Compute dictionary suffix links: for each node $u$, the dictionary suffix link points to the nearest ancestor (via the failure chain) that is a pattern endpoint. This allows efficient enumeration of all patterns ending at a given position.
Search: Scan the text character by character, advancing through the automaton. At each position, follow dictionary suffix links to report all matching patterns.
Complexity
Time: $O(|T| + \sum |P_i| + Z)$ where $Z$ is the total number of matches reported.
Space: $O(\sum |P_i| \cdot |\Sigma|)$ for the automaton.
C++ Solution
#include <bits/stdc++.h>
using namespace std;
struct AhoCorasick {
static const int ALPHA = 26;
struct Node {
int ch[26];
int fail;
int dictSuffix;
int patternId;
Node() {
memset(ch, -1, sizeof(ch));
fail = 0;
dictSuffix = -1;
patternId = -1;
}
};
vector<Node> trie;
AhoCorasick() {
trie.emplace_back();
}
void insert(const string& s, int id) {
int cur = 0;
for (char c : s) {
int ci = c - 'a';
if (trie[cur].ch[ci] == -1) {
trie[cur].ch[ci] = trie.size();
trie.emplace_back();
}
cur = trie[cur].ch[ci];
}
trie[cur].patternId = id;
}
void build() {
queue<int> q;
for (int c = 0; c < ALPHA; c++) {
if (trie[0].ch[c] == -1) {
trie[0].ch[c] = 0;
} else {
trie[trie[0].ch[c]].fail = 0;
q.push(trie[0].ch[c]);
}
}
while (!q.empty()) {
int u = q.front(); q.pop();
trie[u].dictSuffix =
(trie[trie[u].fail].patternId != -1)
? trie[u].fail
: trie[trie[u].fail].dictSuffix;
for (int c = 0; c < ALPHA; c++) {
if (trie[u].ch[c] == -1) {
trie[u].ch[c] = trie[trie[u].fail].ch[c];
} else {
trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
q.push(trie[u].ch[c]);
}
}
}
}
vector<int> search(const string& text, int numPatterns) {
vector<int> count(numPatterns, 0);
int cur = 0;
for (char c : text) {
cur = trie[cur].ch[c - 'a'];
// Report all patterns ending at this position
if (trie[cur].patternId != -1)
count[trie[cur].patternId]++;
int tmp = trie[cur].dictSuffix;
while (tmp != -1) {
count[trie[tmp].patternId]++;
tmp = trie[tmp].dictSuffix;
}
}
return count;
}
};
int main(){
ios::sync_with_stdio(false);
cin.tie(nullptr);
int K;
cin >> K;
AhoCorasick ac;
for (int i = 0; i < K; i++) {
string p;
cin >> p;
ac.insert(p, i);
}
ac.build();
string text;
cin >> text;
vector<int> counts = ac.search(text, K);
for (int i = 0; i < K; i++)
cout << counts[i] << "\n";
return 0;
}
Notes
The dictionary suffix link is the key to efficient multi-pattern reporting. Without it, following the full failure chain at every text position would degrade to $O(|T| \cdot \max|P_i|)$ in the worst case. The dictionary suffix link skips directly to the next pattern-ending node, so the total work for reporting is proportional to the number of matches.
The alphabet size should be adjusted to match the problem's character set. For Mayan hieroglyphics, the alphabet may be larger than 26; replace the ALPHA constant and the index mapping accordingly.
Code
Exact copied C++ implementation from solution.cpp.
#include <bits/stdc++.h>
using namespace std;
struct AhoCorasick {
static const int ALPHA = 26; // adjust for problem's alphabet
struct Node {
int ch[26];
int fail;
int dictSuffix; // nearest pattern-ending node via fail chain
int patternId; // -1 if not end of pattern
Node() {
memset(ch, -1, sizeof(ch));
fail = 0;
dictSuffix = -1;
patternId = -1;
}
};
vector<Node> trie;
AhoCorasick() {
trie.emplace_back(); // root = node 0
}
void insert(const string& s, int id) {
int cur = 0;
for (char c : s) {
int ci = c - 'a'; // adjust for alphabet
if (trie[cur].ch[ci] == -1) {
trie[cur].ch[ci] = trie.size();
trie.emplace_back();
}
cur = trie[cur].ch[ci];
}
trie[cur].patternId = id;
}
void build() {
queue<int> q;
// Initialize root's children
for (int c = 0; c < ALPHA; c++) {
if (trie[0].ch[c] == -1) {
trie[0].ch[c] = 0; // loop back to root
} else {
trie[trie[0].ch[c]].fail = 0;
q.push(trie[0].ch[c]);
}
}
while (!q.empty()) {
int u = q.front(); q.pop();
// Dictionary suffix link
if (trie[trie[u].fail].patternId != -1) {
trie[u].dictSuffix = trie[u].fail;
} else {
trie[u].dictSuffix = trie[trie[u].fail].dictSuffix;
}
for (int c = 0; c < ALPHA; c++) {
if (trie[u].ch[c] == -1) {
trie[u].ch[c] = trie[trie[u].fail].ch[c];
} else {
trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
q.push(trie[u].ch[c]);
}
}
}
}
// Search text and count occurrences of each pattern
vector<int> search(const string& text, int numPatterns) {
vector<int> count(numPatterns, 0);
int cur = 0;
for (char c : text) {
cur = trie[cur].ch[c - 'a'];
// Report all patterns ending here
int tmp = cur;
while (tmp > 0) {
if (trie[tmp].patternId != -1) {
count[trie[tmp].patternId]++;
}
tmp = trie[tmp].dictSuffix;
if (tmp == -1) break;
}
}
return count;
}
};
int main(){
ios::sync_with_stdio(false);
cin.tie(nullptr);
int K;
cin >> K;
AhoCorasick ac;
for (int i = 0; i < K; i++) {
string p;
cin >> p;
ac.insert(p, i);
}
ac.build();
string text;
cin >> text;
vector<int> counts = ac.search(text, K);
for (int i = 0; i < K; i++) {
cout << counts[i] << "\n";
}
return 0;
}
Source Files
Exact copied source-of-truth files. Edit solution.tex for the write-up and solution.cpp for the implementation.
\documentclass[11pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{listings}
\usepackage{geometry}
\usepackage{hyperref}
\geometry{margin=2.5cm}
\lstset{
language=C++,
basicstyle=\ttfamily\small,
keywordstyle=\bfseries,
breaklines=true,
frame=single,
numbers=left,
numberstyle=\tiny,
tabsize=4,
showstringspaces=false
}
\title{IOI 2006 -- Deciphering the Mayan Writing}
\author{Solution}
\date{}
\begin{document}
\maketitle
\section{Problem Statement}
Given a text string $T$ and $K$ pattern strings $P_1, \ldots, P_K$, count how many times each pattern occurs as a substring of $T$. Patterns may overlap in the text.
\section{Solution}
\subsection{Aho-Corasick Multi-Pattern Matching}
\begin{enumerate}
\item \textbf{Build a trie} from all patterns, recording which pattern ends at each terminal node.
\item \textbf{Compute failure links} via BFS from the root: for each node $u$ with child $v$ on character $c$, the failure link of $v$ points to $\delta(\text{fail}(u), c)$.
\item \textbf{Compute dictionary suffix links}: for each node $u$, the dictionary suffix link points to the nearest ancestor (via the failure chain) that is a pattern endpoint. This allows efficient enumeration of all patterns ending at a given position.
\item \textbf{Search}: Scan the text character by character, advancing through the automaton. At each position, follow dictionary suffix links to report all matching patterns.
\end{enumerate}
\subsection{Complexity}
\begin{itemize}
\item \textbf{Time:} $O(|T| + \sum |P_i| + Z)$ where $Z$ is the total number of matches reported.
\item \textbf{Space:} $O(\sum |P_i| \cdot |\Sigma|)$ for the automaton.
\end{itemize}
\section{C++ Solution}
\begin{lstlisting}
#include <bits/stdc++.h>
using namespace std;
struct AhoCorasick {
static const int ALPHA = 26;
struct Node {
int ch[26];
int fail;
int dictSuffix;
int patternId;
Node() {
memset(ch, -1, sizeof(ch));
fail = 0;
dictSuffix = -1;
patternId = -1;
}
};
vector<Node> trie;
AhoCorasick() {
trie.emplace_back();
}
void insert(const string& s, int id) {
int cur = 0;
for (char c : s) {
int ci = c - 'a';
if (trie[cur].ch[ci] == -1) {
trie[cur].ch[ci] = trie.size();
trie.emplace_back();
}
cur = trie[cur].ch[ci];
}
trie[cur].patternId = id;
}
void build() {
queue<int> q;
for (int c = 0; c < ALPHA; c++) {
if (trie[0].ch[c] == -1) {
trie[0].ch[c] = 0;
} else {
trie[trie[0].ch[c]].fail = 0;
q.push(trie[0].ch[c]);
}
}
while (!q.empty()) {
int u = q.front(); q.pop();
trie[u].dictSuffix =
(trie[trie[u].fail].patternId != -1)
? trie[u].fail
: trie[trie[u].fail].dictSuffix;
for (int c = 0; c < ALPHA; c++) {
if (trie[u].ch[c] == -1) {
trie[u].ch[c] = trie[trie[u].fail].ch[c];
} else {
trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
q.push(trie[u].ch[c]);
}
}
}
}
vector<int> search(const string& text, int numPatterns) {
vector<int> count(numPatterns, 0);
int cur = 0;
for (char c : text) {
cur = trie[cur].ch[c - 'a'];
// Report all patterns ending at this position
if (trie[cur].patternId != -1)
count[trie[cur].patternId]++;
int tmp = trie[cur].dictSuffix;
while (tmp != -1) {
count[trie[tmp].patternId]++;
tmp = trie[tmp].dictSuffix;
}
}
return count;
}
};
int main(){
ios::sync_with_stdio(false);
cin.tie(nullptr);
int K;
cin >> K;
AhoCorasick ac;
for (int i = 0; i < K; i++) {
string p;
cin >> p;
ac.insert(p, i);
}
ac.build();
string text;
cin >> text;
vector<int> counts = ac.search(text, K);
for (int i = 0; i < K; i++)
cout << counts[i] << "\n";
return 0;
}
\end{lstlisting}
\section{Notes}
The dictionary suffix link is the key to efficient multi-pattern reporting. Without it, following the full failure chain at every text position would degrade to $O(|T| \cdot \max|P_i|)$ in the worst case. The dictionary suffix link skips directly to the next pattern-ending node, so the total work for reporting is proportional to the number of matches.
The alphabet size should be adjusted to match the problem's character set. For Mayan hieroglyphics, the alphabet may be larger than 26; replace the \texttt{ALPHA} constant and the index mapping accordingly.
\end{document}
#include <bits/stdc++.h>
using namespace std;
struct AhoCorasick {
static const int ALPHA = 26; // adjust for problem's alphabet
struct Node {
int ch[26];
int fail;
int dictSuffix; // nearest pattern-ending node via fail chain
int patternId; // -1 if not end of pattern
Node() {
memset(ch, -1, sizeof(ch));
fail = 0;
dictSuffix = -1;
patternId = -1;
}
};
vector<Node> trie;
AhoCorasick() {
trie.emplace_back(); // root = node 0
}
void insert(const string& s, int id) {
int cur = 0;
for (char c : s) {
int ci = c - 'a'; // adjust for alphabet
if (trie[cur].ch[ci] == -1) {
trie[cur].ch[ci] = trie.size();
trie.emplace_back();
}
cur = trie[cur].ch[ci];
}
trie[cur].patternId = id;
}
void build() {
queue<int> q;
// Initialize root's children
for (int c = 0; c < ALPHA; c++) {
if (trie[0].ch[c] == -1) {
trie[0].ch[c] = 0; // loop back to root
} else {
trie[trie[0].ch[c]].fail = 0;
q.push(trie[0].ch[c]);
}
}
while (!q.empty()) {
int u = q.front(); q.pop();
// Dictionary suffix link
if (trie[trie[u].fail].patternId != -1) {
trie[u].dictSuffix = trie[u].fail;
} else {
trie[u].dictSuffix = trie[trie[u].fail].dictSuffix;
}
for (int c = 0; c < ALPHA; c++) {
if (trie[u].ch[c] == -1) {
trie[u].ch[c] = trie[trie[u].fail].ch[c];
} else {
trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
q.push(trie[u].ch[c]);
}
}
}
}
// Search text and count occurrences of each pattern
vector<int> search(const string& text, int numPatterns) {
vector<int> count(numPatterns, 0);
int cur = 0;
for (char c : text) {
cur = trie[cur].ch[c - 'a'];
// Report all patterns ending here
int tmp = cur;
while (tmp > 0) {
if (trie[tmp].patternId != -1) {
count[trie[tmp].patternId]++;
}
tmp = trie[tmp].dictSuffix;
if (tmp == -1) break;
}
}
return count;
}
};
int main(){
ios::sync_with_stdio(false);
cin.tie(nullptr);
int K;
cin >> K;
AhoCorasick ac;
for (int i = 0; i < K; i++) {
string p;
cin >> p;
ac.insert(p, i);
}
ac.build();
string text;
cin >> text;
vector<int> counts = ac.search(text, K);
for (int i = 0; i < K; i++) {
cout << counts[i] << "\n";
}
return 0;
}