Competitive Programming

IOI 2006 - Deciphering the Mayan Writing

Aho-Corasick Multi-Pattern Matching Build a trie from all patterns, recording which pattern ends at each terminal node. Compute failure links via BFS from the root: for each node u with child v on character c, the fai...

Source sync Apr 19, 2026

Track IOI

Year 2006

Files TeX and C++

Folder competitive_programming/ioi/2006/writing

IOI2006TeXC++

This page is built from the copied files in competitive_programming/ioi/2006/writing. Edit competitive_programming/ioi/2006/writing/solution.tex to update the written solution and competitive_programming/ioi/2006/writing/solution.cpp to update the implementation.

The website does not replace those files with hand-maintained HTML. It reads the copied source tree during the build and exposes the exact files below.

Raw TeX Raw C++

Problem statement

Problem Statement

Rendered from the "Problem Statement" section inside solution.tex because this entry has no separate statement file.

Raw TeX

Given a text string $T$ and $K$ pattern strings $P_1, \ldots, P_K$, count how many times each pattern occurs as a substring of $T$. Patterns may overlap in the text.

Editorial

Rendered from the copied solution.tex file. The original TeX source remains available below.

Raw TeX

Solution

Aho-Corasick Multi-Pattern Matching

Build a trie from all patterns, recording which pattern ends at each terminal node.
Compute failure links via BFS from the root: for each node $u$ with child $v$ on character $c$, the failure link of $v$ points to $\delta(\text{fail}(u), c)$.
Compute dictionary suffix links: for each node $u$, the dictionary suffix link points to the nearest ancestor (via the failure chain) that is a pattern endpoint. This allows efficient enumeration of all patterns ending at a given position.
Search: Scan the text character by character, advancing through the automaton. At each position, follow dictionary suffix links to report all matching patterns.

Complexity

Time: $O(|T| + \sum |P_i| + Z)$ where $Z$ is the total number of matches reported.
Space: $O(\sum |P_i| \cdot |\Sigma|)$ for the automaton.

C++ Solution

#include <bits/stdc++.h>
using namespace std;

struct AhoCorasick {
    static const int ALPHA = 26;
    struct Node {
        int ch[26];
        int fail;
        int dictSuffix;
        int patternId;
        Node() {
            memset(ch, -1, sizeof(ch));
            fail = 0;
            dictSuffix = -1;
            patternId = -1;
        }
    };

    vector<Node> trie;

    AhoCorasick() {
        trie.emplace_back();
    }

    void insert(const string& s, int id) {
        int cur = 0;
        for (char c : s) {
            int ci = c - 'a';
            if (trie[cur].ch[ci] == -1) {
                trie[cur].ch[ci] = trie.size();
                trie.emplace_back();
            }
            cur = trie[cur].ch[ci];
        }
        trie[cur].patternId = id;
    }

    void build() {
        queue<int> q;
        for (int c = 0; c < ALPHA; c++) {
            if (trie[0].ch[c] == -1) {
                trie[0].ch[c] = 0;
            } else {
                trie[trie[0].ch[c]].fail = 0;
                q.push(trie[0].ch[c]);
            }
        }
        while (!q.empty()) {
            int u = q.front(); q.pop();
            trie[u].dictSuffix =
                (trie[trie[u].fail].patternId != -1)
                    ? trie[u].fail
                    : trie[trie[u].fail].dictSuffix;
            for (int c = 0; c < ALPHA; c++) {
                if (trie[u].ch[c] == -1) {
                    trie[u].ch[c] = trie[trie[u].fail].ch[c];
                } else {
                    trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
                    q.push(trie[u].ch[c]);
                }
            }
        }
    }

    vector<int> search(const string& text, int numPatterns) {
        vector<int> count(numPatterns, 0);
        int cur = 0;
        for (char c : text) {
            cur = trie[cur].ch[c - 'a'];
            // Report all patterns ending at this position
            if (trie[cur].patternId != -1)
                count[trie[cur].patternId]++;
            int tmp = trie[cur].dictSuffix;
            while (tmp != -1) {
                count[trie[tmp].patternId]++;
                tmp = trie[tmp].dictSuffix;
            }
        }
        return count;
    }
};

int main(){
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int K;
    cin >> K;

    AhoCorasick ac;
    for (int i = 0; i < K; i++) {
        string p;
        cin >> p;
        ac.insert(p, i);
    }
    ac.build();

    string text;
    cin >> text;

    vector<int> counts = ac.search(text, K);
    for (int i = 0; i < K; i++)
        cout << counts[i] << "\n";

    return 0;
}

Notes

The dictionary suffix link is the key to efficient multi-pattern reporting. Without it, following the full failure chain at every text position would degrade to $O(|T| \cdot \max|P_i|)$ in the worst case. The dictionary suffix link skips directly to the next pattern-ending node, so the total work for reporting is proportional to the number of matches.

The alphabet size should be adjusted to match the problem's character set. For Mayan hieroglyphics, the alphabet may be larger than 26; replace the ALPHA constant and the index mapping accordingly.

C++ competitive_programming/ioi/2006/writing/solution.cpp

Exact copied implementation source.

Raw file

#include <bits/stdc++.h>
using namespace std;

struct AhoCorasick {
    static const int ALPHA = 26; // adjust for problem's alphabet
    struct Node {
        int ch[26];
        int fail;
        int dictSuffix; // nearest pattern-ending node via fail chain
        int patternId;  // -1 if not end of pattern
        Node() {
            memset(ch, -1, sizeof(ch));
            fail = 0;
            dictSuffix = -1;
            patternId = -1;
        }
    };

    vector<Node> trie;

    AhoCorasick() {
        trie.emplace_back(); // root = node 0
    }

    void insert(const string& s, int id) {
        int cur = 0;
        for (char c : s) {
            int ci = c - 'a'; // adjust for alphabet
            if (trie[cur].ch[ci] == -1) {
                trie[cur].ch[ci] = trie.size();
                trie.emplace_back();
            }
            cur = trie[cur].ch[ci];
        }
        trie[cur].patternId = id;
    }

    void build() {
        queue<int> q;
        // Initialize root's children
        for (int c = 0; c < ALPHA; c++) {
            if (trie[0].ch[c] == -1) {
                trie[0].ch[c] = 0; // loop back to root
            } else {
                trie[trie[0].ch[c]].fail = 0;
                q.push(trie[0].ch[c]);
            }
        }

        while (!q.empty()) {
            int u = q.front(); q.pop();
            // Dictionary suffix link
            if (trie[trie[u].fail].patternId != -1) {
                trie[u].dictSuffix = trie[u].fail;
            } else {
                trie[u].dictSuffix = trie[trie[u].fail].dictSuffix;
            }

            for (int c = 0; c < ALPHA; c++) {
                if (trie[u].ch[c] == -1) {
                    trie[u].ch[c] = trie[trie[u].fail].ch[c];
                } else {
                    trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
                    q.push(trie[u].ch[c]);
                }
            }
        }
    }

    // Search text and count occurrences of each pattern
    vector<int> search(const string& text, int numPatterns) {
        vector<int> count(numPatterns, 0);
        int cur = 0;
        for (char c : text) {
            cur = trie[cur].ch[c - 'a'];
            // Report all patterns ending here
            int tmp = cur;
            while (tmp > 0) {
                if (trie[tmp].patternId != -1) {
                    count[trie[tmp].patternId]++;
                }
                tmp = trie[tmp].dictSuffix;
                if (tmp == -1) break;
            }
        }
        return count;
    }
};

int main(){
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int K;
    cin >> K;

    AhoCorasick ac;
    for (int i = 0; i < K; i++) {
        string p;
        cin >> p;
        ac.insert(p, i);
    }
    ac.build();

    string text;
    cin >> text;

    vector<int> counts = ac.search(text, K);

    for (int i = 0; i < K; i++) {
        cout << counts[i] << "\n";
    }

    return 0;
}

TeX write-up competitive_programming/ioi/2006/writing/solution.tex

Exact copied write-up source.

Raw file

\documentclass[11pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{listings}
\usepackage{geometry}
\usepackage{hyperref}
\geometry{margin=2.5cm}

\lstset{
  language=C++,
  basicstyle=\ttfamily\small,
  keywordstyle=\bfseries,
  breaklines=true,
  frame=single,
  numbers=left,
  numberstyle=\tiny,
  tabsize=4,
  showstringspaces=false
}

\title{IOI 2006 -- Deciphering the Mayan Writing}
\author{Solution}
\date{}

\begin{document}
\maketitle

\section{Problem Statement}
Given a text string $T$ and $K$ pattern strings $P_1, \ldots, P_K$, count how many times each pattern occurs as a substring of $T$. Patterns may overlap in the text.

\section{Solution}

\subsection{Aho-Corasick Multi-Pattern Matching}
\begin{enumerate}
    \item \textbf{Build a trie} from all patterns, recording which pattern ends at each terminal node.
    \item \textbf{Compute failure links} via BFS from the root: for each node $u$ with child $v$ on character $c$, the failure link of $v$ points to $\delta(\text{fail}(u), c)$.
    \item \textbf{Compute dictionary suffix links}: for each node $u$, the dictionary suffix link points to the nearest ancestor (via the failure chain) that is a pattern endpoint. This allows efficient enumeration of all patterns ending at a given position.
    \item \textbf{Search}: Scan the text character by character, advancing through the automaton. At each position, follow dictionary suffix links to report all matching patterns.
\end{enumerate}

\subsection{Complexity}
\begin{itemize}
    \item \textbf{Time:} $O(|T| + \sum |P_i| + Z)$ where $Z$ is the total number of matches reported.
    \item \textbf{Space:} $O(\sum |P_i| \cdot |\Sigma|)$ for the automaton.
\end{itemize}

\section{C++ Solution}

\begin{lstlisting}
#include <bits/stdc++.h>
using namespace std;

struct AhoCorasick {
    static const int ALPHA = 26;
    struct Node {
        int ch[26];
        int fail;
        int dictSuffix;
        int patternId;
        Node() {
            memset(ch, -1, sizeof(ch));
            fail = 0;
            dictSuffix = -1;
            patternId = -1;
        }
    };

    vector<Node> trie;

    AhoCorasick() {
        trie.emplace_back();
    }

    void insert(const string& s, int id) {
        int cur = 0;
        for (char c : s) {
            int ci = c - 'a';
            if (trie[cur].ch[ci] == -1) {
                trie[cur].ch[ci] = trie.size();
                trie.emplace_back();
            }
            cur = trie[cur].ch[ci];
        }
        trie[cur].patternId = id;
    }

    void build() {
        queue<int> q;
        for (int c = 0; c < ALPHA; c++) {
            if (trie[0].ch[c] == -1) {
                trie[0].ch[c] = 0;
            } else {
                trie[trie[0].ch[c]].fail = 0;
                q.push(trie[0].ch[c]);
            }
        }
        while (!q.empty()) {
            int u = q.front(); q.pop();
            trie[u].dictSuffix =
                (trie[trie[u].fail].patternId != -1)
                    ? trie[u].fail
                    : trie[trie[u].fail].dictSuffix;
            for (int c = 0; c < ALPHA; c++) {
                if (trie[u].ch[c] == -1) {
                    trie[u].ch[c] = trie[trie[u].fail].ch[c];
                } else {
                    trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
                    q.push(trie[u].ch[c]);
                }
            }
        }
    }

    vector<int> search(const string& text, int numPatterns) {
        vector<int> count(numPatterns, 0);
        int cur = 0;
        for (char c : text) {
            cur = trie[cur].ch[c - 'a'];
            // Report all patterns ending at this position
            if (trie[cur].patternId != -1)
                count[trie[cur].patternId]++;
            int tmp = trie[cur].dictSuffix;
            while (tmp != -1) {
                count[trie[tmp].patternId]++;
                tmp = trie[tmp].dictSuffix;
            }
        }
        return count;
    }
};

int main(){
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int K;
    cin >> K;

    AhoCorasick ac;
    for (int i = 0; i < K; i++) {
        string p;
        cin >> p;
        ac.insert(p, i);
    }
    ac.build();

    string text;
    cin >> text;

    vector<int> counts = ac.search(text, K);
    for (int i = 0; i < K; i++)
        cout << counts[i] << "\n";

    return 0;
}
\end{lstlisting}

\section{Notes}
The dictionary suffix link is the key to efficient multi-pattern reporting. Without it, following the full failure chain at every text position would degrade to $O(|T| \cdot \max|P_i|)$ in the worst case. The dictionary suffix link skips directly to the next pattern-ending node, so the total work for reporting is proportional to the number of matches.

The alphabet size should be adjusted to match the problem's character set. For Mayan hieroglyphics, the alphabet may be larger than 26; replace the \texttt{ALPHA} constant and the index mapping accordingly.

\end{document}

C++ implementation competitive_programming/ioi/2006/writing/solution.cpp

Exact copied implementation source.

Raw file

#include <bits/stdc++.h>
using namespace std;

struct AhoCorasick {
    static const int ALPHA = 26; // adjust for problem's alphabet
    struct Node {
        int ch[26];
        int fail;
        int dictSuffix; // nearest pattern-ending node via fail chain
        int patternId;  // -1 if not end of pattern
        Node() {
            memset(ch, -1, sizeof(ch));
            fail = 0;
            dictSuffix = -1;
            patternId = -1;
        }
    };

    vector<Node> trie;

    AhoCorasick() {
        trie.emplace_back(); // root = node 0
    }

    void insert(const string& s, int id) {
        int cur = 0;
        for (char c : s) {
            int ci = c - 'a'; // adjust for alphabet
            if (trie[cur].ch[ci] == -1) {
                trie[cur].ch[ci] = trie.size();
                trie.emplace_back();
            }
            cur = trie[cur].ch[ci];
        }
        trie[cur].patternId = id;
    }

    void build() {
        queue<int> q;
        // Initialize root's children
        for (int c = 0; c < ALPHA; c++) {
            if (trie[0].ch[c] == -1) {
                trie[0].ch[c] = 0; // loop back to root
            } else {
                trie[trie[0].ch[c]].fail = 0;
                q.push(trie[0].ch[c]);
            }
        }

        while (!q.empty()) {
            int u = q.front(); q.pop();
            // Dictionary suffix link
            if (trie[trie[u].fail].patternId != -1) {
                trie[u].dictSuffix = trie[u].fail;
            } else {
                trie[u].dictSuffix = trie[trie[u].fail].dictSuffix;
            }

            for (int c = 0; c < ALPHA; c++) {
                if (trie[u].ch[c] == -1) {
                    trie[u].ch[c] = trie[trie[u].fail].ch[c];
                } else {
                    trie[trie[u].ch[c]].fail = trie[trie[u].fail].ch[c];
                    q.push(trie[u].ch[c]);
                }
            }
        }
    }

    // Search text and count occurrences of each pattern
    vector<int> search(const string& text, int numPatterns) {
        vector<int> count(numPatterns, 0);
        int cur = 0;
        for (char c : text) {
            cur = trie[cur].ch[c - 'a'];
            // Report all patterns ending here
            int tmp = cur;
            while (tmp > 0) {
                if (trie[tmp].patternId != -1) {
                    count[trie[tmp].patternId]++;
                }
                tmp = trie[tmp].dictSuffix;
                if (tmp == -1) break;
            }
        }
        return count;
    }
};

int main(){
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int K;
    cin >> K;

    AhoCorasick ac;
    for (int i = 0; i < K; i++) {
        string p;
        cin >> p;
        ac.insert(p, i);
    }
    ac.build();

    string text;
    cin >> text;

    vector<int> counts = ac.search(text, K);

    for (int i = 0; i < K; i++) {
        cout << counts[i] << "\n";
    }

    return 0;
}

IOI 2006 - Deciphering the Mayan Writing

Source-first archive entry

Problem Statement

Editorial

Solution

Aho-Corasick Multi-Pattern Matching

Complexity

C++ Solution

Notes

Code

Source Files