All IOI entries
Competitive Programming

IOI 2003 - Comparing Substrings

Problem Statement Summary Given a string S of length N (N 10^5) and Q queries (Q 10^5), each query specifies two substrings S[a a + -1] and S[b b + -1] and asks for their lexicographic ordering (equal, less than, or g...

Source sync Apr 19, 2026
Track IOI
Year 2003
Files TeX and C++
Folder competitive_programming/ioi/2003/comparing_substring
IOI2003TeXC++

Source-first archive entry

This page is built from the copied files in competitive_programming/ioi/2003/comparing_substring. Edit competitive_programming/ioi/2003/comparing_substring/solution.tex to update the written solution and competitive_programming/ioi/2003/comparing_substring/solution.cpp to update the implementation.

The website does not replace those files with hand-maintained HTML. It reads the copied source tree during the build and exposes the exact files below.

Problem Statement

This archive entry does not include a standalone statement file or a dedicated statement section in solution.tex.

The statement is not mirrored as a separate asset in this archive. The editorial and source files remain available below.

Editorial

Rendered from the copied solution.tex file. The original TeX source remains available below.

Problem Statement Summary

Given a string $S$ of length $N$ ($N \le 10^5$) and $Q$ queries ($Q \le 10^5$), each query specifies two substrings $S[a \ldots a{+}\ell{-}1]$ and $S[b \ldots b{+}\ell{-}1]$ and asks for their lexicographic ordering (equal, less than, or greater than).

Solution: Suffix Array + LCP + Sparse Table

Suffix Array

Build the suffix array $\mathit{sa}$ using the $O(N \log^2 N)$ prefix-doubling algorithm. The rank array $\mathit{rank}[i]$ gives the position of suffix $i$ in the sorted order.

LCP Array (Kasai's Algorithm)

The LCP (longest common prefix) array $\mathit{lcp}[i]$ stores the length of the longest common prefix between $\mathit{sa}[i-1]$ and $\mathit{sa}[i]$ in the sorted order. Kasai's algorithm computes this in $O(N)$.

Sparse Table for RMQ

The LCP of two arbitrary suffixes starting at positions $a$ and $b$ equals the minimum value in $\mathit{lcp}[\mathit{rank}[a]+1 \ldots \mathit{rank}[b]]$ (assuming $\mathit{rank}[a] < \mathit{rank}[b]$). A sparse table provides $O(1)$ range-minimum queries after $O(N \log N)$ preprocessing.

Answering Queries

For substrings $S[a \ldots a{+}\ell{-}1]$ and $S[b \ldots b{+}\ell{-}1]$:

  1. Compute $L = \mathrm{LCP}(a, b)$ via the sparse table.

  2. If $L \ge \ell$: the substrings are equal.

  3. Otherwise: compare $S[a + L]$ and $S[b + L]$.

C++ Implementation

#include <bits/stdc++.h>
using namespace std;

struct SuffixArray {
    int n;
    string s;
    vector<int> sa, inv, lcp;
    vector<vector<int>> sparse;

    SuffixArray(const string& str) : s(str), n(str.size()) {
        buildSA();
        buildLCP();
        buildSparse();
    }

    void buildSA() {
        sa.resize(n);
        inv.resize(n);
        vector<int> tmp(n);
        iota(sa.begin(), sa.end(), 0);
        for (int i = 0; i < n; i++) inv[i] = s[i];

        for (int gap = 1; gap < n; gap <<= 1) {
            auto cmp = [&](int a, int b) -> bool {
                if (inv[a] != inv[b]) return inv[a] < inv[b];
                int ra = (a + gap < n) ? inv[a + gap] : -1;
                int rb = (b + gap < n) ? inv[b + gap] : -1;
                return ra < rb;
            };
            sort(sa.begin(), sa.end(), cmp);
            tmp[sa[0]] = 0;
            for (int i = 1; i < n; i++)
                tmp[sa[i]] = tmp[sa[i - 1]] + (cmp(sa[i - 1], sa[i]) ? 1 : 0);
            inv = tmp;
            if (inv[sa[n - 1]] == n - 1) break;
        }
    }

    void buildLCP() {
        lcp.assign(n, 0);
        // inv already stores the inverse suffix array
        int k = 0;
        for (int i = 0; i < n; i++) {
            if (inv[i] == 0) { k = 0; continue; }
            int j = sa[inv[i] - 1];
            while (i + k < n && j + k < n && s[i + k] == s[j + k]) k++;
            lcp[inv[i]] = k;
            if (k > 0) k--;
        }
    }

    void buildSparse() {
        int LOG = 1;
        while ((1 << LOG) <= n) LOG++;
        sparse.assign(LOG, vector<int>(n, 0));
        for (int i = 0; i < n; i++) sparse[0][i] = lcp[i];
        for (int j = 1; j < LOG; j++)
            for (int i = 0; i + (1 << j) <= n; i++)
                sparse[j][i] = min(sparse[j - 1][i],
                                   sparse[j - 1][i + (1 << (j - 1))]);
    }

    int rmq(int l, int r) {
        if (l > r) return 0;
        int k = __lg(r - l + 1);
        return min(sparse[k][l], sparse[k][r - (1 << k) + 1]);
    }

    // LCP of suffixes starting at positions a and b.
    int lcpQuery(int a, int b) {
        if (a == b) return n - a;
        int ra = inv[a], rb = inv[b];
        if (ra > rb) swap(ra, rb);
        return rmq(ra + 1, rb);
    }
};

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    string S;
    cin >> S;

    SuffixArray sa(S);

    int Q;
    cin >> Q;
    while (Q--) {
        int a, b, len;
        cin >> a >> b >> len;
        a--; b--; // convert to 0-indexed

        int commonLen = sa.lcpQuery(a, b);
        if (commonLen >= len) {
            cout << "=\n";
        } else if (S[a + commonLen] < S[b + commonLen]) {
            cout << "<\n";
        } else {
            cout << ">\n";
        }
    }

    return 0;
}

Complexity Analysis

  • Suffix array construction: $O(N \log^2 N)$.

  • LCP array (Kasai): $O(N)$.

  • Sparse table: $O(N \log N)$ build, $O(1)$ per query.

  • Per query: $O(1)$.

  • Total: $O(N \log^2 N + Q)$.

  • Space: $O(N \log N)$.

Code

Exact copied C++ implementation from solution.cpp.

C++ competitive_programming/ioi/2003/comparing_substring/solution.cpp

Exact copied implementation source.

Raw file
// IOI 2003 - Comparing Substrings
// Given string S and Q queries (a, b, len), compare substrings
// S[a..a+len-1] vs S[b..b+len-1] lexicographically.
// Uses suffix array + LCP array + sparse table for O(1) per query.
// SA construction: O(N log^2 N). Queries: O(1) each.

#include <bits/stdc++.h>
using namespace std;

struct SuffixArray {
    int n;
    string s;
    vector<int> sa, rank_, lcp, inv;
    vector<vector<int>> sparse;

    SuffixArray(const string& str) : s(str), n((int)str.size()) {
        buildSA();
        buildLCP();
        buildInverse();
        buildSparse();
    }

    void buildSA() {
        sa.resize(n);
        rank_.resize(n);
        vector<int> tmp(n);
        iota(sa.begin(), sa.end(), 0);

        // Initial ranking by single character
        for (int i = 0; i < n; i++) rank_[i] = s[i];

        for (int gap = 1; gap < n; gap <<= 1) {
            auto cmp = [&](int a, int b) -> bool {
                if (rank_[a] != rank_[b]) return rank_[a] < rank_[b];
                int ra = (a + gap < n) ? rank_[a + gap] : -1;
                int rb = (b + gap < n) ? rank_[b + gap] : -1;
                return ra < rb;
            };
            sort(sa.begin(), sa.end(), cmp);

            tmp[sa[0]] = 0;
            for (int i = 1; i < n; i++) {
                tmp[sa[i]] = tmp[sa[i - 1]] + (cmp(sa[i - 1], sa[i]) ? 1 : 0);
            }
            rank_ = tmp;
            if (rank_[sa[n - 1]] == n - 1) break;
        }
    }

    // Kasai's algorithm for LCP array
    void buildLCP() {
        lcp.resize(n, 0);
        vector<int> inv_tmp(n);
        for (int i = 0; i < n; i++) inv_tmp[sa[i]] = i;

        int k = 0;
        for (int i = 0; i < n; i++) {
            if (inv_tmp[i] == 0) { k = 0; continue; }
            int j = sa[inv_tmp[i] - 1];
            while (i + k < n && j + k < n && s[i + k] == s[j + k]) k++;
            lcp[inv_tmp[i]] = k;
            if (k > 0) k--;
        }
    }

    void buildInverse() {
        inv.resize(n);
        for (int i = 0; i < n; i++) inv[sa[i]] = i;
    }

    // Sparse table for range minimum query on LCP array
    void buildSparse() {
        int LOG = 1;
        while ((1 << LOG) <= n) LOG++;
        sparse.assign(LOG, vector<int>(n, 0));
        for (int i = 0; i < n; i++) sparse[0][i] = lcp[i];
        for (int j = 1; j < LOG; j++) {
            for (int i = 0; i + (1 << j) <= n; i++) {
                sparse[j][i] = min(sparse[j - 1][i],
                                   sparse[j - 1][i + (1 << (j - 1))]);
            }
        }
    }

    int rmq(int l, int r) {
        if (l > r) return 0;
        int k = __lg(r - l + 1);
        return min(sparse[k][l], sparse[k][r - (1 << k) + 1]);
    }

    // LCP of suffixes starting at positions a and b
    int lcpOf(int a, int b) {
        if (a == b) return n - a;
        int ra = inv[a], rb = inv[b];
        if (ra > rb) swap(ra, rb);
        return rmq(ra + 1, rb);
    }
};

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    string S;
    cin >> S;

    // Edge case: empty string
    if (S.empty()) {
        int Q; cin >> Q;
        while (Q--) cout << "=\n";
        return 0;
    }

    SuffixArray sa(S);

    int Q;
    cin >> Q;
    while (Q--) {
        int a, b, len;
        cin >> a >> b >> len;
        a--; b--; // convert to 0-indexed

        int commonLen = sa.lcpOf(a, b);
        if (commonLen >= len) {
            cout << "=\n";
        } else {
            if (S[a + commonLen] < S[b + commonLen])
                cout << "<\n";
            else
                cout << ">\n";
        }
    }

    return 0;
}

Source Files

Exact copied source-of-truth files. Edit solution.tex for the write-up and solution.cpp for the implementation.

TeX write-up competitive_programming/ioi/2003/comparing_substring/solution.tex

Exact copied write-up source.

Raw file
\documentclass[11pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{listings}
\usepackage{xcolor}
\usepackage[margin=1in]{geometry}
\usepackage{hyperref}

\newtheorem{theorem}{Theorem}

\lstset{
  language=C++,
  basicstyle=\ttfamily\small,
  keywordstyle=\color{blue}\bfseries,
  commentstyle=\color{green!60!black},
  stringstyle=\color{red},
  numbers=left,
  numberstyle=\tiny\color{gray},
  breaklines=true,
  frame=single,
  tabsize=4,
  showstringspaces=false
}

\title{IOI 2003 -- Comparing Substrings}
\author{Solution}
\date{}

\begin{document}
\maketitle

\section{Problem Statement Summary}
Given a string $S$ of length $N$ ($N \le 10^5$) and $Q$ queries
($Q \le 10^5$), each query specifies two substrings
$S[a \ldots a{+}\ell{-}1]$ and $S[b \ldots b{+}\ell{-}1]$ and asks
for their lexicographic ordering (equal, less than, or greater than).

\section{Solution: Suffix Array + LCP + Sparse Table}

\subsection{Suffix Array}
Build the suffix array $\mathit{sa}$ using the $O(N \log^2 N)$
prefix-doubling algorithm. The rank array
$\mathit{rank}[i]$ gives the position of suffix $i$ in the sorted
order.

\subsection{LCP Array (Kasai's Algorithm)}
The LCP (longest common prefix) array $\mathit{lcp}[i]$ stores the
length of the longest common prefix between $\mathit{sa}[i-1]$ and
$\mathit{sa}[i]$ in the sorted order. Kasai's algorithm computes this
in $O(N)$.

\subsection{Sparse Table for RMQ}
The LCP of two arbitrary suffixes starting at positions $a$ and $b$
equals the minimum value in $\mathit{lcp}[\mathit{rank}[a]+1 \ldots
\mathit{rank}[b]]$ (assuming $\mathit{rank}[a] < \mathit{rank}[b]$).
A sparse table provides $O(1)$ range-minimum queries after $O(N \log N)$
preprocessing.

\subsection{Answering Queries}
For substrings $S[a \ldots a{+}\ell{-}1]$ and $S[b \ldots b{+}\ell{-}1]$:
\begin{enumerate}
  \item Compute $L = \mathrm{LCP}(a, b)$ via the sparse table.
  \item If $L \ge \ell$: the substrings are equal.
  \item Otherwise: compare $S[a + L]$ and $S[b + L]$.
\end{enumerate}

\section{C++ Implementation}

\begin{lstlisting}
#include <bits/stdc++.h>
using namespace std;

struct SuffixArray {
    int n;
    string s;
    vector<int> sa, inv, lcp;
    vector<vector<int>> sparse;

    SuffixArray(const string& str) : s(str), n(str.size()) {
        buildSA();
        buildLCP();
        buildSparse();
    }

    void buildSA() {
        sa.resize(n);
        inv.resize(n);
        vector<int> tmp(n);
        iota(sa.begin(), sa.end(), 0);
        for (int i = 0; i < n; i++) inv[i] = s[i];

        for (int gap = 1; gap < n; gap <<= 1) {
            auto cmp = [&](int a, int b) -> bool {
                if (inv[a] != inv[b]) return inv[a] < inv[b];
                int ra = (a + gap < n) ? inv[a + gap] : -1;
                int rb = (b + gap < n) ? inv[b + gap] : -1;
                return ra < rb;
            };
            sort(sa.begin(), sa.end(), cmp);
            tmp[sa[0]] = 0;
            for (int i = 1; i < n; i++)
                tmp[sa[i]] = tmp[sa[i - 1]] + (cmp(sa[i - 1], sa[i]) ? 1 : 0);
            inv = tmp;
            if (inv[sa[n - 1]] == n - 1) break;
        }
    }

    void buildLCP() {
        lcp.assign(n, 0);
        // inv already stores the inverse suffix array
        int k = 0;
        for (int i = 0; i < n; i++) {
            if (inv[i] == 0) { k = 0; continue; }
            int j = sa[inv[i] - 1];
            while (i + k < n && j + k < n && s[i + k] == s[j + k]) k++;
            lcp[inv[i]] = k;
            if (k > 0) k--;
        }
    }

    void buildSparse() {
        int LOG = 1;
        while ((1 << LOG) <= n) LOG++;
        sparse.assign(LOG, vector<int>(n, 0));
        for (int i = 0; i < n; i++) sparse[0][i] = lcp[i];
        for (int j = 1; j < LOG; j++)
            for (int i = 0; i + (1 << j) <= n; i++)
                sparse[j][i] = min(sparse[j - 1][i],
                                   sparse[j - 1][i + (1 << (j - 1))]);
    }

    int rmq(int l, int r) {
        if (l > r) return 0;
        int k = __lg(r - l + 1);
        return min(sparse[k][l], sparse[k][r - (1 << k) + 1]);
    }

    // LCP of suffixes starting at positions a and b.
    int lcpQuery(int a, int b) {
        if (a == b) return n - a;
        int ra = inv[a], rb = inv[b];
        if (ra > rb) swap(ra, rb);
        return rmq(ra + 1, rb);
    }
};

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    string S;
    cin >> S;

    SuffixArray sa(S);

    int Q;
    cin >> Q;
    while (Q--) {
        int a, b, len;
        cin >> a >> b >> len;
        a--; b--; // convert to 0-indexed

        int commonLen = sa.lcpQuery(a, b);
        if (commonLen >= len) {
            cout << "=\n";
        } else if (S[a + commonLen] < S[b + commonLen]) {
            cout << "<\n";
        } else {
            cout << ">\n";
        }
    }

    return 0;
}
\end{lstlisting}

\section{Complexity Analysis}
\begin{itemize}
  \item \textbf{Suffix array construction:} $O(N \log^2 N)$.
  \item \textbf{LCP array (Kasai):} $O(N)$.
  \item \textbf{Sparse table:} $O(N \log N)$ build, $O(1)$ per query.
  \item \textbf{Per query:} $O(1)$.
  \item \textbf{Total:} $O(N \log^2 N + Q)$.
  \item \textbf{Space:} $O(N \log N)$.
\end{itemize}

\end{document}