Skip to main content
🐘

PostgreSQL FTS

kham-pg is a PostgreSQL text-search parser extension. It tokenises Thai text for full-text search, replacing the built-in parser for Thai-script documents.

1

Prerequisites

PostgreSQL 14 or later, Rust toolchain (1.85+), and pg_config in your PATH.

pg_config --version   # PostgreSQL 14 or later
rustup target add x86_64-unknown-linux-gnu  # if cross-compiling
2

Build and install

# From the kham monorepo root
cargo build -p kham-pg --release
make -C kham-pg install

# Verify the .so is in place
ls "$(pg_config --pkglibdir)"/kham*
3

Load and configure

Load the parser, create a text search configuration, and add mappings for Thai and other token types.

-- Load the shared library
LOAD 'kham';

-- Create a text search parser backed by kham
CREATE TEXT SEARCH CONFIGURATION kham_cfg (PARSER = kham);

-- Map token types to dictionaries
ALTER TEXT SEARCH CONFIGURATION kham_cfg
  ADD MAPPING FOR thai    WITH simple;
ALTER TEXT SEARCH CONFIGURATION kham_cfg
  ADD MAPPING FOR latin   WITH english_stem;
ALTER TEXT SEARCH CONFIGURATION kham_cfg
  ADD MAPPING FOR integer WITH simple;
4

Index documents

CREATE TABLE articles (
  id    SERIAL PRIMARY KEY,
  title TEXT,
  body  TEXT,
  tsv   TSVECTOR
);

-- Populate the tsvector column
UPDATE articles
SET tsv = to_tsvector('kham_cfg', coalesce(title, '') || ' ' || coalesce(body, ''));

-- GIN index for fast search
CREATE INDEX articles_tsv_idx ON articles USING GIN(tsv);

-- Keep it up-to-date automatically
CREATE TRIGGER articles_tsv_update
  BEFORE INSERT OR UPDATE ON articles
  FOR EACH ROW EXECUTE FUNCTION
    tsvector_update_trigger(tsv, 'public.kham_cfg', title, body);
5

Search with ts_headline

SELECT
  id,
  ts_headline(
    'kham_cfg',
    body,
    query,
    'StartSel=<mark>, StopSel=</mark>, MaxWords=20'
  ) AS snippet
FROM articles,
     to_tsquery('kham_cfg', 'ข้าว & ปลา') AS query
WHERE tsv @@ query
ORDER BY ts_rank(tsv, query) DESC
LIMIT 10;