🗃️
SQLite FTS5
kham-sqlite is a loadable SQLite extension that provides a custom FTS5 tokenizer. It splits Thai text correctly so that FTS5 MATCH queries work on Thai documents.
1
Prerequisites
SQLite with FTS5 enabled and the ability to load extensions. On macOS, the system sqlite3 disables load_extension — install via Homebrew instead.
brew install sqlite
# Use /opt/homebrew/opt/sqlite/bin/sqlite3, not /usr/bin/sqlite3 2
Build the extension
# From the kham monorepo root
cargo build -p kham-sqlite --release
# The shared library is at:
# Linux: target/release/libkham_sqlite.so
# macOS: target/release/libkham_sqlite.dylib 3
Load and create a FTS5 table
.load ./target/release/libkham_sqlite
CREATE VIRTUAL TABLE docs USING fts5(
title,
body,
tokenize = 'kham'
); 4
Insert and search
INSERT INTO docs VALUES
('อาหารไทย', 'กินข้าวกับปลาและผัก'),
('เมืองหลวง', 'กรุงเทพมหานครเป็นเมืองหลวงของประเทศไทย');
-- Simple MATCH
SELECT title, body FROM docs WHERE docs MATCH 'ปลา';
-- BM25 ranking
SELECT title, bm25(docs) AS score
FROM docs
WHERE docs MATCH 'เมือง'
ORDER BY score;
-- Snippet highlight
SELECT snippet(docs, 1, '<b>', '</b>', '…', 10)
FROM docs
WHERE docs MATCH 'กรุงเทพ';