Skip to main content

Getting Started

kham v0.5.0 — pick your target and be up and running in minutes.

🦀

Rust

Native crate — zero-copy, no_std core

Install

[dependencies]
kham-core = "0.5"

Usage

use kham_core::Segmenter;

fn main() {
    let seg = Segmenter::new();

    // Segment into &str slices (zero-copy)
    let tokens = seg.segment("กินข้าวกับปลา");
    println!("{:?}", tokens);
    // ["กิน", "ข้าว", "กับ", "ปลา"]

    // Rich tokens with kind + byte/char spans
    for tok in seg.segment_tokens("Hello กรุงเทพ 2024") {
        println!("{:?}  kind={:?}  chars={}..{}", tok.text, tok.kind, tok.char_span.start, tok.char_span.end);
    }
}
Full integration guide →
🐍

Python

PyO3 bindings — segment() and segment_tokens()

Install

pip install kham

Usage

import kham

# Segment into a list of strings
tokens = kham.segment("กินข้าวกับปลา")
print(tokens)
# ['กิน', 'ข้าว', 'กับ', 'ปลา']

# Rich Token objects
for tok in kham.segment_tokens("Hello กรุงเทพ 2024"):
    print(tok.text, tok.kind, tok.char_start, tok.char_end)
Full integration guide →
🌐

WebAssembly / npm

Runs in browser and Node.js — no server needed

Install

npm install kham-wasm

Usage

import init, { segment, segment_tokens } from 'kham-wasm';

// Initialise once (fetches the .wasm file)
await init();

const words = segment("กินข้าวกับปลา");
console.log(words);
// ["กิน", "ข้าว", "กับ", "ปลา"]

const tokens = segment_tokens("Hello กรุงเทพ 2024");
tokens.forEach(t => console.log(t.text, t.kind, t.char_start, t.char_end));
Full integration guide →
💻

CLI

Command-line Thai segmenter

Install

cargo install kham-cli

Usage

# Segment Thai text
kham "กินข้าวกับปลา"
# กิน|ข้าว|กับ|ปลา

# FTS mode — kind, POS, NE, stop, syn per token
kham --fts "กินข้าวกับปลา"

# FTS + phonetic code in the syn= field
kham --fts --soundex lk82 "กินข้าวกับปลา"
🐘

PostgreSQL FTS

Full-text search parser extension for PostgreSQL 14+

Install

# Build and install (requires pg_config in PATH)
cargo build -p kham-pg --release
make -C kham-pg install

Usage

-- Load extension
LOAD 'kham';

-- Create a text search configuration
CREATE TEXT SEARCH CONFIGURATION kham_cfg (PARSER = kham);
ALTER TEXT SEARCH CONFIGURATION kham_cfg
  ADD MAPPING FOR thai WITH simple;

-- Index and search
CREATE TABLE docs (id SERIAL, body TEXT, tsv TSVECTOR);
UPDATE docs SET tsv = to_tsvector('kham_cfg', body);
CREATE INDEX docs_tsv ON docs USING GIN(tsv);

SELECT id, ts_headline('kham_cfg', body, query) AS snippet
FROM docs, to_tsquery('kham_cfg', 'ข้าว') query
WHERE tsv @@ query;
Full integration guide →
🗃️

SQLite FTS5

Loadable tokenizer extension for SQLite FTS5

Install

# Build (requires SQLite headers)
cargo build -p kham-sqlite --release
# macOS: brew install sqlite  (system sqlite3 disables load_extension)

Usage

.load ./target/release/libkham_sqlite

CREATE VIRTUAL TABLE docs USING fts5(
  body,
  tokenize = 'kham'
);

INSERT INTO docs VALUES ('กินข้าวกับปลา');
INSERT INTO docs VALUES ('กรุงเทพมหานครเป็นเมืองหลวง');

SELECT * FROM docs WHERE docs MATCH 'ปลา';
Full integration guide →