From 4c418dc39aac4978a6f99a66d823916422c3b596 Mon Sep 17 00:00:00 2001 From: Shinwoo PARK Date: Mon, 11 May 2026 12:06:48 +0900 Subject: [PATCH] docs: document topic alias and semantic search APIs --- README.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 26e0e87..092a073 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,15 @@ IdentityDB stores memory as a graph made of: A single fact like `I have worked with TypeScript since 2025.` can connect the topics `I`, `TypeScript`, and `2025` at the same time. -## Current foundation capabilities +## Current capabilities - SQLite, PostgreSQL, MySQL, and MariaDB connection adapters -- Automatic schema initialization for `topics`, `facts`, and `fact_topics` +- Automatic schema initialization for `topics`, `facts`, `fact_topics`, `topic_relations`, `topic_aliases`, and `fact_embeddings` - High-level APIs for adding topics and facts -- Query APIs for listing topics, loading topic-scoped facts, and finding connected facts/topics +- Topic hierarchy APIs for parent/child traversal and lineage lookup +- Topic alias and canonical resolution APIs so facts and queries can resolve alternate names +- Semantic fact indexing and search APIs built around provider-agnostic embeddings +- Dedup-aware ingestion hooks that can reuse an existing fact when a semantic near-duplicate is detected - Pluggable fact extraction so callers can use a small LLM or a deterministic extractor ## Install @@ -29,7 +32,7 @@ bun install ## Quick start ```ts -import { IdentityDB, NaiveExtractor } from 'identitydb'; +import { IdentityDB, NaiveExtractor, type EmbeddingProvider } from 'identitydb'; const db = await IdentityDB.connect({ client: 'sqlite', @@ -58,15 +61,58 @@ await db.addFact({ ], }); -const topic = await db.getTopicByName('TypeScript', { includeFacts: true }); -const connected = await db.findConnectedTopics('TypeScript'); +await db.linkTopics({ + parentName: 'programming language', + childName: 'TypeScript', +}); -console.log(topic?.facts.map((fact) => fact.statement)); +await db.addTopicAlias('TypeScript', 'TS'); + +const provider: EmbeddingProvider = { + model: 'example-embedding-v1', + dimensions: 3, + async embed(input) { + if (input.toLowerCase().includes('typescript')) { + return [1, 0, 0]; + } + + return [0, 1, 0]; + }, +}; + +await db.indexFactEmbeddings({ provider }); + +const topic = await db.getTopicByName('TS', { includeFacts: true }); +const children = await db.getTopicChildren('programming language'); +const lineage = await db.getTopicLineage('TS'); +const connected = await db.findConnectedTopics('TypeScript'); +const matches = await db.searchFacts({ + query: 'TypeScript experience', + provider, + limit: 5, +}); + +console.log(topic?.name); +console.log(children.map((entry) => entry.name)); +console.log(lineage.map((entry) => entry.name)); console.log(connected.map((entry) => [entry.name, entry.sharedFactCount])); +console.log(matches.map((entry) => [entry.statement, entry.score])); await db.close(); ``` +## Semantic ingestion and duplicate detection + +If you provide an embedding provider during ingestion, IdentityDB can index the new fact automatically and reuse an existing fact when a semantic near-duplicate is already present. + +```ts +await db.ingestStatement('Bun makes TypeScript tooling fast.', { + extractor: new NaiveExtractor(), + embeddingProvider: provider, + duplicateThreshold: 0.95, +}); +``` + ## Development ```bash @@ -77,6 +123,9 @@ bun run build ## Current status -This repository is in active foundation development. +This repository is in active MVP expansion development. -See `docs/plans/2026-05-11-identitydb-foundation.md` for the current implementation plan. +See these implementation plans for the current roadmap: + +- `docs/plans/2026-05-11-identitydb-foundation.md` +- `docs/plans/2026-05-11-identitydb-memory-expansion.md`