import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { IdentityDB } from "../src/core/identity-db"; import { LlmFactExtractor } from "../src/ingestion/llm-extractor"; import { NaiveExtractor } from "../src/ingestion/naive-extractor"; import type { FactExtractor, LlmTextGenerationModelInput, } from "../src/ingestion/types"; describe("IdentityDB ingestion", () => { let db: IdentityDB; beforeEach(async () => { db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" }); await db.initialize(); }); afterEach(async () => { await db.close(); }); it("ingests a statement using a provided extractor", async () => { const extractor: FactExtractor = { async extract(input) { return [ { statement: input, topics: [ { name: "I", category: "entity", granularity: "concrete", role: "subject", }, { name: "TypeScript", category: "entity", granularity: "concrete", role: "object", }, { name: "2025", category: "temporal", granularity: "concrete", role: "time", }, ], }, ]; }, }; const fact = await db.ingestStatement( "I have worked with TypeScript since 2025.", { extractor, }, ); expect(fact.topics.map((topic) => topic.name)).toEqual([ "I", "TypeScript", "2025", ]); const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025"); expect(linkedFacts).toHaveLength(1); expect(linkedFacts[0]?.statement).toBe( "I have worked with TypeScript since 2025.", ); }); it("ships a deterministic naive extractor for local usage", async () => { const fact = await db.ingestStatement( "I have worked with TypeScript since 2025.", { extractor: new NaiveExtractor(), }, ); expect(fact.topics.map((topic) => topic.name)).toEqual([ "I", "TypeScript", "2025", ]); const topic = await db.getTopicByName("TypeScript", { includeFacts: true }); expect(topic?.facts).toHaveLength(1); }); it("ships an LLM extractor adapter that returns structured facts from the model", async () => { let prompt: LlmTextGenerationModelInput | undefined = undefined; const extractor = new LlmFactExtractor({ model: { async generateText(input) { prompt = input; return [ { statement: "I have worked with Bun and TypeScript since 2025.", summary: "The speaker has Bun and TypeScript experience.", source: "chat", confidence: 0.91, metadata: { channel: "telegram" }, topics: [ { name: "I", category: "entity", granularity: "concrete", role: "subject", }, { name: "Bun", category: "entity", granularity: "concrete", role: "object", }, { name: "TypeScript", category: "entity", granularity: "concrete", role: "object", }, { name: "2025", category: "temporal", granularity: "concrete", role: "time", }, ], }, ]; }, }, additionalInstructions: "Prefer technology and time topics.", }); const fact = await db.ingestStatement( "I have worked with Bun and TypeScript since 2025.", { extractor, }, ); expect(fact.summary).toBe("The speaker has Bun and TypeScript experience."); expect(fact.source).toBe("chat"); expect(fact.confidence).toBe(0.91); expect(fact.metadata).toEqual({ channel: "telegram" }); expect(fact.topics.map((topic) => topic.name)).toEqual([ "I", "Bun", "TypeScript", "2025", ]); }); });