diff --git a/tests/ingestion.test.ts b/tests/ingestion.test.ts index db60918..c5e21b9 100644 --- a/tests/ingestion.test.ts +++ b/tests/ingestion.test.ts @@ -1,15 +1,18 @@ -import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; -import { IdentityDB } from '../src/core/identity-db'; -import { LlmFactExtractor } from '../src/ingestion/llm-extractor'; -import { NaiveExtractor } from '../src/ingestion/naive-extractor'; -import type { FactExtractor } from '../src/ingestion/types'; +import { IdentityDB } from "../src/core/identity-db"; +import { LlmFactExtractor } from "../src/ingestion/llm-extractor"; +import { NaiveExtractor } from "../src/ingestion/naive-extractor"; +import type { + FactExtractor, + LlmTextGenerationModelInput, +} from "../src/ingestion/types"; -describe('IdentityDB ingestion', () => { +describe("IdentityDB ingestion", () => { let db: IdentityDB; beforeEach(async () => { - db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' }); + db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" }); await db.initialize(); }); @@ -17,44 +20,75 @@ describe('IdentityDB ingestion', () => { await db.close(); }); - it('ingests a statement using a provided extractor', async () => { + it("ingests a statement using a provided extractor", async () => { const extractor: FactExtractor = { async extract(input) { return { statement: input, topics: [ - { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' }, - { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' }, - { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' }, + { + name: "I", + category: "entity", + granularity: "concrete", + role: "subject", + }, + { + name: "TypeScript", + category: "entity", + granularity: "concrete", + role: "object", + }, + { + name: "2025", + category: "temporal", + granularity: "concrete", + role: "time", + }, ], }; }, }; - const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', { - extractor, - }); + const fact = await db.ingestStatement( + "I have worked with TypeScript since 2025.", + { + extractor, + }, + ); - expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']); + expect(fact.topics.map((topic) => topic.name)).toEqual([ + "I", + "TypeScript", + "2025", + ]); - const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025'); + const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025"); expect(linkedFacts).toHaveLength(1); - expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.'); + expect(linkedFacts[0]?.statement).toBe( + "I have worked with TypeScript since 2025.", + ); }); - it('ships a deterministic naive extractor for local usage', async () => { - const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', { - extractor: new NaiveExtractor(), - }); + it("ships a deterministic naive extractor for local usage", async () => { + const fact = await db.ingestStatement( + "I have worked with TypeScript since 2025.", + { + extractor: new NaiveExtractor(), + }, + ); - expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']); + expect(fact.topics.map((topic) => topic.name)).toEqual([ + "I", + "TypeScript", + "2025", + ]); - const topic = await db.getTopicByName('TypeScript', { includeFacts: true }); + const topic = await db.getTopicByName("TypeScript", { includeFacts: true }); expect(topic?.facts).toHaveLength(1); }); - it('ships an LLM extractor adapter that returns structured facts from the model', async () => { - let prompt = ''; + it("ships an LLM extractor adapter that returns structured facts from the model", async () => { + let prompt: LlmTextGenerationModelInput | undefined = undefined; const extractor = new LlmFactExtractor({ model: { @@ -62,33 +96,64 @@ describe('IdentityDB ingestion', () => { prompt = input; return { - statement: 'I have worked with Bun and TypeScript since 2025.', - summary: 'The speaker has Bun and TypeScript experience.', - source: 'chat', + statement: "I have worked with Bun and TypeScript since 2025.", + summary: "The speaker has Bun and TypeScript experience.", + source: "chat", confidence: 0.91, - metadata: { channel: 'telegram' }, + metadata: { channel: "telegram" }, topics: [ - { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' }, - { name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' }, - { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' }, - { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' }, + { + name: "I", + category: "entity", + granularity: "concrete", + role: "subject", + }, + { + name: "Bun", + category: "entity", + granularity: "concrete", + role: "object", + }, + { + name: "TypeScript", + category: "entity", + granularity: "concrete", + role: "object", + }, + { + name: "2025", + category: "temporal", + granularity: "concrete", + role: "time", + }, ], }; }, }, - instructions: 'Prefer technology and time topics.', + additionalInstructions: "Prefer technology and time topics.", }); - const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', { - extractor, - }); + const fact = await db.ingestStatement( + "I have worked with Bun and TypeScript since 2025.", + { + extractor, + }, + ); - expect(prompt).toContain('Prefer technology and time topics.'); - expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.'); - expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.'); - expect(fact.source).toBe('chat'); + expect(prompt).toEqual({ + instruction: expect.stringContaining("Extract one structured fact from the user input."), + input: "I have worked with Bun and TypeScript since 2025.", + additionalInstruction: "Prefer technology and time topics.", + }); + expect(fact.summary).toBe("The speaker has Bun and TypeScript experience."); + expect(fact.source).toBe("chat"); expect(fact.confidence).toBe(0.91); - expect(fact.metadata).toEqual({ channel: 'telegram' }); - expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']); + expect(fact.metadata).toEqual({ channel: "telegram" }); + expect(fact.topics.map((topic) => topic.name)).toEqual([ + "I", + "Bun", + "TypeScript", + "2025", + ]); }); });