import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { IdentityDB } from '../src/core/identity-db'; import { LlmFactExtractor } from '../src/ingestion/llm-extractor'; import { NaiveExtractor } from '../src/ingestion/naive-extractor'; import type { FactExtractor } from '../src/ingestion/types'; describe('IdentityDB ingestion', () => { let db: IdentityDB; beforeEach(async () => { db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' }); await db.initialize(); }); afterEach(async () => { await db.close(); }); it('ingests a statement using a provided extractor', async () => { const extractor: FactExtractor = { async extract(input) { return { statement: input, topics: [ { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' }, { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' }, { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' }, ], }; }, }; const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', { extractor, }); expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']); const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025'); expect(linkedFacts).toHaveLength(1); expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.'); }); it('ships a deterministic naive extractor for local usage', async () => { const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', { extractor: new NaiveExtractor(), }); expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']); const topic = await db.getTopicByName('TypeScript', { includeFacts: true }); expect(topic?.facts).toHaveLength(1); }); it('ships an LLM extractor adapter that turns structured JSON responses into facts', async () => { let prompt = ''; const extractor = new LlmFactExtractor({ model: { async generateText(input) { prompt = input; return JSON.stringify({ statement: 'I have worked with Bun and TypeScript since 2025.', summary: 'The speaker has Bun and TypeScript experience.', source: 'chat', confidence: 0.91, metadata: { channel: 'telegram' }, topics: [ { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' }, { name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' }, { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' }, { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' }, ], }); }, }, instructions: 'Prefer technology and time topics.', }); const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', { extractor, }); expect(prompt).toContain('Prefer technology and time topics.'); expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.'); expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.'); expect(fact.source).toBe('chat'); expect(fact.confidence).toBe(0.91); expect(fact.metadata).toEqual({ channel: 'telegram' }); expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']); }); it('parses JSON responses wrapped in markdown code fences', async () => { const extractor = new LlmFactExtractor({ model: { async generateText() { return [ 'Here is the extracted fact:', '```json', JSON.stringify({ statement: 'Bun powers TypeScript tooling.', topics: [ { name: 'Bun', category: 'entity', granularity: 'concrete' }, { name: 'TypeScript', category: 'entity', granularity: 'concrete' }, ], }), '```', ].join('\n'); }, }, }); const fact = await db.ingestStatement('Bun powers TypeScript tooling.', { extractor, }); expect(fact.topics.map((topic) => topic.name)).toEqual(['Bun', 'TypeScript']); }); it('rejects invalid LLM responses before writing facts', async () => { const extractor = new LlmFactExtractor({ model: { async generateText() { return 'not json at all'; }, }, }); await expect( db.ingestStatement('Bun powers TypeScript tooling.', { extractor, }), ).rejects.toThrow('LLM extractor returned invalid JSON.'); }); });