138 lines
4.7 KiB
TypeScript
138 lines
4.7 KiB
TypeScript
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|
|
|
import { IdentityDB } from '../src/core/identity-db';
|
|
import { LlmFactExtractor } from '../src/ingestion/llm-extractor';
|
|
import { NaiveExtractor } from '../src/ingestion/naive-extractor';
|
|
import type { FactExtractor } from '../src/ingestion/types';
|
|
|
|
describe('IdentityDB ingestion', () => {
|
|
let db: IdentityDB;
|
|
|
|
beforeEach(async () => {
|
|
db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
|
|
await db.initialize();
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await db.close();
|
|
});
|
|
|
|
it('ingests a statement using a provided extractor', async () => {
|
|
const extractor: FactExtractor = {
|
|
async extract(input) {
|
|
return {
|
|
statement: input,
|
|
topics: [
|
|
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
|
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
|
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
|
],
|
|
};
|
|
},
|
|
};
|
|
|
|
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
|
|
extractor,
|
|
});
|
|
|
|
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
|
|
|
|
const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025');
|
|
expect(linkedFacts).toHaveLength(1);
|
|
expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.');
|
|
});
|
|
|
|
it('ships a deterministic naive extractor for local usage', async () => {
|
|
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
|
|
extractor: new NaiveExtractor(),
|
|
});
|
|
|
|
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
|
|
|
|
const topic = await db.getTopicByName('TypeScript', { includeFacts: true });
|
|
expect(topic?.facts).toHaveLength(1);
|
|
});
|
|
|
|
it('ships an LLM extractor adapter that turns structured JSON responses into facts', async () => {
|
|
let prompt = '';
|
|
|
|
const extractor = new LlmFactExtractor({
|
|
model: {
|
|
async generateText(input) {
|
|
prompt = input;
|
|
|
|
return JSON.stringify({
|
|
statement: 'I have worked with Bun and TypeScript since 2025.',
|
|
summary: 'The speaker has Bun and TypeScript experience.',
|
|
source: 'chat',
|
|
confidence: 0.91,
|
|
metadata: { channel: 'telegram' },
|
|
topics: [
|
|
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
|
{ name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' },
|
|
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
|
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
|
],
|
|
});
|
|
},
|
|
},
|
|
instructions: 'Prefer technology and time topics.',
|
|
});
|
|
|
|
const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
|
|
extractor,
|
|
});
|
|
|
|
expect(prompt).toContain('Prefer technology and time topics.');
|
|
expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.');
|
|
expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.');
|
|
expect(fact.source).toBe('chat');
|
|
expect(fact.confidence).toBe(0.91);
|
|
expect(fact.metadata).toEqual({ channel: 'telegram' });
|
|
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
|
|
});
|
|
|
|
it('parses JSON responses wrapped in markdown code fences', async () => {
|
|
const extractor = new LlmFactExtractor({
|
|
model: {
|
|
async generateText() {
|
|
return [
|
|
'Here is the extracted fact:',
|
|
'```json',
|
|
JSON.stringify({
|
|
statement: 'Bun powers TypeScript tooling.',
|
|
topics: [
|
|
{ name: 'Bun', category: 'entity', granularity: 'concrete' },
|
|
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
|
|
],
|
|
}),
|
|
'```',
|
|
].join('\n');
|
|
},
|
|
},
|
|
});
|
|
|
|
const fact = await db.ingestStatement('Bun powers TypeScript tooling.', {
|
|
extractor,
|
|
});
|
|
|
|
expect(fact.topics.map((topic) => topic.name)).toEqual(['Bun', 'TypeScript']);
|
|
});
|
|
|
|
it('rejects invalid LLM responses before writing facts', async () => {
|
|
const extractor = new LlmFactExtractor({
|
|
model: {
|
|
async generateText() {
|
|
return 'not json at all';
|
|
},
|
|
},
|
|
});
|
|
|
|
await expect(
|
|
db.ingestStatement('Bun powers TypeScript tooling.', {
|
|
extractor,
|
|
}),
|
|
).rejects.toThrow('LLM extractor returned invalid JSON.');
|
|
});
|
|
});
|