test: update test of LlmExtractor

This commit is contained in:
2026-05-19 22:28:09 +09:00
parent 518264c467
commit 0e595e6f60

View File

@@ -1,15 +1,18 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { IdentityDB } from '../src/core/identity-db';
import { LlmFactExtractor } from '../src/ingestion/llm-extractor';
import { NaiveExtractor } from '../src/ingestion/naive-extractor';
import type { FactExtractor } from '../src/ingestion/types';
import { IdentityDB } from "../src/core/identity-db";
import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
import { NaiveExtractor } from "../src/ingestion/naive-extractor";
import type {
FactExtractor,
LlmTextGenerationModelInput,
} from "../src/ingestion/types";
describe('IdentityDB ingestion', () => {
describe("IdentityDB ingestion", () => {
let db: IdentityDB;
beforeEach(async () => {
db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" });
await db.initialize();
});
@@ -17,44 +20,75 @@ describe('IdentityDB ingestion', () => {
await db.close();
});
it('ingests a statement using a provided extractor', async () => {
it("ingests a statement using a provided extractor", async () => {
const extractor: FactExtractor = {
async extract(input) {
return {
statement: input,
topics: [
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
{
name: "I",
category: "entity",
granularity: "concrete",
role: "subject",
},
{
name: "TypeScript",
category: "entity",
granularity: "concrete",
role: "object",
},
{
name: "2025",
category: "temporal",
granularity: "concrete",
role: "time",
},
],
};
},
};
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
extractor,
});
const fact = await db.ingestStatement(
"I have worked with TypeScript since 2025.",
{
extractor,
},
);
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
expect(fact.topics.map((topic) => topic.name)).toEqual([
"I",
"TypeScript",
"2025",
]);
const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025');
const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025");
expect(linkedFacts).toHaveLength(1);
expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.');
expect(linkedFacts[0]?.statement).toBe(
"I have worked with TypeScript since 2025.",
);
});
it('ships a deterministic naive extractor for local usage', async () => {
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
extractor: new NaiveExtractor(),
});
it("ships a deterministic naive extractor for local usage", async () => {
const fact = await db.ingestStatement(
"I have worked with TypeScript since 2025.",
{
extractor: new NaiveExtractor(),
},
);
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
expect(fact.topics.map((topic) => topic.name)).toEqual([
"I",
"TypeScript",
"2025",
]);
const topic = await db.getTopicByName('TypeScript', { includeFacts: true });
const topic = await db.getTopicByName("TypeScript", { includeFacts: true });
expect(topic?.facts).toHaveLength(1);
});
it('ships an LLM extractor adapter that returns structured facts from the model', async () => {
let prompt = '';
it("ships an LLM extractor adapter that returns structured facts from the model", async () => {
let prompt: LlmTextGenerationModelInput | undefined = undefined;
const extractor = new LlmFactExtractor({
model: {
@@ -62,33 +96,64 @@ describe('IdentityDB ingestion', () => {
prompt = input;
return {
statement: 'I have worked with Bun and TypeScript since 2025.',
summary: 'The speaker has Bun and TypeScript experience.',
source: 'chat',
statement: "I have worked with Bun and TypeScript since 2025.",
summary: "The speaker has Bun and TypeScript experience.",
source: "chat",
confidence: 0.91,
metadata: { channel: 'telegram' },
metadata: { channel: "telegram" },
topics: [
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
{ name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' },
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
{
name: "I",
category: "entity",
granularity: "concrete",
role: "subject",
},
{
name: "Bun",
category: "entity",
granularity: "concrete",
role: "object",
},
{
name: "TypeScript",
category: "entity",
granularity: "concrete",
role: "object",
},
{
name: "2025",
category: "temporal",
granularity: "concrete",
role: "time",
},
],
};
},
},
instructions: 'Prefer technology and time topics.',
additionalInstructions: "Prefer technology and time topics.",
});
const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
extractor,
});
const fact = await db.ingestStatement(
"I have worked with Bun and TypeScript since 2025.",
{
extractor,
},
);
expect(prompt).toContain('Prefer technology and time topics.');
expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.');
expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.');
expect(fact.source).toBe('chat');
expect(prompt).toEqual({
instruction: expect.stringContaining("Extract one structured fact from the user input."),
input: "I have worked with Bun and TypeScript since 2025.",
additionalInstruction: "Prefer technology and time topics.",
});
expect(fact.summary).toBe("The speaker has Bun and TypeScript experience.");
expect(fact.source).toBe("chat");
expect(fact.confidence).toBe(0.91);
expect(fact.metadata).toEqual({ channel: 'telegram' });
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
expect(fact.metadata).toEqual({ channel: "telegram" });
expect(fact.topics.map((topic) => topic.name)).toEqual([
"I",
"Bun",
"TypeScript",
"2025",
]);
});
});