159 lines
4.3 KiB
TypeScript
159 lines
4.3 KiB
TypeScript
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
|
|
import { IdentityDB } from "../src/core/identity-db";
|
|
import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
|
|
import { NaiveExtractor } from "../src/ingestion/naive-extractor";
|
|
import type {
|
|
FactExtractor,
|
|
LlmTextGenerationModelInput,
|
|
} from "../src/ingestion/types";
|
|
|
|
describe("IdentityDB ingestion", () => {
|
|
let db: IdentityDB;
|
|
|
|
beforeEach(async () => {
|
|
db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" });
|
|
await db.initialize();
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await db.close();
|
|
});
|
|
|
|
it("ingests a statement using a provided extractor", async () => {
|
|
const extractor: FactExtractor = {
|
|
async extract(input) {
|
|
return [
|
|
{
|
|
statement: input,
|
|
topics: [
|
|
{
|
|
name: "I",
|
|
category: "entity",
|
|
granularity: "concrete",
|
|
role: "subject",
|
|
},
|
|
{
|
|
name: "TypeScript",
|
|
category: "entity",
|
|
granularity: "concrete",
|
|
role: "object",
|
|
},
|
|
{
|
|
name: "2025",
|
|
category: "temporal",
|
|
granularity: "concrete",
|
|
role: "time",
|
|
},
|
|
],
|
|
},
|
|
];
|
|
},
|
|
};
|
|
|
|
const fact = await db.ingestStatement(
|
|
"I have worked with TypeScript since 2025.",
|
|
{
|
|
extractor,
|
|
},
|
|
);
|
|
|
|
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
|
"I",
|
|
"TypeScript",
|
|
"2025",
|
|
]);
|
|
|
|
const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025");
|
|
expect(linkedFacts).toHaveLength(1);
|
|
expect(linkedFacts[0]?.statement).toBe(
|
|
"I have worked with TypeScript since 2025.",
|
|
);
|
|
});
|
|
|
|
it("ships a deterministic naive extractor for local usage", async () => {
|
|
const fact = await db.ingestStatement(
|
|
"I have worked with TypeScript since 2025.",
|
|
{
|
|
extractor: new NaiveExtractor(),
|
|
},
|
|
);
|
|
|
|
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
|
"I",
|
|
"TypeScript",
|
|
"2025",
|
|
]);
|
|
|
|
const topic = await db.getTopicByName("TypeScript", { includeFacts: true });
|
|
expect(topic?.facts).toHaveLength(1);
|
|
});
|
|
|
|
it("ships an LLM extractor adapter that returns structured facts from the model", async () => {
|
|
let prompt: LlmTextGenerationModelInput | undefined = undefined;
|
|
|
|
const extractor = new LlmFactExtractor({
|
|
model: {
|
|
async generateText(input) {
|
|
prompt = input;
|
|
|
|
return [
|
|
{
|
|
statement: "I have worked with Bun and TypeScript since 2025.",
|
|
summary: "The speaker has Bun and TypeScript experience.",
|
|
source: "chat",
|
|
confidence: 0.91,
|
|
metadata: { channel: "telegram" },
|
|
topics: [
|
|
{
|
|
name: "I",
|
|
category: "entity",
|
|
granularity: "concrete",
|
|
role: "subject",
|
|
},
|
|
{
|
|
name: "Bun",
|
|
category: "entity",
|
|
granularity: "concrete",
|
|
role: "object",
|
|
},
|
|
{
|
|
name: "TypeScript",
|
|
category: "entity",
|
|
granularity: "concrete",
|
|
role: "object",
|
|
},
|
|
{
|
|
name: "2025",
|
|
category: "temporal",
|
|
granularity: "concrete",
|
|
role: "time",
|
|
},
|
|
],
|
|
},
|
|
];
|
|
},
|
|
},
|
|
additionalInstructions: "Prefer technology and time topics.",
|
|
});
|
|
|
|
const fact = await db.ingestStatement(
|
|
"I have worked with Bun and TypeScript since 2025.",
|
|
{
|
|
extractor,
|
|
},
|
|
);
|
|
|
|
expect(fact.summary).toBe("The speaker has Bun and TypeScript experience.");
|
|
expect(fact.source).toBe("chat");
|
|
expect(fact.confidence).toBe(0.91);
|
|
expect(fact.metadata).toEqual({ channel: "telegram" });
|
|
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
|
"I",
|
|
"Bun",
|
|
"TypeScript",
|
|
"2025",
|
|
]);
|
|
});
|
|
});
|