Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1172c63db7 | |||
| 0e595e6f60 | |||
| 518264c467 | |||
| cc8b3dfb14 | |||
| 56e17dab49 |
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "identitydb",
|
||||
"version": "0.3.0",
|
||||
"version": "0.4.0",
|
||||
"description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
|
||||
@@ -15,25 +15,10 @@ export class LlmFactExtractor implements FactExtractor {
|
||||
constructor(private readonly options: LlmFactExtractorOptions) {}
|
||||
|
||||
async extract(input: string): Promise<ExtractedFact> {
|
||||
const prompt = this.buildPrompt(input);
|
||||
return this.options.model.generateText(prompt);
|
||||
}
|
||||
|
||||
private buildPrompt(input: string): string {
|
||||
if (this.options.promptBuilder) {
|
||||
return this.options.promptBuilder(input, this.options.instructions);
|
||||
}
|
||||
|
||||
const instructions = this.options.instructions?.trim();
|
||||
|
||||
return [
|
||||
DEFAULT_INSTRUCTIONS,
|
||||
instructions && instructions.length > 0
|
||||
? `Additional instructions:\n${instructions}`
|
||||
: null,
|
||||
`Input:\n${input.trim()}`,
|
||||
]
|
||||
.filter((value): value is string => value !== null)
|
||||
.join("\n\n");
|
||||
return this.options.model.generateText({
|
||||
instruction: DEFAULT_INSTRUCTIONS,
|
||||
input,
|
||||
additionalInstruction: this.options.additionalInstructions,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,14 +2,14 @@ import type {
|
||||
AddFactInput,
|
||||
EmbeddingProvider,
|
||||
TopicLinkInput,
|
||||
} from '../types/api';
|
||||
} from "../types/api";
|
||||
|
||||
export interface ExtractedFact {
|
||||
statement?: string;
|
||||
summary?: string | null;
|
||||
source?: string | null;
|
||||
confidence?: number | null;
|
||||
metadata?: AddFactInput['metadata'];
|
||||
metadata?: AddFactInput["metadata"];
|
||||
topics: TopicLinkInput[];
|
||||
}
|
||||
|
||||
@@ -17,14 +17,19 @@ export interface FactExtractor {
|
||||
extract(input: string): Promise<ExtractedFact>;
|
||||
}
|
||||
|
||||
export interface LlmTextGenerationModelInput {
|
||||
instruction: string;
|
||||
input: string;
|
||||
additionalInstruction?: string | undefined;
|
||||
}
|
||||
|
||||
export interface LlmTextGenerationModel {
|
||||
generateText(prompt: string): Promise<ExtractedFact>;
|
||||
generateText(prompt: LlmTextGenerationModelInput): Promise<ExtractedFact>;
|
||||
}
|
||||
|
||||
export interface LlmFactExtractorOptions {
|
||||
model: LlmTextGenerationModel;
|
||||
instructions?: string;
|
||||
promptBuilder?: (input: string, instructions?: string) => string;
|
||||
additionalInstructions?: string | undefined;
|
||||
}
|
||||
|
||||
export interface IngestStatementOptions {
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
|
||||
import { IdentityDB } from '../src/core/identity-db';
|
||||
import { LlmFactExtractor } from '../src/ingestion/llm-extractor';
|
||||
import { NaiveExtractor } from '../src/ingestion/naive-extractor';
|
||||
import type { FactExtractor } from '../src/ingestion/types';
|
||||
import { IdentityDB } from "../src/core/identity-db";
|
||||
import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
|
||||
import { NaiveExtractor } from "../src/ingestion/naive-extractor";
|
||||
import type {
|
||||
FactExtractor,
|
||||
LlmTextGenerationModelInput,
|
||||
} from "../src/ingestion/types";
|
||||
|
||||
describe('IdentityDB ingestion', () => {
|
||||
describe("IdentityDB ingestion", () => {
|
||||
let db: IdentityDB;
|
||||
|
||||
beforeEach(async () => {
|
||||
db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
|
||||
db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" });
|
||||
await db.initialize();
|
||||
});
|
||||
|
||||
@@ -17,44 +20,75 @@ describe('IdentityDB ingestion', () => {
|
||||
await db.close();
|
||||
});
|
||||
|
||||
it('ingests a statement using a provided extractor', async () => {
|
||||
it("ingests a statement using a provided extractor", async () => {
|
||||
const extractor: FactExtractor = {
|
||||
async extract(input) {
|
||||
return {
|
||||
statement: input,
|
||||
topics: [
|
||||
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
||||
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
||||
{
|
||||
name: "I",
|
||||
category: "entity",
|
||||
granularity: "concrete",
|
||||
role: "subject",
|
||||
},
|
||||
{
|
||||
name: "TypeScript",
|
||||
category: "entity",
|
||||
granularity: "concrete",
|
||||
role: "object",
|
||||
},
|
||||
{
|
||||
name: "2025",
|
||||
category: "temporal",
|
||||
granularity: "concrete",
|
||||
role: "time",
|
||||
},
|
||||
],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
|
||||
extractor,
|
||||
});
|
||||
const fact = await db.ingestStatement(
|
||||
"I have worked with TypeScript since 2025.",
|
||||
{
|
||||
extractor,
|
||||
},
|
||||
);
|
||||
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
||||
"I",
|
||||
"TypeScript",
|
||||
"2025",
|
||||
]);
|
||||
|
||||
const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025');
|
||||
const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025");
|
||||
expect(linkedFacts).toHaveLength(1);
|
||||
expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.');
|
||||
expect(linkedFacts[0]?.statement).toBe(
|
||||
"I have worked with TypeScript since 2025.",
|
||||
);
|
||||
});
|
||||
|
||||
it('ships a deterministic naive extractor for local usage', async () => {
|
||||
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
|
||||
extractor: new NaiveExtractor(),
|
||||
});
|
||||
it("ships a deterministic naive extractor for local usage", async () => {
|
||||
const fact = await db.ingestStatement(
|
||||
"I have worked with TypeScript since 2025.",
|
||||
{
|
||||
extractor: new NaiveExtractor(),
|
||||
},
|
||||
);
|
||||
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
||||
"I",
|
||||
"TypeScript",
|
||||
"2025",
|
||||
]);
|
||||
|
||||
const topic = await db.getTopicByName('TypeScript', { includeFacts: true });
|
||||
const topic = await db.getTopicByName("TypeScript", { includeFacts: true });
|
||||
expect(topic?.facts).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('ships an LLM extractor adapter that returns structured facts from the model', async () => {
|
||||
let prompt = '';
|
||||
it("ships an LLM extractor adapter that returns structured facts from the model", async () => {
|
||||
let prompt: LlmTextGenerationModelInput | undefined = undefined;
|
||||
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
@@ -62,33 +96,64 @@ describe('IdentityDB ingestion', () => {
|
||||
prompt = input;
|
||||
|
||||
return {
|
||||
statement: 'I have worked with Bun and TypeScript since 2025.',
|
||||
summary: 'The speaker has Bun and TypeScript experience.',
|
||||
source: 'chat',
|
||||
statement: "I have worked with Bun and TypeScript since 2025.",
|
||||
summary: "The speaker has Bun and TypeScript experience.",
|
||||
source: "chat",
|
||||
confidence: 0.91,
|
||||
metadata: { channel: 'telegram' },
|
||||
metadata: { channel: "telegram" },
|
||||
topics: [
|
||||
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
||||
{ name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' },
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
||||
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
||||
{
|
||||
name: "I",
|
||||
category: "entity",
|
||||
granularity: "concrete",
|
||||
role: "subject",
|
||||
},
|
||||
{
|
||||
name: "Bun",
|
||||
category: "entity",
|
||||
granularity: "concrete",
|
||||
role: "object",
|
||||
},
|
||||
{
|
||||
name: "TypeScript",
|
||||
category: "entity",
|
||||
granularity: "concrete",
|
||||
role: "object",
|
||||
},
|
||||
{
|
||||
name: "2025",
|
||||
category: "temporal",
|
||||
granularity: "concrete",
|
||||
role: "time",
|
||||
},
|
||||
],
|
||||
};
|
||||
},
|
||||
},
|
||||
instructions: 'Prefer technology and time topics.',
|
||||
additionalInstructions: "Prefer technology and time topics.",
|
||||
});
|
||||
|
||||
const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
|
||||
extractor,
|
||||
});
|
||||
const fact = await db.ingestStatement(
|
||||
"I have worked with Bun and TypeScript since 2025.",
|
||||
{
|
||||
extractor,
|
||||
},
|
||||
);
|
||||
|
||||
expect(prompt).toContain('Prefer technology and time topics.');
|
||||
expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.');
|
||||
expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.');
|
||||
expect(fact.source).toBe('chat');
|
||||
expect(prompt).toEqual({
|
||||
instruction: expect.stringContaining("Extract one structured fact from the user input."),
|
||||
input: "I have worked with Bun and TypeScript since 2025.",
|
||||
additionalInstruction: "Prefer technology and time topics.",
|
||||
});
|
||||
expect(fact.summary).toBe("The speaker has Bun and TypeScript experience.");
|
||||
expect(fact.source).toBe("chat");
|
||||
expect(fact.confidence).toBe(0.91);
|
||||
expect(fact.metadata).toEqual({ channel: 'telegram' });
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
|
||||
expect(fact.metadata).toEqual({ channel: "telegram" });
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
||||
"I",
|
||||
"Bun",
|
||||
"TypeScript",
|
||||
"2025",
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user