5 Commits

Author SHA1 Message Date
1172c63db7 v0.4.0
All checks were successful
npm release / verify (push) Successful in 12s
npm release / publish to npm (push) Successful in 11s
2026-05-19 22:30:27 +09:00
0e595e6f60 test: update test of LlmExtractor 2026-05-19 22:28:09 +09:00
518264c467 v0.3.1
Some checks failed
npm release / verify (push) Failing after 9s
npm release / publish to npm (push) Has been skipped
2026-05-19 22:19:30 +09:00
cc8b3dfb14 vv0.3.1 2026-05-19 22:18:51 +09:00
56e17dab49 feat: make extract input structured 2026-05-19 22:18:42 +09:00
4 changed files with 124 additions and 69 deletions

View File

@@ -1,6 +1,6 @@
{ {
"name": "identitydb", "name": "identitydb",
"version": "0.3.0", "version": "0.4.0",
"description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.", "description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
"license": "MIT", "license": "MIT",
"type": "module", "type": "module",

View File

@@ -15,25 +15,10 @@ export class LlmFactExtractor implements FactExtractor {
constructor(private readonly options: LlmFactExtractorOptions) {} constructor(private readonly options: LlmFactExtractorOptions) {}
async extract(input: string): Promise<ExtractedFact> { async extract(input: string): Promise<ExtractedFact> {
const prompt = this.buildPrompt(input); return this.options.model.generateText({
return this.options.model.generateText(prompt); instruction: DEFAULT_INSTRUCTIONS,
} input,
additionalInstruction: this.options.additionalInstructions,
private buildPrompt(input: string): string { });
if (this.options.promptBuilder) {
return this.options.promptBuilder(input, this.options.instructions);
}
const instructions = this.options.instructions?.trim();
return [
DEFAULT_INSTRUCTIONS,
instructions && instructions.length > 0
? `Additional instructions:\n${instructions}`
: null,
`Input:\n${input.trim()}`,
]
.filter((value): value is string => value !== null)
.join("\n\n");
} }
} }

View File

@@ -2,14 +2,14 @@ import type {
AddFactInput, AddFactInput,
EmbeddingProvider, EmbeddingProvider,
TopicLinkInput, TopicLinkInput,
} from '../types/api'; } from "../types/api";
export interface ExtractedFact { export interface ExtractedFact {
statement?: string; statement?: string;
summary?: string | null; summary?: string | null;
source?: string | null; source?: string | null;
confidence?: number | null; confidence?: number | null;
metadata?: AddFactInput['metadata']; metadata?: AddFactInput["metadata"];
topics: TopicLinkInput[]; topics: TopicLinkInput[];
} }
@@ -17,14 +17,19 @@ export interface FactExtractor {
extract(input: string): Promise<ExtractedFact>; extract(input: string): Promise<ExtractedFact>;
} }
export interface LlmTextGenerationModelInput {
instruction: string;
input: string;
additionalInstruction?: string | undefined;
}
export interface LlmTextGenerationModel { export interface LlmTextGenerationModel {
generateText(prompt: string): Promise<ExtractedFact>; generateText(prompt: LlmTextGenerationModelInput): Promise<ExtractedFact>;
} }
export interface LlmFactExtractorOptions { export interface LlmFactExtractorOptions {
model: LlmTextGenerationModel; model: LlmTextGenerationModel;
instructions?: string; additionalInstructions?: string | undefined;
promptBuilder?: (input: string, instructions?: string) => string;
} }
export interface IngestStatementOptions { export interface IngestStatementOptions {

View File

@@ -1,15 +1,18 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { IdentityDB } from '../src/core/identity-db'; import { IdentityDB } from "../src/core/identity-db";
import { LlmFactExtractor } from '../src/ingestion/llm-extractor'; import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
import { NaiveExtractor } from '../src/ingestion/naive-extractor'; import { NaiveExtractor } from "../src/ingestion/naive-extractor";
import type { FactExtractor } from '../src/ingestion/types'; import type {
FactExtractor,
LlmTextGenerationModelInput,
} from "../src/ingestion/types";
describe('IdentityDB ingestion', () => { describe("IdentityDB ingestion", () => {
let db: IdentityDB; let db: IdentityDB;
beforeEach(async () => { beforeEach(async () => {
db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' }); db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" });
await db.initialize(); await db.initialize();
}); });
@@ -17,44 +20,75 @@ describe('IdentityDB ingestion', () => {
await db.close(); await db.close();
}); });
it('ingests a statement using a provided extractor', async () => { it("ingests a statement using a provided extractor", async () => {
const extractor: FactExtractor = { const extractor: FactExtractor = {
async extract(input) { async extract(input) {
return { return {
statement: input, statement: input,
topics: [ topics: [
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' }, {
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' }, name: "I",
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' }, category: "entity",
granularity: "concrete",
role: "subject",
},
{
name: "TypeScript",
category: "entity",
granularity: "concrete",
role: "object",
},
{
name: "2025",
category: "temporal",
granularity: "concrete",
role: "time",
},
], ],
}; };
}, },
}; };
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', { const fact = await db.ingestStatement(
extractor, "I have worked with TypeScript since 2025.",
}); {
extractor,
},
);
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']); expect(fact.topics.map((topic) => topic.name)).toEqual([
"I",
"TypeScript",
"2025",
]);
const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025'); const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025");
expect(linkedFacts).toHaveLength(1); expect(linkedFacts).toHaveLength(1);
expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.'); expect(linkedFacts[0]?.statement).toBe(
"I have worked with TypeScript since 2025.",
);
}); });
it('ships a deterministic naive extractor for local usage', async () => { it("ships a deterministic naive extractor for local usage", async () => {
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', { const fact = await db.ingestStatement(
extractor: new NaiveExtractor(), "I have worked with TypeScript since 2025.",
}); {
extractor: new NaiveExtractor(),
},
);
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']); expect(fact.topics.map((topic) => topic.name)).toEqual([
"I",
"TypeScript",
"2025",
]);
const topic = await db.getTopicByName('TypeScript', { includeFacts: true }); const topic = await db.getTopicByName("TypeScript", { includeFacts: true });
expect(topic?.facts).toHaveLength(1); expect(topic?.facts).toHaveLength(1);
}); });
it('ships an LLM extractor adapter that returns structured facts from the model', async () => { it("ships an LLM extractor adapter that returns structured facts from the model", async () => {
let prompt = ''; let prompt: LlmTextGenerationModelInput | undefined = undefined;
const extractor = new LlmFactExtractor({ const extractor = new LlmFactExtractor({
model: { model: {
@@ -62,33 +96,64 @@ describe('IdentityDB ingestion', () => {
prompt = input; prompt = input;
return { return {
statement: 'I have worked with Bun and TypeScript since 2025.', statement: "I have worked with Bun and TypeScript since 2025.",
summary: 'The speaker has Bun and TypeScript experience.', summary: "The speaker has Bun and TypeScript experience.",
source: 'chat', source: "chat",
confidence: 0.91, confidence: 0.91,
metadata: { channel: 'telegram' }, metadata: { channel: "telegram" },
topics: [ topics: [
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' }, {
{ name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' }, name: "I",
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' }, category: "entity",
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' }, granularity: "concrete",
role: "subject",
},
{
name: "Bun",
category: "entity",
granularity: "concrete",
role: "object",
},
{
name: "TypeScript",
category: "entity",
granularity: "concrete",
role: "object",
},
{
name: "2025",
category: "temporal",
granularity: "concrete",
role: "time",
},
], ],
}; };
}, },
}, },
instructions: 'Prefer technology and time topics.', additionalInstructions: "Prefer technology and time topics.",
}); });
const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', { const fact = await db.ingestStatement(
extractor, "I have worked with Bun and TypeScript since 2025.",
}); {
extractor,
},
);
expect(prompt).toContain('Prefer technology and time topics.'); expect(prompt).toEqual({
expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.'); instruction: expect.stringContaining("Extract one structured fact from the user input."),
expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.'); input: "I have worked with Bun and TypeScript since 2025.",
expect(fact.source).toBe('chat'); additionalInstruction: "Prefer technology and time topics.",
});
expect(fact.summary).toBe("The speaker has Bun and TypeScript experience.");
expect(fact.source).toBe("chat");
expect(fact.confidence).toBe(0.91); expect(fact.confidence).toBe(0.91);
expect(fact.metadata).toEqual({ channel: 'telegram' }); expect(fact.metadata).toEqual({ channel: "telegram" });
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']); expect(fact.topics.map((topic) => topic.name)).toEqual([
"I",
"Bun",
"TypeScript",
"2025",
]);
}); });
}); });