Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1172c63db7 | |||
| 0e595e6f60 | |||
| 518264c467 | |||
| cc8b3dfb14 | |||
| 56e17dab49 | |||
| cc2e9110cc | |||
| 0480ea182f | |||
| 185edfdae8 | |||
| a33fd61c97 |
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "identitydb",
|
"name": "identitydb",
|
||||||
"version": "0.2.1",
|
"version": "0.4.0",
|
||||||
"description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
|
"description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
|
|||||||
@@ -1,273 +1,24 @@
|
|||||||
import { IdentityDBError } from '../core/errors';
|
|
||||||
import type { TopicCategory, TopicGranularity } from '../types/domain';
|
|
||||||
import type {
|
import type {
|
||||||
ExtractedFact,
|
ExtractedFact,
|
||||||
FactExtractor,
|
FactExtractor,
|
||||||
LlmFactExtractorOptions,
|
LlmFactExtractorOptions,
|
||||||
} from './types';
|
} from "./types";
|
||||||
|
|
||||||
const DEFAULT_INSTRUCTIONS = [
|
const DEFAULT_INSTRUCTIONS = [
|
||||||
'Extract one structured fact from the user input.',
|
"Extract one structured fact from the user input.",
|
||||||
'Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.',
|
"Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.",
|
||||||
'Use this shape: {"statement": string?, "summary": string|null, "source": string|null, "confidence": number|null, "metadata": object|null, "topics": Array<{"name": string, "category": "entity"|"concept"|"temporal"|"custom"?, "granularity": "abstract"|"concrete"|"mixed"?, "role": string|null, "description": string|null, "metadata": object|null}>}.',
|
'Use this shape: {"statement": string?, "summary": string|null, "source": string|null, "confidence": number|null, "metadata": object|null, "topics": Array<{"name": string, "category": "entity"|"concept"|"temporal"|"custom"?, "granularity": "abstract"|"concrete"|"mixed"?, "role": string|null, "description": string|null, "metadata": object|null}>}.',
|
||||||
'Only include topics that are explicitly supported by the input.',
|
'Only include topics that are explicitly in the input as-is. For example, topic "I started TypeScript since 2015" can be "I", "TypeScript", "2015".',
|
||||||
].join('\n');
|
].join("\n");
|
||||||
|
|
||||||
export class LlmFactExtractor implements FactExtractor {
|
export class LlmFactExtractor implements FactExtractor {
|
||||||
constructor(private readonly options: LlmFactExtractorOptions) {}
|
constructor(private readonly options: LlmFactExtractorOptions) {}
|
||||||
|
|
||||||
async extract(input: string): Promise<ExtractedFact> {
|
async extract(input: string): Promise<ExtractedFact> {
|
||||||
const prompt = this.buildPrompt(input);
|
return this.options.model.generateText({
|
||||||
const response = await this.options.model.generateText(prompt);
|
instruction: DEFAULT_INSTRUCTIONS,
|
||||||
return parseLlmExtractedFactResponse(response);
|
input,
|
||||||
}
|
additionalInstruction: this.options.additionalInstructions,
|
||||||
|
});
|
||||||
private buildPrompt(input: string): string {
|
|
||||||
if (this.options.promptBuilder) {
|
|
||||||
return this.options.promptBuilder(input, this.options.instructions);
|
|
||||||
}
|
|
||||||
|
|
||||||
const instructions = this.options.instructions?.trim();
|
|
||||||
|
|
||||||
return [
|
|
||||||
DEFAULT_INSTRUCTIONS,
|
|
||||||
instructions && instructions.length > 0 ? `Additional instructions:\n${instructions}` : null,
|
|
||||||
`Input:\n${input.trim()}`,
|
|
||||||
]
|
|
||||||
.filter((value): value is string => value !== null)
|
|
||||||
.join('\n\n');
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function parseLlmExtractedFactResponse(response: string): ExtractedFact {
|
|
||||||
const payload = parseJsonCandidate(response);
|
|
||||||
|
|
||||||
if (!isRecord(payload)) {
|
|
||||||
throw new IdentityDBError('LLM extractor response must be a JSON object.');
|
|
||||||
}
|
|
||||||
|
|
||||||
const topics = parseTopics(payload.topics);
|
|
||||||
const extracted: ExtractedFact = { topics };
|
|
||||||
|
|
||||||
const statement = optionalString(payload.statement);
|
|
||||||
if (statement !== undefined) {
|
|
||||||
extracted.statement = statement;
|
|
||||||
}
|
|
||||||
|
|
||||||
const summary = optionalNullableString(payload.summary);
|
|
||||||
if (summary !== undefined) {
|
|
||||||
extracted.summary = summary;
|
|
||||||
}
|
|
||||||
|
|
||||||
const source = optionalNullableString(payload.source);
|
|
||||||
if (source !== undefined) {
|
|
||||||
extracted.source = source;
|
|
||||||
}
|
|
||||||
|
|
||||||
const confidence = optionalNullableNumber(payload.confidence);
|
|
||||||
if (confidence !== undefined) {
|
|
||||||
extracted.confidence = confidence;
|
|
||||||
}
|
|
||||||
|
|
||||||
const metadata = optionalMetadata(payload.metadata);
|
|
||||||
if (metadata !== undefined) {
|
|
||||||
extracted.metadata = metadata;
|
|
||||||
}
|
|
||||||
|
|
||||||
return extracted;
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseJsonCandidate(response: string): unknown {
|
|
||||||
const trimmed = response.trim();
|
|
||||||
|
|
||||||
for (const candidate of collectJsonCandidates(trimmed)) {
|
|
||||||
try {
|
|
||||||
return JSON.parse(candidate);
|
|
||||||
} catch {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new IdentityDBError('LLM extractor returned invalid JSON.');
|
|
||||||
}
|
|
||||||
|
|
||||||
function collectJsonCandidates(response: string): string[] {
|
|
||||||
const candidates = new Set<string>();
|
|
||||||
candidates.add(response);
|
|
||||||
|
|
||||||
const fencePattern = /```(?:json)?\s*([\s\S]*?)```/gi;
|
|
||||||
let match: RegExpExecArray | null = fencePattern.exec(response);
|
|
||||||
|
|
||||||
while (match) {
|
|
||||||
const candidate = match[1]?.trim();
|
|
||||||
if (candidate) {
|
|
||||||
candidates.add(candidate);
|
|
||||||
}
|
|
||||||
|
|
||||||
match = fencePattern.exec(response);
|
|
||||||
}
|
|
||||||
|
|
||||||
const firstBrace = response.indexOf('{');
|
|
||||||
const lastBrace = response.lastIndexOf('}');
|
|
||||||
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
|
||||||
candidates.add(response.slice(firstBrace, lastBrace + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Array.from(candidates);
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseTopics(value: unknown): ExtractedFact['topics'] {
|
|
||||||
if (!Array.isArray(value)) {
|
|
||||||
throw new IdentityDBError('LLM extractor response must include a topics array.');
|
|
||||||
}
|
|
||||||
|
|
||||||
return value.map((entry) => parseTopic(entry));
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseTopic(value: unknown): ExtractedFact['topics'][number] {
|
|
||||||
if (!isRecord(value)) {
|
|
||||||
throw new IdentityDBError('LLM extractor topics must be JSON objects.');
|
|
||||||
}
|
|
||||||
|
|
||||||
const name = optionalString(value.name)?.trim();
|
|
||||||
if (!name) {
|
|
||||||
throw new IdentityDBError('LLM extractor topics must include a non-empty name.');
|
|
||||||
}
|
|
||||||
|
|
||||||
const topic: ExtractedFact['topics'][number] = { name };
|
|
||||||
|
|
||||||
const category = optionalTopicCategory(value.category);
|
|
||||||
if (category !== undefined) {
|
|
||||||
topic.category = category;
|
|
||||||
}
|
|
||||||
|
|
||||||
const granularity = optionalTopicGranularity(value.granularity);
|
|
||||||
if (granularity !== undefined) {
|
|
||||||
topic.granularity = granularity;
|
|
||||||
}
|
|
||||||
|
|
||||||
const role = optionalNullableString(value.role);
|
|
||||||
if (role !== undefined) {
|
|
||||||
topic.role = role;
|
|
||||||
}
|
|
||||||
|
|
||||||
const description = optionalNullableString(value.description);
|
|
||||||
if (description !== undefined) {
|
|
||||||
topic.description = description;
|
|
||||||
}
|
|
||||||
|
|
||||||
const metadata = optionalMetadata(value.metadata);
|
|
||||||
if (metadata !== undefined) {
|
|
||||||
topic.metadata = metadata;
|
|
||||||
}
|
|
||||||
|
|
||||||
return topic;
|
|
||||||
}
|
|
||||||
|
|
||||||
function optionalString(value: unknown): string | undefined {
|
|
||||||
if (value === undefined) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeof value !== 'string') {
|
|
||||||
throw new IdentityDBError('LLM extractor expected a string field.');
|
|
||||||
}
|
|
||||||
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
function optionalNullableString(value: unknown): string | null | undefined {
|
|
||||||
if (value === undefined) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value === null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeof value !== 'string') {
|
|
||||||
throw new IdentityDBError('LLM extractor expected a nullable string field.');
|
|
||||||
}
|
|
||||||
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
function optionalNullableNumber(value: unknown): number | null | undefined {
|
|
||||||
if (value === undefined) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value === null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeof value !== 'number' || Number.isNaN(value)) {
|
|
||||||
throw new IdentityDBError('LLM extractor expected confidence to be a number or null.');
|
|
||||||
}
|
|
||||||
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
function optionalMetadata(value: unknown): ExtractedFact['metadata'] | undefined {
|
|
||||||
if (value === undefined) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value === null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isJsonLike(value)) {
|
|
||||||
throw new IdentityDBError('LLM extractor metadata must be valid JSON-compatible data.');
|
|
||||||
}
|
|
||||||
|
|
||||||
return value as ExtractedFact['metadata'];
|
|
||||||
}
|
|
||||||
|
|
||||||
function optionalTopicCategory(value: unknown): TopicCategory | undefined {
|
|
||||||
if (value === undefined) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value === 'entity' || value === 'concept' || value === 'temporal' || value === 'custom') {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new IdentityDBError('LLM extractor returned an unsupported topic category.');
|
|
||||||
}
|
|
||||||
|
|
||||||
function optionalTopicGranularity(value: unknown): TopicGranularity | undefined {
|
|
||||||
if (value === undefined) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value === 'abstract' || value === 'concrete' || value === 'mixed') {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new IdentityDBError('LLM extractor returned an unsupported topic granularity.');
|
|
||||||
}
|
|
||||||
|
|
||||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
||||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isJsonLike(value: unknown): boolean {
|
|
||||||
if (value === null) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(value)) {
|
|
||||||
return value.every((entry) => isJsonLike(entry));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isRecord(value)) {
|
|
||||||
return Object.values(value).every((entry) => isJsonLike(entry));
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2,14 +2,14 @@ import type {
|
|||||||
AddFactInput,
|
AddFactInput,
|
||||||
EmbeddingProvider,
|
EmbeddingProvider,
|
||||||
TopicLinkInput,
|
TopicLinkInput,
|
||||||
} from '../types/api';
|
} from "../types/api";
|
||||||
|
|
||||||
export interface ExtractedFact {
|
export interface ExtractedFact {
|
||||||
statement?: string;
|
statement?: string;
|
||||||
summary?: string | null;
|
summary?: string | null;
|
||||||
source?: string | null;
|
source?: string | null;
|
||||||
confidence?: number | null;
|
confidence?: number | null;
|
||||||
metadata?: AddFactInput['metadata'];
|
metadata?: AddFactInput["metadata"];
|
||||||
topics: TopicLinkInput[];
|
topics: TopicLinkInput[];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -17,14 +17,19 @@ export interface FactExtractor {
|
|||||||
extract(input: string): Promise<ExtractedFact>;
|
extract(input: string): Promise<ExtractedFact>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface LlmTextGenerationModelInput {
|
||||||
|
instruction: string;
|
||||||
|
input: string;
|
||||||
|
additionalInstruction?: string | undefined;
|
||||||
|
}
|
||||||
|
|
||||||
export interface LlmTextGenerationModel {
|
export interface LlmTextGenerationModel {
|
||||||
generateText(prompt: string): Promise<string>;
|
generateText(prompt: LlmTextGenerationModelInput): Promise<ExtractedFact>;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface LlmFactExtractorOptions {
|
export interface LlmFactExtractorOptions {
|
||||||
model: LlmTextGenerationModel;
|
model: LlmTextGenerationModel;
|
||||||
instructions?: string;
|
additionalInstructions?: string | undefined;
|
||||||
promptBuilder?: (input: string, instructions?: string) => string;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface IngestStatementOptions {
|
export interface IngestStatementOptions {
|
||||||
|
|||||||
@@ -1,15 +1,18 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||||
|
|
||||||
import { IdentityDB } from '../src/core/identity-db';
|
import { IdentityDB } from "../src/core/identity-db";
|
||||||
import { LlmFactExtractor } from '../src/ingestion/llm-extractor';
|
import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
|
||||||
import { NaiveExtractor } from '../src/ingestion/naive-extractor';
|
import { NaiveExtractor } from "../src/ingestion/naive-extractor";
|
||||||
import type { FactExtractor } from '../src/ingestion/types';
|
import type {
|
||||||
|
FactExtractor,
|
||||||
|
LlmTextGenerationModelInput,
|
||||||
|
} from "../src/ingestion/types";
|
||||||
|
|
||||||
describe('IdentityDB ingestion', () => {
|
describe("IdentityDB ingestion", () => {
|
||||||
let db: IdentityDB;
|
let db: IdentityDB;
|
||||||
|
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
|
db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" });
|
||||||
await db.initialize();
|
await db.initialize();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -17,121 +20,140 @@ describe('IdentityDB ingestion', () => {
|
|||||||
await db.close();
|
await db.close();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('ingests a statement using a provided extractor', async () => {
|
it("ingests a statement using a provided extractor", async () => {
|
||||||
const extractor: FactExtractor = {
|
const extractor: FactExtractor = {
|
||||||
async extract(input) {
|
async extract(input) {
|
||||||
return {
|
return {
|
||||||
statement: input,
|
statement: input,
|
||||||
topics: [
|
topics: [
|
||||||
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
{
|
||||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
name: "I",
|
||||||
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
category: "entity",
|
||||||
|
granularity: "concrete",
|
||||||
|
role: "subject",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "TypeScript",
|
||||||
|
category: "entity",
|
||||||
|
granularity: "concrete",
|
||||||
|
role: "object",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "2025",
|
||||||
|
category: "temporal",
|
||||||
|
granularity: "concrete",
|
||||||
|
role: "time",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
|
const fact = await db.ingestStatement(
|
||||||
|
"I have worked with TypeScript since 2025.",
|
||||||
|
{
|
||||||
extractor,
|
extractor,
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
|
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
||||||
|
"I",
|
||||||
|
"TypeScript",
|
||||||
|
"2025",
|
||||||
|
]);
|
||||||
|
|
||||||
const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025');
|
const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025");
|
||||||
expect(linkedFacts).toHaveLength(1);
|
expect(linkedFacts).toHaveLength(1);
|
||||||
expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.');
|
expect(linkedFacts[0]?.statement).toBe(
|
||||||
|
"I have worked with TypeScript since 2025.",
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('ships a deterministic naive extractor for local usage', async () => {
|
it("ships a deterministic naive extractor for local usage", async () => {
|
||||||
const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
|
const fact = await db.ingestStatement(
|
||||||
|
"I have worked with TypeScript since 2025.",
|
||||||
|
{
|
||||||
extractor: new NaiveExtractor(),
|
extractor: new NaiveExtractor(),
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
|
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
||||||
|
"I",
|
||||||
|
"TypeScript",
|
||||||
|
"2025",
|
||||||
|
]);
|
||||||
|
|
||||||
const topic = await db.getTopicByName('TypeScript', { includeFacts: true });
|
const topic = await db.getTopicByName("TypeScript", { includeFacts: true });
|
||||||
expect(topic?.facts).toHaveLength(1);
|
expect(topic?.facts).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('ships an LLM extractor adapter that turns structured JSON responses into facts', async () => {
|
it("ships an LLM extractor adapter that returns structured facts from the model", async () => {
|
||||||
let prompt = '';
|
let prompt: LlmTextGenerationModelInput | undefined = undefined;
|
||||||
|
|
||||||
const extractor = new LlmFactExtractor({
|
const extractor = new LlmFactExtractor({
|
||||||
model: {
|
model: {
|
||||||
async generateText(input) {
|
async generateText(input) {
|
||||||
prompt = input;
|
prompt = input;
|
||||||
|
|
||||||
return JSON.stringify({
|
return {
|
||||||
statement: 'I have worked with Bun and TypeScript since 2025.',
|
statement: "I have worked with Bun and TypeScript since 2025.",
|
||||||
summary: 'The speaker has Bun and TypeScript experience.',
|
summary: "The speaker has Bun and TypeScript experience.",
|
||||||
source: 'chat',
|
source: "chat",
|
||||||
confidence: 0.91,
|
confidence: 0.91,
|
||||||
metadata: { channel: 'telegram' },
|
metadata: { channel: "telegram" },
|
||||||
topics: [
|
topics: [
|
||||||
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
{
|
||||||
{ name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' },
|
name: "I",
|
||||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
category: "entity",
|
||||||
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
granularity: "concrete",
|
||||||
|
role: "subject",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Bun",
|
||||||
|
category: "entity",
|
||||||
|
granularity: "concrete",
|
||||||
|
role: "object",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "TypeScript",
|
||||||
|
category: "entity",
|
||||||
|
granularity: "concrete",
|
||||||
|
role: "object",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "2025",
|
||||||
|
category: "temporal",
|
||||||
|
granularity: "concrete",
|
||||||
|
role: "time",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
});
|
};
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
instructions: 'Prefer technology and time topics.',
|
additionalInstructions: "Prefer technology and time topics.",
|
||||||
});
|
});
|
||||||
|
|
||||||
const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
|
const fact = await db.ingestStatement(
|
||||||
|
"I have worked with Bun and TypeScript since 2025.",
|
||||||
|
{
|
||||||
extractor,
|
extractor,
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
expect(prompt).toContain('Prefer technology and time topics.');
|
expect(prompt).toEqual({
|
||||||
expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.');
|
instruction: expect.stringContaining("Extract one structured fact from the user input."),
|
||||||
expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.');
|
input: "I have worked with Bun and TypeScript since 2025.",
|
||||||
expect(fact.source).toBe('chat');
|
additionalInstruction: "Prefer technology and time topics.",
|
||||||
|
});
|
||||||
|
expect(fact.summary).toBe("The speaker has Bun and TypeScript experience.");
|
||||||
|
expect(fact.source).toBe("chat");
|
||||||
expect(fact.confidence).toBe(0.91);
|
expect(fact.confidence).toBe(0.91);
|
||||||
expect(fact.metadata).toEqual({ channel: 'telegram' });
|
expect(fact.metadata).toEqual({ channel: "telegram" });
|
||||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
|
expect(fact.topics.map((topic) => topic.name)).toEqual([
|
||||||
});
|
"I",
|
||||||
|
"Bun",
|
||||||
it('parses JSON responses wrapped in markdown code fences', async () => {
|
"TypeScript",
|
||||||
const extractor = new LlmFactExtractor({
|
"2025",
|
||||||
model: {
|
]);
|
||||||
async generateText() {
|
|
||||||
return [
|
|
||||||
'Here is the extracted fact:',
|
|
||||||
'```json',
|
|
||||||
JSON.stringify({
|
|
||||||
statement: 'Bun powers TypeScript tooling.',
|
|
||||||
topics: [
|
|
||||||
{ name: 'Bun', category: 'entity', granularity: 'concrete' },
|
|
||||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
|
|
||||||
],
|
|
||||||
}),
|
|
||||||
'```',
|
|
||||||
].join('\n');
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const fact = await db.ingestStatement('Bun powers TypeScript tooling.', {
|
|
||||||
extractor,
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['Bun', 'TypeScript']);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('rejects invalid LLM responses before writing facts', async () => {
|
|
||||||
const extractor = new LlmFactExtractor({
|
|
||||||
model: {
|
|
||||||
async generateText() {
|
|
||||||
return 'not json at all';
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
await expect(
|
|
||||||
db.ingestStatement('Bun powers TypeScript tooling.', {
|
|
||||||
extractor,
|
|
||||||
}),
|
|
||||||
).rejects.toThrow('LLM extractor returned invalid JSON.');
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user