refactor: make generateText model return ExtractedFact
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
import { IdentityDBError } from "../core/errors";
|
||||
import type { TopicCategory, TopicGranularity } from "../types/domain";
|
||||
import type {
|
||||
ExtractedFact,
|
||||
FactExtractor,
|
||||
@@ -18,8 +16,7 @@ export class LlmFactExtractor implements FactExtractor {
|
||||
|
||||
async extract(input: string): Promise<ExtractedFact> {
|
||||
const prompt = this.buildPrompt(input);
|
||||
const response = await this.options.model.generateText(prompt);
|
||||
return parseLlmExtractedFactResponse(response);
|
||||
return this.options.model.generateText(prompt);
|
||||
}
|
||||
|
||||
private buildPrompt(input: string): string {
|
||||
@@ -40,263 +37,3 @@ export class LlmFactExtractor implements FactExtractor {
|
||||
.join("\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
export function parseLlmExtractedFactResponse(response: string): ExtractedFact {
|
||||
const payload = parseJsonCandidate(response);
|
||||
|
||||
if (!isRecord(payload)) {
|
||||
throw new IdentityDBError("LLM extractor response must be a JSON object.");
|
||||
}
|
||||
|
||||
const topics = parseTopics(payload.topics);
|
||||
const extracted: ExtractedFact = { topics };
|
||||
|
||||
const statement = optionalString(payload.statement);
|
||||
if (statement !== undefined) {
|
||||
extracted.statement = statement;
|
||||
}
|
||||
|
||||
const summary = optionalNullableString(payload.summary);
|
||||
if (summary !== undefined) {
|
||||
extracted.summary = summary;
|
||||
}
|
||||
|
||||
const source = optionalNullableString(payload.source);
|
||||
if (source !== undefined) {
|
||||
extracted.source = source;
|
||||
}
|
||||
|
||||
const confidence = optionalNullableNumber(payload.confidence);
|
||||
if (confidence !== undefined) {
|
||||
extracted.confidence = confidence;
|
||||
}
|
||||
|
||||
const metadata = optionalMetadata(payload.metadata);
|
||||
if (metadata !== undefined) {
|
||||
extracted.metadata = metadata;
|
||||
}
|
||||
|
||||
return extracted;
|
||||
}
|
||||
|
||||
function parseJsonCandidate(response: string): unknown {
|
||||
const trimmed = response.trim();
|
||||
|
||||
for (const candidate of collectJsonCandidates(trimmed)) {
|
||||
try {
|
||||
return JSON.parse(candidate);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw new IdentityDBError("LLM extractor returned invalid JSON.");
|
||||
}
|
||||
|
||||
function collectJsonCandidates(response: string): string[] {
|
||||
const candidates = new Set<string>();
|
||||
candidates.add(response);
|
||||
|
||||
const fencePattern = /```(?:json)?\s*([\s\S]*?)```/gi;
|
||||
let match: RegExpExecArray | null = fencePattern.exec(response);
|
||||
|
||||
while (match) {
|
||||
const candidate = match[1]?.trim();
|
||||
if (candidate) {
|
||||
candidates.add(candidate);
|
||||
}
|
||||
|
||||
match = fencePattern.exec(response);
|
||||
}
|
||||
|
||||
const firstBrace = response.indexOf("{");
|
||||
const lastBrace = response.lastIndexOf("}");
|
||||
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
||||
candidates.add(response.slice(firstBrace, lastBrace + 1));
|
||||
}
|
||||
|
||||
return Array.from(candidates);
|
||||
}
|
||||
|
||||
function parseTopics(value: unknown): ExtractedFact["topics"] {
|
||||
if (!Array.isArray(value)) {
|
||||
throw new IdentityDBError(
|
||||
"LLM extractor response must include a topics array.",
|
||||
);
|
||||
}
|
||||
|
||||
return value.map((entry) => parseTopic(entry));
|
||||
}
|
||||
|
||||
function parseTopic(value: unknown): ExtractedFact["topics"][number] {
|
||||
if (!isRecord(value)) {
|
||||
throw new IdentityDBError("LLM extractor topics must be JSON objects.");
|
||||
}
|
||||
|
||||
const name = optionalString(value.name)?.trim();
|
||||
if (!name) {
|
||||
throw new IdentityDBError(
|
||||
"LLM extractor topics must include a non-empty name.",
|
||||
);
|
||||
}
|
||||
|
||||
const topic: ExtractedFact["topics"][number] = { name };
|
||||
|
||||
const category = optionalTopicCategory(value.category);
|
||||
if (category !== undefined) {
|
||||
topic.category = category;
|
||||
}
|
||||
|
||||
const granularity = optionalTopicGranularity(value.granularity);
|
||||
if (granularity !== undefined) {
|
||||
topic.granularity = granularity;
|
||||
}
|
||||
|
||||
const role = optionalNullableString(value.role);
|
||||
if (role !== undefined) {
|
||||
topic.role = role;
|
||||
}
|
||||
|
||||
const description = optionalNullableString(value.description);
|
||||
if (description !== undefined) {
|
||||
topic.description = description;
|
||||
}
|
||||
|
||||
const metadata = optionalMetadata(value.metadata);
|
||||
if (metadata !== undefined) {
|
||||
topic.metadata = metadata;
|
||||
}
|
||||
|
||||
return topic;
|
||||
}
|
||||
|
||||
function optionalString(value: unknown): string | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (typeof value !== "string") {
|
||||
throw new IdentityDBError("LLM extractor expected a string field.");
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function optionalNullableString(value: unknown): string | null | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof value !== "string") {
|
||||
throw new IdentityDBError(
|
||||
"LLM extractor expected a nullable string field.",
|
||||
);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function optionalNullableNumber(value: unknown): number | null | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof value !== "number" || Number.isNaN(value)) {
|
||||
throw new IdentityDBError(
|
||||
"LLM extractor expected confidence to be a number or null.",
|
||||
);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function optionalMetadata(
|
||||
value: unknown,
|
||||
): ExtractedFact["metadata"] | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isJsonLike(value)) {
|
||||
throw new IdentityDBError(
|
||||
"LLM extractor metadata must be valid JSON-compatible data.",
|
||||
);
|
||||
}
|
||||
|
||||
return value as ExtractedFact["metadata"];
|
||||
}
|
||||
|
||||
function optionalTopicCategory(value: unknown): TopicCategory | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (
|
||||
value === "entity" ||
|
||||
value === "concept" ||
|
||||
value === "temporal" ||
|
||||
value === "custom"
|
||||
) {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new IdentityDBError(
|
||||
"LLM extractor returned an unsupported topic category.",
|
||||
);
|
||||
}
|
||||
|
||||
function optionalTopicGranularity(
|
||||
value: unknown,
|
||||
): TopicGranularity | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === "abstract" || value === "concrete" || value === "mixed") {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new IdentityDBError(
|
||||
"LLM extractor returned an unsupported topic granularity.",
|
||||
);
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function isJsonLike(value: unknown): boolean {
|
||||
if (value === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
typeof value === "string" ||
|
||||
typeof value === "number" ||
|
||||
typeof value === "boolean"
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
return value.every((entry) => isJsonLike(entry));
|
||||
}
|
||||
|
||||
if (isRecord(value)) {
|
||||
return Object.values(value).every((entry) => isJsonLike(entry));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ export interface FactExtractor {
|
||||
}
|
||||
|
||||
export interface LlmTextGenerationModel {
|
||||
generateText(prompt: string): Promise<string>;
|
||||
generateText(prompt: string): Promise<ExtractedFact>;
|
||||
}
|
||||
|
||||
export interface LlmFactExtractorOptions {
|
||||
|
||||
@@ -53,7 +53,7 @@ describe('IdentityDB ingestion', () => {
|
||||
expect(topic?.facts).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('ships an LLM extractor adapter that turns structured JSON responses into facts', async () => {
|
||||
it('ships an LLM extractor adapter that returns structured facts from the model', async () => {
|
||||
let prompt = '';
|
||||
|
||||
const extractor = new LlmFactExtractor({
|
||||
@@ -61,7 +61,7 @@ describe('IdentityDB ingestion', () => {
|
||||
async generateText(input) {
|
||||
prompt = input;
|
||||
|
||||
return JSON.stringify({
|
||||
return {
|
||||
statement: 'I have worked with Bun and TypeScript since 2025.',
|
||||
summary: 'The speaker has Bun and TypeScript experience.',
|
||||
source: 'chat',
|
||||
@@ -73,7 +73,7 @@ describe('IdentityDB ingestion', () => {
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
||||
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
||||
],
|
||||
});
|
||||
};
|
||||
},
|
||||
},
|
||||
instructions: 'Prefer technology and time topics.',
|
||||
@@ -91,47 +91,4 @@ describe('IdentityDB ingestion', () => {
|
||||
expect(fact.metadata).toEqual({ channel: 'telegram' });
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
|
||||
});
|
||||
|
||||
it('parses JSON responses wrapped in markdown code fences', async () => {
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
async generateText() {
|
||||
return [
|
||||
'Here is the extracted fact:',
|
||||
'```json',
|
||||
JSON.stringify({
|
||||
statement: 'Bun powers TypeScript tooling.',
|
||||
topics: [
|
||||
{ name: 'Bun', category: 'entity', granularity: 'concrete' },
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
|
||||
],
|
||||
}),
|
||||
'```',
|
||||
].join('\n');
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const fact = await db.ingestStatement('Bun powers TypeScript tooling.', {
|
||||
extractor,
|
||||
});
|
||||
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['Bun', 'TypeScript']);
|
||||
});
|
||||
|
||||
it('rejects invalid LLM responses before writing facts', async () => {
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
async generateText() {
|
||||
return 'not json at all';
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
db.ingestStatement('Bun powers TypeScript tooling.', {
|
||||
extractor,
|
||||
}),
|
||||
).rejects.toThrow('LLM extractor returned invalid JSON.');
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user