|
|
|
|
@@ -1,17 +1,17 @@
|
|
|
|
|
import { IdentityDBError } from '../core/errors';
|
|
|
|
|
import type { TopicCategory, TopicGranularity } from '../types/domain';
|
|
|
|
|
import { IdentityDBError } from "../core/errors";
|
|
|
|
|
import type { TopicCategory, TopicGranularity } from "../types/domain";
|
|
|
|
|
import type {
|
|
|
|
|
ExtractedFact,
|
|
|
|
|
FactExtractor,
|
|
|
|
|
LlmFactExtractorOptions,
|
|
|
|
|
} from './types';
|
|
|
|
|
} from "./types";
|
|
|
|
|
|
|
|
|
|
const DEFAULT_INSTRUCTIONS = [
|
|
|
|
|
'Extract one structured fact from the user input.',
|
|
|
|
|
'Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.',
|
|
|
|
|
"Extract one structured fact from the user input.",
|
|
|
|
|
"Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.",
|
|
|
|
|
'Use this shape: {"statement": string?, "summary": string|null, "source": string|null, "confidence": number|null, "metadata": object|null, "topics": Array<{"name": string, "category": "entity"|"concept"|"temporal"|"custom"?, "granularity": "abstract"|"concrete"|"mixed"?, "role": string|null, "description": string|null, "metadata": object|null}>}.',
|
|
|
|
|
'Only include topics that are explicitly supported by the input.',
|
|
|
|
|
].join('\n');
|
|
|
|
|
'Only include topics that are explicitly in the input as-is. For example, topic "I started TypeScript since 2015" can be "I", "TypeScript", "2015".',
|
|
|
|
|
].join("\n");
|
|
|
|
|
|
|
|
|
|
export class LlmFactExtractor implements FactExtractor {
|
|
|
|
|
constructor(private readonly options: LlmFactExtractorOptions) {}
|
|
|
|
|
@@ -31,11 +31,13 @@ export class LlmFactExtractor implements FactExtractor {
|
|
|
|
|
|
|
|
|
|
return [
|
|
|
|
|
DEFAULT_INSTRUCTIONS,
|
|
|
|
|
instructions && instructions.length > 0 ? `Additional instructions:\n${instructions}` : null,
|
|
|
|
|
instructions && instructions.length > 0
|
|
|
|
|
? `Additional instructions:\n${instructions}`
|
|
|
|
|
: null,
|
|
|
|
|
`Input:\n${input.trim()}`,
|
|
|
|
|
]
|
|
|
|
|
.filter((value): value is string => value !== null)
|
|
|
|
|
.join('\n\n');
|
|
|
|
|
.join("\n\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -43,7 +45,7 @@ export function parseLlmExtractedFactResponse(response: string): ExtractedFact {
|
|
|
|
|
const payload = parseJsonCandidate(response);
|
|
|
|
|
|
|
|
|
|
if (!isRecord(payload)) {
|
|
|
|
|
throw new IdentityDBError('LLM extractor response must be a JSON object.');
|
|
|
|
|
throw new IdentityDBError("LLM extractor response must be a JSON object.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const topics = parseTopics(payload.topics);
|
|
|
|
|
@@ -88,7 +90,7 @@ function parseJsonCandidate(response: string): unknown {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new IdentityDBError('LLM extractor returned invalid JSON.');
|
|
|
|
|
throw new IdentityDBError("LLM extractor returned invalid JSON.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function collectJsonCandidates(response: string): string[] {
|
|
|
|
|
@@ -107,8 +109,8 @@ function collectJsonCandidates(response: string): string[] {
|
|
|
|
|
match = fencePattern.exec(response);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const firstBrace = response.indexOf('{');
|
|
|
|
|
const lastBrace = response.lastIndexOf('}');
|
|
|
|
|
const firstBrace = response.indexOf("{");
|
|
|
|
|
const lastBrace = response.lastIndexOf("}");
|
|
|
|
|
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
|
|
|
|
candidates.add(response.slice(firstBrace, lastBrace + 1));
|
|
|
|
|
}
|
|
|
|
|
@@ -116,25 +118,29 @@ function collectJsonCandidates(response: string): string[] {
|
|
|
|
|
return Array.from(candidates);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseTopics(value: unknown): ExtractedFact['topics'] {
|
|
|
|
|
function parseTopics(value: unknown): ExtractedFact["topics"] {
|
|
|
|
|
if (!Array.isArray(value)) {
|
|
|
|
|
throw new IdentityDBError('LLM extractor response must include a topics array.');
|
|
|
|
|
throw new IdentityDBError(
|
|
|
|
|
"LLM extractor response must include a topics array.",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return value.map((entry) => parseTopic(entry));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseTopic(value: unknown): ExtractedFact['topics'][number] {
|
|
|
|
|
function parseTopic(value: unknown): ExtractedFact["topics"][number] {
|
|
|
|
|
if (!isRecord(value)) {
|
|
|
|
|
throw new IdentityDBError('LLM extractor topics must be JSON objects.');
|
|
|
|
|
throw new IdentityDBError("LLM extractor topics must be JSON objects.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const name = optionalString(value.name)?.trim();
|
|
|
|
|
if (!name) {
|
|
|
|
|
throw new IdentityDBError('LLM extractor topics must include a non-empty name.');
|
|
|
|
|
throw new IdentityDBError(
|
|
|
|
|
"LLM extractor topics must include a non-empty name.",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const topic: ExtractedFact['topics'][number] = { name };
|
|
|
|
|
const topic: ExtractedFact["topics"][number] = { name };
|
|
|
|
|
|
|
|
|
|
const category = optionalTopicCategory(value.category);
|
|
|
|
|
if (category !== undefined) {
|
|
|
|
|
@@ -169,8 +175,8 @@ function optionalString(value: unknown): string | undefined {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (typeof value !== 'string') {
|
|
|
|
|
throw new IdentityDBError('LLM extractor expected a string field.');
|
|
|
|
|
if (typeof value !== "string") {
|
|
|
|
|
throw new IdentityDBError("LLM extractor expected a string field.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return value;
|
|
|
|
|
@@ -185,8 +191,10 @@ function optionalNullableString(value: unknown): string | null | undefined {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (typeof value !== 'string') {
|
|
|
|
|
throw new IdentityDBError('LLM extractor expected a nullable string field.');
|
|
|
|
|
if (typeof value !== "string") {
|
|
|
|
|
throw new IdentityDBError(
|
|
|
|
|
"LLM extractor expected a nullable string field.",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return value;
|
|
|
|
|
@@ -201,14 +209,18 @@ function optionalNullableNumber(value: unknown): number | null | undefined {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (typeof value !== 'number' || Number.isNaN(value)) {
|
|
|
|
|
throw new IdentityDBError('LLM extractor expected confidence to be a number or null.');
|
|
|
|
|
if (typeof value !== "number" || Number.isNaN(value)) {
|
|
|
|
|
throw new IdentityDBError(
|
|
|
|
|
"LLM extractor expected confidence to be a number or null.",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function optionalMetadata(value: unknown): ExtractedFact['metadata'] | undefined {
|
|
|
|
|
function optionalMetadata(
|
|
|
|
|
value: unknown,
|
|
|
|
|
): ExtractedFact["metadata"] | undefined {
|
|
|
|
|
if (value === undefined) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
@@ -218,10 +230,12 @@ function optionalMetadata(value: unknown): ExtractedFact['metadata'] | undefined
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!isJsonLike(value)) {
|
|
|
|
|
throw new IdentityDBError('LLM extractor metadata must be valid JSON-compatible data.');
|
|
|
|
|
throw new IdentityDBError(
|
|
|
|
|
"LLM extractor metadata must be valid JSON-compatible data.",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return value as ExtractedFact['metadata'];
|
|
|
|
|
return value as ExtractedFact["metadata"];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function optionalTopicCategory(value: unknown): TopicCategory | undefined {
|
|
|
|
|
@@ -229,27 +243,38 @@ function optionalTopicCategory(value: unknown): TopicCategory | undefined {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (value === 'entity' || value === 'concept' || value === 'temporal' || value === 'custom') {
|
|
|
|
|
if (
|
|
|
|
|
value === "entity" ||
|
|
|
|
|
value === "concept" ||
|
|
|
|
|
value === "temporal" ||
|
|
|
|
|
value === "custom"
|
|
|
|
|
) {
|
|
|
|
|
return value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new IdentityDBError('LLM extractor returned an unsupported topic category.');
|
|
|
|
|
throw new IdentityDBError(
|
|
|
|
|
"LLM extractor returned an unsupported topic category.",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function optionalTopicGranularity(value: unknown): TopicGranularity | undefined {
|
|
|
|
|
function optionalTopicGranularity(
|
|
|
|
|
value: unknown,
|
|
|
|
|
): TopicGranularity | undefined {
|
|
|
|
|
if (value === undefined) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (value === 'abstract' || value === 'concrete' || value === 'mixed') {
|
|
|
|
|
if (value === "abstract" || value === "concrete" || value === "mixed") {
|
|
|
|
|
return value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new IdentityDBError('LLM extractor returned an unsupported topic granularity.');
|
|
|
|
|
throw new IdentityDBError(
|
|
|
|
|
"LLM extractor returned an unsupported topic granularity.",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
|
|
|
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
|
|
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isJsonLike(value: unknown): boolean {
|
|
|
|
|
@@ -257,7 +282,11 @@ function isJsonLike(value: unknown): boolean {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
|
|
|
|
if (
|
|
|
|
|
typeof value === "string" ||
|
|
|
|
|
typeof value === "number" ||
|
|
|
|
|
typeof value === "boolean"
|
|
|
|
|
) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|