refactor: make generateText model return ExtractedFact

This commit is contained in:
2026-05-19 22:06:54 +09:00
parent 185edfdae8
commit 0480ea182f
3 changed files with 5 additions and 311 deletions

View File

@@ -1,5 +1,3 @@
import { IdentityDBError } from "../core/errors";
import type { TopicCategory, TopicGranularity } from "../types/domain";
import type {
ExtractedFact,
FactExtractor,
@@ -18,8 +16,7 @@ export class LlmFactExtractor implements FactExtractor {
async extract(input: string): Promise<ExtractedFact> {
const prompt = this.buildPrompt(input);
const response = await this.options.model.generateText(prompt);
return parseLlmExtractedFactResponse(response);
return this.options.model.generateText(prompt);
}
private buildPrompt(input: string): string {
@@ -40,263 +37,3 @@ export class LlmFactExtractor implements FactExtractor {
.join("\n\n");
}
}
export function parseLlmExtractedFactResponse(response: string): ExtractedFact {
const payload = parseJsonCandidate(response);
if (!isRecord(payload)) {
throw new IdentityDBError("LLM extractor response must be a JSON object.");
}
const topics = parseTopics(payload.topics);
const extracted: ExtractedFact = { topics };
const statement = optionalString(payload.statement);
if (statement !== undefined) {
extracted.statement = statement;
}
const summary = optionalNullableString(payload.summary);
if (summary !== undefined) {
extracted.summary = summary;
}
const source = optionalNullableString(payload.source);
if (source !== undefined) {
extracted.source = source;
}
const confidence = optionalNullableNumber(payload.confidence);
if (confidence !== undefined) {
extracted.confidence = confidence;
}
const metadata = optionalMetadata(payload.metadata);
if (metadata !== undefined) {
extracted.metadata = metadata;
}
return extracted;
}
function parseJsonCandidate(response: string): unknown {
const trimmed = response.trim();
for (const candidate of collectJsonCandidates(trimmed)) {
try {
return JSON.parse(candidate);
} catch {
continue;
}
}
throw new IdentityDBError("LLM extractor returned invalid JSON.");
}
function collectJsonCandidates(response: string): string[] {
const candidates = new Set<string>();
candidates.add(response);
const fencePattern = /```(?:json)?\s*([\s\S]*?)```/gi;
let match: RegExpExecArray | null = fencePattern.exec(response);
while (match) {
const candidate = match[1]?.trim();
if (candidate) {
candidates.add(candidate);
}
match = fencePattern.exec(response);
}
const firstBrace = response.indexOf("{");
const lastBrace = response.lastIndexOf("}");
if (firstBrace >= 0 && lastBrace > firstBrace) {
candidates.add(response.slice(firstBrace, lastBrace + 1));
}
return Array.from(candidates);
}
function parseTopics(value: unknown): ExtractedFact["topics"] {
if (!Array.isArray(value)) {
throw new IdentityDBError(
"LLM extractor response must include a topics array.",
);
}
return value.map((entry) => parseTopic(entry));
}
function parseTopic(value: unknown): ExtractedFact["topics"][number] {
if (!isRecord(value)) {
throw new IdentityDBError("LLM extractor topics must be JSON objects.");
}
const name = optionalString(value.name)?.trim();
if (!name) {
throw new IdentityDBError(
"LLM extractor topics must include a non-empty name.",
);
}
const topic: ExtractedFact["topics"][number] = { name };
const category = optionalTopicCategory(value.category);
if (category !== undefined) {
topic.category = category;
}
const granularity = optionalTopicGranularity(value.granularity);
if (granularity !== undefined) {
topic.granularity = granularity;
}
const role = optionalNullableString(value.role);
if (role !== undefined) {
topic.role = role;
}
const description = optionalNullableString(value.description);
if (description !== undefined) {
topic.description = description;
}
const metadata = optionalMetadata(value.metadata);
if (metadata !== undefined) {
topic.metadata = metadata;
}
return topic;
}
function optionalString(value: unknown): string | undefined {
if (value === undefined) {
return undefined;
}
if (typeof value !== "string") {
throw new IdentityDBError("LLM extractor expected a string field.");
}
return value;
}
function optionalNullableString(value: unknown): string | null | undefined {
if (value === undefined) {
return undefined;
}
if (value === null) {
return null;
}
if (typeof value !== "string") {
throw new IdentityDBError(
"LLM extractor expected a nullable string field.",
);
}
return value;
}
function optionalNullableNumber(value: unknown): number | null | undefined {
if (value === undefined) {
return undefined;
}
if (value === null) {
return null;
}
if (typeof value !== "number" || Number.isNaN(value)) {
throw new IdentityDBError(
"LLM extractor expected confidence to be a number or null.",
);
}
return value;
}
function optionalMetadata(
value: unknown,
): ExtractedFact["metadata"] | undefined {
if (value === undefined) {
return undefined;
}
if (value === null) {
return null;
}
if (!isJsonLike(value)) {
throw new IdentityDBError(
"LLM extractor metadata must be valid JSON-compatible data.",
);
}
return value as ExtractedFact["metadata"];
}
function optionalTopicCategory(value: unknown): TopicCategory | undefined {
if (value === undefined) {
return undefined;
}
if (
value === "entity" ||
value === "concept" ||
value === "temporal" ||
value === "custom"
) {
return value;
}
throw new IdentityDBError(
"LLM extractor returned an unsupported topic category.",
);
}
function optionalTopicGranularity(
value: unknown,
): TopicGranularity | undefined {
if (value === undefined) {
return undefined;
}
if (value === "abstract" || value === "concrete" || value === "mixed") {
return value;
}
throw new IdentityDBError(
"LLM extractor returned an unsupported topic granularity.",
);
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function isJsonLike(value: unknown): boolean {
if (value === null) {
return true;
}
if (
typeof value === "string" ||
typeof value === "number" ||
typeof value === "boolean"
) {
return true;
}
if (Array.isArray(value)) {
return value.every((entry) => isJsonLike(entry));
}
if (isRecord(value)) {
return Object.values(value).every((entry) => isJsonLike(entry));
}
return false;
}