feat: make FactExtractor extracts multiple facts per input

2026-05-20 22:59:35 +09:00
parent 188f03e8e8
commit 7602c92046
7 changed files with 157 additions and 127 deletions
--- a/src/ingestion/llm-extractor.ts
+++ b/src/ingestion/llm-extractor.ts
@@ -5,16 +5,18 @@ import type {
 } from "./types";

 const DEFAULT_INSTRUCTIONS = [
-  "Extract one structured fact from the user input.",
-  "Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.",
-  'Use this shape: {"statement": string?, "summary": string|null, "source": string|null, "confidence": number|null, "metadata": object|null, "topics": Array<{"name": string, "category": "entity"|"concept"|"temporal"|"custom"?, "granularity": "abstract"|"concrete"|"mixed"?, "role": string|null, "description": string|null, "metadata": object|null}>}.',
-  'Only include topics that are explicitly in the input as-is. For example, topic "I started TypeScript since 2015" can be "I", "TypeScript", "2015".',
+  "Extract structured facts from the user input.",
+  "Return a JSON array of fact objects. Do not include markdown, explanations, or prose outside the JSON array.",
+  'Each fact object must have a "statement", "summary", "source", "confidence", and "topics" array.',
+  'Each topic in "topics" must have a "name", and may include "category", "granularity", and "role".',
+  "Only include topics that are explicitly in the input.",
+  "If the input contains multiple distinct facts, return them as separate objects in the array.",
 ].join("\n");

 export class LlmFactExtractor implements FactExtractor {
  constructor(private readonly options: LlmFactExtractorOptions) {}

-  async extract(input: string): Promise<ExtractedFact> {
+  async extract(input: string): Promise<ExtractedFact[]> {
    return this.options.model.generateText({
      instruction: DEFAULT_INSTRUCTIONS,
      input,