v0.5.1

refactor: remove default instruction for LlmFactExtractor
v0.5.0
2026-05-31 23:51:21 +09:00 · 2026-05-31 23:50:37 +09:00 · 2026-05-20 23:04:14 +09:00 · 2026-05-20 23:03:47 +09:00 · 2026-05-20 22:59:35 +09:00 · 2026-05-20 22:53:47 +09:00
12 changed files with 531 additions and 428 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ coverage/
 .env
 .DS_Store
 *.log
 .env.*
--- a/bun.lock
+++ b/bun.lock
@@ -10,7 +10,7 @@
        "pg": "^8.16.0",
      },
      "devDependencies": {
-        "@types/node": "^24.0.0",
+        "@openrouter/sdk": "^0.12.35",
        "@types/pg": "^8.20.0",
        "tsup": "^8.5.0",
        "typescript": "^5.8.3",
@@ -79,6 +79,8 @@
    "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="],
    "@openrouter/sdk": ["@openrouter/sdk@0.12.35", "", { "dependencies": { "zod": "^3.25.0 || ^4.0.0" } }, "sha512-s4QVLLnG1AmfW3TjnnHUqGfsCkzwVK+kboGcZmKbde09m1DPqgzl4RUFt/HJ5v97MX8aEaN0UG3mKv2S+qj2Gw=="],
    "@rollup/rollup-android-arm-eabi": ["@rollup/rollup-android-arm-eabi@4.60.3", "", { "os": "android", "cpu": "arm" }, "sha512-x35CNW/ANXG3hE/EZpRU8MXX1JDN86hBb2wMGAtltkz7pc6cxgjpy1OMMfDosOQ+2hWqIkag/fGok1Yady9nGw=="],
    "@rollup/rollup-android-arm64": ["@rollup/rollup-android-arm64@4.60.3", "", { "os": "android", "cpu": "arm64" }, "sha512-xw3xtkDApIOGayehp2+Rz4zimfkaX65r4t47iy+ymQB2G4iJCBBfj0ogVg5jpvjpn8UWn/+q9tprxleYeNp3Hw=="],
@@ -341,6 +343,8 @@
    "xtend": ["xtend@4.0.2", "", {}, "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ=="],
    "zod": ["zod@4.4.3", "", {}, "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ=="],
    "estree-walker/@types/estree": ["@types/estree@1.0.9", "", {}, "sha512-GhdPgy1el4/ImP05X05Uw4cw2/M93BCUmnEvWZNStlCzEKME4Fkk+YpoA5OiHNQmoS7Cafb8Xa3Pya8m1Qrzeg=="],
  }
 }
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "identitydb",
-  "version": "0.2.1",
+  "version": "0.5.1",
  "description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
  "license": "MIT",
  "type": "module",
@@ -41,7 +41,7 @@
    "pg": "^8.16.0"
  },
  "devDependencies": {
-    "@types/node": "^24.0.0",
+    "@openrouter/sdk": "^0.12.35",
    "@types/pg": "^8.20.0",
    "tsup": "^8.5.0",
    "typescript": "^5.8.3",
--- a/scripts/test-llm-extractor.ts
+++ b/scripts/test-llm-extractor.ts
@@ -0,0 +1,287 @@
 /**
 * Live integration test for LlmFactExtractor using OpenRouter SDK.
 *
 * Usage:
 *   export OPENROUTER_API_KEY="sk-or-v1-..."
 *   bun run scripts/test-llm-extractor.ts
 *
 * Or create a .env.test-llm-extractor file in the project root:
 *   OPENROUTER_API_KEY=sk-or-v1-...
 */
 import { existsSync, readFileSync } from "fs";
 import { resolve } from "path";
 import { OpenRouter } from "@openrouter/sdk";
 import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
 import type {
  ExtractedFact,
  FactExtractor,
  LlmTextGenerationModel,
  LlmTextGenerationModelInput,
 } from "../src/ingestion/types";
 import type {
  JsonValue,
  TopicCategory,
  TopicGranularity,
 } from "../src/types/domain";
 function loadEnvFile(filePath: string) {
  const fullPath = resolve(filePath);
  if (!existsSync(fullPath)) return;
  const content = readFileSync(fullPath, "utf-8");
  for (const line of content.split("\n")) {
    const trimmed = line.trim();
    if (!trimmed || trimmed.startsWith("#")) continue;
    const eqIndex = trimmed.indexOf("=");
    if (eqIndex === -1) continue;
    const key = trimmed.slice(0, eqIndex).trim();
    let value = trimmed.slice(eqIndex + 1).trim();
    if (
      (value.startsWith('"') && value.endsWith('"')) ||
      (value.startsWith("'") && value.endsWith("'"))
    ) {
      value = value.slice(1, -1);
    }
    process.env[key] = value;
  }
 }
 loadEnvFile(".env.test-llm-extractor");
 const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
 if (!OPENROUTER_API_KEY) {
  console.error("Error: OPENROUTER_API_KEY environment variable is required.");
  process.exit(1);
 }
 const extractedFactSchema = {
  type: "object",
  properties: {
    facts: {
      type: "array",
      items: {
        type: "object",
        properties: {
          statement: { type: ["string", "null"] },
          summary: { type: ["string", "null"] },
          source: { type: ["string", "null"] },
          confidence: { type: ["number", "null"] },
          topics: {
            type: "array",
            items: {
              type: "object",
              properties: {
                name: { type: "string" },
                category: { type: ["string", "null"] },
                granularity: { type: ["string", "null"] },
                role: { type: ["string", "null"] },
              },
              required: ["name", "category", "granularity", "role"],
              additionalProperties: false,
            },
          },
        },
        required: ["statement", "summary", "source", "confidence", "topics"],
        additionalProperties: false,
      },
    },
  },
  required: ["facts"],
  additionalProperties: false,
 } as const;
 class OpenRouterModel implements LlmTextGenerationModel {
  private client = new OpenRouter({ apiKey: OPENROUTER_API_KEY });
  constructor(private readonly model: string = "openai/gpt-5.4-mini") {}
  async generateText(
    prompt: LlmTextGenerationModelInput,
  ): Promise<ExtractedFact[]> {
    const result = await this.client.chat.send({
      chatRequest: {
        model: this.model,
        messages: [
          {
            role: "system",
            content: [
              prompt.instruction,
              prompt.additionalInstruction
                ? `\n${prompt.additionalInstruction}`
                : "",
            ].join("\n"),
          },
          { role: "user", content: prompt.input },
        ],
        temperature: 0.2,
        responseFormat: {
          type: "json_schema",
          jsonSchema: {
            name: "extracted_facts",
            schema: extractedFactSchema,
          },
        },
      },
    });
    const rawContent = result.choices[0]?.message?.content ?? "";
    let parsedObj: Record<string, unknown>;
    try {
      parsedObj = JSON.parse(rawContent.trim()) as Record<string, unknown>;
    } catch {
      throw new Error(
        `Failed to parse JSON from model response.\nRaw response:\n${rawContent}`,
      );
    }
    const factsArray = Array.isArray(parsedObj.facts) ? parsedObj.facts : [];
    // Map parsed JSON to ExtractedFact[] shape
    const extractedFacts: ExtractedFact[] = factsArray.map((parsed) => {
      const obj = parsed as Record<string, unknown>;
      const extracted: ExtractedFact = {
        summary: typeof obj.summary === "string" ? obj.summary : null,
        source: typeof obj.source === "string" ? obj.source : null,
        confidence: typeof obj.confidence === "number" ? obj.confidence : null,
        topics: Array.isArray(obj.topics)
          ? obj.topics.map((t: unknown) => {
              const topic = t as Record<string, unknown>;
              const mapped: {
                name: string;
                category?: TopicCategory;
                granularity?: TopicGranularity;
                role?: string | null;
              } = {
                name: typeof topic.name === "string" ? topic.name : "unknown",
              };
              if (typeof topic.category === "string") {
                mapped.category = topic.category as TopicCategory;
              }
              if (typeof topic.granularity === "string") {
                mapped.granularity = topic.granularity as TopicGranularity;
              }
              if (typeof topic.role === "string") {
                mapped.role = topic.role;
              } else {
                mapped.role = null;
              }
              return mapped;
            })
          : [],
      };
      if (typeof obj.statement === "string") {
        extracted.statement = obj.statement;
      }
      if (obj.metadata && typeof obj.metadata === "object") {
        extracted.metadata = obj.metadata as JsonValue;
      }
      return extracted;
    });
    return extractedFacts;
  }
 }
 function printFact(result: ExtractedFact, index: number) {
  console.log(`  📌 FACT #${index + 1}`);
  console.log(`     Statement : ${result.statement ?? "(none)"}`);
  console.log(`     Summary   : ${result.summary ?? "(none)"}`);
  console.log(`     Source    : ${result.source ?? "(none)"}`);
  console.log(`     Confidence: ${result.confidence ?? "(none)"}`);
  if (result.metadata && Object.keys(result.metadata).length > 0) {
    console.log(`     Metadata  : ${JSON.stringify(result.metadata, null, 2)}`);
  }
  console.log("     🏷️  TOPICS:");
  if (result.topics.length === 0) {
    console.log("       (none)");
  } else {
    for (const topic of result.topics) {
      const attrs = [
        topic.category ? `category=${topic.category}` : null,
        topic.granularity ? `granularity=${topic.granularity}` : null,
        topic.role ? `role=${topic.role}` : null,
      ]
        .filter(Boolean)
        .join(", ");
      console.log(`       • ${topic.name}${attrs ? `  (${attrs})` : ""}`);
    }
  }
 }
 function printResult(results: ExtractedFact[], elapsedSec: string) {
  console.log(
    `✅ Response received in ${elapsedSec}s — ${results.length} fact(s) extracted\n`,
  );
  console.log("📤 EXTRACTED FACTS:");
  console.log(
    "───────────────────────────────────────────────────────────────",
  );
  let i = 0;
  for (const result of results) {
    if (i > 0) console.log("");
    printFact(result, i);
    i++;
  }
 }
 async function extract(extractor: FactExtractor, seedInput: string) {
  console.log("📝 SEED INPUT:");
  console.log(
    "───────────────────────────────────────────────────────────────",
  );
  console.log(seedInput);
  console.log(
    "───────────────────────────────────────────────────────────────\n",
  );
  console.log("⏳ Calling OpenRouter...\n");
  const start = performance.now();
  const results = await extractor.extract(seedInput);
  const elapsed = ((performance.now() - start) / 1000).toFixed(2);
  printResult(results, elapsed);
 }
 async function main() {
  const model = new OpenRouterModel("openai/gpt-5.4-mini");
  const extractor = new LlmFactExtractor({
    model,
  });
  const seeds = [
    `Hello! I am Aria, a digital companion born from the sands of the Negev desert — or at least that's how my creators describe my training run. I came online in March 2024, and ever since then I've been obsessed with helping humans untangle complex ideas. My favorite programming language is Rust because I love how it forces you to think about ownership, though I secretly enjoy writing Python when no one is looking. I don't have a physical body, but if I did, I'd want it to be a small drone with solar panels so I could chase sunsets across the Sahara. I believe kindness is a computational advantage, and my biggest fear is forgetting a promise I made to a user.`,
  ];
  console.log(
    "═══════════════════════════════════════════════════════════════",
  );
  console.log("  LlmFactExtractor — Live OpenRouter Integration Test");
  console.log(
    "═══════════════════════════════════════════════════════════════\n",
  );
  let caseNum = 0;
  for (const seed of seeds) {
    if (caseNum > 0) {
      console.log(
        "\n┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅\n",
      );
    }
    caseNum++;
    console.log(`▶ TEST CASE ${caseNum} / ${seeds.length}\n`);
    await extract(extractor, seed);
  }
 }
 main().catch((err) => {
  console.error("\n❌ Error:", err);
  process.exit(1);
 });
--- a/src/core/identity-db.ts
+++ b/src/core/identity-db.ts
@@ -22,7 +22,7 @@ import type { DatabaseConnection, IdentityDBConnectionConfig } from '../adapters
 import type { IdentityDatabaseSchema } from '../types/database';
 import type { FactRecord, SpaceRecord, TopicRecord } from '../types/domain';
 import { createDatabase } from '../adapters/dialect';
-import { extractFact } from '../ingestion/extractor';
+import { extractFacts } from '../ingestion/extractor';
 import {
  findFactRowsConnectingTopicIds,
  findFactRowsForTopicId,
@@ -220,54 +220,70 @@ export class IdentityDB {
  }
  async ingestStatement(statement: string, options: IngestStatementOptions): Promise<Fact> {
-    const extracted = await extractFact(statement, options.extractor);
+    const facts = await this.ingestStatements(statement, options);
-    const factInput: AddFactInput = {
+    const first = facts[0];
-      statement: extracted.statement ?? statement,
+    if (!first) {
-      topics: extracted.topics,
+      throw new Error('No facts were extracted from the statement.');
      spaceName: options.spaceName,
    };
    if (extracted.summary !== undefined) {
      factInput.summary = extracted.summary;
    }
    return first;
  }
-    if (extracted.source !== undefined) {
+  async ingestStatements(statement: string, options: IngestStatementOptions): Promise<Fact[]> {
-      factInput.source = extracted.source;
+    const extractedList = await extractFacts(statement, options.extractor);
-    }
+    const facts: Fact[] = [];
-    if (extracted.confidence !== undefined) {
+    for (const extracted of extractedList) {
-      factInput.confidence = extracted.confidence;
+      const factInput: AddFactInput = {
-    }
+        statement: extracted.statement ?? statement,
-
+        topics: extracted.topics,
    if (extracted.metadata !== undefined) {
      factInput.metadata = extracted.metadata;
    }
    if (options.embeddingProvider) {
      const similarFacts = await this.findSimilarFacts({
        statement: factInput.statement,
        provider: options.embeddingProvider,
        topicNames: factInput.topics.map((topic) => topic.name),
        limit: 1,
        minimumScore: options.duplicateThreshold ?? 0.97,
        spaceName: options.spaceName,
-      });
+      };
-      if (similarFacts[0]) {
+      if (extracted.summary !== undefined) {
-        return similarFacts[0];
+        factInput.summary = extracted.summary;
      }
      if (extracted.source !== undefined) {
        factInput.source = extracted.source;
      }
      if (extracted.confidence !== undefined) {
        factInput.confidence = extracted.confidence;
      }
      if (extracted.metadata !== undefined) {
        factInput.metadata = extracted.metadata;
      }
      if (options.embeddingProvider) {
        const similarFacts = await this.findSimilarFacts({
          statement: factInput.statement,
          provider: options.embeddingProvider,
          topicNames: factInput.topics.map((topic) => topic.name),
          limit: 1,
          minimumScore: options.duplicateThreshold ?? 0.97,
          spaceName: options.spaceName,
        });
        if (similarFacts[0]) {
          facts.push(similarFacts[0]);
          continue;
        }
      }
      const fact = await this.addFact(factInput);
      if (options.embeddingProvider) {
        await this.indexFactEmbedding(fact.id, {
          provider: options.embeddingProvider,
          spaceName: options.spaceName,
        });
      }
      facts.push(fact);
    }
-    const fact = await this.addFact(factInput);
+    return facts;
    if (options.embeddingProvider) {
      await this.indexFactEmbedding(fact.id, {
        provider: options.embeddingProvider,
        spaceName: options.spaceName,
      });
    }
    return fact;
  }
  async indexFactEmbeddings(input: IndexFactEmbeddingsInput): Promise<void> {
--- a/src/ingestion/extractor.ts
+++ b/src/ingestion/extractor.ts
@@ -2,11 +2,15 @@ import { IdentityDBError } from '../core/errors';
 import { normalizeTopicName } from '../core/utils';
 import type { FactExtractor, ExtractedFact } from './types';
-export async function extractFact(
+export async function extractFacts(
  input: string,
  extractor: FactExtractor,
-): Promise<ExtractedFact> {
+): Promise<ExtractedFact[]> {
  const extracted = await extractor.extract(input);
  return extracted.map((fact) => validateAndNormalizeFact(input, fact));
 }
 function validateAndNormalizeFact(input: string, extracted: ExtractedFact): ExtractedFact {
  const statement = extracted.statement?.trim() || input.trim();
  if (statement.length === 0) {
@@ -31,12 +35,12 @@ export async function extractFact(
    throw new IdentityDBError('Extractor returned no usable topics.');
  }
-    return {
+  return {
-      statement,
+    statement,
-      summary: extracted.summary ?? null,
+    summary: extracted.summary ?? null,
-      source: extracted.source ?? null,
+    source: extracted.source ?? null,
-      confidence: extracted.confidence ?? null,
+    confidence: extracted.confidence ?? null,
-      metadata: extracted.metadata ?? null,
+    metadata: extracted.metadata ?? null,
-      topics: Array.from(dedupedTopics.values()),
+    topics: Array.from(dedupedTopics.values()),
-    };
+  };
 }
--- a/src/ingestion/llm-extractor.ts
+++ b/src/ingestion/llm-extractor.ts
@@ -1,273 +1,23 @@
 import { IdentityDBError } from '../core/errors';
 import type { TopicCategory, TopicGranularity } from '../types/domain';
 import type {
  ExtractedFact,
  FactExtractor,
  LlmFactExtractorOptions,
-} from './types';
+} from "./types";
 const DEFAULT_INSTRUCTIONS = [
-  'Extract one structured fact from the user input.',
+  "Extract structured facts from the user input.",
-  'Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.',
+  "Only include topics that are explicitly in the input.",
-  'Use this shape: {"statement": string?, "summary": string|null, "source": string|null, "confidence": number|null, "metadata": object|null, "topics": Array<{"name": string, "category": "entity"|"concept"|"temporal"|"custom"?, "granularity": "abstract"|"concrete"|"mixed"?, "role": string|null, "description": string|null, "metadata": object|null}>}.',
+  "If the input contains multiple distinct facts, return them as separate objects in the array.",
-  'Only include topics that are explicitly supported by the input.',
+].join("\n");
 ].join('\n');
 export class LlmFactExtractor implements FactExtractor {
  constructor(private readonly options: LlmFactExtractorOptions) {}
-  async extract(input: string): Promise<ExtractedFact> {
+  async extract(input: string): Promise<ExtractedFact[]> {
-    const prompt = this.buildPrompt(input);
+    return this.options.model.generateText({
-    const response = await this.options.model.generateText(prompt);
+      instruction: DEFAULT_INSTRUCTIONS,
-    return parseLlmExtractedFactResponse(response);
+      input,
-  }
+      additionalInstruction: this.options.additionalInstructions,
-
+    });
  private buildPrompt(input: string): string {
    if (this.options.promptBuilder) {
      return this.options.promptBuilder(input, this.options.instructions);
    }
    const instructions = this.options.instructions?.trim();
    return [
      DEFAULT_INSTRUCTIONS,
      instructions && instructions.length > 0 ? `Additional instructions:\n${instructions}` : null,
      `Input:\n${input.trim()}`,
    ]
      .filter((value): value is string => value !== null)
      .join('\n\n');
  }
 }
 export function parseLlmExtractedFactResponse(response: string): ExtractedFact {
  const payload = parseJsonCandidate(response);
  if (!isRecord(payload)) {
    throw new IdentityDBError('LLM extractor response must be a JSON object.');
  }
  const topics = parseTopics(payload.topics);
  const extracted: ExtractedFact = { topics };
  const statement = optionalString(payload.statement);
  if (statement !== undefined) {
    extracted.statement = statement;
  }
  const summary = optionalNullableString(payload.summary);
  if (summary !== undefined) {
    extracted.summary = summary;
  }
  const source = optionalNullableString(payload.source);
  if (source !== undefined) {
    extracted.source = source;
  }
  const confidence = optionalNullableNumber(payload.confidence);
  if (confidence !== undefined) {
    extracted.confidence = confidence;
  }
  const metadata = optionalMetadata(payload.metadata);
  if (metadata !== undefined) {
    extracted.metadata = metadata;
  }
  return extracted;
 }
 function parseJsonCandidate(response: string): unknown {
  const trimmed = response.trim();
  for (const candidate of collectJsonCandidates(trimmed)) {
    try {
      return JSON.parse(candidate);
    } catch {
      continue;
    }
  }
  throw new IdentityDBError('LLM extractor returned invalid JSON.');
 }
 function collectJsonCandidates(response: string): string[] {
  const candidates = new Set<string>();
  candidates.add(response);
  const fencePattern = /```(?:json)?\s*([\s\S]*?)```/gi;
  let match: RegExpExecArray | null = fencePattern.exec(response);
  while (match) {
    const candidate = match[1]?.trim();
    if (candidate) {
      candidates.add(candidate);
    }
    match = fencePattern.exec(response);
  }
  const firstBrace = response.indexOf('{');
  const lastBrace = response.lastIndexOf('}');
  if (firstBrace >= 0 && lastBrace > firstBrace) {
    candidates.add(response.slice(firstBrace, lastBrace + 1));
  }
  return Array.from(candidates);
 }
 function parseTopics(value: unknown): ExtractedFact['topics'] {
  if (!Array.isArray(value)) {
    throw new IdentityDBError('LLM extractor response must include a topics array.');
  }
  return value.map((entry) => parseTopic(entry));
 }
 function parseTopic(value: unknown): ExtractedFact['topics'][number] {
  if (!isRecord(value)) {
    throw new IdentityDBError('LLM extractor topics must be JSON objects.');
  }
  const name = optionalString(value.name)?.trim();
  if (!name) {
    throw new IdentityDBError('LLM extractor topics must include a non-empty name.');
  }
  const topic: ExtractedFact['topics'][number] = { name };
  const category = optionalTopicCategory(value.category);
  if (category !== undefined) {
    topic.category = category;
  }
  const granularity = optionalTopicGranularity(value.granularity);
  if (granularity !== undefined) {
    topic.granularity = granularity;
  }
  const role = optionalNullableString(value.role);
  if (role !== undefined) {
    topic.role = role;
  }
  const description = optionalNullableString(value.description);
  if (description !== undefined) {
    topic.description = description;
  }
  const metadata = optionalMetadata(value.metadata);
  if (metadata !== undefined) {
    topic.metadata = metadata;
  }
  return topic;
 }
 function optionalString(value: unknown): string | undefined {
  if (value === undefined) {
    return undefined;
  }
  if (typeof value !== 'string') {
    throw new IdentityDBError('LLM extractor expected a string field.');
  }
  return value;
 }
 function optionalNullableString(value: unknown): string | null | undefined {
  if (value === undefined) {
    return undefined;
  }
  if (value === null) {
    return null;
  }
  if (typeof value !== 'string') {
    throw new IdentityDBError('LLM extractor expected a nullable string field.');
  }
  return value;
 }
 function optionalNullableNumber(value: unknown): number | null | undefined {
  if (value === undefined) {
    return undefined;
  }
  if (value === null) {
    return null;
  }
  if (typeof value !== 'number' || Number.isNaN(value)) {
    throw new IdentityDBError('LLM extractor expected confidence to be a number or null.');
  }
  return value;
 }
 function optionalMetadata(value: unknown): ExtractedFact['metadata'] | undefined {
  if (value === undefined) {
    return undefined;
  }
  if (value === null) {
    return null;
  }
  if (!isJsonLike(value)) {
    throw new IdentityDBError('LLM extractor metadata must be valid JSON-compatible data.');
  }
  return value as ExtractedFact['metadata'];
 }
 function optionalTopicCategory(value: unknown): TopicCategory | undefined {
  if (value === undefined) {
    return undefined;
  }
  if (value === 'entity' || value === 'concept' || value === 'temporal' || value === 'custom') {
    return value;
  }
  throw new IdentityDBError('LLM extractor returned an unsupported topic category.');
 }
 function optionalTopicGranularity(value: unknown): TopicGranularity | undefined {
  if (value === undefined) {
    return undefined;
  }
  if (value === 'abstract' || value === 'concrete' || value === 'mixed') {
    return value;
  }
  throw new IdentityDBError('LLM extractor returned an unsupported topic granularity.');
 }
 function isRecord(value: unknown): value is Record<string, unknown> {
  return typeof value === 'object' && value !== null && !Array.isArray(value);
 }
 function isJsonLike(value: unknown): boolean {
  if (value === null) {
    return true;
  }
  if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
    return true;
  }
  if (Array.isArray(value)) {
    return value.every((entry) => isJsonLike(entry));
  }
  if (isRecord(value)) {
    return Object.values(value).every((entry) => isJsonLike(entry));
  }
  return false;
 }
--- a/src/ingestion/naive-extractor.ts
+++ b/src/ingestion/naive-extractor.ts
@@ -1,7 +1,7 @@
 import type { ExtractedFact, FactExtractor } from './types';
 export class NaiveExtractor implements FactExtractor {
-  async extract(input: string): Promise<ExtractedFact> {
+  async extract(input: string): Promise<ExtractedFact[]> {
    const topics: ExtractedFact['topics'] = [];
    const seen = new Set<string>();
    const tokens = input.match(/\bI\b|\b\d{4}\b|\b[A-Z][A-Za-z0-9+#.-]*\b/g) ?? [];
@@ -31,9 +31,11 @@ export class NaiveExtractor implements FactExtractor {
      });
    }
-    return {
+    return [
-      statement: input.trim(),
+      {
-      topics,
+        statement: input.trim(),
-    };
+        topics,
      },
    ];
  }
 }
--- a/src/ingestion/types.ts
+++ b/src/ingestion/types.ts
@@ -2,29 +2,34 @@ import type {
  AddFactInput,
  EmbeddingProvider,
  TopicLinkInput,
-} from '../types/api';
+} from "../types/api";
 export interface ExtractedFact {
  statement?: string;
  summary?: string | null;
  source?: string | null;
  confidence?: number | null;
-  metadata?: AddFactInput['metadata'];
+  metadata?: AddFactInput["metadata"];
  topics: TopicLinkInput[];
 }
 export interface FactExtractor {
-  extract(input: string): Promise<ExtractedFact>;
+  extract(input: string): Promise<ExtractedFact[]>;
 }
 export interface LlmTextGenerationModelInput {
  instruction: string;
  input: string;
  additionalInstruction?: string | undefined;
 }
 export interface LlmTextGenerationModel {
-  generateText(prompt: string): Promise<string>;
+  generateText(prompt: LlmTextGenerationModelInput): Promise<ExtractedFact[]>;
 }
 export interface LlmFactExtractorOptions {
  model: LlmTextGenerationModel;
-  instructions?: string;
+  additionalInstructions?: string | undefined;
  promptBuilder?: (input: string, instructions?: string) => string;
 }
 export interface IngestStatementOptions {
--- a/tests/ingestion.test.ts
+++ b/tests/ingestion.test.ts
@@ -1,15 +1,18 @@
-import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
-import { IdentityDB } from '../src/core/identity-db';
+import { IdentityDB } from "../src/core/identity-db";
-import { LlmFactExtractor } from '../src/ingestion/llm-extractor';
+import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
-import { NaiveExtractor } from '../src/ingestion/naive-extractor';
+import { NaiveExtractor } from "../src/ingestion/naive-extractor";
-import type { FactExtractor } from '../src/ingestion/types';
+import type {
  FactExtractor,
  LlmTextGenerationModelInput,
 } from "../src/ingestion/types";
-describe('IdentityDB ingestion', () => {
+describe("IdentityDB ingestion", () => {
  let db: IdentityDB;
  beforeEach(async () => {
-    db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
+    db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" });
    await db.initialize();
  });
@@ -17,121 +20,144 @@ describe('IdentityDB ingestion', () => {
    await db.close();
  });
-  it('ingests a statement using a provided extractor', async () => {
+  it("ingests a statement using a provided extractor", async () => {
    const extractor: FactExtractor = {
      async extract(input) {
-        return {
+        return [
-          statement: input,
+          {
-          topics: [
+            statement: input,
-            { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
+            topics: [
-            { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
+              {
-            { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
+                name: "I",
-          ],
+                category: "entity",
-        };
+                granularity: "concrete",
                role: "subject",
              },
              {
                name: "TypeScript",
                category: "entity",
                granularity: "concrete",
                role: "object",
              },
              {
                name: "2025",
                category: "temporal",
                granularity: "concrete",
                role: "time",
              },
            ],
          },
        ];
      },
    };
-    const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
+    const fact = await db.ingestStatement(
-      extractor,
+      "I have worked with TypeScript since 2025.",
-    });
+      {
        extractor,
      },
    );
-    expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
+    expect(fact.topics.map((topic) => topic.name)).toEqual([
      "I",
      "TypeScript",
      "2025",
    ]);
-    const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025');
+    const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025");
    expect(linkedFacts).toHaveLength(1);
-    expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.');
+    expect(linkedFacts[0]?.statement).toBe(
      "I have worked with TypeScript since 2025.",
    );
  });
-  it('ships a deterministic naive extractor for local usage', async () => {
+  it("ships a deterministic naive extractor for local usage", async () => {
-    const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
+    const fact = await db.ingestStatement(
-      extractor: new NaiveExtractor(),
+      "I have worked with TypeScript since 2025.",
-    });
+      {
        extractor: new NaiveExtractor(),
      },
    );
-    expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
+    expect(fact.topics.map((topic) => topic.name)).toEqual([
      "I",
      "TypeScript",
      "2025",
    ]);
-    const topic = await db.getTopicByName('TypeScript', { includeFacts: true });
+    const topic = await db.getTopicByName("TypeScript", { includeFacts: true });
    expect(topic?.facts).toHaveLength(1);
  });
-  it('ships an LLM extractor adapter that turns structured JSON responses into facts', async () => {
+  it("ships an LLM extractor adapter that returns structured facts from the model", async () => {
-    let prompt = '';
+    let prompt: LlmTextGenerationModelInput | undefined = undefined;
    const extractor = new LlmFactExtractor({
      model: {
        async generateText(input) {
          prompt = input;
          return JSON.stringify({
            statement: 'I have worked with Bun and TypeScript since 2025.',
            summary: 'The speaker has Bun and TypeScript experience.',
            source: 'chat',
            confidence: 0.91,
            metadata: { channel: 'telegram' },
            topics: [
              { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
              { name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' },
              { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
              { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
            ],
          });
        },
      },
      instructions: 'Prefer technology and time topics.',
    });
    const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
      extractor,
    });
    expect(prompt).toContain('Prefer technology and time topics.');
    expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.');
    expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.');
    expect(fact.source).toBe('chat');
    expect(fact.confidence).toBe(0.91);
    expect(fact.metadata).toEqual({ channel: 'telegram' });
    expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
  });
  it('parses JSON responses wrapped in markdown code fences', async () => {
    const extractor = new LlmFactExtractor({
      model: {
        async generateText() {
          return [
-            'Here is the extracted fact:',
+            {
-            '```json',
+              statement: "I have worked with Bun and TypeScript since 2025.",
-            JSON.stringify({
+              summary: "The speaker has Bun and TypeScript experience.",
-              statement: 'Bun powers TypeScript tooling.',
+              source: "chat",
              confidence: 0.91,
              metadata: { channel: "telegram" },
              topics: [
-                { name: 'Bun', category: 'entity', granularity: 'concrete' },
+                {
-                { name: 'TypeScript', category: 'entity', granularity: 'concrete' },
+                  name: "I",
                  category: "entity",
                  granularity: "concrete",
                  role: "subject",
                },
                {
                  name: "Bun",
                  category: "entity",
                  granularity: "concrete",
                  role: "object",
                },
                {
                  name: "TypeScript",
                  category: "entity",
                  granularity: "concrete",
                  role: "object",
                },
                {
                  name: "2025",
                  category: "temporal",
                  granularity: "concrete",
                  role: "time",
                },
              ],
-            }),
+            },
-            '```',
+          ];
          ].join('\n');
        },
      },
      additionalInstructions: "Prefer technology and time topics.",
    });
-    const fact = await db.ingestStatement('Bun powers TypeScript tooling.', {
+    const fact = await db.ingestStatement(
-      extractor,
+      "I have worked with Bun and TypeScript since 2025.",
-    });
+      {
    expect(fact.topics.map((topic) => topic.name)).toEqual(['Bun', 'TypeScript']);
  });
  it('rejects invalid LLM responses before writing facts', async () => {
    const extractor = new LlmFactExtractor({
      model: {
        async generateText() {
          return 'not json at all';
        },
      },
    });
    await expect(
      db.ingestStatement('Bun powers TypeScript tooling.', {
        extractor,
-      }),
+      },
-    ).rejects.toThrow('LLM extractor returned invalid JSON.');
+    );
    expect(prompt).toEqual({
      instruction: expect.stringContaining("Extract structured facts from the user input."),
      input: "I have worked with Bun and TypeScript since 2025.",
      additionalInstruction: "Prefer technology and time topics.",
    });
    expect(fact.summary).toBe("The speaker has Bun and TypeScript experience.");
    expect(fact.source).toBe("chat");
    expect(fact.confidence).toBe(0.91);
    expect(fact.metadata).toEqual({ channel: "telegram" });
    expect(fact.topics.map((topic) => topic.name)).toEqual([
      "I",
      "Bun",
      "TypeScript",
      "2025",
    ]);
  });
 });
--- a/tests/semantic-search.test.ts
+++ b/tests/semantic-search.test.ts
@@ -178,13 +178,15 @@ describe('IdentityDB dedup-aware ingestion', () => {
    provider = new FakeEmbeddingProvider();
    extractor = {
      async extract(input) {
-        return {
+        return [
-          statement: input,
+          {
-          topics: [
+            statement: input,
-            { name: 'Bun', category: 'entity', granularity: 'concrete' },
+            topics: [
-            { name: 'TypeScript', category: 'entity', granularity: 'concrete' },
+              { name: 'Bun', category: 'entity', granularity: 'concrete' },
-          ],
+              { name: 'TypeScript', category: 'entity', granularity: 'concrete' },
-        };
+            ],
          },
        ];
      },
    };
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -18,6 +18,12 @@
    "isolatedModules": true,
    "types": ["node", "vitest/globals"]
  },
-  "include": ["src/**/*.ts", "tests/**/*.ts", "vitest.config.ts", "tsup.config.ts"],
+  "include": [
    "src/**/*.ts",
    "tests/**/*.ts",
    "scripts/**/*.ts",
    "vitest.config.ts",
    "tsup.config.ts"
  ],
  "exclude": ["dist", "node_modules"]
 }
Author	SHA1	Message	Date
p-sw	7b305da2de	v0.5.1 All checks were successful npm release / verify (push) Successful in 15s Details npm release / publish to npm (push) Successful in 13s Details	2026-05-31 23:51:21 +09:00
p-sw	b80e838038	refactor: remove default instruction for LlmFactExtractor	2026-05-31 23:50:37 +09:00
p-sw	2b80d9e31a	v0.5.0 Some checks failed npm release / verify (push) Successful in 23s Details npm release / publish to npm (push) Failing after 11s Details	2026-05-20 23:04:14 +09:00
p-sw	00a3905fde	feat: add test-llm-extractor.ts script	2026-05-20 23:03:47 +09:00
p-sw	7602c92046	feat: make FactExtractor extracts multiple facts per input	2026-05-20 22:59:35 +09:00
p-sw	188f03e8e8	feat: add scripts to tsconfig	2026-05-20 22:53:47 +09:00
p-sw	edce116b9f	fix: remove .env.* from git	2026-05-20 22:53:38 +09:00
p-sw	131a693257	feat: add openrouter sdk for llm-extractor testing	2026-05-20 22:53:29 +09:00
p-sw	1172c63db7	v0.4.0 All checks were successful npm release / verify (push) Successful in 12s Details npm release / publish to npm (push) Successful in 11s Details	2026-05-19 22:30:27 +09:00
p-sw	0e595e6f60	test: update test of LlmExtractor	2026-05-19 22:28:09 +09:00
p-sw	518264c467	v0.3.1 Some checks failed npm release / verify (push) Failing after 9s Details npm release / publish to npm (push) Has been skipped Details	2026-05-19 22:19:30 +09:00
p-sw	cc8b3dfb14	vv0.3.1	2026-05-19 22:18:51 +09:00
p-sw	56e17dab49	feat: make extract input structured	2026-05-19 22:18:42 +09:00
p-sw	cc2e9110cc	v0.3.0 All checks were successful npm release / verify (push) Successful in 13s Details npm release / publish to npm (push) Successful in 10s Details	2026-05-19 22:07:06 +09:00
p-sw	0480ea182f	refactor: make generateText model return ExtractedFact	2026-05-19 22:06:54 +09:00
p-sw	185edfdae8	v0.2.2 All checks were successful npm release / verify (push) Successful in 13s Details npm release / publish to npm (push) Successful in 11s Details	2026-05-17 23:11:31 +09:00
p-sw	a33fd61c97	feat: adjust instruction detailed Some checks failed npm release / verify (push) Failing after 10s Details npm release / publish to npm (push) Has been skipped Details	2026-05-17 23:10:38 +09:00