v0.4.0

test: update test of LlmExtractor
v0.3.1
2026-05-19 22:30:27 +09:00 · 2026-05-19 22:28:09 +09:00 · 2026-05-19 22:19:30 +09:00 · 2026-05-19 22:18:51 +09:00 · 2026-05-19 22:18:42 +09:00 · 2026-05-19 22:07:06 +09:00
4 changed files with 132 additions and 354 deletions
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "identitydb",
-  "version": "0.2.1",
+  "version": "0.4.0",
  "description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
  "license": "MIT",
  "type": "module",
--- a/src/ingestion/llm-extractor.ts
+++ b/src/ingestion/llm-extractor.ts
@@ -1,273 +1,24 @@
-import { IdentityDBError } from '../core/errors';
-import type { TopicCategory, TopicGranularity } from '../types/domain';
 import type {
  ExtractedFact,
  FactExtractor,
  LlmFactExtractorOptions,
-} from './types';
+} from "./types";

 const DEFAULT_INSTRUCTIONS = [
-  'Extract one structured fact from the user input.',
-  'Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.',
+  "Extract one structured fact from the user input.",
+  "Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.",
  'Use this shape: {"statement": string?, "summary": string|null, "source": string|null, "confidence": number|null, "metadata": object|null, "topics": Array<{"name": string, "category": "entity"|"concept"|"temporal"|"custom"?, "granularity": "abstract"|"concrete"|"mixed"?, "role": string|null, "description": string|null, "metadata": object|null}>}.',
-  'Only include topics that are explicitly supported by the input.',
-].join('\n');
+  'Only include topics that are explicitly in the input as-is. For example, topic "I started TypeScript since 2015" can be "I", "TypeScript", "2015".',
+].join("\n");

 export class LlmFactExtractor implements FactExtractor {
  constructor(private readonly options: LlmFactExtractorOptions) {}

  async extract(input: string): Promise<ExtractedFact> {
-    const prompt = this.buildPrompt(input);
-    const response = await this.options.model.generateText(prompt);
-    return parseLlmExtractedFactResponse(response);
-  }
-
-  private buildPrompt(input: string): string {
-    if (this.options.promptBuilder) {
-      return this.options.promptBuilder(input, this.options.instructions);
-    }
-
-    const instructions = this.options.instructions?.trim();
-
-    return [
-      DEFAULT_INSTRUCTIONS,
-      instructions && instructions.length > 0 ? `Additional instructions:\n${instructions}` : null,
-      `Input:\n${input.trim()}`,
-    ]
-      .filter((value): value is string => value !== null)
-      .join('\n\n');
+    return this.options.model.generateText({
+      instruction: DEFAULT_INSTRUCTIONS,
+      input,
+      additionalInstruction: this.options.additionalInstructions,
+    });
  }
 }
-
-export function parseLlmExtractedFactResponse(response: string): ExtractedFact {
-  const payload = parseJsonCandidate(response);
-
-  if (!isRecord(payload)) {
-    throw new IdentityDBError('LLM extractor response must be a JSON object.');
-  }
-
-  const topics = parseTopics(payload.topics);
-  const extracted: ExtractedFact = { topics };
-
-  const statement = optionalString(payload.statement);
-  if (statement !== undefined) {
-    extracted.statement = statement;
-  }
-
-  const summary = optionalNullableString(payload.summary);
-  if (summary !== undefined) {
-    extracted.summary = summary;
-  }
-
-  const source = optionalNullableString(payload.source);
-  if (source !== undefined) {
-    extracted.source = source;
-  }
-
-  const confidence = optionalNullableNumber(payload.confidence);
-  if (confidence !== undefined) {
-    extracted.confidence = confidence;
-  }
-
-  const metadata = optionalMetadata(payload.metadata);
-  if (metadata !== undefined) {
-    extracted.metadata = metadata;
-  }
-
-  return extracted;
-}
-
-function parseJsonCandidate(response: string): unknown {
-  const trimmed = response.trim();
-
-  for (const candidate of collectJsonCandidates(trimmed)) {
-    try {
-      return JSON.parse(candidate);
-    } catch {
-      continue;
-    }
-  }
-
-  throw new IdentityDBError('LLM extractor returned invalid JSON.');
-}
-
-function collectJsonCandidates(response: string): string[] {
-  const candidates = new Set<string>();
-  candidates.add(response);
-
-  const fencePattern = /```(?:json)?\s*([\s\S]*?)```/gi;
-  let match: RegExpExecArray | null = fencePattern.exec(response);
-
-  while (match) {
-    const candidate = match[1]?.trim();
-    if (candidate) {
-      candidates.add(candidate);
-    }
-
-    match = fencePattern.exec(response);
-  }
-
-  const firstBrace = response.indexOf('{');
-  const lastBrace = response.lastIndexOf('}');
-  if (firstBrace >= 0 && lastBrace > firstBrace) {
-    candidates.add(response.slice(firstBrace, lastBrace + 1));
-  }
-
-  return Array.from(candidates);
-}
-
-function parseTopics(value: unknown): ExtractedFact['topics'] {
-  if (!Array.isArray(value)) {
-    throw new IdentityDBError('LLM extractor response must include a topics array.');
-  }
-
-  return value.map((entry) => parseTopic(entry));
-}
-
-function parseTopic(value: unknown): ExtractedFact['topics'][number] {
-  if (!isRecord(value)) {
-    throw new IdentityDBError('LLM extractor topics must be JSON objects.');
-  }
-
-  const name = optionalString(value.name)?.trim();
-  if (!name) {
-    throw new IdentityDBError('LLM extractor topics must include a non-empty name.');
-  }
-
-  const topic: ExtractedFact['topics'][number] = { name };
-
-  const category = optionalTopicCategory(value.category);
-  if (category !== undefined) {
-    topic.category = category;
-  }
-
-  const granularity = optionalTopicGranularity(value.granularity);
-  if (granularity !== undefined) {
-    topic.granularity = granularity;
-  }
-
-  const role = optionalNullableString(value.role);
-  if (role !== undefined) {
-    topic.role = role;
-  }
-
-  const description = optionalNullableString(value.description);
-  if (description !== undefined) {
-    topic.description = description;
-  }
-
-  const metadata = optionalMetadata(value.metadata);
-  if (metadata !== undefined) {
-    topic.metadata = metadata;
-  }
-
-  return topic;
-}
-
-function optionalString(value: unknown): string | undefined {
-  if (value === undefined) {
-    return undefined;
-  }
-
-  if (typeof value !== 'string') {
-    throw new IdentityDBError('LLM extractor expected a string field.');
-  }
-
-  return value;
-}
-
-function optionalNullableString(value: unknown): string | null | undefined {
-  if (value === undefined) {
-    return undefined;
-  }
-
-  if (value === null) {
-    return null;
-  }
-
-  if (typeof value !== 'string') {
-    throw new IdentityDBError('LLM extractor expected a nullable string field.');
-  }
-
-  return value;
-}
-
-function optionalNullableNumber(value: unknown): number | null | undefined {
-  if (value === undefined) {
-    return undefined;
-  }
-
-  if (value === null) {
-    return null;
-  }
-
-  if (typeof value !== 'number' || Number.isNaN(value)) {
-    throw new IdentityDBError('LLM extractor expected confidence to be a number or null.');
-  }
-
-  return value;
-}
-
-function optionalMetadata(value: unknown): ExtractedFact['metadata'] | undefined {
-  if (value === undefined) {
-    return undefined;
-  }
-
-  if (value === null) {
-    return null;
-  }
-
-  if (!isJsonLike(value)) {
-    throw new IdentityDBError('LLM extractor metadata must be valid JSON-compatible data.');
-  }
-
-  return value as ExtractedFact['metadata'];
-}
-
-function optionalTopicCategory(value: unknown): TopicCategory | undefined {
-  if (value === undefined) {
-    return undefined;
-  }
-
-  if (value === 'entity' || value === 'concept' || value === 'temporal' || value === 'custom') {
-    return value;
-  }
-
-  throw new IdentityDBError('LLM extractor returned an unsupported topic category.');
-}
-
-function optionalTopicGranularity(value: unknown): TopicGranularity | undefined {
-  if (value === undefined) {
-    return undefined;
-  }
-
-  if (value === 'abstract' || value === 'concrete' || value === 'mixed') {
-    return value;
-  }
-
-  throw new IdentityDBError('LLM extractor returned an unsupported topic granularity.');
-}
-
-function isRecord(value: unknown): value is Record<string, unknown> {
-  return typeof value === 'object' && value !== null && !Array.isArray(value);
-}
-
-function isJsonLike(value: unknown): boolean {
-  if (value === null) {
-    return true;
-  }
-
-  if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
-    return true;
-  }
-
-  if (Array.isArray(value)) {
-    return value.every((entry) => isJsonLike(entry));
-  }
-
-  if (isRecord(value)) {
-    return Object.values(value).every((entry) => isJsonLike(entry));
-  }
-
-  return false;
-}
--- a/src/ingestion/types.ts
+++ b/src/ingestion/types.ts
@@ -2,14 +2,14 @@ import type {
  AddFactInput,
  EmbeddingProvider,
  TopicLinkInput,
-} from '../types/api';
+} from "../types/api";

 export interface ExtractedFact {
  statement?: string;
  summary?: string | null;
  source?: string | null;
  confidence?: number | null;
-  metadata?: AddFactInput['metadata'];
+  metadata?: AddFactInput["metadata"];
  topics: TopicLinkInput[];
 }

@@ -17,14 +17,19 @@ export interface FactExtractor {
  extract(input: string): Promise<ExtractedFact>;
 }

+export interface LlmTextGenerationModelInput {
+  instruction: string;
+  input: string;
+  additionalInstruction?: string | undefined;
+}
+
 export interface LlmTextGenerationModel {
-  generateText(prompt: string): Promise<string>;
+  generateText(prompt: LlmTextGenerationModelInput): Promise<ExtractedFact>;
 }

 export interface LlmFactExtractorOptions {
  model: LlmTextGenerationModel;
-  instructions?: string;
-  promptBuilder?: (input: string, instructions?: string) => string;
+  additionalInstructions?: string | undefined;
 }

 export interface IngestStatementOptions {
--- a/tests/ingestion.test.ts
+++ b/tests/ingestion.test.ts
@@ -1,15 +1,18 @@
-import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { afterEach, beforeEach, describe, expect, it } from "vitest";

-import { IdentityDB } from '../src/core/identity-db';
-import { LlmFactExtractor } from '../src/ingestion/llm-extractor';
-import { NaiveExtractor } from '../src/ingestion/naive-extractor';
-import type { FactExtractor } from '../src/ingestion/types';
+import { IdentityDB } from "../src/core/identity-db";
+import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
+import { NaiveExtractor } from "../src/ingestion/naive-extractor";
+import type {
+  FactExtractor,
+  LlmTextGenerationModelInput,
+} from "../src/ingestion/types";

-describe('IdentityDB ingestion', () => {
+describe("IdentityDB ingestion", () => {
  let db: IdentityDB;

  beforeEach(async () => {
-    db = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
+    db = await IdentityDB.connect({ client: "sqlite", filename: ":memory:" });
    await db.initialize();
  });

@@ -17,121 +20,140 @@ describe('IdentityDB ingestion', () => {
    await db.close();
  });

-  it('ingests a statement using a provided extractor', async () => {
+  it("ingests a statement using a provided extractor", async () => {
    const extractor: FactExtractor = {
      async extract(input) {
        return {
          statement: input,
          topics: [
-            { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
-            { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
-            { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
+            {
+              name: "I",
+              category: "entity",
+              granularity: "concrete",
+              role: "subject",
+            },
+            {
+              name: "TypeScript",
+              category: "entity",
+              granularity: "concrete",
+              role: "object",
+            },
+            {
+              name: "2025",
+              category: "temporal",
+              granularity: "concrete",
+              role: "time",
+            },
          ],
        };
      },
    };

-    const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
-      extractor,
-    });
+    const fact = await db.ingestStatement(
+      "I have worked with TypeScript since 2025.",
+      {
+        extractor,
+      },
+    );

-    expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
+    expect(fact.topics.map((topic) => topic.name)).toEqual([
+      "I",
+      "TypeScript",
+      "2025",
+    ]);

-    const linkedFacts = await db.getTopicFactsLinkedTo('TypeScript', '2025');
+    const linkedFacts = await db.getTopicFactsLinkedTo("TypeScript", "2025");
    expect(linkedFacts).toHaveLength(1);
-    expect(linkedFacts[0]?.statement).toBe('I have worked with TypeScript since 2025.');
+    expect(linkedFacts[0]?.statement).toBe(
+      "I have worked with TypeScript since 2025.",
+    );
  });

-  it('ships a deterministic naive extractor for local usage', async () => {
-    const fact = await db.ingestStatement('I have worked with TypeScript since 2025.', {
-      extractor: new NaiveExtractor(),
-    });
+  it("ships a deterministic naive extractor for local usage", async () => {
+    const fact = await db.ingestStatement(
+      "I have worked with TypeScript since 2025.",
+      {
+        extractor: new NaiveExtractor(),
+      },
+    );

-    expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'TypeScript', '2025']);
+    expect(fact.topics.map((topic) => topic.name)).toEqual([
+      "I",
+      "TypeScript",
+      "2025",
+    ]);

-    const topic = await db.getTopicByName('TypeScript', { includeFacts: true });
+    const topic = await db.getTopicByName("TypeScript", { includeFacts: true });
    expect(topic?.facts).toHaveLength(1);
  });

-  it('ships an LLM extractor adapter that turns structured JSON responses into facts', async () => {
-    let prompt = '';
+  it("ships an LLM extractor adapter that returns structured facts from the model", async () => {
+    let prompt: LlmTextGenerationModelInput | undefined = undefined;

    const extractor = new LlmFactExtractor({
      model: {
        async generateText(input) {
          prompt = input;

-          return JSON.stringify({
-            statement: 'I have worked with Bun and TypeScript since 2025.',
-            summary: 'The speaker has Bun and TypeScript experience.',
-            source: 'chat',
+          return {
+            statement: "I have worked with Bun and TypeScript since 2025.",
+            summary: "The speaker has Bun and TypeScript experience.",
+            source: "chat",
            confidence: 0.91,
-            metadata: { channel: 'telegram' },
+            metadata: { channel: "telegram" },
            topics: [
-              { name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
-              { name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' },
-              { name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
-              { name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
+              {
+                name: "I",
+                category: "entity",
+                granularity: "concrete",
+                role: "subject",
+              },
+              {
+                name: "Bun",
+                category: "entity",
+                granularity: "concrete",
+                role: "object",
+              },
+              {
+                name: "TypeScript",
+                category: "entity",
+                granularity: "concrete",
+                role: "object",
+              },
+              {
+                name: "2025",
+                category: "temporal",
+                granularity: "concrete",
+                role: "time",
+              },
            ],
-          });
+          };
        },
      },
-      instructions: 'Prefer technology and time topics.',
+      additionalInstructions: "Prefer technology and time topics.",
    });

-    const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
-      extractor,
-    });
-
-    expect(prompt).toContain('Prefer technology and time topics.');
-    expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.');
-    expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.');
-    expect(fact.source).toBe('chat');
-    expect(fact.confidence).toBe(0.91);
-    expect(fact.metadata).toEqual({ channel: 'telegram' });
-    expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
-  });
-
-  it('parses JSON responses wrapped in markdown code fences', async () => {
-    const extractor = new LlmFactExtractor({
-      model: {
-        async generateText() {
-          return [
-            'Here is the extracted fact:',
-            '```json',
-            JSON.stringify({
-              statement: 'Bun powers TypeScript tooling.',
-              topics: [
-                { name: 'Bun', category: 'entity', granularity: 'concrete' },
-                { name: 'TypeScript', category: 'entity', granularity: 'concrete' },
-              ],
-            }),
-            '```',
-          ].join('\n');
-        },
-      },
-    });
-
-    const fact = await db.ingestStatement('Bun powers TypeScript tooling.', {
-      extractor,
-    });
-
-    expect(fact.topics.map((topic) => topic.name)).toEqual(['Bun', 'TypeScript']);
-  });
-
-  it('rejects invalid LLM responses before writing facts', async () => {
-    const extractor = new LlmFactExtractor({
-      model: {
-        async generateText() {
-          return 'not json at all';
-        },
-      },
-    });
-
-    await expect(
-      db.ingestStatement('Bun powers TypeScript tooling.', {
+    const fact = await db.ingestStatement(
+      "I have worked with Bun and TypeScript since 2025.",
+      {
        extractor,
-      }),
-    ).rejects.toThrow('LLM extractor returned invalid JSON.');
+      },
+    );
+
+    expect(prompt).toEqual({
+      instruction: expect.stringContaining("Extract one structured fact from the user input."),
+      input: "I have worked with Bun and TypeScript since 2025.",
+      additionalInstruction: "Prefer technology and time topics.",
+    });
+    expect(fact.summary).toBe("The speaker has Bun and TypeScript experience.");
+    expect(fact.source).toBe("chat");
+    expect(fact.confidence).toBe(0.91);
+    expect(fact.metadata).toEqual({ channel: "telegram" });
+    expect(fact.topics.map((topic) => topic.name)).toEqual([
+      "I",
+      "Bun",
+      "TypeScript",
+      "2025",
+    ]);
  });
 });
Author	SHA1	Message	Date
p-sw	1172c63db7	v0.4.0 All checks were successful npm release / verify (push) Successful in 12s Details npm release / publish to npm (push) Successful in 11s Details	2026-05-19 22:30:27 +09:00
p-sw	0e595e6f60	test: update test of LlmExtractor	2026-05-19 22:28:09 +09:00
p-sw	518264c467	v0.3.1 Some checks failed npm release / verify (push) Failing after 9s Details npm release / publish to npm (push) Has been skipped Details	2026-05-19 22:19:30 +09:00
p-sw	cc8b3dfb14	vv0.3.1	2026-05-19 22:18:51 +09:00
p-sw	56e17dab49	feat: make extract input structured	2026-05-19 22:18:42 +09:00
p-sw	cc2e9110cc	v0.3.0 All checks were successful npm release / verify (push) Successful in 13s Details npm release / publish to npm (push) Successful in 10s Details	2026-05-19 22:07:06 +09:00
p-sw	0480ea182f	refactor: make generateText model return ExtractedFact	2026-05-19 22:06:54 +09:00
p-sw	185edfdae8	v0.2.2 All checks were successful npm release / verify (push) Successful in 13s Details npm release / publish to npm (push) Successful in 11s Details	2026-05-17 23:11:31 +09:00
p-sw	a33fd61c97	feat: adjust instruction detailed Some checks failed npm release / verify (push) Failing after 10s Details npm release / publish to npm (push) Has been skipped Details	2026-05-17 23:10:38 +09:00