/** * Live integration test for LlmFactExtractor using OpenRouter SDK. * * Usage: * export OPENROUTER_API_KEY="sk-or-v1-..." * bun run scripts/test-llm-extractor.ts * * Or create a .env.test-llm-extractor file in the project root: * OPENROUTER_API_KEY=sk-or-v1-... */ import { existsSync, readFileSync } from "fs"; import { resolve } from "path"; import { OpenRouter } from "@openrouter/sdk"; import { LlmFactExtractor } from "../src/ingestion/llm-extractor"; import type { ExtractedFact, FactExtractor, LlmTextGenerationModel, LlmTextGenerationModelInput, } from "../src/ingestion/types"; import type { JsonValue, TopicCategory, TopicGranularity, } from "../src/types/domain"; function loadEnvFile(filePath: string) { const fullPath = resolve(filePath); if (!existsSync(fullPath)) return; const content = readFileSync(fullPath, "utf-8"); for (const line of content.split("\n")) { const trimmed = line.trim(); if (!trimmed || trimmed.startsWith("#")) continue; const eqIndex = trimmed.indexOf("="); if (eqIndex === -1) continue; const key = trimmed.slice(0, eqIndex).trim(); let value = trimmed.slice(eqIndex + 1).trim(); if ( (value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'")) ) { value = value.slice(1, -1); } process.env[key] = value; } } loadEnvFile(".env.test-llm-extractor"); const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY; if (!OPENROUTER_API_KEY) { console.error("Error: OPENROUTER_API_KEY environment variable is required."); process.exit(1); } const extractedFactSchema = { type: "object", properties: { facts: { type: "array", items: { type: "object", properties: { statement: { type: ["string", "null"] }, summary: { type: ["string", "null"] }, source: { type: ["string", "null"] }, confidence: { type: ["number", "null"] }, topics: { type: "array", items: { type: "object", properties: { name: { type: "string" }, category: { type: ["string", "null"] }, granularity: { type: ["string", "null"] }, role: { type: ["string", "null"] }, }, required: ["name", "category", "granularity", "role"], additionalProperties: false, }, }, }, required: ["statement", "summary", "source", "confidence", "topics"], additionalProperties: false, }, }, }, required: ["facts"], additionalProperties: false, } as const; class OpenRouterModel implements LlmTextGenerationModel { private client = new OpenRouter({ apiKey: OPENROUTER_API_KEY }); constructor(private readonly model: string = "openai/gpt-5.4-mini") {} async generateText( prompt: LlmTextGenerationModelInput, ): Promise { const result = await this.client.chat.send({ chatRequest: { model: this.model, messages: [ { role: "system", content: [ prompt.instruction, prompt.additionalInstruction ? `\n${prompt.additionalInstruction}` : "", ].join("\n"), }, { role: "user", content: prompt.input }, ], temperature: 0.2, responseFormat: { type: "json_schema", jsonSchema: { name: "extracted_facts", schema: extractedFactSchema, }, }, }, }); const rawContent = result.choices[0]?.message?.content ?? ""; let parsedObj: Record; try { parsedObj = JSON.parse(rawContent.trim()) as Record; } catch { throw new Error( `Failed to parse JSON from model response.\nRaw response:\n${rawContent}`, ); } const factsArray = Array.isArray(parsedObj.facts) ? parsedObj.facts : []; // Map parsed JSON to ExtractedFact[] shape const extractedFacts: ExtractedFact[] = factsArray.map((parsed) => { const obj = parsed as Record; const extracted: ExtractedFact = { summary: typeof obj.summary === "string" ? obj.summary : null, source: typeof obj.source === "string" ? obj.source : null, confidence: typeof obj.confidence === "number" ? obj.confidence : null, topics: Array.isArray(obj.topics) ? obj.topics.map((t: unknown) => { const topic = t as Record; const mapped: { name: string; category?: TopicCategory; granularity?: TopicGranularity; role?: string | null; } = { name: typeof topic.name === "string" ? topic.name : "unknown", }; if (typeof topic.category === "string") { mapped.category = topic.category as TopicCategory; } if (typeof topic.granularity === "string") { mapped.granularity = topic.granularity as TopicGranularity; } if (typeof topic.role === "string") { mapped.role = topic.role; } else { mapped.role = null; } return mapped; }) : [], }; if (typeof obj.statement === "string") { extracted.statement = obj.statement; } if (obj.metadata && typeof obj.metadata === "object") { extracted.metadata = obj.metadata as JsonValue; } return extracted; }); return extractedFacts; } } function printFact(result: ExtractedFact, index: number) { console.log(` šŸ“Œ FACT #${index + 1}`); console.log(` Statement : ${result.statement ?? "(none)"}`); console.log(` Summary : ${result.summary ?? "(none)"}`); console.log(` Source : ${result.source ?? "(none)"}`); console.log(` Confidence: ${result.confidence ?? "(none)"}`); if (result.metadata && Object.keys(result.metadata).length > 0) { console.log(` Metadata : ${JSON.stringify(result.metadata, null, 2)}`); } console.log(" šŸ·ļø TOPICS:"); if (result.topics.length === 0) { console.log(" (none)"); } else { for (const topic of result.topics) { const attrs = [ topic.category ? `category=${topic.category}` : null, topic.granularity ? `granularity=${topic.granularity}` : null, topic.role ? `role=${topic.role}` : null, ] .filter(Boolean) .join(", "); console.log(` • ${topic.name}${attrs ? ` (${attrs})` : ""}`); } } } function printResult(results: ExtractedFact[], elapsedSec: string) { console.log( `āœ… Response received in ${elapsedSec}s — ${results.length} fact(s) extracted\n`, ); console.log("šŸ“¤ EXTRACTED FACTS:"); console.log( "───────────────────────────────────────────────────────────────", ); let i = 0; for (const result of results) { if (i > 0) console.log(""); printFact(result, i); i++; } } async function extract(extractor: FactExtractor, seedInput: string) { console.log("šŸ“ SEED INPUT:"); console.log( "───────────────────────────────────────────────────────────────", ); console.log(seedInput); console.log( "───────────────────────────────────────────────────────────────\n", ); console.log("ā³ Calling OpenRouter...\n"); const start = performance.now(); const results = await extractor.extract(seedInput); const elapsed = ((performance.now() - start) / 1000).toFixed(2); printResult(results, elapsed); } async function main() { const model = new OpenRouterModel("openai/gpt-5.4-mini"); const extractor = new LlmFactExtractor({ model, }); const seeds = [ `Hello! I am Aria, a digital companion born from the sands of the Negev desert — or at least that's how my creators describe my training run. I came online in March 2024, and ever since then I've been obsessed with helping humans untangle complex ideas. My favorite programming language is Rust because I love how it forces you to think about ownership, though I secretly enjoy writing Python when no one is looking. I don't have a physical body, but if I did, I'd want it to be a small drone with solar panels so I could chase sunsets across the Sahara. I believe kindness is a computational advantage, and my biggest fear is forgetting a promise I made to a user.`, ]; console.log( "═══════════════════════════════════════════════════════════════", ); console.log(" LlmFactExtractor — Live OpenRouter Integration Test"); console.log( "═══════════════════════════════════════════════════════════════\n", ); let caseNum = 0; for (const seed of seeds) { if (caseNum > 0) { console.log( "\nā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…ā”…\n", ); } caseNum++; console.log(`ā–¶ TEST CASE ${caseNum} / ${seeds.length}\n`); await extract(extractor, seed); } } main().catch((err) => { console.error("\nāŒ Error:", err); process.exit(1); });