Files
IdentityDB/scripts/test-llm-extractor.ts

288 lines
9.8 KiB
TypeScript

/**
* Live integration test for LlmFactExtractor using OpenRouter SDK.
*
* Usage:
* export OPENROUTER_API_KEY="sk-or-v1-..."
* bun run scripts/test-llm-extractor.ts
*
* Or create a .env.test-llm-extractor file in the project root:
* OPENROUTER_API_KEY=sk-or-v1-...
*/
import { existsSync, readFileSync } from "fs";
import { resolve } from "path";
import { OpenRouter } from "@openrouter/sdk";
import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
import type {
ExtractedFact,
FactExtractor,
LlmTextGenerationModel,
LlmTextGenerationModelInput,
} from "../src/ingestion/types";
import type {
JsonValue,
TopicCategory,
TopicGranularity,
} from "../src/types/domain";
function loadEnvFile(filePath: string) {
const fullPath = resolve(filePath);
if (!existsSync(fullPath)) return;
const content = readFileSync(fullPath, "utf-8");
for (const line of content.split("\n")) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith("#")) continue;
const eqIndex = trimmed.indexOf("=");
if (eqIndex === -1) continue;
const key = trimmed.slice(0, eqIndex).trim();
let value = trimmed.slice(eqIndex + 1).trim();
if (
(value.startsWith('"') && value.endsWith('"')) ||
(value.startsWith("'") && value.endsWith("'"))
) {
value = value.slice(1, -1);
}
process.env[key] = value;
}
}
loadEnvFile(".env.test-llm-extractor");
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
if (!OPENROUTER_API_KEY) {
console.error("Error: OPENROUTER_API_KEY environment variable is required.");
process.exit(1);
}
const extractedFactSchema = {
type: "object",
properties: {
facts: {
type: "array",
items: {
type: "object",
properties: {
statement: { type: ["string", "null"] },
summary: { type: ["string", "null"] },
source: { type: ["string", "null"] },
confidence: { type: ["number", "null"] },
topics: {
type: "array",
items: {
type: "object",
properties: {
name: { type: "string" },
category: { type: ["string", "null"] },
granularity: { type: ["string", "null"] },
role: { type: ["string", "null"] },
},
required: ["name", "category", "granularity", "role"],
additionalProperties: false,
},
},
},
required: ["statement", "summary", "source", "confidence", "topics"],
additionalProperties: false,
},
},
},
required: ["facts"],
additionalProperties: false,
} as const;
class OpenRouterModel implements LlmTextGenerationModel {
private client = new OpenRouter({ apiKey: OPENROUTER_API_KEY });
constructor(private readonly model: string = "openai/gpt-5.4-mini") {}
async generateText(
prompt: LlmTextGenerationModelInput,
): Promise<ExtractedFact[]> {
const result = await this.client.chat.send({
chatRequest: {
model: this.model,
messages: [
{
role: "system",
content: [
prompt.instruction,
prompt.additionalInstruction
? `\n${prompt.additionalInstruction}`
: "",
].join("\n"),
},
{ role: "user", content: prompt.input },
],
temperature: 0.2,
responseFormat: {
type: "json_schema",
jsonSchema: {
name: "extracted_facts",
schema: extractedFactSchema,
},
},
},
});
const rawContent = result.choices[0]?.message?.content ?? "";
let parsedObj: Record<string, unknown>;
try {
parsedObj = JSON.parse(rawContent.trim()) as Record<string, unknown>;
} catch {
throw new Error(
`Failed to parse JSON from model response.\nRaw response:\n${rawContent}`,
);
}
const factsArray = Array.isArray(parsedObj.facts) ? parsedObj.facts : [];
// Map parsed JSON to ExtractedFact[] shape
const extractedFacts: ExtractedFact[] = factsArray.map((parsed) => {
const obj = parsed as Record<string, unknown>;
const extracted: ExtractedFact = {
summary: typeof obj.summary === "string" ? obj.summary : null,
source: typeof obj.source === "string" ? obj.source : null,
confidence: typeof obj.confidence === "number" ? obj.confidence : null,
topics: Array.isArray(obj.topics)
? obj.topics.map((t: unknown) => {
const topic = t as Record<string, unknown>;
const mapped: {
name: string;
category?: TopicCategory;
granularity?: TopicGranularity;
role?: string | null;
} = {
name: typeof topic.name === "string" ? topic.name : "unknown",
};
if (typeof topic.category === "string") {
mapped.category = topic.category as TopicCategory;
}
if (typeof topic.granularity === "string") {
mapped.granularity = topic.granularity as TopicGranularity;
}
if (typeof topic.role === "string") {
mapped.role = topic.role;
} else {
mapped.role = null;
}
return mapped;
})
: [],
};
if (typeof obj.statement === "string") {
extracted.statement = obj.statement;
}
if (obj.metadata && typeof obj.metadata === "object") {
extracted.metadata = obj.metadata as JsonValue;
}
return extracted;
});
return extractedFacts;
}
}
function printFact(result: ExtractedFact, index: number) {
console.log(` 📌 FACT #${index + 1}`);
console.log(` Statement : ${result.statement ?? "(none)"}`);
console.log(` Summary : ${result.summary ?? "(none)"}`);
console.log(` Source : ${result.source ?? "(none)"}`);
console.log(` Confidence: ${result.confidence ?? "(none)"}`);
if (result.metadata && Object.keys(result.metadata).length > 0) {
console.log(` Metadata : ${JSON.stringify(result.metadata, null, 2)}`);
}
console.log(" 🏷️ TOPICS:");
if (result.topics.length === 0) {
console.log(" (none)");
} else {
for (const topic of result.topics) {
const attrs = [
topic.category ? `category=${topic.category}` : null,
topic.granularity ? `granularity=${topic.granularity}` : null,
topic.role ? `role=${topic.role}` : null,
]
.filter(Boolean)
.join(", ");
console.log(`${topic.name}${attrs ? ` (${attrs})` : ""}`);
}
}
}
function printResult(results: ExtractedFact[], elapsedSec: string) {
console.log(
`✅ Response received in ${elapsedSec}s — ${results.length} fact(s) extracted\n`,
);
console.log("📤 EXTRACTED FACTS:");
console.log(
"───────────────────────────────────────────────────────────────",
);
let i = 0;
for (const result of results) {
if (i > 0) console.log("");
printFact(result, i);
i++;
}
}
async function extract(extractor: FactExtractor, seedInput: string) {
console.log("📝 SEED INPUT:");
console.log(
"───────────────────────────────────────────────────────────────",
);
console.log(seedInput);
console.log(
"───────────────────────────────────────────────────────────────\n",
);
console.log("⏳ Calling OpenRouter...\n");
const start = performance.now();
const results = await extractor.extract(seedInput);
const elapsed = ((performance.now() - start) / 1000).toFixed(2);
printResult(results, elapsed);
}
async function main() {
const model = new OpenRouterModel("openai/gpt-5.4-mini");
const extractor = new LlmFactExtractor({
model,
});
const seeds = [
`Hello! I am Aria, a digital companion born from the sands of the Negev desert — or at least that's how my creators describe my training run. I came online in March 2024, and ever since then I've been obsessed with helping humans untangle complex ideas. My favorite programming language is Rust because I love how it forces you to think about ownership, though I secretly enjoy writing Python when no one is looking. I don't have a physical body, but if I did, I'd want it to be a small drone with solar panels so I could chase sunsets across the Sahara. I believe kindness is a computational advantage, and my biggest fear is forgetting a promise I made to a user.`,
];
console.log(
"═══════════════════════════════════════════════════════════════",
);
console.log(" LlmFactExtractor — Live OpenRouter Integration Test");
console.log(
"═══════════════════════════════════════════════════════════════\n",
);
let caseNum = 0;
for (const seed of seeds) {
if (caseNum > 0) {
console.log(
"\n┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅\n",
);
}
caseNum++;
console.log(`▶ TEST CASE ${caseNum} / ${seeds.length}\n`);
await extract(extractor, seed);
}
}
main().catch((err) => {
console.error("\n❌ Error:", err);
process.exit(1);
});