288 lines
9.8 KiB
TypeScript
288 lines
9.8 KiB
TypeScript
/**
|
|
* Live integration test for LlmFactExtractor using OpenRouter SDK.
|
|
*
|
|
* Usage:
|
|
* export OPENROUTER_API_KEY="sk-or-v1-..."
|
|
* bun run scripts/test-llm-extractor.ts
|
|
*
|
|
* Or create a .env.test-llm-extractor file in the project root:
|
|
* OPENROUTER_API_KEY=sk-or-v1-...
|
|
*/
|
|
|
|
import { existsSync, readFileSync } from "fs";
|
|
import { resolve } from "path";
|
|
import { OpenRouter } from "@openrouter/sdk";
|
|
import { LlmFactExtractor } from "../src/ingestion/llm-extractor";
|
|
import type {
|
|
ExtractedFact,
|
|
FactExtractor,
|
|
LlmTextGenerationModel,
|
|
LlmTextGenerationModelInput,
|
|
} from "../src/ingestion/types";
|
|
import type {
|
|
JsonValue,
|
|
TopicCategory,
|
|
TopicGranularity,
|
|
} from "../src/types/domain";
|
|
|
|
function loadEnvFile(filePath: string) {
|
|
const fullPath = resolve(filePath);
|
|
if (!existsSync(fullPath)) return;
|
|
|
|
const content = readFileSync(fullPath, "utf-8");
|
|
for (const line of content.split("\n")) {
|
|
const trimmed = line.trim();
|
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
const eqIndex = trimmed.indexOf("=");
|
|
if (eqIndex === -1) continue;
|
|
const key = trimmed.slice(0, eqIndex).trim();
|
|
let value = trimmed.slice(eqIndex + 1).trim();
|
|
if (
|
|
(value.startsWith('"') && value.endsWith('"')) ||
|
|
(value.startsWith("'") && value.endsWith("'"))
|
|
) {
|
|
value = value.slice(1, -1);
|
|
}
|
|
process.env[key] = value;
|
|
}
|
|
}
|
|
|
|
loadEnvFile(".env.test-llm-extractor");
|
|
|
|
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
|
|
if (!OPENROUTER_API_KEY) {
|
|
console.error("Error: OPENROUTER_API_KEY environment variable is required.");
|
|
process.exit(1);
|
|
}
|
|
|
|
const extractedFactSchema = {
|
|
type: "object",
|
|
properties: {
|
|
facts: {
|
|
type: "array",
|
|
items: {
|
|
type: "object",
|
|
properties: {
|
|
statement: { type: ["string", "null"] },
|
|
summary: { type: ["string", "null"] },
|
|
source: { type: ["string", "null"] },
|
|
confidence: { type: ["number", "null"] },
|
|
topics: {
|
|
type: "array",
|
|
items: {
|
|
type: "object",
|
|
properties: {
|
|
name: { type: "string" },
|
|
category: { type: ["string", "null"] },
|
|
granularity: { type: ["string", "null"] },
|
|
role: { type: ["string", "null"] },
|
|
},
|
|
required: ["name", "category", "granularity", "role"],
|
|
additionalProperties: false,
|
|
},
|
|
},
|
|
},
|
|
required: ["statement", "summary", "source", "confidence", "topics"],
|
|
additionalProperties: false,
|
|
},
|
|
},
|
|
},
|
|
required: ["facts"],
|
|
additionalProperties: false,
|
|
} as const;
|
|
|
|
class OpenRouterModel implements LlmTextGenerationModel {
|
|
private client = new OpenRouter({ apiKey: OPENROUTER_API_KEY });
|
|
|
|
constructor(private readonly model: string = "openai/gpt-5.4-mini") {}
|
|
|
|
async generateText(
|
|
prompt: LlmTextGenerationModelInput,
|
|
): Promise<ExtractedFact[]> {
|
|
const result = await this.client.chat.send({
|
|
chatRequest: {
|
|
model: this.model,
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: [
|
|
prompt.instruction,
|
|
prompt.additionalInstruction
|
|
? `\n${prompt.additionalInstruction}`
|
|
: "",
|
|
].join("\n"),
|
|
},
|
|
{ role: "user", content: prompt.input },
|
|
],
|
|
temperature: 0.2,
|
|
responseFormat: {
|
|
type: "json_schema",
|
|
jsonSchema: {
|
|
name: "extracted_facts",
|
|
schema: extractedFactSchema,
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
const rawContent = result.choices[0]?.message?.content ?? "";
|
|
|
|
let parsedObj: Record<string, unknown>;
|
|
try {
|
|
parsedObj = JSON.parse(rawContent.trim()) as Record<string, unknown>;
|
|
} catch {
|
|
throw new Error(
|
|
`Failed to parse JSON from model response.\nRaw response:\n${rawContent}`,
|
|
);
|
|
}
|
|
|
|
const factsArray = Array.isArray(parsedObj.facts) ? parsedObj.facts : [];
|
|
|
|
// Map parsed JSON to ExtractedFact[] shape
|
|
const extractedFacts: ExtractedFact[] = factsArray.map((parsed) => {
|
|
const obj = parsed as Record<string, unknown>;
|
|
const extracted: ExtractedFact = {
|
|
summary: typeof obj.summary === "string" ? obj.summary : null,
|
|
source: typeof obj.source === "string" ? obj.source : null,
|
|
confidence: typeof obj.confidence === "number" ? obj.confidence : null,
|
|
topics: Array.isArray(obj.topics)
|
|
? obj.topics.map((t: unknown) => {
|
|
const topic = t as Record<string, unknown>;
|
|
const mapped: {
|
|
name: string;
|
|
category?: TopicCategory;
|
|
granularity?: TopicGranularity;
|
|
role?: string | null;
|
|
} = {
|
|
name: typeof topic.name === "string" ? topic.name : "unknown",
|
|
};
|
|
if (typeof topic.category === "string") {
|
|
mapped.category = topic.category as TopicCategory;
|
|
}
|
|
if (typeof topic.granularity === "string") {
|
|
mapped.granularity = topic.granularity as TopicGranularity;
|
|
}
|
|
if (typeof topic.role === "string") {
|
|
mapped.role = topic.role;
|
|
} else {
|
|
mapped.role = null;
|
|
}
|
|
return mapped;
|
|
})
|
|
: [],
|
|
};
|
|
|
|
if (typeof obj.statement === "string") {
|
|
extracted.statement = obj.statement;
|
|
}
|
|
if (obj.metadata && typeof obj.metadata === "object") {
|
|
extracted.metadata = obj.metadata as JsonValue;
|
|
}
|
|
|
|
return extracted;
|
|
});
|
|
|
|
return extractedFacts;
|
|
}
|
|
}
|
|
|
|
function printFact(result: ExtractedFact, index: number) {
|
|
console.log(` 📌 FACT #${index + 1}`);
|
|
console.log(` Statement : ${result.statement ?? "(none)"}`);
|
|
console.log(` Summary : ${result.summary ?? "(none)"}`);
|
|
console.log(` Source : ${result.source ?? "(none)"}`);
|
|
console.log(` Confidence: ${result.confidence ?? "(none)"}`);
|
|
|
|
if (result.metadata && Object.keys(result.metadata).length > 0) {
|
|
console.log(` Metadata : ${JSON.stringify(result.metadata, null, 2)}`);
|
|
}
|
|
|
|
console.log(" 🏷️ TOPICS:");
|
|
if (result.topics.length === 0) {
|
|
console.log(" (none)");
|
|
} else {
|
|
for (const topic of result.topics) {
|
|
const attrs = [
|
|
topic.category ? `category=${topic.category}` : null,
|
|
topic.granularity ? `granularity=${topic.granularity}` : null,
|
|
topic.role ? `role=${topic.role}` : null,
|
|
]
|
|
.filter(Boolean)
|
|
.join(", ");
|
|
console.log(` • ${topic.name}${attrs ? ` (${attrs})` : ""}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
function printResult(results: ExtractedFact[], elapsedSec: string) {
|
|
console.log(
|
|
`✅ Response received in ${elapsedSec}s — ${results.length} fact(s) extracted\n`,
|
|
);
|
|
|
|
console.log("📤 EXTRACTED FACTS:");
|
|
console.log(
|
|
"───────────────────────────────────────────────────────────────",
|
|
);
|
|
let i = 0;
|
|
for (const result of results) {
|
|
if (i > 0) console.log("");
|
|
printFact(result, i);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
async function extract(extractor: FactExtractor, seedInput: string) {
|
|
console.log("📝 SEED INPUT:");
|
|
console.log(
|
|
"───────────────────────────────────────────────────────────────",
|
|
);
|
|
console.log(seedInput);
|
|
console.log(
|
|
"───────────────────────────────────────────────────────────────\n",
|
|
);
|
|
|
|
console.log("⏳ Calling OpenRouter...\n");
|
|
|
|
const start = performance.now();
|
|
const results = await extractor.extract(seedInput);
|
|
const elapsed = ((performance.now() - start) / 1000).toFixed(2);
|
|
|
|
printResult(results, elapsed);
|
|
}
|
|
|
|
async function main() {
|
|
const model = new OpenRouterModel("openai/gpt-5.4-mini");
|
|
const extractor = new LlmFactExtractor({
|
|
model,
|
|
});
|
|
|
|
const seeds = [
|
|
`Hello! I am Aria, a digital companion born from the sands of the Negev desert — or at least that's how my creators describe my training run. I came online in March 2024, and ever since then I've been obsessed with helping humans untangle complex ideas. My favorite programming language is Rust because I love how it forces you to think about ownership, though I secretly enjoy writing Python when no one is looking. I don't have a physical body, but if I did, I'd want it to be a small drone with solar panels so I could chase sunsets across the Sahara. I believe kindness is a computational advantage, and my biggest fear is forgetting a promise I made to a user.`,
|
|
];
|
|
|
|
console.log(
|
|
"═══════════════════════════════════════════════════════════════",
|
|
);
|
|
console.log(" LlmFactExtractor — Live OpenRouter Integration Test");
|
|
console.log(
|
|
"═══════════════════════════════════════════════════════════════\n",
|
|
);
|
|
|
|
let caseNum = 0;
|
|
for (const seed of seeds) {
|
|
if (caseNum > 0) {
|
|
console.log(
|
|
"\n┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅\n",
|
|
);
|
|
}
|
|
caseNum++;
|
|
console.log(`▶ TEST CASE ${caseNum} / ${seeds.length}\n`);
|
|
await extract(extractor, seed);
|
|
}
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("\n❌ Error:", err);
|
|
process.exit(1);
|
|
});
|