47 lines
1.3 KiB
TypeScript
47 lines
1.3 KiB
TypeScript
import { IdentityDBError } from '../core/errors';
|
|
import { normalizeTopicName } from '../core/utils';
|
|
import type { FactExtractor, ExtractedFact } from './types';
|
|
|
|
export async function extractFacts(
|
|
input: string,
|
|
extractor: FactExtractor,
|
|
): Promise<ExtractedFact[]> {
|
|
const extracted = await extractor.extract(input);
|
|
return extracted.map((fact) => validateAndNormalizeFact(input, fact));
|
|
}
|
|
|
|
function validateAndNormalizeFact(input: string, extracted: ExtractedFact): ExtractedFact {
|
|
const statement = extracted.statement?.trim() || input.trim();
|
|
|
|
if (statement.length === 0) {
|
|
throw new IdentityDBError('Extractor returned an empty statement.');
|
|
}
|
|
|
|
const dedupedTopics = new Map<string, ExtractedFact['topics'][number]>();
|
|
|
|
for (const topic of extracted.topics) {
|
|
const normalizedName = normalizeTopicName(topic.name);
|
|
|
|
if (normalizedName.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
if (!dedupedTopics.has(normalizedName)) {
|
|
dedupedTopics.set(normalizedName, topic);
|
|
}
|
|
}
|
|
|
|
if (dedupedTopics.size === 0) {
|
|
throw new IdentityDBError('Extractor returned no usable topics.');
|
|
}
|
|
|
|
return {
|
|
statement,
|
|
summary: extracted.summary ?? null,
|
|
source: extracted.source ?? null,
|
|
confidence: extracted.confidence ?? null,
|
|
metadata: extracted.metadata ?? null,
|
|
topics: Array.from(dedupedTopics.values()),
|
|
};
|
|
}
|