Files
IdentityDB/src/ingestion/extractor.ts

43 lines
1.2 KiB
TypeScript

import { IdentityDBError } from '../core/errors';
import { normalizeTopicName } from '../core/utils';
import type { FactExtractor, ExtractedFact } from './types';
export async function extractFact(
input: string,
extractor: FactExtractor,
): Promise<ExtractedFact> {
const extracted = await extractor.extract(input);
const statement = extracted.statement?.trim() || input.trim();
if (statement.length === 0) {
throw new IdentityDBError('Extractor returned an empty statement.');
}
const dedupedTopics = new Map<string, ExtractedFact['topics'][number]>();
for (const topic of extracted.topics) {
const normalizedName = normalizeTopicName(topic.name);
if (normalizedName.length === 0) {
continue;
}
if (!dedupedTopics.has(normalizedName)) {
dedupedTopics.set(normalizedName, topic);
}
}
if (dedupedTopics.size === 0) {
throw new IdentityDBError('Extractor returned no usable topics.');
}
return {
statement,
summary: extracted.summary ?? null,
source: extracted.source ?? null,
confidence: extracted.confidence ?? null,
metadata: extracted.metadata ?? null,
topics: Array.from(dedupedTopics.values()),
};
}