From 4baf056cd9bf1872bb0b57b315f01ff0d563d5b3 Mon Sep 17 00:00:00 2001 From: Shinwoo PARK Date: Mon, 11 May 2026 23:12:02 +0900 Subject: [PATCH] feat: add conversation memory pipeline --- CHANGELOG.md | 12 ++ README.md | 11 ++ package.json | 3 +- src/conversation/index.ts | 274 ++++++++++++++++++++++++++++++++++++- tests/conversation.test.ts | 208 ++++++++++++++++++++++++++++ 5 files changed, 506 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9085360 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +## 0.1.1 + +- add an optional two-stage conversation memory pipeline with classifier and extractor models +- store approved inbound and first-pass outbound memories back into the persona IdentityDB space with turn-trace metadata +- skip re-persisting exact duplicate extracted memories by domain and statement +- expose the new conversation memory pipeline through the public `conversation` module exports + +## 0.1.0 + +- initial public BoxBrain framework release diff --git a/README.md b/README.md index 3e070d7..1564798 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ The project is framework-first rather than product-first. The current core libra - availability snapshots with current + next transition calculation - DM-style conversation orchestration for inbound replies and proactive openings - delegated mandatory/contextual memory retrieval pipelines for conversation turns +- optional two-stage conversation memory extraction pipeline for durable inbound/outbound memories - human-like first-reply delay and typing delay utilities - farewell-style refusal flows that can trigger availability-changing tool calls @@ -49,6 +50,16 @@ The library is now grouped by domain under `src/`: Each domain now exposes a class-based service API in addition to the existing functional helpers so consumers can organize stateful integrations more cleanly. +## Conversation memory pipeline + +Conversation turns can now optionally run a two-stage durable-memory pipeline: + +1. a `classifierModel` decides whether each inbound or first-pass outbound message is worth remembering +2. an `extractorModel` converts only approved messages into IdentityDB-ready fact drafts +3. extracted facts are stored back into the persona space with conversation-turn trace metadata + +The optional `memoryPipeline` input is available on both `replyToConversation(...)` and `startConversation(...)`, so app integrations can enable long-term relationship memory without changing their storage layer. + ## Release Tagging `vX.Y.Z` or `X.Y.Z` triggers the Gitea npm release workflow under `.gitea/workflows/npm-release.yml`. diff --git a/package.json b/package.json index a232a1b..8cd083c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "boxbrain", - "version": "0.1.0", + "version": "0.1.1", "description": "IdentityDB-backed framework for synthetic human-like personas and DM-style LLM harnesses.", "license": "MIT", "type": "module", @@ -16,6 +16,7 @@ "files": [ "dist", "README.md", + "CHANGELOG.md", "LICENSE" ], "engines": { diff --git a/src/conversation/index.ts b/src/conversation/index.ts index 82da346..5abfa4c 100644 --- a/src/conversation/index.ts +++ b/src/conversation/index.ts @@ -9,6 +9,8 @@ import type { BoxBrainAvailabilityMode, BoxBrainConversationDirection, BoxBrainConversationEntry, + BoxBrainFactDomain, + BoxBrainFactDraft, BoxBrainMemoryReference, BoxBrainMessage, BoxBrainToolCall, @@ -18,6 +20,25 @@ export interface ConversationMemorySelectionResult { memoryIds: string[]; } +export interface ConversationMemoryClassificationResult { + shouldRemember: boolean; + reason?: string | undefined; +} + +export interface ConversationMemoryExtractedFactDraft extends BoxBrainFactDraft { + domain?: BoxBrainFactDomain | undefined; +} + +export interface ConversationMemoryExtractionResult { + facts: ConversationMemoryExtractedFactDraft[]; +} + +export interface ConversationMemoryPipelineOptions { + classifierModel: StructuredModelAdapter; + extractorModel: StructuredModelAdapter; + source?: string | undefined; +} + export interface SetAvailabilityToolArguments extends Record { mode: BoxBrainAvailabilityMode; reason?: string; @@ -43,6 +64,7 @@ interface BaseConversationInput { mandatoryMemoryModel: StructuredModelAdapter; contextualMemoryModel: StructuredModelAdapter; responseModel: StructuredModelAdapter; + memoryPipeline?: ConversationMemoryPipelineOptions | undefined; rng?: (() => number) | undefined; activeExchangeWindowSeconds?: number | undefined; isFirstReplyInExchange?: boolean | undefined; @@ -165,7 +187,14 @@ async function generateConversationTurn( spaceName: input.spaceName, at: input.currentTime, }); + const persona = await resolvePersonaProfile(db, input.spaceName); + const memoryCandidates: ConversationMemoryCandidate[] = []; + if (input.inboundMessage) { + memoryCandidates.push(createConversationMemoryCandidate(input, 'inbound', input.inboundMessage)); + } + if (availability.current.mode === 'offline') { + await maybePersistConversationMemories(db, input.spaceName, persona.displayName, memoryCandidates, input.memoryPipeline); return { blocked: true, blockedReason: availability.current.reason, @@ -176,7 +205,6 @@ async function generateConversationTurn( }; } - const persona = await resolvePersonaProfile(db, input.spaceName); const candidateMemories = await buildMemoryCandidates(db, input.spaceName, input.counterpartId, input.counterpartDisplayName, input.currentTime); const candidateMap = new Map( candidateMemories.map((memory, index) => [`m${index + 1}`, memory]), @@ -228,6 +256,7 @@ async function generateConversationTurn( }); if (replyDelaySeconds === null) { + await maybePersistConversationMemories(db, input.spaceName, persona.displayName, memoryCandidates, input.memoryPipeline); return { blocked: true, blockedReason: availability.current.reason, @@ -261,8 +290,10 @@ async function generateConversationTurn( turnId: input.turnId, source: `${input.responseModel.provider}:${input.responseModel.model}`, }); + memoryCandidates.push(createConversationMemoryCandidate(input, 'outbound', message.text)); } + await maybePersistConversationMemories(db, input.spaceName, persona.displayName, memoryCandidates, input.memoryPipeline); const toolCallsExecuted = await executeToolCalls(db, input.spaceName, input.currentTime, plan.toolCalls ?? []); return { @@ -395,6 +426,198 @@ function buildConversationPrompt( .join('\n'); } +interface ConversationMemoryCandidate { + turnId: string; + direction: BoxBrainConversationDirection; + text: string; + occurredAt: string; + counterpartId: string; + counterpartDisplayName?: string | undefined; + proactive: boolean; +} + +function createConversationMemoryCandidate( + input: BaseConversationInput & { proactive: boolean; turnId: string }, + direction: BoxBrainConversationDirection, + text: string, +): ConversationMemoryCandidate { + return { + turnId: input.turnId, + direction, + text, + occurredAt: input.currentTime, + counterpartId: input.counterpartId, + counterpartDisplayName: input.counterpartDisplayName, + proactive: input.proactive, + }; +} + +async function maybePersistConversationMemories( + db: IdentityDB, + spaceName: string, + personaDisplayName: string, + candidates: ConversationMemoryCandidate[], + pipeline?: ConversationMemoryPipelineOptions, +): Promise { + if (!pipeline || candidates.length === 0) { + return; + } + + const existingFacts = await listFactsInSpace(db, spaceName); + const dedupeKeys = new Set( + existingFacts + .map((fact) => buildConversationMemoryDedupKey(getFactDomain(fact), fact.statement)) + .filter((key): key is string => Boolean(key)), + ); + + for (const candidate of candidates) { + const classification = assertConversationMemoryClassificationResult( + await pipeline.classifierModel.generateObject({ + prompt: buildConversationMemoryClassificationPrompt(personaDisplayName, candidate), + schema: { type: 'object', required: ['shouldRemember'] }, + metadata: { + boxbrainTask: 'persona.conversation.classify_memory', + counterpartId: candidate.counterpartId, + direction: candidate.direction, + }, + }), + ); + + if (!classification.shouldRemember) { + continue; + } + + const extraction = assertConversationMemoryExtractionResult( + await pipeline.extractorModel.generateObject({ + prompt: buildConversationMemoryExtractionPrompt(personaDisplayName, candidate, classification.reason), + schema: { type: 'object', required: ['facts'] }, + metadata: { + boxbrainTask: 'persona.conversation.extract_memory', + counterpartId: candidate.counterpartId, + direction: candidate.direction, + }, + }), + ); + + const factsByDomain = new Map(); + for (const extractedFact of extraction.facts) { + const domain = normalizeConversationMemoryDomain(extractedFact.domain, candidate.direction); + const dedupeKey = buildConversationMemoryDedupKey(domain, extractedFact.statement); + if (dedupeKeys.has(dedupeKey)) { + continue; + } + dedupeKeys.add(dedupeKey); + + const draft = toConversationMemoryDraft(extractedFact, candidate, classification.reason); + const existingDrafts = factsByDomain.get(domain) ?? []; + existingDrafts.push(draft); + factsByDomain.set(domain, existingDrafts); + } + + for (const [domain, facts] of Array.from(factsByDomain.entries())) { + await persistFactDrafts(db, { + spaceName, + domain, + source: pipeline.source ?? `${pipeline.extractorModel.provider}:${pipeline.extractorModel.model}`, + facts, + }); + } + } +} + +function buildConversationMemoryClassificationPrompt( + personaDisplayName: string, + candidate: ConversationMemoryCandidate, +): string { + return [ + `Decide whether this DM message is worth storing as durable memory for ${personaDisplayName}.`, + 'Approve only if the message contains a stable preference, biography detail, relationship fact, recurring routine, or other future-useful memory.', + 'Reject ephemeral small talk, filler, acknowledgements, and one-off chatter.', + `Occurred at: ${candidate.occurredAt}`, + `Counterpart: ${candidate.counterpartDisplayName ?? candidate.counterpartId}`, + `Direction: ${candidate.direction}`, + `Proactive: ${candidate.proactive}`, + `Message: ${candidate.text}`, + 'Return { shouldRemember, reason? }.', + ].join('\n'); +} + +function buildConversationMemoryExtractionPrompt( + personaDisplayName: string, + candidate: ConversationMemoryCandidate, + classifierReason?: string, +): string { + const defaultDomain = normalizeConversationMemoryDomain(undefined, candidate.direction); + return [ + `Extract IdentityDB-ready durable facts from this approved DM message for ${personaDisplayName}.`, + 'Each fact must have a concise statement and at least one topic.', + 'Use domain persona.relationship for durable facts about the counterpart or the relationship.', + 'Use domain persona.biography for durable facts about the persona.', + `Default domain for this message if unsure: ${defaultDomain}`, + `Occurred at: ${candidate.occurredAt}`, + `Counterpart: ${candidate.counterpartDisplayName ?? candidate.counterpartId}`, + `Direction: ${candidate.direction}`, + `Proactive: ${candidate.proactive}`, + classifierReason ? `Approved because: ${classifierReason}` : undefined, + `Message: ${candidate.text}`, + 'Return { facts } where facts is an array of { domain?, statement, summary?, confidence?, topics }.', + ] + .filter((line): line is string => Boolean(line)) + .join('\n'); +} + +function normalizeConversationMemoryDomain( + domain: BoxBrainFactDomain | undefined, + direction: BoxBrainConversationDirection, +): BoxBrainFactDomain { + if (typeof domain === 'string' && domain.trim().length > 0) { + return domain; + } + + return direction === 'inbound' ? 'persona.relationship' : 'persona.biography'; +} + +function buildConversationMemoryDedupKey(domain: BoxBrainFactDomain | null, statement: string): string { + return `${domain ?? 'unknown'}::${statement.trim().toLowerCase()}`; +} + +function toConversationMemoryDraft( + extractedFact: ConversationMemoryExtractedFactDraft, + candidate: ConversationMemoryCandidate, + classifierReason?: string, +): BoxBrainFactDraft { + const metadata = getJsonObject(extractedFact.metadata); + const draft: BoxBrainFactDraft = { + statement: extractedFact.statement, + topics: extractedFact.topics, + metadata: jsonObject({ + ...(metadata ?? {}), + conversationMemory: jsonObject({ + turnId: candidate.turnId, + direction: candidate.direction, + occurredAt: candidate.occurredAt, + counterpartId: candidate.counterpartId, + counterpartDisplayName: candidate.counterpartDisplayName, + proactive: candidate.proactive, + sourceMessage: candidate.text, + classifierReason, + }), + }), + }; + + if (extractedFact.summary !== undefined) { + draft.summary = extractedFact.summary; + } + if (extractedFact.source !== undefined) { + draft.source = extractedFact.source; + } + if (extractedFact.confidence !== undefined) { + draft.confidence = extractedFact.confidence; + } + + return draft; +} + function renderCandidateMemories(candidateMap: Map): string { return Array.from(candidateMap.entries()) .map(([id, memory]) => `${id}: [${memory.domain}] ${memory.summary}`) @@ -409,6 +632,55 @@ function assertConversationMemorySelectionResult(value: ConversationMemorySelect return value; } +function assertConversationMemoryClassificationResult( + value: ConversationMemoryClassificationResult, +): ConversationMemoryClassificationResult { + if (!value || typeof value.shouldRemember !== 'boolean') { + throw new Error('Conversation memory classification output must include a shouldRemember boolean.'); + } + if (value.reason !== undefined && typeof value.reason !== 'string') { + throw new Error('Conversation memory classification reason must be a string when provided.'); + } + + return value; +} + +function assertConversationMemoryExtractionResult( + value: ConversationMemoryExtractionResult, +): ConversationMemoryExtractionResult { + if (!value || !Array.isArray(value.facts)) { + throw new Error('Conversation memory extraction output must include a facts array.'); + } + + for (const fact of value.facts) { + if (!fact || typeof fact.statement !== 'string' || fact.statement.trim().length === 0) { + throw new Error('Extracted conversation memory facts must include a non-empty statement.'); + } + if (fact.domain !== undefined && (typeof fact.domain !== 'string' || fact.domain.trim().length === 0)) { + throw new Error('Extracted conversation memory fact domains must be non-empty strings when provided.'); + } + if (!Array.isArray(fact.topics) || fact.topics.length === 0) { + throw new Error('Extracted conversation memory facts must include at least one topic.'); + } + for (const topic of fact.topics) { + if (!topic || typeof topic.name !== 'string' || topic.name.trim().length === 0) { + throw new Error('Extracted conversation memory fact topics must include a non-empty name.'); + } + } + if (fact.summary !== undefined && typeof fact.summary !== 'string') { + throw new Error('Extracted conversation memory fact summaries must be strings when provided.'); + } + if (fact.source !== undefined && typeof fact.source !== 'string') { + throw new Error('Extracted conversation memory fact sources must be strings when provided.'); + } + if (fact.confidence !== undefined && typeof fact.confidence !== 'number') { + throw new Error('Extracted conversation memory fact confidence values must be numbers when provided.'); + } + } + + return value; +} + function assertConversationTurnPlan(value: ConversationTurnPlan): ConversationTurnPlan { if (!value || (value.mode !== 'reply' && value.mode !== 'refuse')) { throw new Error('Conversation turn plan must include a mode of reply or refuse.'); diff --git a/tests/conversation.test.ts b/tests/conversation.test.ts index c27708e..9adc513 100644 --- a/tests/conversation.test.ts +++ b/tests/conversation.test.ts @@ -1,4 +1,5 @@ import { afterEach, describe, expect, it } from 'vitest'; +import type { Fact } from 'identitydb'; import { generateSchedule, getAvailabilitySnapshot, @@ -6,6 +7,8 @@ import { replyToConversation, setAvailabilityStatus, startConversation, + type ConversationMemoryClassificationResult, + type ConversationMemoryExtractionResult, type ConversationMemorySelectionResult, type ConversationTurnPlan, type ScheduleGenerationResult, @@ -118,6 +121,162 @@ describe('conversation APIs', () => { expect(history[0]?.proactive).toBe(true); }); + it('classifies inbound and outbound conversation messages, extracts only approved memories, and stores trace metadata', async () => { + const db = await createDb(); + await seedPersonaMemory(db); + const classifierPrompts: string[] = []; + const extractorPrompts: string[] = []; + + await replyToConversation(db, { + spaceName: 'persona:minji', + counterpartId: 'user:shinwoo', + counterpartDisplayName: 'Shinwoo', + message: '이번주말에 등산 가고 싶어', + currentTime: '2026-05-12T12:00:00.000Z', + mandatoryMemoryModel: createSelectionModel(['m1']), + contextualMemoryModel: createSelectionModel([]), + responseModel: createResponseModel({ + mode: 'reply', + messages: ['좋다 나도 산 좋아해', '저녁엔가족이랑먹어'], + }), + rng: () => 0, + memoryPipeline: { + classifierModel: createMemoryClassifier([ + { shouldRemember: true, reason: 'stores a durable user preference' }, + { shouldRemember: false, reason: 'small talk only' }, + { shouldRemember: true, reason: 'reveals a stable persona routine' }, + ], classifierPrompts), + extractorModel: createMemoryExtractor([ + { + facts: [ + { + domain: 'persona.relationship', + statement: 'Shinwoo wants to go hiking on weekends.', + topics: [{ name: 'Shinwoo' }, { name: 'hiking' }], + }, + ], + }, + { + facts: [ + { + domain: 'persona.biography', + statement: 'Minji often has family dinner in the evening.', + topics: [{ name: 'Minji' }, { name: 'family dinner' }], + }, + ], + }, + ], extractorPrompts), + }, + }); + + expect(classifierPrompts).toHaveLength(3); + expect(classifierPrompts[0]).toContain('Direction: inbound'); + expect(classifierPrompts[0]).toContain('이번주말에 등산 가고 싶어'); + expect(classifierPrompts[1]).toContain('Direction: outbound'); + expect(extractorPrompts).toHaveLength(2); + expect(extractorPrompts[0]).toContain('stores a durable user preference'); + expect(extractorPrompts[1]).toContain('reveals a stable persona routine'); + + const facts = await listFactsForSpace(db, 'persona:minji'); + const hikingFact = facts.find((fact) => fact.statement === 'Shinwoo wants to go hiking on weekends.'); + const dinnerFact = facts.find((fact) => fact.statement === 'Minji often has family dinner in the evening.'); + + expect(hikingFact?.metadata).toMatchObject({ + boxbrain: { + domain: 'persona.relationship', + }, + conversationMemory: { + turnId: expect.any(String), + direction: 'inbound', + counterpartId: 'user:shinwoo', + counterpartDisplayName: 'Shinwoo', + occurredAt: '2026-05-12T12:00:00.000Z', + proactive: false, + sourceMessage: '이번주말에 등산 가고 싶어', + classifierReason: 'stores a durable user preference', + }, + }); + expect(dinnerFact?.metadata).toMatchObject({ + boxbrain: { + domain: 'persona.biography', + }, + conversationMemory: { + turnId: expect.any(String), + direction: 'outbound', + counterpartId: 'user:shinwoo', + counterpartDisplayName: 'Shinwoo', + occurredAt: '2026-05-12T12:00:00.000Z', + proactive: false, + sourceMessage: '저녁엔가족이랑먹어', + classifierReason: 'reveals a stable persona routine', + }, + }); + }); + + it('deduplicates repeated extracted conversation memories by statement and domain', async () => { + const db = await createDb(); + await seedPersonaMemory(db); + + await replyToConversation(db, { + spaceName: 'persona:minji', + counterpartId: 'user:shinwoo', + counterpartDisplayName: 'Shinwoo', + message: '나는 민트초코 좋아해', + currentTime: '2026-05-12T12:00:00.000Z', + mandatoryMemoryModel: createSelectionModel(['m1']), + contextualMemoryModel: createSelectionModel([]), + responseModel: createResponseModel({ mode: 'reply', messages: ['오 진짜?'] }), + memoryPipeline: { + classifierModel: createMemoryClassifier([ + { shouldRemember: true, reason: 'stable preference' }, + { shouldRemember: false, reason: 'reply is not worth storing' }, + ]), + extractorModel: createMemoryExtractor([ + { + facts: [ + { + domain: 'persona.relationship', + statement: 'Shinwoo likes mint chocolate.', + topics: [{ name: 'Shinwoo' }, { name: 'mint chocolate' }], + }, + ], + }, + ]), + }, + }); + + await replyToConversation(db, { + spaceName: 'persona:minji', + counterpartId: 'user:shinwoo', + counterpartDisplayName: 'Shinwoo', + message: '나 아직도 민트초코 좋아해', + currentTime: '2026-05-12T13:00:00.000Z', + mandatoryMemoryModel: createSelectionModel(['m1']), + contextualMemoryModel: createSelectionModel([]), + responseModel: createResponseModel({ mode: 'reply', messages: ['기억하고있어'] }), + memoryPipeline: { + classifierModel: createMemoryClassifier([ + { shouldRemember: true, reason: 'same stable preference' }, + { shouldRemember: false, reason: 'reply is not worth storing' }, + ]), + extractorModel: createMemoryExtractor([ + { + facts: [ + { + domain: 'persona.relationship', + statement: 'Shinwoo likes mint chocolate.', + topics: [{ name: 'Shinwoo' }, { name: 'mint chocolate' }], + }, + ], + }, + ]), + }, + }); + + const facts = await listFactsForSpace(db, 'persona:minji'); + expect(facts.filter((fact) => fact.statement === 'Shinwoo likes mint chocolate.')).toHaveLength(1); + }); + it('executes availability tool calls after farewell-style refusal messages', async () => { const db = await createDb(); await seedPersonaMemory(db); @@ -208,6 +367,55 @@ function createResponseModel(plan: ConversationTurnPlan, prompts: string[] = []) }; } +function createMemoryClassifier( + results: ConversationMemoryClassificationResult[], + prompts: string[] = [], +): StructuredModelAdapter { + const queue = [...results]; + return { + provider: 'fake-structured', + model: 'memory-classifier', + async generateObject(request: { prompt: string }): Promise { + prompts.push(request.prompt); + const result = queue.shift(); + if (!result) { + throw new Error('No queued conversation memory classification result.'); + } + return result as TOutput; + }, + }; +} + +function createMemoryExtractor( + results: ConversationMemoryExtractionResult[], + prompts: string[] = [], +): StructuredModelAdapter { + const queue = [...results]; + return { + provider: 'fake-structured', + model: 'memory-extractor', + async generateObject(request: { prompt: string }): Promise { + prompts.push(request.prompt); + const result = queue.shift(); + if (!result) { + throw new Error('No queued conversation memory extraction result.'); + } + return result as TOutput; + }, + }; +} + +async function listFactsForSpace(db: Awaited>, spaceName: string): Promise { + const topics = await db.listTopics({ includeFacts: true, spaceName }); + const byId = new Map(); + for (const topic of topics) { + for (const fact of topic.facts) { + byId.set(fact.id, fact); + } + } + return Array.from(byId.values()); +} + async function seedPersonaMemory(db: Awaited>) { await db.upsertSpace({ name: 'persona:minji',