Compare commits
13 Commits
4c418dc39a
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b77e8eea40 | |||
| cea45a552a | |||
| d83fc31c59 | |||
| b908bc0bd9 | |||
| 283f91ed91 | |||
| 5991e4f1f0 | |||
| 0dc657c97b | |||
| 96d0568197 | |||
| e8adccfbbf | |||
| 1c82b63e7a | |||
| 3e39d3bbd5 | |||
| 4f877a40fb | |||
| 7a02621e40 |
117
.gitea/workflows/npm-release.yml
Normal file
117
.gitea/workflows/npm-release.yml
Normal file
@@ -0,0 +1,117 @@
|
||||
name: npm release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
- '[0-9]*'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
jobs:
|
||||
verify:
|
||||
name: verify
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: node:20-bookworm
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
- name: Install release tools
|
||||
run: |
|
||||
set -euo pipefail
|
||||
apt-get update
|
||||
apt-get install -y git curl ca-certificates
|
||||
curl -fsSL https://bun.sh/install | bash -s -- bun-v1.3.13
|
||||
install -m 0755 /root/.bun/bin/bun /usr/local/bin/bun
|
||||
node --version
|
||||
npm --version
|
||||
bun --version
|
||||
|
||||
- name: Clone tagged source
|
||||
run: |
|
||||
set -euo pipefail
|
||||
REPO_URL="${{ gitea.server_url }}/${{ gitea.repository }}.git"
|
||||
AUTH_HEADER="$(printf '%s' '${{ gitea.actor }}:${{ secrets.GITEA_TOKEN }}' | base64 -w0)"
|
||||
git -c http.extraHeader="Authorization: Basic $AUTH_HEADER" clone --depth 1 --branch "${{ gitea.ref_name }}" "$REPO_URL" repo
|
||||
git -C repo rev-parse HEAD
|
||||
|
||||
- name: Verify release tag matches package version
|
||||
working-directory: repo
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TAG_NAME="${{ gitea.ref_name }}"
|
||||
PACKAGE_VERSION="$(node -p "require('./package.json').version")"
|
||||
|
||||
if [ "$TAG_NAME" = "v$PACKAGE_VERSION" ] || [ "$TAG_NAME" = "$PACKAGE_VERSION" ]; then
|
||||
echo "Release tag $TAG_NAME matches package version $PACKAGE_VERSION"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Tag $TAG_NAME does not match package.json version $PACKAGE_VERSION" >&2
|
||||
exit 1
|
||||
|
||||
- name: Run verify pipeline
|
||||
working-directory: repo
|
||||
run: |
|
||||
set -euo pipefail
|
||||
bun install --frozen-lockfile
|
||||
bun run test
|
||||
bun run check
|
||||
bun run build
|
||||
|
||||
release:
|
||||
name: publish to npm
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: node:20-bookworm
|
||||
timeout-minutes: 20
|
||||
needs:
|
||||
- verify
|
||||
|
||||
steps:
|
||||
- name: Install release tools
|
||||
run: |
|
||||
set -euo pipefail
|
||||
apt-get update
|
||||
apt-get install -y git curl ca-certificates
|
||||
curl -fsSL https://bun.sh/install | bash -s -- bun-v1.3.13
|
||||
install -m 0755 /root/.bun/bin/bun /usr/local/bin/bun
|
||||
node --version
|
||||
npm --version
|
||||
bun --version
|
||||
|
||||
- name: Clone tagged source
|
||||
run: |
|
||||
set -euo pipefail
|
||||
REPO_URL="${{ gitea.server_url }}/${{ gitea.repository }}.git"
|
||||
AUTH_HEADER="$(printf '%s' '${{ gitea.actor }}:${{ secrets.GITEA_TOKEN }}' | base64 -w0)"
|
||||
git -c http.extraHeader="Authorization: Basic $AUTH_HEADER" clone --depth 1 --branch "${{ gitea.ref_name }}" "$REPO_URL" repo
|
||||
git -C repo rev-parse HEAD
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: repo
|
||||
run: |
|
||||
set -euo pipefail
|
||||
bun install --frozen-lockfile
|
||||
|
||||
- name: Build package
|
||||
working-directory: repo
|
||||
run: |
|
||||
set -euo pipefail
|
||||
bun run build
|
||||
|
||||
- name: Publish package to npm
|
||||
working-directory: repo
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
printf '//registry.npmjs.org/:_authToken=%s\n' "$NODE_AUTH_TOKEN" > ~/.npmrc
|
||||
npm publish
|
||||
55
README.md
55
README.md
@@ -15,9 +15,9 @@ A single fact like `I have worked with TypeScript since 2025.` can connect the t
|
||||
## Current capabilities
|
||||
|
||||
- SQLite, PostgreSQL, MySQL, and MariaDB connection adapters
|
||||
- Automatic schema initialization for `topics`, `facts`, `fact_topics`, `topic_relations`, `topic_aliases`, and `fact_embeddings`
|
||||
- Automatic schema initialization for `spaces`, `topics`, `facts`, `fact_topics`, `topic_relations`, `topic_aliases`, and `fact_embeddings`
|
||||
- High-level APIs for adding topics and facts
|
||||
- Topic hierarchy APIs for parent/child traversal and lineage lookup
|
||||
- Hard space isolation so independent memory graphs can coexist without cross-linking
|
||||
- Topic alias and canonical resolution APIs so facts and queries can resolve alternate names
|
||||
- Semantic fact indexing and search APIs built around provider-agnostic embeddings
|
||||
- Dedup-aware ingestion hooks that can reuse an existing fact when a semantic near-duplicate is detected
|
||||
@@ -101,6 +101,34 @@ console.log(matches.map((entry) => [entry.statement, entry.score]));
|
||||
await db.close();
|
||||
```
|
||||
|
||||
## Memory spaces
|
||||
|
||||
IdentityDB now supports hard isolation via spaces. If you write facts into `spaceName: 'A'` and `spaceName: 'B'`, they behave like separate dimensions:
|
||||
|
||||
- the same topic name can exist in both spaces
|
||||
- aliases resolve only inside the requested space
|
||||
- hierarchy, connected-topic traversal, semantic search, and duplicate detection stay inside the same space
|
||||
|
||||
```ts
|
||||
await db.upsertSpace({ name: 'A' });
|
||||
await db.upsertSpace({ name: 'B' });
|
||||
|
||||
await db.addFact({
|
||||
spaceName: 'A',
|
||||
statement: 'TypeScript belongs to A.',
|
||||
topics: [{ name: 'TypeScript', category: 'entity', granularity: 'concrete' }],
|
||||
});
|
||||
|
||||
await db.addFact({
|
||||
spaceName: 'B',
|
||||
statement: 'TypeScript belongs to B.',
|
||||
topics: [{ name: 'TypeScript', category: 'entity', granularity: 'concrete' }],
|
||||
});
|
||||
|
||||
const alphaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'A' });
|
||||
const betaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'B' });
|
||||
```
|
||||
|
||||
## Semantic ingestion and duplicate detection
|
||||
|
||||
If you provide an embedding provider during ingestion, IdentityDB can index the new fact automatically and reuse an existing fact when a semantic near-duplicate is already present.
|
||||
@@ -113,6 +141,29 @@ await db.ingestStatement('Bun makes TypeScript tooling fast.', {
|
||||
});
|
||||
```
|
||||
|
||||
## LLM-backed extraction
|
||||
|
||||
You can bridge any text-generating model into IdentityDB by wrapping it with `LlmFactExtractor`.
|
||||
|
||||
```ts
|
||||
import { LlmFactExtractor } from 'identitydb';
|
||||
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
async generateText(prompt) {
|
||||
return callYourFavoriteLlm(prompt);
|
||||
},
|
||||
},
|
||||
instructions: 'Prefer technology, product, and time topics over generic nouns.',
|
||||
});
|
||||
|
||||
await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
|
||||
extractor,
|
||||
});
|
||||
```
|
||||
|
||||
The adapter expects the model to return JSON and will validate the structured response before IdentityDB writes a fact.
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
|
||||
87
docs/plans/2026-05-11-identitydb-llm-extractor-adapter.md
Normal file
87
docs/plans/2026-05-11-identitydb-llm-extractor-adapter.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# IdentityDB LLM Extractor Adapter Implementation Plan
|
||||
|
||||
> **For Hermes:** Use the `subagent-driven-development` skill to execute this plan task-by-task. Enforce strict TDD for every production behavior.
|
||||
|
||||
**Goal:** Add a provider-agnostic LLM-backed fact extractor adapter so callers can plug a small language model into IdentityDB ingestion without coupling the package to a specific SDK.
|
||||
|
||||
**Architecture:** Keep `FactExtractor` as the stable ingestion contract, then add an `LlmFactExtractor` adapter that delegates prompting and text generation to a narrow model interface. The adapter should build a deterministic JSON-only extraction prompt, parse structured JSON from the model response, validate the shape, and return `ExtractedFact` objects that flow through the existing ingestion validation path.
|
||||
|
||||
**Tech Stack:** TypeScript, Bun, Node.js, Kysely, Vitest, tsup.
|
||||
|
||||
---
|
||||
|
||||
## Scope and interpretation
|
||||
|
||||
- The new adapter must remain provider-agnostic and must not depend on OpenAI, Anthropic, or any other SDK.
|
||||
- The adapter should accept a minimal language-model interface that returns text so package consumers can bridge any LLM client they want.
|
||||
- Structured output must be validated in the adapter before returning it to `extractFact()`.
|
||||
- The adapter should tolerate common model formatting noise such as fenced ```json blocks around the payload.
|
||||
- Initial release should focus on correctness and predictable integration, not prompt-optimization or retries.
|
||||
|
||||
---
|
||||
|
||||
## Public API additions
|
||||
|
||||
```ts
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
async generateText(prompt) {
|
||||
return jsonStringFromSomeLlm(prompt);
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
|
||||
extractor,
|
||||
});
|
||||
```
|
||||
|
||||
Optional customization:
|
||||
|
||||
```ts
|
||||
const extractor = new LlmFactExtractor({
|
||||
model,
|
||||
instructions: 'Prefer product and technology topics over generic nouns.',
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execution plan
|
||||
|
||||
### Task 1: Lock the adapter behavior with failing tests
|
||||
|
||||
**Objective:** Define the LLM adapter contract before implementation.
|
||||
|
||||
**Files:**
|
||||
- Modify: `tests/ingestion.test.ts`
|
||||
- Modify: `src/ingestion/types.ts`
|
||||
- Modify: `src/index.ts`
|
||||
|
||||
**Verification:**
|
||||
- Run focused ingestion tests and confirm they fail for the missing adapter behavior.
|
||||
|
||||
### Task 2: Implement the LLM adapter and response parsing
|
||||
|
||||
**Objective:** Add a reusable `LlmFactExtractor` implementation plus robust JSON extraction helpers.
|
||||
|
||||
**Files:**
|
||||
- Create: `src/ingestion/llm-extractor.ts`
|
||||
- Modify: `src/ingestion/types.ts`
|
||||
- Modify: `src/ingestion/extractor.ts`
|
||||
- Modify: `src/index.ts`
|
||||
|
||||
**Verification:**
|
||||
- Run the focused ingestion tests until green.
|
||||
|
||||
### Task 3: Document the adapter and run the full suite
|
||||
|
||||
**Objective:** Expose the new adapter in docs and ensure the whole package still passes verification.
|
||||
|
||||
**Files:**
|
||||
- Modify: `README.md`
|
||||
- Modify: `src/index.ts`
|
||||
|
||||
**Verification:**
|
||||
- Run `bun run test && bun run check && bun run build`
|
||||
- Confirm the README shows how to bridge an arbitrary LLM client into the adapter.
|
||||
157
docs/plans/2026-05-11-identitydb-space-isolation.md
Normal file
157
docs/plans/2026-05-11-identitydb-space-isolation.md
Normal file
@@ -0,0 +1,157 @@
|
||||
# IdentityDB Space Isolation Implementation Plan
|
||||
|
||||
> **For Hermes:** Use the `writing-plans` and `test-driven-development` skills. Implement this feature in small TDD steps with meaningful commits.
|
||||
|
||||
**Goal:** Add first-class memory spaces so callers can keep unrelated topic/fact graphs isolated from each other while still using one physical database.
|
||||
|
||||
**Architecture:** Introduce a `spaces` table plus `space_id` scoping on the canonical topic/fact records that define graph ownership. Treat the unnamed/default behavior as a built-in `default` space so existing API usage keeps working, while allowing any write/query path to target a named space explicitly.
|
||||
|
||||
**Tech Stack:** TypeScript, Bun, Vitest, Kysely, SQLite/PostgreSQL/MySQL/MariaDB-compatible schema primitives.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
1. A caller must be able to write memory into independent spaces such as `A` and `B`.
|
||||
2. Topic lookups, alias resolution, hierarchy traversal, connected-topic queries, fact queries, ingestion, and semantic search must only see data from the requested space.
|
||||
3. Existing callers that do not specify a space must continue to work inside a built-in `default` space.
|
||||
4. Space names must be normalized/canonicalized similarly to topics.
|
||||
5. Documentation must explain both the isolation model and the default-space compatibility behavior.
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Add failing tests for schema-level space support
|
||||
|
||||
**Objective:** Define the storage contract for spaces and per-space uniqueness before implementation.
|
||||
|
||||
**Files:**
|
||||
- Modify: `tests/migrations.test.ts`
|
||||
|
||||
**Step 1: Write failing tests**
|
||||
- Assert that initialization creates a `spaces` table.
|
||||
- Assert that `topics`, `facts`, and `topic_aliases` now include `space_id`.
|
||||
- Assert that `spaces` includes `id`, `name`, `normalized_name`, `description`, `metadata`, `created_at`, `updated_at`.
|
||||
|
||||
**Step 2: Run red test**
|
||||
- Run: `bun run test tests/migrations.test.ts`
|
||||
- Expect: failure because the schema does not yet contain space support.
|
||||
|
||||
**Step 3: Commit later with implementation**
|
||||
- Do not commit yet; continue only after implementation turns the test green.
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Add failing behavioral tests for isolated spaces
|
||||
|
||||
**Objective:** Lock in the public API behavior for separate memory spaces.
|
||||
|
||||
**Files:**
|
||||
- Modify: `tests/identity-db.test.ts`
|
||||
- Modify: `tests/queries.test.ts`
|
||||
- Modify: `tests/semantic-search.test.ts`
|
||||
- Optionally create: `tests/spaces.test.ts` if separation makes the scenarios clearer.
|
||||
|
||||
**Step 1: Write failing tests**
|
||||
- Verify the same topic name can exist in two spaces without deduplicating together.
|
||||
- Verify facts added in `spaceName: 'A'` are invisible from `spaceName: 'B'`.
|
||||
- Verify alias resolution only resolves inside the same space.
|
||||
- Verify hierarchy parent/child traversal only stays within the same space.
|
||||
- Verify semantic search and duplicate-aware ingestion only search within the same space.
|
||||
- Verify callers that omit `spaceName` still operate in the `default` space.
|
||||
|
||||
**Step 2: Run red tests**
|
||||
- Run the most targeted files first, then the whole suite slice.
|
||||
- Example: `bun run test tests/identity-db.test.ts tests/queries.test.ts tests/semantic-search.test.ts`
|
||||
- Expect: failure due to missing API fields and missing isolation logic.
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Implement schema and type support
|
||||
|
||||
**Objective:** Add the underlying data model required for spaces.
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/core/schema.ts`
|
||||
- Modify: `src/types/domain.ts`
|
||||
- Modify: `src/types/database.ts`
|
||||
- Modify: `src/core/migrations.ts`
|
||||
|
||||
**Implementation notes:**
|
||||
- Add `SPACES_TABLE` and `SPACE_COLUMNS` constants.
|
||||
- Add `SpaceRecord` domain type.
|
||||
- Add `spaces` to `IdentityDatabaseSchema`.
|
||||
- Create the `spaces` table.
|
||||
- Add `space_id` columns to `topics`, `facts`, and `topic_aliases`.
|
||||
- Make topic uniqueness per-space, not global.
|
||||
- Make alias uniqueness per-space, not global.
|
||||
- Seed or upsert a built-in `default` space during initialization via normal application flow, not hard-coded SQL assumptions.
|
||||
|
||||
**Verification:**
|
||||
- Re-run: `bun run test tests/migrations.test.ts`
|
||||
- Expect: green.
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Implement space-aware API contracts and query helpers
|
||||
|
||||
**Objective:** Thread `spaceName` through the high-level API and low-level query layer.
|
||||
|
||||
**Files:**
|
||||
- Modify: `src/types/api.ts`
|
||||
- Modify: `src/ingestion/types.ts`
|
||||
- Modify: `src/queries/topics.ts`
|
||||
- Modify: `src/queries/facts.ts`
|
||||
- Modify: `src/core/identity-db.ts`
|
||||
- Modify: `src/index.ts` if new exported types are added
|
||||
|
||||
**Implementation notes:**
|
||||
- Add public `Space` and `UpsertSpaceInput` types if needed.
|
||||
- Add optional `spaceName` on write/query inputs where the caller targets a graph.
|
||||
- Add helpers to resolve or create the requested space inside transactions.
|
||||
- Ensure all existing topic lookup helpers filter by `space_id`.
|
||||
- Ensure semantic search candidates are restricted to facts in the requested space.
|
||||
- Preserve existing no-space API calls by mapping them to `default`.
|
||||
|
||||
**Verification:**
|
||||
- Re-run the failing behavior tests.
|
||||
- Expect: green for the new targeted tests.
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Refine ergonomics and update documentation
|
||||
|
||||
**Objective:** Make the feature understandable and safe to use.
|
||||
|
||||
**Files:**
|
||||
- Modify: `README.md`
|
||||
- Optionally modify: wiki docs later if requested
|
||||
|
||||
**Implementation notes:**
|
||||
- Document the default space behavior.
|
||||
- Add examples for `spaceName: 'A'` and `spaceName: 'B'`.
|
||||
- Explain that spaces are hard isolation boundaries for topic/fact traversal and semantic retrieval.
|
||||
|
||||
**Verification:**
|
||||
- Run: `bun run test && bun run check && bun run build`
|
||||
- Confirm docs/examples align with the final public API.
|
||||
|
||||
---
|
||||
|
||||
## Suggested commit boundaries
|
||||
|
||||
1. `docs: add IdentityDB space isolation plan`
|
||||
2. `test: specify isolated memory spaces`
|
||||
3. `feat: add space-aware memory graph isolation`
|
||||
4. `docs: document space-scoped memory usage`
|
||||
|
||||
---
|
||||
|
||||
## Acceptance checklist
|
||||
|
||||
- [ ] `spaces` table exists.
|
||||
- [ ] Topics with the same normalized name can exist in different spaces.
|
||||
- [ ] Facts from one space do not appear in another space’s queries.
|
||||
- [ ] Alias and hierarchy traversal are space-aware.
|
||||
- [ ] Semantic search and duplicate detection are space-aware.
|
||||
- [ ] Existing callers still work via the `default` space.
|
||||
- [ ] Full test/build/typecheck suite passes.
|
||||
65
docs/plans/2026-05-11-identitydb-wiki-docs.md
Normal file
65
docs/plans/2026-05-11-identitydb-wiki-docs.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# IdentityDB Wiki Documentation Implementation Plan
|
||||
|
||||
> **For Hermes:** Execute this plan step-by-step. Prefer concrete repository inspection over assumptions, and verify the wiki remote after each major write.
|
||||
|
||||
**Goal:** Verify the IdentityDB wiki repository state, create or clone it as needed, and publish concrete wiki documentation covering the project's purpose, usage, and extractor choices including `NaiveExtractor`.
|
||||
|
||||
**Architecture:** Treat the Gitea wiki as a separate Git repository. First verify whether the wiki feature is enabled and whether the `.wiki.git` remote already exists. If the remote does not exist yet, bootstrap it with a minimal `Home.md`, then clone the wiki repo into a local working directory and author Markdown pages there. Keep the documentation practical, using the package README and current source files as the canonical content source.
|
||||
|
||||
**Tech Stack:** Gitea, tea CLI, Git, Markdown, Bun/TypeScript project docs.
|
||||
|
||||
---
|
||||
|
||||
## Execution plan
|
||||
|
||||
### Task 1: Inspect wiki availability and remote state
|
||||
|
||||
**Objective:** Confirm that the repository has wiki support enabled and determine whether the Git-backed wiki repo already exists.
|
||||
|
||||
**Files:**
|
||||
- Inspect: `https://git.psw.kr/p-sw/IdentityDB`
|
||||
- Read: `/home/hermes-agent/IdentityDB/README.md`
|
||||
- Read: `/home/hermes-agent/IdentityDB/src/ingestion/naive-extractor.ts`
|
||||
- Read: `/home/hermes-agent/IdentityDB/src/ingestion/llm-extractor.ts`
|
||||
|
||||
**Verification:**
|
||||
- Check Gitea repo metadata for `has_wiki=true`.
|
||||
- Check whether `https://git.psw.kr/p-sw/IdentityDB.wiki.git` is readable.
|
||||
|
||||
### Task 2: Bootstrap the wiki repo if missing
|
||||
|
||||
**Objective:** Create the Git-backed wiki repository if it has not been materialized yet.
|
||||
|
||||
**Files:**
|
||||
- Create temporarily: `/home/hermes-agent/IdentityDB-wiki-bootstrap/Home.md`
|
||||
|
||||
**Verification:**
|
||||
- Push a first commit to `https://git.psw.kr/p-sw/IdentityDB.wiki.git`.
|
||||
- Confirm the remote becomes cloneable afterward.
|
||||
|
||||
### Task 3: Clone the wiki repo and author concrete pages
|
||||
|
||||
**Objective:** Write practical docs explaining why IdentityDB exists, how to use it, and where `NaiveExtractor` fits.
|
||||
|
||||
**Files:**
|
||||
- Clone to: `/home/hermes-agent/IdentityDB.wiki`
|
||||
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/Home.md`
|
||||
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/Getting-Started.md`
|
||||
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/Extractors.md`
|
||||
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/_Sidebar.md`
|
||||
|
||||
**Verification:**
|
||||
- Review the generated Markdown files locally.
|
||||
- Ensure internal wiki links resolve by page name.
|
||||
|
||||
### Task 4: Commit, push, and verify the published wiki state
|
||||
|
||||
**Objective:** Publish the wiki docs and verify the remote history reflects the changes.
|
||||
|
||||
**Files:**
|
||||
- Commit within: `/home/hermes-agent/IdentityDB.wiki`
|
||||
|
||||
**Verification:**
|
||||
- Run `git status --short` and `git log --oneline -n 3` in the wiki repo.
|
||||
- Push to the remote wiki repo.
|
||||
- Confirm the wiki is cloneable and the latest commit is visible remotely.
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "identitydb",
|
||||
"version": "0.1.0",
|
||||
"version": "0.2.0",
|
||||
"description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
|
||||
"license": "MIT",
|
||||
"type": "module",
|
||||
|
||||
@@ -1,39 +1,27 @@
|
||||
import {
|
||||
type AddFactInput,
|
||||
type ConnectedTopic,
|
||||
type Fact,
|
||||
type FactTopic,
|
||||
type FindSimilarFactsInput,
|
||||
type IndexFactEmbeddingsInput,
|
||||
type LinkTopicsInput,
|
||||
type ListTopicsOptions,
|
||||
type ScoredFact,
|
||||
type SearchFactsInput,
|
||||
type Space,
|
||||
type SpaceScopedInput,
|
||||
type Topic,
|
||||
type TopicLookupOptions,
|
||||
type TopicWithFacts,
|
||||
type UpsertSpaceInput,
|
||||
type UpsertTopicInput,
|
||||
type AddFactInput,
|
||||
type LinkTopicsInput,
|
||||
} from '../types/api';
|
||||
import type { IngestStatementOptions } from '../ingestion/types';
|
||||
import type { DatabaseConnection, IdentityDBConnectionConfig } from '../adapters/dialect';
|
||||
import type { IdentityDatabaseSchema } from '../types/database';
|
||||
import type { FactRecord, TopicRecord } from '../types/domain';
|
||||
import type { FactRecord, SpaceRecord, TopicRecord } from '../types/domain';
|
||||
import { createDatabase } from '../adapters/dialect';
|
||||
import { IdentityDBError } from './errors';
|
||||
import { initializeSchema } from './migrations';
|
||||
import {
|
||||
canonicalizeTopicName,
|
||||
cosineSimilarity,
|
||||
createContentHash,
|
||||
createId,
|
||||
deserializeEmbedding,
|
||||
mapFactRow,
|
||||
mapTopicRow,
|
||||
normalizeTopicName,
|
||||
nowIsoString,
|
||||
serializeEmbedding,
|
||||
serializeMetadata,
|
||||
} from './utils';
|
||||
import { extractFact } from '../ingestion/extractor';
|
||||
import {
|
||||
findFactRowsConnectingTopicIds,
|
||||
@@ -41,16 +29,37 @@ import {
|
||||
findTopicLinksForFactIds,
|
||||
} from '../queries/facts';
|
||||
import {
|
||||
type DatabaseExecutor,
|
||||
findChildTopicRows,
|
||||
findConnectedTopicRows,
|
||||
findParentTopicRows,
|
||||
findSpaceRowByNormalizedName,
|
||||
findTopicRowByNameOrAlias,
|
||||
findTopicRowByNormalizedAlias,
|
||||
findTopicRowByNormalizedName,
|
||||
listTopicAliasRowsForTopicId,
|
||||
listTopicRows,
|
||||
findChildTopicRows,
|
||||
findParentTopicRows,
|
||||
type DatabaseExecutor,
|
||||
} from '../queries/topics';
|
||||
import { IdentityDBError } from './errors';
|
||||
import { initializeSchema } from './migrations';
|
||||
import {
|
||||
canonicalizeSpaceName,
|
||||
canonicalizeTopicName,
|
||||
cosineSimilarity,
|
||||
createContentHash,
|
||||
createId,
|
||||
deserializeEmbedding,
|
||||
mapFactRow,
|
||||
mapSpaceRow,
|
||||
mapTopicRow,
|
||||
normalizeSpaceName,
|
||||
normalizeTopicName,
|
||||
nowIsoString,
|
||||
serializeEmbedding,
|
||||
serializeMetadata,
|
||||
} from './utils';
|
||||
|
||||
const DEFAULT_SPACE_NAME = 'default';
|
||||
|
||||
export class IdentityDB {
|
||||
private constructor(private readonly connection: DatabaseConnection) {}
|
||||
@@ -68,6 +77,72 @@ export class IdentityDB {
|
||||
await this.connection.destroy();
|
||||
}
|
||||
|
||||
async upsertSpace(input: UpsertSpaceInput): Promise<Space> {
|
||||
return this.connection.db.transaction().execute(async (trx) => {
|
||||
const normalizedName = normalizeSpaceName(input.name);
|
||||
if (normalizedName.length === 0) {
|
||||
throw new IdentityDBError('Space name cannot be empty.');
|
||||
}
|
||||
|
||||
const now = nowIsoString();
|
||||
const existing = await findSpaceRowByNormalizedName(trx, normalizedName);
|
||||
|
||||
if (existing) {
|
||||
await trx
|
||||
.updateTable('spaces')
|
||||
.set({
|
||||
name: canonicalizeSpaceName(input.name),
|
||||
description: input.description !== undefined ? input.description : existing.description,
|
||||
metadata: input.metadata !== undefined ? serializeMetadata(input.metadata) : existing.metadata,
|
||||
updated_at: now,
|
||||
})
|
||||
.where('id', '=', existing.id)
|
||||
.execute();
|
||||
|
||||
const updated = await trx
|
||||
.selectFrom('spaces')
|
||||
.selectAll()
|
||||
.where('id', '=', existing.id)
|
||||
.executeTakeFirstOrThrow();
|
||||
|
||||
return mapSpaceRow(updated);
|
||||
}
|
||||
|
||||
const createdRow: SpaceRecord = {
|
||||
id: createId(),
|
||||
name: canonicalizeSpaceName(input.name),
|
||||
normalized_name: normalizedName,
|
||||
description: input.description ?? null,
|
||||
metadata: serializeMetadata(input.metadata),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
await trx.insertInto('spaces').values(createdRow).execute();
|
||||
return mapSpaceRow(createdRow);
|
||||
});
|
||||
}
|
||||
|
||||
async getSpaceByName(name: string): Promise<Space | null> {
|
||||
const normalizedName = normalizeSpaceName(name);
|
||||
if (normalizedName.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const row = await findSpaceRowByNormalizedName(this.connection.db, normalizedName);
|
||||
return row ? mapSpaceRow(row) : null;
|
||||
}
|
||||
|
||||
async listSpaces(): Promise<Space[]> {
|
||||
const rows = await this.connection.db
|
||||
.selectFrom('spaces')
|
||||
.selectAll()
|
||||
.orderBy('normalized_name', 'asc')
|
||||
.execute();
|
||||
|
||||
return rows.map(mapSpaceRow);
|
||||
}
|
||||
|
||||
async upsertTopic(input: UpsertTopicInput): Promise<Topic> {
|
||||
return this.upsertTopicInExecutor(this.connection.db, input);
|
||||
}
|
||||
@@ -82,6 +157,7 @@ export class IdentityDB {
|
||||
}
|
||||
|
||||
return this.connection.db.transaction().execute(async (trx) => {
|
||||
const space = await this.getOrCreateSpaceInExecutor(trx, input.spaceName);
|
||||
const createdAt = nowIsoString();
|
||||
const factId = createId();
|
||||
|
||||
@@ -89,6 +165,7 @@ export class IdentityDB {
|
||||
.insertInto('facts')
|
||||
.values({
|
||||
id: factId,
|
||||
space_id: space.id,
|
||||
statement: input.statement.trim(),
|
||||
summary: input.summary ?? null,
|
||||
source: input.source ?? null,
|
||||
@@ -103,7 +180,11 @@ export class IdentityDB {
|
||||
|
||||
for (let index = 0; index < input.topics.length; index += 1) {
|
||||
const topicInput = input.topics[index]!;
|
||||
const topic = await this.upsertTopicInExecutor(trx, topicInput);
|
||||
this.assertScopedTopicInput(space, topicInput.spaceName);
|
||||
const topic = await this.upsertTopicInExecutor(trx, {
|
||||
...topicInput,
|
||||
spaceName: space.name,
|
||||
});
|
||||
|
||||
await trx
|
||||
.insertInto('fact_topics')
|
||||
@@ -125,6 +206,7 @@ export class IdentityDB {
|
||||
|
||||
return {
|
||||
id: factId,
|
||||
spaceId: space.id,
|
||||
statement: input.statement.trim(),
|
||||
summary: input.summary ?? null,
|
||||
source: input.source ?? null,
|
||||
@@ -137,14 +219,12 @@ export class IdentityDB {
|
||||
});
|
||||
}
|
||||
|
||||
async ingestStatement(
|
||||
statement: string,
|
||||
options: IngestStatementOptions,
|
||||
): Promise<Fact> {
|
||||
async ingestStatement(statement: string, options: IngestStatementOptions): Promise<Fact> {
|
||||
const extracted = await extractFact(statement, options.extractor);
|
||||
const factInput: AddFactInput = {
|
||||
statement: extracted.statement ?? statement,
|
||||
topics: extracted.topics,
|
||||
spaceName: options.spaceName,
|
||||
};
|
||||
|
||||
if (extracted.summary !== undefined) {
|
||||
@@ -170,6 +250,7 @@ export class IdentityDB {
|
||||
topicNames: factInput.topics.map((topic) => topic.name),
|
||||
limit: 1,
|
||||
minimumScore: options.duplicateThreshold ?? 0.97,
|
||||
spaceName: options.spaceName,
|
||||
});
|
||||
|
||||
if (similarFacts[0]) {
|
||||
@@ -180,15 +261,27 @@ export class IdentityDB {
|
||||
const fact = await this.addFact(factInput);
|
||||
|
||||
if (options.embeddingProvider) {
|
||||
await this.indexFactEmbedding(fact.id, { provider: options.embeddingProvider });
|
||||
await this.indexFactEmbedding(fact.id, {
|
||||
provider: options.embeddingProvider,
|
||||
spaceName: options.spaceName,
|
||||
});
|
||||
}
|
||||
|
||||
return fact;
|
||||
}
|
||||
|
||||
async indexFactEmbeddings(input: IndexFactEmbeddingsInput): Promise<void> {
|
||||
const factRows = await this.connection.db.selectFrom('facts').selectAll().orderBy('created_at', 'asc').execute();
|
||||
const space = await this.getSpaceForRead(input.spaceName);
|
||||
if (input.spaceName && !space) {
|
||||
return;
|
||||
}
|
||||
|
||||
let factQuery = this.connection.db.selectFrom('facts').selectAll().orderBy('created_at', 'asc');
|
||||
if (space) {
|
||||
factQuery = factQuery.where('space_id', '=', space.id);
|
||||
}
|
||||
|
||||
const factRows = await factQuery.execute();
|
||||
if (factRows.length === 0) {
|
||||
return;
|
||||
}
|
||||
@@ -222,6 +315,13 @@ export class IdentityDB {
|
||||
throw new IdentityDBError(`Fact not found: ${factId}`);
|
||||
}
|
||||
|
||||
if (input.spaceName) {
|
||||
const space = await this.getSpaceForRead(input.spaceName);
|
||||
if (!space || space.id !== factRow.space_id) {
|
||||
throw new IdentityDBError(`Fact ${factId} does not belong to space ${canonicalizeSpaceName(input.spaceName)}.`);
|
||||
}
|
||||
}
|
||||
|
||||
const embedding = await input.provider.embed(factRow.statement);
|
||||
this.assertEmbeddingShape(embedding, input.provider.dimensions);
|
||||
|
||||
@@ -236,6 +336,11 @@ export class IdentityDB {
|
||||
return [];
|
||||
}
|
||||
|
||||
const space = await this.getSpaceForRead(input.spaceName);
|
||||
if (input.spaceName && !space) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const queryEmbedding = await input.provider.embed(queryText);
|
||||
this.assertEmbeddingShape(queryEmbedding, input.provider.dimensions);
|
||||
|
||||
@@ -245,6 +350,7 @@ export class IdentityDB {
|
||||
topicNames: input.topicNames,
|
||||
limit: input.limit,
|
||||
minimumScore: input.minimumScore,
|
||||
spaceId: space?.id,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -254,6 +360,11 @@ export class IdentityDB {
|
||||
return [];
|
||||
}
|
||||
|
||||
const space = await this.getSpaceForRead(input.spaceName);
|
||||
if (input.spaceName && !space) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const queryEmbedding = await input.provider.embed(statement);
|
||||
this.assertEmbeddingShape(queryEmbedding, input.provider.dimensions);
|
||||
|
||||
@@ -263,6 +374,7 @@ export class IdentityDB {
|
||||
topicNames: input.topicNames,
|
||||
limit: input.limit,
|
||||
minimumScore: input.minimumScore,
|
||||
spaceId: space?.id,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -279,12 +391,15 @@ export class IdentityDB {
|
||||
}
|
||||
|
||||
await this.connection.db.transaction().execute(async (trx) => {
|
||||
const space = await this.getOrCreateSpaceInExecutor(trx, input.spaceName);
|
||||
const parentTopic = await this.upsertTopicInExecutor(trx, {
|
||||
name: input.parentName,
|
||||
granularity: 'abstract',
|
||||
spaceName: space.name,
|
||||
});
|
||||
const childTopic = await this.upsertTopicInExecutor(trx, {
|
||||
name: input.childName,
|
||||
spaceName: space.name,
|
||||
});
|
||||
|
||||
const existing = await trx
|
||||
@@ -309,7 +424,7 @@ export class IdentityDB {
|
||||
});
|
||||
}
|
||||
|
||||
async addTopicAlias(canonicalName: string, alias: string): Promise<void> {
|
||||
async addTopicAlias(canonicalName: string, alias: string, options?: SpaceScopedInput): Promise<void> {
|
||||
const normalizedAlias = normalizeTopicName(alias);
|
||||
|
||||
if (normalizedAlias.length === 0) {
|
||||
@@ -317,18 +432,22 @@ export class IdentityDB {
|
||||
}
|
||||
|
||||
await this.connection.db.transaction().execute(async (trx) => {
|
||||
const canonicalTopic = await this.upsertTopicInExecutor(trx, { name: canonicalName });
|
||||
const space = await this.getOrCreateSpaceInExecutor(trx, options?.spaceName);
|
||||
const canonicalTopic = await this.upsertTopicInExecutor(trx, {
|
||||
name: canonicalName,
|
||||
spaceName: space.name,
|
||||
});
|
||||
|
||||
if (normalizedAlias === canonicalTopic.normalizedName) {
|
||||
return;
|
||||
}
|
||||
|
||||
const exactTopicMatch = await findTopicRowByNormalizedName(trx, normalizedAlias);
|
||||
const exactTopicMatch = await findTopicRowByNormalizedName(trx, space.id, normalizedAlias);
|
||||
if (exactTopicMatch && exactTopicMatch.id !== canonicalTopic.id) {
|
||||
throw new IdentityDBError('Cannot assign an alias that already belongs to another canonical topic.');
|
||||
}
|
||||
|
||||
const aliasMatch = await findTopicRowByNormalizedAlias(trx, normalizedAlias);
|
||||
const aliasMatch = await findTopicRowByNormalizedAlias(trx, space.id, normalizedAlias);
|
||||
if (aliasMatch) {
|
||||
if (aliasMatch.id !== canonicalTopic.id) {
|
||||
throw new IdentityDBError('Cannot assign an alias that already resolves to another topic.');
|
||||
@@ -341,6 +460,7 @@ export class IdentityDB {
|
||||
.insertInto('topic_aliases')
|
||||
.values({
|
||||
id: createId(),
|
||||
space_id: space.id,
|
||||
topic_id: canonicalTopic.id,
|
||||
alias: canonicalizeTopicName(alias),
|
||||
normalized_alias: normalizedAlias,
|
||||
@@ -352,47 +472,43 @@ export class IdentityDB {
|
||||
});
|
||||
}
|
||||
|
||||
async resolveTopic(name: string): Promise<Topic | null> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
async resolveTopic(name: string, options?: SpaceScopedInput): Promise<Topic | null> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
return topicRow ? mapTopicRow(topicRow) : null;
|
||||
}
|
||||
|
||||
async getTopicAliases(name: string): Promise<string[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
|
||||
async getTopicAliases(name: string, options?: SpaceScopedInput): Promise<string[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
if (!topicRow) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const aliasRows = await listTopicAliasRowsForTopicId(this.connection.db, topicRow.id);
|
||||
const aliasRows = await listTopicAliasRowsForTopicId(this.connection.db, topicRow.space_id, topicRow.id);
|
||||
return aliasRows.map((aliasRow) => aliasRow.alias);
|
||||
}
|
||||
|
||||
async getTopicChildren(name: string): Promise<Topic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
|
||||
async getTopicChildren(name: string, options?: SpaceScopedInput): Promise<Topic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
if (!topicRow) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const childRows = await findChildTopicRows(this.connection.db, topicRow.id);
|
||||
const childRows = await findChildTopicRows(this.connection.db, topicRow.space_id, topicRow.id);
|
||||
return childRows.map(mapTopicRow);
|
||||
}
|
||||
|
||||
async getTopicParents(name: string): Promise<Topic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
|
||||
async getTopicParents(name: string, options?: SpaceScopedInput): Promise<Topic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
if (!topicRow) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const parentRows = await findParentTopicRows(this.connection.db, topicRow.id);
|
||||
const parentRows = await findParentTopicRows(this.connection.db, topicRow.space_id, topicRow.id);
|
||||
return parentRows.map(mapTopicRow);
|
||||
}
|
||||
|
||||
async getTopicLineage(name: string): Promise<Topic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
|
||||
async getTopicLineage(name: string, options?: SpaceScopedInput): Promise<Topic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
if (!topicRow) {
|
||||
return [];
|
||||
}
|
||||
@@ -405,8 +521,7 @@ export class IdentityDB {
|
||||
const nextLevelIds: string[] = [];
|
||||
|
||||
for (const currentId of currentLevelIds) {
|
||||
const parentRows = await findParentTopicRows(this.connection.db, currentId);
|
||||
|
||||
const parentRows = await findParentTopicRows(this.connection.db, topicRow.space_id, currentId);
|
||||
for (const parentRow of parentRows) {
|
||||
if (visitedTopicIds.has(parentRow.id)) {
|
||||
continue;
|
||||
@@ -424,97 +539,97 @@ export class IdentityDB {
|
||||
return lineage;
|
||||
}
|
||||
|
||||
async getTopicFacts(name: string): Promise<Fact[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
|
||||
async getTopicFacts(name: string, options?: SpaceScopedInput): Promise<Fact[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
if (!topicRow) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const factRows = await findFactRowsForTopicId(this.connection.db, topicRow.id);
|
||||
return this.hydrateFacts(factRows);
|
||||
const factRows = await findFactRowsForTopicId(this.connection.db, topicRow.space_id, topicRow.id);
|
||||
return this.hydrateFacts(factRows, topicRow.space_id);
|
||||
}
|
||||
|
||||
async getTopicFactsLinkedTo(name: string, linkedTopicName: string): Promise<Fact[]> {
|
||||
return this.findFactsConnectingTopics([name, linkedTopicName]);
|
||||
async getTopicFactsLinkedTo(name: string, linkedTopicName: string, options?: SpaceScopedInput): Promise<Fact[]> {
|
||||
return this.findFactsConnectingTopics([name, linkedTopicName], options);
|
||||
}
|
||||
|
||||
async findFactsConnectingTopics(names: string[]): Promise<Fact[]> {
|
||||
async findFactsConnectingTopics(names: string[], options?: SpaceScopedInput): Promise<Fact[]> {
|
||||
if (names.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const topicRows = await Promise.all(names.map((name) => this.getRequiredTopicRow(name)));
|
||||
const space = await this.getSpaceForRead(options?.spaceName);
|
||||
if (options?.spaceName && !space) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const topicRows = await Promise.all(names.map((name) => this.getRequiredTopicRow(name, options?.spaceName)));
|
||||
if (topicRows.some((topicRow) => topicRow === undefined)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const topicIds = topicRows.map((topicRow) => topicRow!.id);
|
||||
const factRows = await findFactRowsConnectingTopicIds(this.connection.db, topicIds);
|
||||
const spaceId = topicRows[0]!.space_id ?? space?.id;
|
||||
const factRows = await findFactRowsConnectingTopicIds(this.connection.db, spaceId, topicIds);
|
||||
|
||||
return this.hydrateFacts(factRows);
|
||||
return this.hydrateFacts(factRows, spaceId);
|
||||
}
|
||||
|
||||
async getTopicByName(
|
||||
name: string,
|
||||
options: { includeFacts: true },
|
||||
): Promise<TopicWithFacts | null>;
|
||||
async getTopicByName(name: string, options: { includeFacts: true; spaceName?: string }): Promise<TopicWithFacts | null>;
|
||||
async getTopicByName(name: string, options?: TopicLookupOptions): Promise<Topic | null>;
|
||||
async getTopicByName(
|
||||
name: string,
|
||||
options?: TopicLookupOptions,
|
||||
): Promise<Topic | TopicWithFacts | null> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
|
||||
async getTopicByName(name: string, options?: TopicLookupOptions): Promise<Topic | TopicWithFacts | null> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
if (!topicRow) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const topic = mapTopicRow(topicRow);
|
||||
|
||||
if (options?.includeFacts) {
|
||||
return {
|
||||
...topic,
|
||||
facts: await this.getTopicFacts(name),
|
||||
facts: await this.getTopicFacts(name, { spaceName: options.spaceName }),
|
||||
};
|
||||
}
|
||||
|
||||
return topic;
|
||||
}
|
||||
|
||||
async listTopics(options: { includeFacts: true; limit?: number }): Promise<TopicWithFacts[]>;
|
||||
async listTopics(options: { includeFacts: true; limit?: number; spaceName?: string }): Promise<TopicWithFacts[]>;
|
||||
async listTopics(options?: ListTopicsOptions): Promise<Topic[]>;
|
||||
async listTopics(
|
||||
options?: ListTopicsOptions,
|
||||
): Promise<Topic[] | TopicWithFacts[]> {
|
||||
const rows = await listTopicRows(this.connection.db, options?.limit);
|
||||
async listTopics(options?: ListTopicsOptions): Promise<Topic[] | TopicWithFacts[]> {
|
||||
const space = await this.getSpaceForRead(options?.spaceName);
|
||||
if (options?.spaceName && !space) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const spaceId = space?.id ?? await this.getDefaultSpaceIdForRead();
|
||||
if (!spaceId) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const rows = await listTopicRows(this.connection.db, spaceId, options?.limit);
|
||||
if (!options?.includeFacts) {
|
||||
return rows.map(mapTopicRow);
|
||||
}
|
||||
|
||||
const topicsWithFacts: TopicWithFacts[] = [];
|
||||
|
||||
for (const row of rows) {
|
||||
topicsWithFacts.push({
|
||||
...mapTopicRow(row),
|
||||
facts: await this.getTopicFacts(row.name),
|
||||
facts: await this.getTopicFacts(row.name, { spaceName: options?.spaceName }),
|
||||
});
|
||||
}
|
||||
|
||||
return topicsWithFacts;
|
||||
}
|
||||
|
||||
async findConnectedTopics(name: string): Promise<ConnectedTopic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name);
|
||||
|
||||
async findConnectedTopics(name: string, options?: SpaceScopedInput): Promise<ConnectedTopic[]> {
|
||||
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
|
||||
if (!topicRow) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const rows = await findConnectedTopicRows(this.connection.db, topicRow.id);
|
||||
|
||||
const rows = await findConnectedTopicRows(this.connection.db, topicRow.space_id, topicRow.id);
|
||||
return rows.map((row) => ({
|
||||
...mapTopicRow(row),
|
||||
sharedFactCount: row.shared_fact_count,
|
||||
@@ -527,18 +642,25 @@ export class IdentityDB {
|
||||
topicNames?: string[] | undefined;
|
||||
limit?: number | undefined;
|
||||
minimumScore?: number | undefined;
|
||||
spaceId?: string | undefined;
|
||||
}): Promise<ScoredFact[]> {
|
||||
const topicIds = await this.resolveTopicIds(input.topicNames);
|
||||
const effectiveSpaceId = input.spaceId ?? await this.getDefaultSpaceIdForRead();
|
||||
if (!effectiveSpaceId) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const topicIds = await this.resolveTopicIds(input.topicNames, effectiveSpaceId);
|
||||
if (topicIds === null) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const factRows = topicIds.length > 0
|
||||
? await findFactRowsConnectingTopicIds(this.connection.db, topicIds)
|
||||
? await findFactRowsConnectingTopicIds(this.connection.db, effectiveSpaceId, topicIds)
|
||||
: await this.connection.db
|
||||
.selectFrom('facts')
|
||||
.innerJoin('fact_embeddings', 'fact_embeddings.fact_id', 'facts.id')
|
||||
.selectAll('facts')
|
||||
.where('facts.space_id', '=', effectiveSpaceId)
|
||||
.where('fact_embeddings.model', '=', input.providerModel)
|
||||
.orderBy('facts.created_at', 'asc')
|
||||
.execute();
|
||||
@@ -547,14 +669,14 @@ export class IdentityDB {
|
||||
return [];
|
||||
}
|
||||
|
||||
const embeddingRowsQuery = this.connection.db
|
||||
const embeddingRows = await this.connection.db
|
||||
.selectFrom('fact_embeddings')
|
||||
.selectAll()
|
||||
.where('model', '=', input.providerModel);
|
||||
|
||||
const embeddingRows = factRows.length > 0
|
||||
? await embeddingRowsQuery.where('fact_id', 'in', factRows.map((factRow) => factRow.id)).execute()
|
||||
: [];
|
||||
.innerJoin('facts', 'facts.id', 'fact_embeddings.fact_id')
|
||||
.selectAll('fact_embeddings')
|
||||
.where('facts.space_id', '=', effectiveSpaceId)
|
||||
.where('fact_embeddings.model', '=', input.providerModel)
|
||||
.where('fact_embeddings.fact_id', 'in', factRows.map((factRow) => factRow.id))
|
||||
.execute();
|
||||
|
||||
const embeddingsByFactId = new Map(
|
||||
embeddingRows.map((embeddingRow) => [embeddingRow.fact_id, deserializeEmbedding(embeddingRow.embedding)]),
|
||||
@@ -578,7 +700,7 @@ export class IdentityDB {
|
||||
return [];
|
||||
}
|
||||
|
||||
const hydratedFacts = await this.hydrateFacts(scoredRows.map((entry) => entry.factRow));
|
||||
const hydratedFacts = await this.hydrateFacts(scoredRows.map((entry) => entry.factRow), effectiveSpaceId);
|
||||
const factsById = new Map(hydratedFacts.map((fact) => [fact.id, fact]));
|
||||
|
||||
return scoredRows.map((entry) => ({
|
||||
@@ -587,12 +709,12 @@ export class IdentityDB {
|
||||
}));
|
||||
}
|
||||
|
||||
private async resolveTopicIds(topicNames?: string[]): Promise<string[] | null> {
|
||||
private async resolveTopicIds(topicNames: string[] | undefined, spaceId: string): Promise<string[] | null> {
|
||||
if (!topicNames || topicNames.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const topicRows = await Promise.all(topicNames.map((topicName) => this.getRequiredTopicRow(topicName)));
|
||||
const topicRows = await Promise.all(topicNames.map((topicName) => this.getRequiredTopicRowInSpaceId(topicName, spaceId)));
|
||||
if (topicRows.some((topicRow) => !topicRow)) {
|
||||
return null;
|
||||
}
|
||||
@@ -637,30 +759,28 @@ export class IdentityDB {
|
||||
}
|
||||
}
|
||||
|
||||
private async upsertTopicInExecutor(
|
||||
executor: DatabaseExecutor,
|
||||
input: UpsertTopicInput,
|
||||
): Promise<Topic> {
|
||||
private async upsertTopicInExecutor(executor: DatabaseExecutor, input: UpsertTopicInput): Promise<Topic> {
|
||||
const normalizedName = normalizeTopicName(input.name);
|
||||
|
||||
if (normalizedName.length === 0) {
|
||||
throw new IdentityDBError('Topic name cannot be empty.');
|
||||
}
|
||||
|
||||
const existing = await findTopicRowByNormalizedName(executor, normalizedName);
|
||||
const space = await this.getOrCreateSpaceInExecutor(executor, input.spaceName);
|
||||
const existing = await findTopicRowByNormalizedName(executor, space.id, normalizedName);
|
||||
const now = nowIsoString();
|
||||
|
||||
if (existing) {
|
||||
return this.updateTopicRowInExecutor(executor, existing, input, now, true);
|
||||
}
|
||||
|
||||
const aliasedTopic = await findTopicRowByNormalizedAlias(executor, normalizedName);
|
||||
const aliasedTopic = await findTopicRowByNormalizedAlias(executor, space.id, normalizedName);
|
||||
if (aliasedTopic) {
|
||||
return this.updateTopicRowInExecutor(executor, aliasedTopic, input, now, false);
|
||||
}
|
||||
|
||||
const createdRow: TopicRecord = {
|
||||
id: createId(),
|
||||
space_id: space.id,
|
||||
name: canonicalizeTopicName(input.name),
|
||||
normalized_name: normalizedName,
|
||||
category: input.category ?? 'custom',
|
||||
@@ -672,7 +792,6 @@ export class IdentityDB {
|
||||
};
|
||||
|
||||
await executor.insertInto('topics').values(createdRow).execute();
|
||||
|
||||
return mapTopicRow(createdRow);
|
||||
}
|
||||
|
||||
@@ -705,22 +824,39 @@ export class IdentityDB {
|
||||
return mapTopicRow(updated);
|
||||
}
|
||||
|
||||
private async getRequiredTopicRow(name: string): Promise<TopicRecord | undefined> {
|
||||
const normalizedName = normalizeTopicName(name);
|
||||
private async getRequiredTopicRow(name: string, spaceName?: string): Promise<TopicRecord | undefined> {
|
||||
const space = await this.getSpaceForRead(spaceName);
|
||||
if (spaceName && !space) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const spaceId = space?.id ?? await this.getDefaultSpaceIdForRead();
|
||||
if (!spaceId) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return this.getRequiredTopicRowInSpaceId(name, spaceId);
|
||||
}
|
||||
|
||||
private async getRequiredTopicRowInSpaceId(name: string, spaceId: string): Promise<TopicRecord | undefined> {
|
||||
const normalizedName = normalizeTopicName(name);
|
||||
if (normalizedName.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return findTopicRowByNameOrAlias(this.connection.db, normalizedName);
|
||||
return findTopicRowByNameOrAlias(this.connection.db, spaceId, normalizedName);
|
||||
}
|
||||
|
||||
private async hydrateFacts(factRows: FactRecord[]): Promise<Fact[]> {
|
||||
private async hydrateFacts(factRows: FactRecord[], spaceId?: string): Promise<Fact[]> {
|
||||
if (factRows.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const effectiveSpaceId = spaceId ?? factRows[0]!.space_id;
|
||||
const factIds = factRows.map((fact) => fact.id);
|
||||
const topicLinks = await findTopicLinksForFactIds(this.connection.db, factIds);
|
||||
const topicLinks = await findTopicLinksForFactIds(this.connection.db, effectiveSpaceId, factIds);
|
||||
|
||||
const topicsByFactId = new Map<string, FactTopic[]>();
|
||||
|
||||
for (const topicLink of topicLinks) {
|
||||
const topics = topicsByFactId.get(topicLink.fact_id) ?? [];
|
||||
topics.push({
|
||||
@@ -733,4 +869,57 @@ export class IdentityDB {
|
||||
|
||||
return factRows.map((factRow) => mapFactRow(factRow, topicsByFactId.get(factRow.id) ?? []));
|
||||
}
|
||||
|
||||
private async getOrCreateSpaceInExecutor(executor: DatabaseExecutor, requestedSpaceName?: string): Promise<SpaceRecord> {
|
||||
const normalizedName = normalizeSpaceName(requestedSpaceName ?? DEFAULT_SPACE_NAME);
|
||||
const canonicalName = canonicalizeSpaceName(requestedSpaceName ?? DEFAULT_SPACE_NAME);
|
||||
const existing = await findSpaceRowByNormalizedName(executor, normalizedName);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
const now = nowIsoString();
|
||||
const createdRow: SpaceRecord = {
|
||||
id: createId(),
|
||||
name: canonicalName,
|
||||
normalized_name: normalizedName,
|
||||
description: null,
|
||||
metadata: null,
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
await executor.insertInto('spaces').values(createdRow).execute();
|
||||
return createdRow;
|
||||
}
|
||||
|
||||
private async getSpaceForRead(spaceName?: string): Promise<SpaceRecord | undefined> {
|
||||
if (!spaceName) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const normalizedName = normalizeSpaceName(spaceName);
|
||||
if (normalizedName.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return findSpaceRowByNormalizedName(this.connection.db, normalizedName);
|
||||
}
|
||||
|
||||
private async getDefaultSpaceIdForRead(): Promise<string | undefined> {
|
||||
const defaultSpace = await findSpaceRowByNormalizedName(this.connection.db, normalizeSpaceName(DEFAULT_SPACE_NAME));
|
||||
return defaultSpace?.id;
|
||||
}
|
||||
|
||||
private assertScopedTopicInput(space: SpaceRecord, topicSpaceName?: string): void {
|
||||
if (!topicSpaceName) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (normalizeSpaceName(topicSpaceName) !== space.normalized_name) {
|
||||
throw new IdentityDBError(
|
||||
`Fact topics cannot point to a different space than the fact itself (${space.name}).`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
FACTS_TABLE,
|
||||
FACT_EMBEDDINGS_TABLE,
|
||||
FACT_TOPICS_TABLE,
|
||||
SPACES_TABLE,
|
||||
TOPIC_ALIASES_TABLE,
|
||||
TOPIC_RELATIONS_TABLE,
|
||||
TOPICS_TABLE,
|
||||
@@ -14,23 +15,42 @@ export async function initializeSchema(
|
||||
db: Kysely<IdentityDatabaseSchema>,
|
||||
): Promise<void> {
|
||||
await db.schema
|
||||
.createTable(TOPICS_TABLE)
|
||||
.createTable(SPACES_TABLE)
|
||||
.ifNotExists()
|
||||
.addColumn('id', 'text', (column) => column.primaryKey())
|
||||
.addColumn('name', 'text', (column) => column.notNull())
|
||||
.addColumn('normalized_name', 'text', (column) => column.notNull().unique())
|
||||
.addColumn('category', 'text', (column) => column.notNull())
|
||||
.addColumn('granularity', 'text', (column) => column.notNull())
|
||||
.addColumn('description', 'text')
|
||||
.addColumn('metadata', 'text')
|
||||
.addColumn('created_at', 'text', (column) => column.notNull())
|
||||
.addColumn('updated_at', 'text', (column) => column.notNull())
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createTable(TOPICS_TABLE)
|
||||
.ifNotExists()
|
||||
.addColumn('id', 'text', (column) => column.primaryKey())
|
||||
.addColumn('space_id', 'text', (column) =>
|
||||
column.notNull().references(`${SPACES_TABLE}.id`).onDelete('cascade'),
|
||||
)
|
||||
.addColumn('name', 'text', (column) => column.notNull())
|
||||
.addColumn('normalized_name', 'text', (column) => column.notNull())
|
||||
.addColumn('category', 'text', (column) => column.notNull())
|
||||
.addColumn('granularity', 'text', (column) => column.notNull())
|
||||
.addColumn('description', 'text')
|
||||
.addColumn('metadata', 'text')
|
||||
.addColumn('created_at', 'text', (column) => column.notNull())
|
||||
.addColumn('updated_at', 'text', (column) => column.notNull())
|
||||
.addUniqueConstraint('topics_space_normalized_name_key', ['space_id', 'normalized_name'])
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createTable(FACTS_TABLE)
|
||||
.ifNotExists()
|
||||
.addColumn('id', 'text', (column) => column.primaryKey())
|
||||
.addColumn('space_id', 'text', (column) =>
|
||||
column.notNull().references(`${SPACES_TABLE}.id`).onDelete('cascade'),
|
||||
)
|
||||
.addColumn('statement', 'text', (column) => column.notNull())
|
||||
.addColumn('summary', 'text')
|
||||
.addColumn('source', 'text')
|
||||
@@ -88,14 +108,32 @@ export async function initializeSchema(
|
||||
.createTable(TOPIC_ALIASES_TABLE)
|
||||
.ifNotExists()
|
||||
.addColumn('id', 'text', (column) => column.primaryKey())
|
||||
.addColumn('space_id', 'text', (column) =>
|
||||
column.notNull().references(`${SPACES_TABLE}.id`).onDelete('cascade'),
|
||||
)
|
||||
.addColumn('topic_id', 'text', (column) =>
|
||||
column.notNull().references(`${TOPICS_TABLE}.id`).onDelete('cascade'),
|
||||
)
|
||||
.addColumn('alias', 'text', (column) => column.notNull())
|
||||
.addColumn('normalized_alias', 'text', (column) => column.notNull().unique())
|
||||
.addColumn('normalized_alias', 'text', (column) => column.notNull())
|
||||
.addColumn('is_primary', 'integer', (column) => column.notNull())
|
||||
.addColumn('created_at', 'text', (column) => column.notNull())
|
||||
.addColumn('updated_at', 'text', (column) => column.notNull())
|
||||
.addUniqueConstraint('topic_aliases_space_normalized_alias_key', ['space_id', 'normalized_alias'])
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('topics_space_id_idx')
|
||||
.ifNotExists()
|
||||
.on(TOPICS_TABLE)
|
||||
.column('space_id')
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('facts_space_id_idx')
|
||||
.ifNotExists()
|
||||
.on(FACTS_TABLE)
|
||||
.column('space_id')
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
@@ -133,6 +171,13 @@ export async function initializeSchema(
|
||||
.column('child_topic_id')
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('topic_aliases_space_id_idx')
|
||||
.ifNotExists()
|
||||
.on(TOPIC_ALIASES_TABLE)
|
||||
.column('space_id')
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('topic_aliases_topic_id_idx')
|
||||
.ifNotExists()
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
export const SPACES_TABLE = 'spaces';
|
||||
export const TOPICS_TABLE = 'topics';
|
||||
export const FACTS_TABLE = 'facts';
|
||||
export const FACT_TOPICS_TABLE = 'fact_topics';
|
||||
@@ -5,8 +6,19 @@ export const TOPIC_RELATIONS_TABLE = 'topic_relations';
|
||||
export const TOPIC_ALIASES_TABLE = 'topic_aliases';
|
||||
export const FACT_EMBEDDINGS_TABLE = 'fact_embeddings';
|
||||
|
||||
export const SPACE_COLUMNS = [
|
||||
'id',
|
||||
'name',
|
||||
'normalized_name',
|
||||
'description',
|
||||
'metadata',
|
||||
'created_at',
|
||||
'updated_at',
|
||||
] as const;
|
||||
|
||||
export const TOPIC_COLUMNS = [
|
||||
'id',
|
||||
'space_id',
|
||||
'name',
|
||||
'normalized_name',
|
||||
'category',
|
||||
@@ -19,6 +31,7 @@ export const TOPIC_COLUMNS = [
|
||||
|
||||
export const FACT_COLUMNS = [
|
||||
'id',
|
||||
'space_id',
|
||||
'statement',
|
||||
'summary',
|
||||
'source',
|
||||
@@ -45,6 +58,7 @@ export const TOPIC_RELATION_COLUMNS = [
|
||||
|
||||
export const TOPIC_ALIAS_COLUMNS = [
|
||||
'id',
|
||||
'space_id',
|
||||
'topic_id',
|
||||
'alias',
|
||||
'normalized_alias',
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { createHash, randomUUID } from 'node:crypto';
|
||||
|
||||
import type { Fact, FactTopic, Topic } from '../types/api';
|
||||
import type { FactRecord, TopicRecord } from '../types/domain';
|
||||
import type { Fact, FactTopic, Space, Topic } from '../types/api';
|
||||
import type { FactRecord, SpaceRecord, TopicRecord } from '../types/domain';
|
||||
|
||||
export function normalizeTopicName(name: string): string {
|
||||
return name.trim().replace(/\s+/g, ' ').toLowerCase();
|
||||
@@ -11,6 +11,14 @@ export function canonicalizeTopicName(name: string): string {
|
||||
return name.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
export function normalizeSpaceName(name: string): string {
|
||||
return normalizeTopicName(name);
|
||||
}
|
||||
|
||||
export function canonicalizeSpaceName(name: string): string {
|
||||
return canonicalizeTopicName(name);
|
||||
}
|
||||
|
||||
export function nowIsoString(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
@@ -71,9 +79,22 @@ export function cosineSimilarity(left: number[], right: number[]): number {
|
||||
return dot / (Math.sqrt(leftMagnitude) * Math.sqrt(rightMagnitude));
|
||||
}
|
||||
|
||||
export function mapSpaceRow(record: SpaceRecord): Space {
|
||||
return {
|
||||
id: record.id,
|
||||
name: record.name,
|
||||
normalizedName: record.normalized_name,
|
||||
description: record.description,
|
||||
metadata: deserializeMetadata(record.metadata) as Space['metadata'],
|
||||
createdAt: record.created_at,
|
||||
updatedAt: record.updated_at,
|
||||
};
|
||||
}
|
||||
|
||||
export function mapTopicRow(record: TopicRecord): Topic {
|
||||
return {
|
||||
id: record.id,
|
||||
spaceId: record.space_id,
|
||||
name: record.name,
|
||||
normalizedName: record.normalized_name,
|
||||
category: record.category,
|
||||
@@ -88,6 +109,7 @@ export function mapTopicRow(record: TopicRecord): Topic {
|
||||
export function mapFactRow(record: FactRecord, topics: FactTopic[]): Fact {
|
||||
return {
|
||||
id: record.id,
|
||||
spaceId: record.space_id,
|
||||
statement: record.statement,
|
||||
summary: record.summary,
|
||||
source: record.source,
|
||||
|
||||
@@ -2,6 +2,7 @@ export * from './adapters';
|
||||
export * from './core/identity-db';
|
||||
export * from './core/migrations';
|
||||
export * from './ingestion/extractor';
|
||||
export * from './ingestion/llm-extractor';
|
||||
export * from './ingestion/naive-extractor';
|
||||
export * from './ingestion/types';
|
||||
export * from './types/api';
|
||||
|
||||
273
src/ingestion/llm-extractor.ts
Normal file
273
src/ingestion/llm-extractor.ts
Normal file
@@ -0,0 +1,273 @@
|
||||
import { IdentityDBError } from '../core/errors';
|
||||
import type { TopicCategory, TopicGranularity } from '../types/domain';
|
||||
import type {
|
||||
ExtractedFact,
|
||||
FactExtractor,
|
||||
LlmFactExtractorOptions,
|
||||
} from './types';
|
||||
|
||||
const DEFAULT_INSTRUCTIONS = [
|
||||
'Extract one structured fact from the user input.',
|
||||
'Return JSON only. Do not include markdown, explanations, or prose outside the JSON object.',
|
||||
'Use this shape: {"statement": string?, "summary": string|null, "source": string|null, "confidence": number|null, "metadata": object|null, "topics": Array<{"name": string, "category": "entity"|"concept"|"temporal"|"custom"?, "granularity": "abstract"|"concrete"|"mixed"?, "role": string|null, "description": string|null, "metadata": object|null}>}.',
|
||||
'Only include topics that are explicitly supported by the input.',
|
||||
].join('\n');
|
||||
|
||||
export class LlmFactExtractor implements FactExtractor {
|
||||
constructor(private readonly options: LlmFactExtractorOptions) {}
|
||||
|
||||
async extract(input: string): Promise<ExtractedFact> {
|
||||
const prompt = this.buildPrompt(input);
|
||||
const response = await this.options.model.generateText(prompt);
|
||||
return parseLlmExtractedFactResponse(response);
|
||||
}
|
||||
|
||||
private buildPrompt(input: string): string {
|
||||
if (this.options.promptBuilder) {
|
||||
return this.options.promptBuilder(input, this.options.instructions);
|
||||
}
|
||||
|
||||
const instructions = this.options.instructions?.trim();
|
||||
|
||||
return [
|
||||
DEFAULT_INSTRUCTIONS,
|
||||
instructions && instructions.length > 0 ? `Additional instructions:\n${instructions}` : null,
|
||||
`Input:\n${input.trim()}`,
|
||||
]
|
||||
.filter((value): value is string => value !== null)
|
||||
.join('\n\n');
|
||||
}
|
||||
}
|
||||
|
||||
export function parseLlmExtractedFactResponse(response: string): ExtractedFact {
|
||||
const payload = parseJsonCandidate(response);
|
||||
|
||||
if (!isRecord(payload)) {
|
||||
throw new IdentityDBError('LLM extractor response must be a JSON object.');
|
||||
}
|
||||
|
||||
const topics = parseTopics(payload.topics);
|
||||
const extracted: ExtractedFact = { topics };
|
||||
|
||||
const statement = optionalString(payload.statement);
|
||||
if (statement !== undefined) {
|
||||
extracted.statement = statement;
|
||||
}
|
||||
|
||||
const summary = optionalNullableString(payload.summary);
|
||||
if (summary !== undefined) {
|
||||
extracted.summary = summary;
|
||||
}
|
||||
|
||||
const source = optionalNullableString(payload.source);
|
||||
if (source !== undefined) {
|
||||
extracted.source = source;
|
||||
}
|
||||
|
||||
const confidence = optionalNullableNumber(payload.confidence);
|
||||
if (confidence !== undefined) {
|
||||
extracted.confidence = confidence;
|
||||
}
|
||||
|
||||
const metadata = optionalMetadata(payload.metadata);
|
||||
if (metadata !== undefined) {
|
||||
extracted.metadata = metadata;
|
||||
}
|
||||
|
||||
return extracted;
|
||||
}
|
||||
|
||||
function parseJsonCandidate(response: string): unknown {
|
||||
const trimmed = response.trim();
|
||||
|
||||
for (const candidate of collectJsonCandidates(trimmed)) {
|
||||
try {
|
||||
return JSON.parse(candidate);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw new IdentityDBError('LLM extractor returned invalid JSON.');
|
||||
}
|
||||
|
||||
function collectJsonCandidates(response: string): string[] {
|
||||
const candidates = new Set<string>();
|
||||
candidates.add(response);
|
||||
|
||||
const fencePattern = /```(?:json)?\s*([\s\S]*?)```/gi;
|
||||
let match: RegExpExecArray | null = fencePattern.exec(response);
|
||||
|
||||
while (match) {
|
||||
const candidate = match[1]?.trim();
|
||||
if (candidate) {
|
||||
candidates.add(candidate);
|
||||
}
|
||||
|
||||
match = fencePattern.exec(response);
|
||||
}
|
||||
|
||||
const firstBrace = response.indexOf('{');
|
||||
const lastBrace = response.lastIndexOf('}');
|
||||
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
||||
candidates.add(response.slice(firstBrace, lastBrace + 1));
|
||||
}
|
||||
|
||||
return Array.from(candidates);
|
||||
}
|
||||
|
||||
function parseTopics(value: unknown): ExtractedFact['topics'] {
|
||||
if (!Array.isArray(value)) {
|
||||
throw new IdentityDBError('LLM extractor response must include a topics array.');
|
||||
}
|
||||
|
||||
return value.map((entry) => parseTopic(entry));
|
||||
}
|
||||
|
||||
function parseTopic(value: unknown): ExtractedFact['topics'][number] {
|
||||
if (!isRecord(value)) {
|
||||
throw new IdentityDBError('LLM extractor topics must be JSON objects.');
|
||||
}
|
||||
|
||||
const name = optionalString(value.name)?.trim();
|
||||
if (!name) {
|
||||
throw new IdentityDBError('LLM extractor topics must include a non-empty name.');
|
||||
}
|
||||
|
||||
const topic: ExtractedFact['topics'][number] = { name };
|
||||
|
||||
const category = optionalTopicCategory(value.category);
|
||||
if (category !== undefined) {
|
||||
topic.category = category;
|
||||
}
|
||||
|
||||
const granularity = optionalTopicGranularity(value.granularity);
|
||||
if (granularity !== undefined) {
|
||||
topic.granularity = granularity;
|
||||
}
|
||||
|
||||
const role = optionalNullableString(value.role);
|
||||
if (role !== undefined) {
|
||||
topic.role = role;
|
||||
}
|
||||
|
||||
const description = optionalNullableString(value.description);
|
||||
if (description !== undefined) {
|
||||
topic.description = description;
|
||||
}
|
||||
|
||||
const metadata = optionalMetadata(value.metadata);
|
||||
if (metadata !== undefined) {
|
||||
topic.metadata = metadata;
|
||||
}
|
||||
|
||||
return topic;
|
||||
}
|
||||
|
||||
function optionalString(value: unknown): string | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (typeof value !== 'string') {
|
||||
throw new IdentityDBError('LLM extractor expected a string field.');
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function optionalNullableString(value: unknown): string | null | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof value !== 'string') {
|
||||
throw new IdentityDBError('LLM extractor expected a nullable string field.');
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function optionalNullableNumber(value: unknown): number | null | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof value !== 'number' || Number.isNaN(value)) {
|
||||
throw new IdentityDBError('LLM extractor expected confidence to be a number or null.');
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function optionalMetadata(value: unknown): ExtractedFact['metadata'] | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isJsonLike(value)) {
|
||||
throw new IdentityDBError('LLM extractor metadata must be valid JSON-compatible data.');
|
||||
}
|
||||
|
||||
return value as ExtractedFact['metadata'];
|
||||
}
|
||||
|
||||
function optionalTopicCategory(value: unknown): TopicCategory | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === 'entity' || value === 'concept' || value === 'temporal' || value === 'custom') {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new IdentityDBError('LLM extractor returned an unsupported topic category.');
|
||||
}
|
||||
|
||||
function optionalTopicGranularity(value: unknown): TopicGranularity | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value === 'abstract' || value === 'concrete' || value === 'mixed') {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new IdentityDBError('LLM extractor returned an unsupported topic granularity.');
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function isJsonLike(value: unknown): boolean {
|
||||
if (value === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
return value.every((entry) => isJsonLike(entry));
|
||||
}
|
||||
|
||||
if (isRecord(value)) {
|
||||
return Object.values(value).every((entry) => isJsonLike(entry));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -17,8 +17,19 @@ export interface FactExtractor {
|
||||
extract(input: string): Promise<ExtractedFact>;
|
||||
}
|
||||
|
||||
export interface LlmTextGenerationModel {
|
||||
generateText(prompt: string): Promise<string>;
|
||||
}
|
||||
|
||||
export interface LlmFactExtractorOptions {
|
||||
model: LlmTextGenerationModel;
|
||||
instructions?: string;
|
||||
promptBuilder?: (input: string, instructions?: string) => string;
|
||||
}
|
||||
|
||||
export interface IngestStatementOptions {
|
||||
extractor: FactExtractor;
|
||||
embeddingProvider?: EmbeddingProvider;
|
||||
duplicateThreshold?: number;
|
||||
spaceName?: string;
|
||||
}
|
||||
|
||||
@@ -13,12 +13,14 @@ export interface FactTopicJoinRow extends TopicRecord {
|
||||
|
||||
export async function findFactRowsForTopicId(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
topicId: string,
|
||||
): Promise<FactRecord[]> {
|
||||
return executor
|
||||
.selectFrom('facts')
|
||||
.innerJoin('fact_topics', 'fact_topics.fact_id', 'facts.id')
|
||||
.selectAll('facts')
|
||||
.where('facts.space_id', '=', spaceId)
|
||||
.where('fact_topics.topic_id', '=', topicId)
|
||||
.orderBy('facts.created_at', 'asc')
|
||||
.execute();
|
||||
@@ -26,6 +28,7 @@ export async function findFactRowsForTopicId(
|
||||
|
||||
export async function findFactRowsConnectingTopicIds(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
topicIds: string[],
|
||||
): Promise<FactRecord[]> {
|
||||
if (topicIds.length === 0) {
|
||||
@@ -36,6 +39,7 @@ export async function findFactRowsConnectingTopicIds(
|
||||
.selectFrom('facts')
|
||||
.innerJoin('fact_topics', 'fact_topics.fact_id', 'facts.id')
|
||||
.selectAll('facts')
|
||||
.where('facts.space_id', '=', spaceId)
|
||||
.where('fact_topics.topic_id', 'in', topicIds)
|
||||
.groupBy('facts.id')
|
||||
.having((eb) => eb.fn.count<number>('fact_topics.topic_id'), '=', topicIds.length)
|
||||
@@ -45,6 +49,7 @@ export async function findFactRowsConnectingTopicIds(
|
||||
|
||||
export async function findTopicLinksForFactIds(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
factIds: string[],
|
||||
): Promise<FactTopicJoinRow[]> {
|
||||
if (factIds.length === 0) {
|
||||
@@ -60,6 +65,7 @@ export async function findTopicLinksForFactIds(
|
||||
'fact_topics.role as role',
|
||||
'fact_topics.position as position',
|
||||
])
|
||||
.where('topics.space_id', '=', spaceId)
|
||||
.where('fact_topics.fact_id', 'in', factIds)
|
||||
.orderBy('fact_topics.position', 'asc')
|
||||
.execute() as Promise<FactTopicJoinRow[]>;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { Kysely, Transaction } from 'kysely';
|
||||
|
||||
import type { IdentityDatabaseSchema } from '../types/database';
|
||||
import type { TopicAliasRecord, TopicRecord } from '../types/domain';
|
||||
import type { SpaceRecord, TopicAliasRecord, TopicRecord } from '../types/domain';
|
||||
|
||||
export type DatabaseExecutor = Kysely<IdentityDatabaseSchema> | Transaction<IdentityDatabaseSchema>;
|
||||
|
||||
@@ -9,48 +9,66 @@ export interface ConnectedTopicRow extends TopicRecord {
|
||||
shared_fact_count: number;
|
||||
}
|
||||
|
||||
export async function findSpaceRowByNormalizedName(
|
||||
executor: DatabaseExecutor,
|
||||
normalizedName: string,
|
||||
): Promise<SpaceRecord | undefined> {
|
||||
return executor
|
||||
.selectFrom('spaces')
|
||||
.selectAll()
|
||||
.where('normalized_name', '=', normalizedName)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
export async function findTopicRowByNormalizedName(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
normalizedName: string,
|
||||
): Promise<TopicRecord | undefined> {
|
||||
return executor
|
||||
.selectFrom('topics')
|
||||
.selectAll()
|
||||
.where('space_id', '=', spaceId)
|
||||
.where('normalized_name', '=', normalizedName)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
export async function findTopicRowByNormalizedAlias(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
normalizedAlias: string,
|
||||
): Promise<TopicRecord | undefined> {
|
||||
return executor
|
||||
.selectFrom('topic_aliases')
|
||||
.innerJoin('topics', 'topics.id', 'topic_aliases.topic_id')
|
||||
.selectAll('topics')
|
||||
.where('topic_aliases.space_id', '=', spaceId)
|
||||
.where('topic_aliases.normalized_alias', '=', normalizedAlias)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
export async function findTopicRowByNameOrAlias(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
normalizedName: string,
|
||||
): Promise<TopicRecord | undefined> {
|
||||
const directMatch = await findTopicRowByNormalizedName(executor, normalizedName);
|
||||
const directMatch = await findTopicRowByNormalizedName(executor, spaceId, normalizedName);
|
||||
if (directMatch) {
|
||||
return directMatch;
|
||||
}
|
||||
|
||||
return findTopicRowByNormalizedAlias(executor, normalizedName);
|
||||
return findTopicRowByNormalizedAlias(executor, spaceId, normalizedName);
|
||||
}
|
||||
|
||||
export async function listTopicAliasRowsForTopicId(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
topicId: string,
|
||||
): Promise<TopicAliasRecord[]> {
|
||||
return executor
|
||||
.selectFrom('topic_aliases')
|
||||
.selectAll()
|
||||
.where('space_id', '=', spaceId)
|
||||
.where('topic_id', '=', topicId)
|
||||
.orderBy('is_primary', 'desc')
|
||||
.orderBy('normalized_alias', 'asc')
|
||||
@@ -59,9 +77,14 @@ export async function listTopicAliasRowsForTopicId(
|
||||
|
||||
export async function listTopicRows(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
limit?: number,
|
||||
): Promise<TopicRecord[]> {
|
||||
let query = executor.selectFrom('topics').selectAll().orderBy('normalized_name', 'asc');
|
||||
let query = executor
|
||||
.selectFrom('topics')
|
||||
.selectAll()
|
||||
.where('space_id', '=', spaceId)
|
||||
.orderBy('normalized_name', 'asc');
|
||||
|
||||
if (limit !== undefined) {
|
||||
query = query.limit(limit);
|
||||
@@ -72,14 +95,18 @@ export async function listTopicRows(
|
||||
|
||||
export async function findConnectedTopicRows(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
topicId: string,
|
||||
): Promise<ConnectedTopicRow[]> {
|
||||
return executor
|
||||
.selectFrom('fact_topics as source_link')
|
||||
.innerJoin('facts', 'facts.id', 'source_link.fact_id')
|
||||
.innerJoin('fact_topics as related_link', 'related_link.fact_id', 'source_link.fact_id')
|
||||
.innerJoin('topics', 'topics.id', 'related_link.topic_id')
|
||||
.selectAll('topics')
|
||||
.select((eb) => eb.fn.count<number>('related_link.fact_id').as('shared_fact_count'))
|
||||
.where('facts.space_id', '=', spaceId)
|
||||
.where('topics.space_id', '=', spaceId)
|
||||
.where('source_link.topic_id', '=', topicId)
|
||||
.whereRef('related_link.topic_id', '!=', 'source_link.topic_id')
|
||||
.groupBy('topics.id')
|
||||
@@ -90,12 +117,14 @@ export async function findConnectedTopicRows(
|
||||
|
||||
export async function findChildTopicRows(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
parentTopicId: string,
|
||||
): Promise<TopicRecord[]> {
|
||||
return executor
|
||||
.selectFrom('topic_relations')
|
||||
.innerJoin('topics', 'topics.id', 'topic_relations.child_topic_id')
|
||||
.selectAll('topics')
|
||||
.where('topics.space_id', '=', spaceId)
|
||||
.where('topic_relations.parent_topic_id', '=', parentTopicId)
|
||||
.where('topic_relations.relation', '=', 'parent_of')
|
||||
.orderBy('topics.normalized_name', 'asc')
|
||||
@@ -104,12 +133,14 @@ export async function findChildTopicRows(
|
||||
|
||||
export async function findParentTopicRows(
|
||||
executor: DatabaseExecutor,
|
||||
spaceId: string,
|
||||
childTopicId: string,
|
||||
): Promise<TopicRecord[]> {
|
||||
return executor
|
||||
.selectFrom('topic_relations')
|
||||
.innerJoin('topics', 'topics.id', 'topic_relations.parent_topic_id')
|
||||
.selectAll('topics')
|
||||
.where('topics.space_id', '=', spaceId)
|
||||
.where('topic_relations.child_topic_id', '=', childTopicId)
|
||||
.where('topic_relations.relation', '=', 'parent_of')
|
||||
.orderBy('topics.normalized_name', 'asc')
|
||||
|
||||
@@ -1,6 +1,26 @@
|
||||
import type { JsonValue, TopicCategory, TopicGranularity } from './domain';
|
||||
|
||||
export interface UpsertTopicInput {
|
||||
export interface SpaceScopedInput {
|
||||
spaceName?: string | undefined;
|
||||
}
|
||||
|
||||
export interface UpsertSpaceInput {
|
||||
name: string;
|
||||
description?: string | null;
|
||||
metadata?: JsonValue | null;
|
||||
}
|
||||
|
||||
export interface Space {
|
||||
id: string;
|
||||
name: string;
|
||||
normalizedName: string;
|
||||
description: string | null;
|
||||
metadata: JsonValue | null;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
}
|
||||
|
||||
export interface UpsertTopicInput extends SpaceScopedInput {
|
||||
name: string;
|
||||
category?: TopicCategory;
|
||||
granularity?: TopicGranularity;
|
||||
@@ -12,7 +32,7 @@ export interface TopicLinkInput extends UpsertTopicInput {
|
||||
role?: string | null;
|
||||
}
|
||||
|
||||
export interface AddFactInput {
|
||||
export interface AddFactInput extends SpaceScopedInput {
|
||||
statement: string;
|
||||
summary?: string | null;
|
||||
source?: string | null;
|
||||
@@ -21,13 +41,14 @@ export interface AddFactInput {
|
||||
topics: TopicLinkInput[];
|
||||
}
|
||||
|
||||
export interface LinkTopicsInput {
|
||||
export interface LinkTopicsInput extends SpaceScopedInput {
|
||||
parentName: string;
|
||||
childName: string;
|
||||
}
|
||||
|
||||
export interface Topic {
|
||||
id: string;
|
||||
spaceId: string;
|
||||
name: string;
|
||||
normalizedName: string;
|
||||
category: TopicCategory;
|
||||
@@ -45,6 +66,7 @@ export interface FactTopic extends Topic {
|
||||
|
||||
export interface Fact {
|
||||
id: string;
|
||||
spaceId: string;
|
||||
statement: string;
|
||||
summary: string | null;
|
||||
source: string | null;
|
||||
@@ -63,11 +85,11 @@ export interface ConnectedTopic extends Topic {
|
||||
sharedFactCount: number;
|
||||
}
|
||||
|
||||
export interface TopicLookupOptions {
|
||||
export interface TopicLookupOptions extends SpaceScopedInput {
|
||||
includeFacts?: boolean;
|
||||
}
|
||||
|
||||
export interface ListTopicsOptions {
|
||||
export interface ListTopicsOptions extends SpaceScopedInput {
|
||||
includeFacts?: boolean;
|
||||
limit?: number;
|
||||
}
|
||||
@@ -79,11 +101,11 @@ export interface EmbeddingProvider {
|
||||
embedMany?(inputs: string[]): Promise<number[][]>;
|
||||
}
|
||||
|
||||
export interface IndexFactEmbeddingsInput {
|
||||
export interface IndexFactEmbeddingsInput extends SpaceScopedInput {
|
||||
provider: EmbeddingProvider;
|
||||
}
|
||||
|
||||
export interface SearchFactsInput {
|
||||
export interface SearchFactsInput extends SpaceScopedInput {
|
||||
query: string;
|
||||
provider: EmbeddingProvider;
|
||||
topicNames?: string[];
|
||||
@@ -91,7 +113,7 @@ export interface SearchFactsInput {
|
||||
minimumScore?: number;
|
||||
}
|
||||
|
||||
export interface FindSimilarFactsInput {
|
||||
export interface FindSimilarFactsInput extends SpaceScopedInput {
|
||||
statement: string;
|
||||
provider: EmbeddingProvider;
|
||||
topicNames?: string[];
|
||||
|
||||
@@ -2,12 +2,14 @@ import type {
|
||||
FactEmbeddingRecord,
|
||||
FactRecord,
|
||||
FactTopicRecord,
|
||||
SpaceRecord,
|
||||
TopicAliasRecord,
|
||||
TopicRecord,
|
||||
TopicRelationRecord,
|
||||
} from './domain';
|
||||
|
||||
export interface IdentityDatabaseSchema {
|
||||
spaces: SpaceRecord;
|
||||
topics: TopicRecord;
|
||||
facts: FactRecord;
|
||||
fact_topics: FactTopicRecord;
|
||||
|
||||
@@ -5,8 +5,19 @@ export type TopicGranularity = 'abstract' | 'concrete' | 'mixed';
|
||||
export type JsonPrimitive = string | number | boolean | null;
|
||||
export type JsonValue = JsonPrimitive | JsonValue[] | { [key: string]: JsonValue };
|
||||
|
||||
export interface SpaceRecord {
|
||||
id: string;
|
||||
name: string;
|
||||
normalized_name: string;
|
||||
description: string | null;
|
||||
metadata: string | null;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
export interface TopicRecord {
|
||||
id: string;
|
||||
space_id: string;
|
||||
name: string;
|
||||
normalized_name: string;
|
||||
category: TopicCategory;
|
||||
@@ -19,6 +30,7 @@ export interface TopicRecord {
|
||||
|
||||
export interface FactRecord {
|
||||
id: string;
|
||||
space_id: string;
|
||||
statement: string;
|
||||
summary: string | null;
|
||||
source: string | null;
|
||||
@@ -45,6 +57,7 @@ export interface TopicRelationRecord {
|
||||
|
||||
export interface TopicAliasRecord {
|
||||
id: string;
|
||||
space_id: string;
|
||||
topic_id: string;
|
||||
alias: string;
|
||||
normalized_alias: string;
|
||||
|
||||
@@ -34,6 +34,58 @@ describe('IdentityDB topic and fact writes', () => {
|
||||
expect(topics).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('keeps same normalized topic names isolated across spaces', async () => {
|
||||
const alpha = await db.upsertTopic({
|
||||
name: 'TypeScript',
|
||||
category: 'entity',
|
||||
granularity: 'concrete',
|
||||
spaceName: 'A',
|
||||
});
|
||||
|
||||
const beta = await db.upsertTopic({
|
||||
name: 'TypeScript',
|
||||
category: 'entity',
|
||||
granularity: 'concrete',
|
||||
spaceName: 'B',
|
||||
});
|
||||
|
||||
expect(beta.id).not.toBe(alpha.id);
|
||||
|
||||
const alphaTopics = await db.listTopics({ includeFacts: false, spaceName: 'A' });
|
||||
const betaTopics = await db.listTopics({ includeFacts: false, spaceName: 'B' });
|
||||
const defaultTopics = await db.listTopics({ includeFacts: false });
|
||||
|
||||
expect(alphaTopics.map((topic) => topic.name)).toEqual(['TypeScript']);
|
||||
expect(betaTopics.map((topic) => topic.name)).toEqual(['TypeScript']);
|
||||
expect(defaultTopics).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('keeps alias resolution scoped to the requested space', async () => {
|
||||
await db.upsertTopic({
|
||||
name: 'TypeScript',
|
||||
category: 'entity',
|
||||
granularity: 'concrete',
|
||||
spaceName: 'A',
|
||||
});
|
||||
await db.upsertTopic({
|
||||
name: 'TeamSpeak',
|
||||
category: 'entity',
|
||||
granularity: 'concrete',
|
||||
spaceName: 'B',
|
||||
});
|
||||
|
||||
await db.addTopicAlias('TypeScript', 'TS', { spaceName: 'A' });
|
||||
await db.addTopicAlias('TeamSpeak', 'TS', { spaceName: 'B' });
|
||||
|
||||
const alphaResolved = await db.resolveTopic('ts', { spaceName: 'A' });
|
||||
const betaResolved = await db.resolveTopic('ts', { spaceName: 'B' });
|
||||
const defaultResolved = await db.resolveTopic('ts');
|
||||
|
||||
expect(alphaResolved?.name).toBe('TypeScript');
|
||||
expect(betaResolved?.name).toBe('TeamSpeak');
|
||||
expect(defaultResolved).toBeNull();
|
||||
});
|
||||
|
||||
it('adds one fact that links multiple topics', async () => {
|
||||
const fact = await db.addFact({
|
||||
statement: 'I have worked with TypeScript since 2025.',
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import { IdentityDB } from '../src/core/identity-db';
|
||||
import { LlmFactExtractor } from '../src/ingestion/llm-extractor';
|
||||
import { NaiveExtractor } from '../src/ingestion/naive-extractor';
|
||||
import type { FactExtractor } from '../src/ingestion/types';
|
||||
|
||||
@@ -51,4 +52,86 @@ describe('IdentityDB ingestion', () => {
|
||||
const topic = await db.getTopicByName('TypeScript', { includeFacts: true });
|
||||
expect(topic?.facts).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('ships an LLM extractor adapter that turns structured JSON responses into facts', async () => {
|
||||
let prompt = '';
|
||||
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
async generateText(input) {
|
||||
prompt = input;
|
||||
|
||||
return JSON.stringify({
|
||||
statement: 'I have worked with Bun and TypeScript since 2025.',
|
||||
summary: 'The speaker has Bun and TypeScript experience.',
|
||||
source: 'chat',
|
||||
confidence: 0.91,
|
||||
metadata: { channel: 'telegram' },
|
||||
topics: [
|
||||
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
||||
{ name: 'Bun', category: 'entity', granularity: 'concrete', role: 'object' },
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
||||
{ name: '2025', category: 'temporal', granularity: 'concrete', role: 'time' },
|
||||
],
|
||||
});
|
||||
},
|
||||
},
|
||||
instructions: 'Prefer technology and time topics.',
|
||||
});
|
||||
|
||||
const fact = await db.ingestStatement('I have worked with Bun and TypeScript since 2025.', {
|
||||
extractor,
|
||||
});
|
||||
|
||||
expect(prompt).toContain('Prefer technology and time topics.');
|
||||
expect(prompt).toContain('I have worked with Bun and TypeScript since 2025.');
|
||||
expect(fact.summary).toBe('The speaker has Bun and TypeScript experience.');
|
||||
expect(fact.source).toBe('chat');
|
||||
expect(fact.confidence).toBe(0.91);
|
||||
expect(fact.metadata).toEqual({ channel: 'telegram' });
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['I', 'Bun', 'TypeScript', '2025']);
|
||||
});
|
||||
|
||||
it('parses JSON responses wrapped in markdown code fences', async () => {
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
async generateText() {
|
||||
return [
|
||||
'Here is the extracted fact:',
|
||||
'```json',
|
||||
JSON.stringify({
|
||||
statement: 'Bun powers TypeScript tooling.',
|
||||
topics: [
|
||||
{ name: 'Bun', category: 'entity', granularity: 'concrete' },
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
|
||||
],
|
||||
}),
|
||||
'```',
|
||||
].join('\n');
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const fact = await db.ingestStatement('Bun powers TypeScript tooling.', {
|
||||
extractor,
|
||||
});
|
||||
|
||||
expect(fact.topics.map((topic) => topic.name)).toEqual(['Bun', 'TypeScript']);
|
||||
});
|
||||
|
||||
it('rejects invalid LLM responses before writing facts', async () => {
|
||||
const extractor = new LlmFactExtractor({
|
||||
model: {
|
||||
async generateText() {
|
||||
return 'not json at all';
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
db.ingestStatement('Bun powers TypeScript tooling.', {
|
||||
extractor,
|
||||
}),
|
||||
).rejects.toThrow('LLM extractor returned invalid JSON.');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -16,7 +16,7 @@ afterEach(async () => {
|
||||
});
|
||||
|
||||
describe('initializeSchema', () => {
|
||||
it('creates the topics, facts, fact_embeddings, fact_topics, topic_relations, and topic_aliases tables', async () => {
|
||||
it('creates the spaces, topics, facts, fact_embeddings, fact_topics, topic_relations, and topic_aliases tables', async () => {
|
||||
const connection = await createDatabase({ client: 'sqlite', filename: ':memory:' });
|
||||
openConnections.push(connection.destroy);
|
||||
|
||||
@@ -31,6 +31,7 @@ describe('initializeSchema', () => {
|
||||
|
||||
const tableNames = tables.rows.map((row) => row.name);
|
||||
|
||||
expect(tableNames).toContain('spaces');
|
||||
expect(tableNames).toContain('topics');
|
||||
expect(tableNames).toContain('facts');
|
||||
expect(tableNames).toContain('fact_embeddings');
|
||||
@@ -45,6 +46,7 @@ describe('initializeSchema', () => {
|
||||
|
||||
await initializeSchema(connection.db);
|
||||
|
||||
const spaceColumns = await sql<{ name: string }>`PRAGMA table_info(spaces)`.execute(connection.db);
|
||||
const topicsColumns = await sql<{ name: string }>`PRAGMA table_info(topics)`.execute(connection.db);
|
||||
const factsColumns = await sql<{ name: string }>`PRAGMA table_info(facts)`.execute(connection.db);
|
||||
const factEmbeddingsColumns = await sql<{ name: string }>`PRAGMA table_info(fact_embeddings)`.execute(connection.db);
|
||||
@@ -52,8 +54,19 @@ describe('initializeSchema', () => {
|
||||
const topicRelationsColumns = await sql<{ name: string }>`PRAGMA table_info(topic_relations)`.execute(connection.db);
|
||||
const topicAliasesColumns = await sql<{ name: string }>`PRAGMA table_info(topic_aliases)`.execute(connection.db);
|
||||
|
||||
expect(spaceColumns.rows.map((row) => row.name)).toEqual([
|
||||
'id',
|
||||
'name',
|
||||
'normalized_name',
|
||||
'description',
|
||||
'metadata',
|
||||
'created_at',
|
||||
'updated_at',
|
||||
]);
|
||||
|
||||
expect(topicsColumns.rows.map((row) => row.name)).toEqual([
|
||||
'id',
|
||||
'space_id',
|
||||
'name',
|
||||
'normalized_name',
|
||||
'category',
|
||||
@@ -66,6 +79,7 @@ describe('initializeSchema', () => {
|
||||
|
||||
expect(factsColumns.rows.map((row) => row.name)).toEqual([
|
||||
'id',
|
||||
'space_id',
|
||||
'statement',
|
||||
'summary',
|
||||
'source',
|
||||
@@ -102,6 +116,7 @@ describe('initializeSchema', () => {
|
||||
|
||||
expect(topicAliasesColumns.rows.map((row) => row.name)).toEqual([
|
||||
'id',
|
||||
'space_id',
|
||||
'topic_id',
|
||||
'alias',
|
||||
'normalized_alias',
|
||||
|
||||
@@ -2,9 +2,10 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import { IdentityDB } from '../src/core/identity-db';
|
||||
|
||||
async function seedMemoryGraph(db: IdentityDB): Promise<void> {
|
||||
async function seedMemoryGraph(db: IdentityDB, spaceName?: string): Promise<void> {
|
||||
await db.addFact({
|
||||
statement: 'I have worked with TypeScript since 2025.',
|
||||
spaceName,
|
||||
topics: [
|
||||
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
|
||||
@@ -14,6 +15,7 @@ async function seedMemoryGraph(db: IdentityDB): Promise<void> {
|
||||
|
||||
await db.addFact({
|
||||
statement: 'TypeScript is a programming language.',
|
||||
spaceName,
|
||||
topics: [
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'subject' },
|
||||
{ name: 'programming language', category: 'concept', granularity: 'abstract', role: 'classification' },
|
||||
@@ -23,11 +25,13 @@ async function seedMemoryGraph(db: IdentityDB): Promise<void> {
|
||||
await db.linkTopics({
|
||||
parentName: 'software technology',
|
||||
childName: 'programming language',
|
||||
spaceName,
|
||||
});
|
||||
|
||||
await db.linkTopics({
|
||||
parentName: 'programming language',
|
||||
childName: 'TypeScript',
|
||||
spaceName,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -114,6 +118,56 @@ describe('IdentityDB queries', () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it('keeps hierarchy and fact queries isolated per space', async () => {
|
||||
const isolatedDb = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
|
||||
try {
|
||||
await isolatedDb.initialize();
|
||||
await seedMemoryGraph(isolatedDb, 'A');
|
||||
|
||||
await isolatedDb.addFact({
|
||||
statement: 'TypeScript is a typed superset.',
|
||||
spaceName: 'B',
|
||||
topics: [
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'subject' },
|
||||
{ name: 'superset', category: 'concept', granularity: 'abstract', role: 'classification' },
|
||||
],
|
||||
});
|
||||
|
||||
await isolatedDb.linkTopics({
|
||||
parentName: 'language family',
|
||||
childName: 'TypeScript',
|
||||
spaceName: 'B',
|
||||
});
|
||||
|
||||
const alphaTopic = await isolatedDb.getTopicByName('TypeScript', {
|
||||
includeFacts: true,
|
||||
spaceName: 'A',
|
||||
});
|
||||
const betaTopic = await isolatedDb.getTopicByName('TypeScript', {
|
||||
includeFacts: true,
|
||||
spaceName: 'B',
|
||||
});
|
||||
const alphaParents = await isolatedDb.getTopicParents('TypeScript', { spaceName: 'A' });
|
||||
const betaParents = await isolatedDb.getTopicParents('TypeScript', { spaceName: 'B' });
|
||||
const alphaConnected = await isolatedDb.findConnectedTopics('TypeScript', { spaceName: 'A' });
|
||||
const betaConnected = await isolatedDb.findConnectedTopics('TypeScript', { spaceName: 'B' });
|
||||
|
||||
expect(alphaTopic?.facts.map((fact) => fact.statement)).toEqual([
|
||||
'I have worked with TypeScript since 2025.',
|
||||
'TypeScript is a programming language.',
|
||||
]);
|
||||
expect(betaTopic?.facts.map((fact) => fact.statement)).toEqual([
|
||||
'TypeScript is a typed superset.',
|
||||
]);
|
||||
expect(alphaParents.map((topic) => topic.name)).toEqual(['programming language']);
|
||||
expect(betaParents.map((topic) => topic.name)).toEqual(['language family']);
|
||||
expect(alphaConnected.map((topic) => topic.name)).toEqual(['2025', 'I', 'programming language']);
|
||||
expect(betaConnected.map((topic) => topic.name)).toEqual(['superset']);
|
||||
} finally {
|
||||
await isolatedDb.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('resolves alias names in topic lookups', async () => {
|
||||
await db.addTopicAlias('TypeScript', 'TS');
|
||||
|
||||
|
||||
@@ -120,6 +120,53 @@ describe('IdentityDB semantic search', () => {
|
||||
expect(matches[0]?.statement).toBe('Bun runs TypeScript tooling quickly.');
|
||||
expect(matches[0]!.score).toBeGreaterThan(matches[1]!.score);
|
||||
});
|
||||
|
||||
it('keeps semantic search isolated per space', async () => {
|
||||
const isolatedDb = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
|
||||
try {
|
||||
await isolatedDb.initialize();
|
||||
|
||||
await isolatedDb.addFact({
|
||||
statement: 'Bun runs TypeScript tooling quickly.',
|
||||
spaceName: 'A',
|
||||
topics: [
|
||||
{ name: 'Bun', category: 'entity', granularity: 'concrete' },
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
|
||||
],
|
||||
});
|
||||
|
||||
await isolatedDb.addFact({
|
||||
statement: 'TypeScript runtime tooling belongs to another tenant.',
|
||||
spaceName: 'B',
|
||||
topics: [
|
||||
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
|
||||
],
|
||||
});
|
||||
|
||||
await isolatedDb.indexFactEmbeddings({ provider, spaceName: 'A' });
|
||||
await isolatedDb.indexFactEmbeddings({ provider, spaceName: 'B' });
|
||||
|
||||
const alphaMatches = await isolatedDb.searchFacts({
|
||||
query: 'TypeScript runtime tooling',
|
||||
provider,
|
||||
spaceName: 'A',
|
||||
});
|
||||
const betaMatches = await isolatedDb.searchFacts({
|
||||
query: 'TypeScript runtime tooling',
|
||||
provider,
|
||||
spaceName: 'B',
|
||||
});
|
||||
|
||||
expect(alphaMatches.map((match) => match.statement)).toEqual([
|
||||
'Bun runs TypeScript tooling quickly.',
|
||||
]);
|
||||
expect(betaMatches.map((match) => match.statement)).toEqual([
|
||||
'TypeScript runtime tooling belongs to another tenant.',
|
||||
]);
|
||||
} finally {
|
||||
await isolatedDb.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('IdentityDB dedup-aware ingestion', () => {
|
||||
@@ -167,4 +214,26 @@ describe('IdentityDB dedup-aware ingestion', () => {
|
||||
expect(facts).toHaveLength(1);
|
||||
expect(facts[0]?.statement).toBe('Bun runs TypeScript tooling quickly.');
|
||||
});
|
||||
|
||||
it('does not reuse a semantic duplicate from another space', async () => {
|
||||
const first = await db.ingestStatement('Bun runs TypeScript tooling quickly.', {
|
||||
extractor,
|
||||
embeddingProvider: provider,
|
||||
spaceName: 'A',
|
||||
});
|
||||
|
||||
const second = await db.ingestStatement('Bun makes TypeScript tooling fast.', {
|
||||
extractor,
|
||||
embeddingProvider: provider,
|
||||
duplicateThreshold: 0.95,
|
||||
spaceName: 'B',
|
||||
});
|
||||
|
||||
const alphaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'A' });
|
||||
const betaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'B' });
|
||||
|
||||
expect(second.id).not.toBe(first.id);
|
||||
expect(alphaFacts).toHaveLength(1);
|
||||
expect(betaFacts).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user