Compare commits

...

10 Commits

Author SHA1 Message Date
b77e8eea40 chore: release 0.2.0
All checks were successful
npm release / verify (push) Successful in 13s
npm release / publish to npm (push) Successful in 11s
2026-05-11 19:13:52 +09:00
cea45a552a docs: document space isolation usage 2026-05-11 14:45:35 +09:00
d83fc31c59 feat: add isolated memory spaces 2026-05-11 14:45:28 +09:00
b908bc0bd9 docs: add IdentityDB space isolation plan 2026-05-11 14:28:08 +09:00
283f91ed91 ci: configure npm auth for release publish
All checks were successful
npm release / verify (push) Successful in 13s
npm release / publish to npm (push) Successful in 12s
2026-05-11 13:59:29 +09:00
5991e4f1f0 ci: run Gitea release steps with bash
Some checks failed
npm release / verify (push) Successful in 13s
npm release / publish to npm (push) Failing after 11s
2026-05-11 13:57:45 +09:00
0dc657c97b ci: make Gitea release workflow self-contained
Some checks failed
npm release / verify (push) Failing after 10s
npm release / publish to npm (push) Has been skipped
2026-05-11 13:56:39 +09:00
96d0568197 ci: set writable HOME for Gitea release workflow
Some checks failed
npm release / verify (push) Failing after 2s
npm release / publish to npm (push) Has been skipped
2026-05-11 13:53:04 +09:00
e8adccfbbf ci: add tag-gated npm release workflow
Some checks failed
npm release / verify (push) Failing after 13s
npm release / publish to npm (push) Has been skipped
2026-05-11 13:36:07 +09:00
1c82b63e7a docs: add IdentityDB wiki documentation plan 2026-05-11 12:27:12 +09:00
19 changed files with 1037 additions and 135 deletions

View File

@@ -0,0 +1,117 @@
name: npm release
on:
push:
tags:
- 'v*'
- '[0-9]*'
permissions:
contents: read
defaults:
run:
shell: bash
jobs:
verify:
name: verify
runs-on: ubuntu-latest
container:
image: node:20-bookworm
timeout-minutes: 20
steps:
- name: Install release tools
run: |
set -euo pipefail
apt-get update
apt-get install -y git curl ca-certificates
curl -fsSL https://bun.sh/install | bash -s -- bun-v1.3.13
install -m 0755 /root/.bun/bin/bun /usr/local/bin/bun
node --version
npm --version
bun --version
- name: Clone tagged source
run: |
set -euo pipefail
REPO_URL="${{ gitea.server_url }}/${{ gitea.repository }}.git"
AUTH_HEADER="$(printf '%s' '${{ gitea.actor }}:${{ secrets.GITEA_TOKEN }}' | base64 -w0)"
git -c http.extraHeader="Authorization: Basic $AUTH_HEADER" clone --depth 1 --branch "${{ gitea.ref_name }}" "$REPO_URL" repo
git -C repo rev-parse HEAD
- name: Verify release tag matches package version
working-directory: repo
shell: bash
run: |
set -euo pipefail
TAG_NAME="${{ gitea.ref_name }}"
PACKAGE_VERSION="$(node -p "require('./package.json').version")"
if [ "$TAG_NAME" = "v$PACKAGE_VERSION" ] || [ "$TAG_NAME" = "$PACKAGE_VERSION" ]; then
echo "Release tag $TAG_NAME matches package version $PACKAGE_VERSION"
exit 0
fi
echo "Tag $TAG_NAME does not match package.json version $PACKAGE_VERSION" >&2
exit 1
- name: Run verify pipeline
working-directory: repo
run: |
set -euo pipefail
bun install --frozen-lockfile
bun run test
bun run check
bun run build
release:
name: publish to npm
runs-on: ubuntu-latest
container:
image: node:20-bookworm
timeout-minutes: 20
needs:
- verify
steps:
- name: Install release tools
run: |
set -euo pipefail
apt-get update
apt-get install -y git curl ca-certificates
curl -fsSL https://bun.sh/install | bash -s -- bun-v1.3.13
install -m 0755 /root/.bun/bin/bun /usr/local/bin/bun
node --version
npm --version
bun --version
- name: Clone tagged source
run: |
set -euo pipefail
REPO_URL="${{ gitea.server_url }}/${{ gitea.repository }}.git"
AUTH_HEADER="$(printf '%s' '${{ gitea.actor }}:${{ secrets.GITEA_TOKEN }}' | base64 -w0)"
git -c http.extraHeader="Authorization: Basic $AUTH_HEADER" clone --depth 1 --branch "${{ gitea.ref_name }}" "$REPO_URL" repo
git -C repo rev-parse HEAD
- name: Install dependencies
working-directory: repo
run: |
set -euo pipefail
bun install --frozen-lockfile
- name: Build package
working-directory: repo
run: |
set -euo pipefail
bun run build
- name: Publish package to npm
working-directory: repo
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
set -euo pipefail
printf '//registry.npmjs.org/:_authToken=%s\n' "$NODE_AUTH_TOKEN" > ~/.npmrc
npm publish

View File

@@ -15,9 +15,9 @@ A single fact like `I have worked with TypeScript since 2025.` can connect the t
## Current capabilities
- SQLite, PostgreSQL, MySQL, and MariaDB connection adapters
- Automatic schema initialization for `topics`, `facts`, `fact_topics`, `topic_relations`, `topic_aliases`, and `fact_embeddings`
- Automatic schema initialization for `spaces`, `topics`, `facts`, `fact_topics`, `topic_relations`, `topic_aliases`, and `fact_embeddings`
- High-level APIs for adding topics and facts
- Topic hierarchy APIs for parent/child traversal and lineage lookup
- Hard space isolation so independent memory graphs can coexist without cross-linking
- Topic alias and canonical resolution APIs so facts and queries can resolve alternate names
- Semantic fact indexing and search APIs built around provider-agnostic embeddings
- Dedup-aware ingestion hooks that can reuse an existing fact when a semantic near-duplicate is detected
@@ -101,6 +101,34 @@ console.log(matches.map((entry) => [entry.statement, entry.score]));
await db.close();
```
## Memory spaces
IdentityDB now supports hard isolation via spaces. If you write facts into `spaceName: 'A'` and `spaceName: 'B'`, they behave like separate dimensions:
- the same topic name can exist in both spaces
- aliases resolve only inside the requested space
- hierarchy, connected-topic traversal, semantic search, and duplicate detection stay inside the same space
```ts
await db.upsertSpace({ name: 'A' });
await db.upsertSpace({ name: 'B' });
await db.addFact({
spaceName: 'A',
statement: 'TypeScript belongs to A.',
topics: [{ name: 'TypeScript', category: 'entity', granularity: 'concrete' }],
});
await db.addFact({
spaceName: 'B',
statement: 'TypeScript belongs to B.',
topics: [{ name: 'TypeScript', category: 'entity', granularity: 'concrete' }],
});
const alphaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'A' });
const betaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'B' });
```
## Semantic ingestion and duplicate detection
If you provide an embedding provider during ingestion, IdentityDB can index the new fact automatically and reuse an existing fact when a semantic near-duplicate is already present.

View File

@@ -0,0 +1,157 @@
# IdentityDB Space Isolation Implementation Plan
> **For Hermes:** Use the `writing-plans` and `test-driven-development` skills. Implement this feature in small TDD steps with meaningful commits.
**Goal:** Add first-class memory spaces so callers can keep unrelated topic/fact graphs isolated from each other while still using one physical database.
**Architecture:** Introduce a `spaces` table plus `space_id` scoping on the canonical topic/fact records that define graph ownership. Treat the unnamed/default behavior as a built-in `default` space so existing API usage keeps working, while allowing any write/query path to target a named space explicitly.
**Tech Stack:** TypeScript, Bun, Vitest, Kysely, SQLite/PostgreSQL/MySQL/MariaDB-compatible schema primitives.
---
## Requirements
1. A caller must be able to write memory into independent spaces such as `A` and `B`.
2. Topic lookups, alias resolution, hierarchy traversal, connected-topic queries, fact queries, ingestion, and semantic search must only see data from the requested space.
3. Existing callers that do not specify a space must continue to work inside a built-in `default` space.
4. Space names must be normalized/canonicalized similarly to topics.
5. Documentation must explain both the isolation model and the default-space compatibility behavior.
---
## Task 1: Add failing tests for schema-level space support
**Objective:** Define the storage contract for spaces and per-space uniqueness before implementation.
**Files:**
- Modify: `tests/migrations.test.ts`
**Step 1: Write failing tests**
- Assert that initialization creates a `spaces` table.
- Assert that `topics`, `facts`, and `topic_aliases` now include `space_id`.
- Assert that `spaces` includes `id`, `name`, `normalized_name`, `description`, `metadata`, `created_at`, `updated_at`.
**Step 2: Run red test**
- Run: `bun run test tests/migrations.test.ts`
- Expect: failure because the schema does not yet contain space support.
**Step 3: Commit later with implementation**
- Do not commit yet; continue only after implementation turns the test green.
---
## Task 2: Add failing behavioral tests for isolated spaces
**Objective:** Lock in the public API behavior for separate memory spaces.
**Files:**
- Modify: `tests/identity-db.test.ts`
- Modify: `tests/queries.test.ts`
- Modify: `tests/semantic-search.test.ts`
- Optionally create: `tests/spaces.test.ts` if separation makes the scenarios clearer.
**Step 1: Write failing tests**
- Verify the same topic name can exist in two spaces without deduplicating together.
- Verify facts added in `spaceName: 'A'` are invisible from `spaceName: 'B'`.
- Verify alias resolution only resolves inside the same space.
- Verify hierarchy parent/child traversal only stays within the same space.
- Verify semantic search and duplicate-aware ingestion only search within the same space.
- Verify callers that omit `spaceName` still operate in the `default` space.
**Step 2: Run red tests**
- Run the most targeted files first, then the whole suite slice.
- Example: `bun run test tests/identity-db.test.ts tests/queries.test.ts tests/semantic-search.test.ts`
- Expect: failure due to missing API fields and missing isolation logic.
---
## Task 3: Implement schema and type support
**Objective:** Add the underlying data model required for spaces.
**Files:**
- Modify: `src/core/schema.ts`
- Modify: `src/types/domain.ts`
- Modify: `src/types/database.ts`
- Modify: `src/core/migrations.ts`
**Implementation notes:**
- Add `SPACES_TABLE` and `SPACE_COLUMNS` constants.
- Add `SpaceRecord` domain type.
- Add `spaces` to `IdentityDatabaseSchema`.
- Create the `spaces` table.
- Add `space_id` columns to `topics`, `facts`, and `topic_aliases`.
- Make topic uniqueness per-space, not global.
- Make alias uniqueness per-space, not global.
- Seed or upsert a built-in `default` space during initialization via normal application flow, not hard-coded SQL assumptions.
**Verification:**
- Re-run: `bun run test tests/migrations.test.ts`
- Expect: green.
---
## Task 4: Implement space-aware API contracts and query helpers
**Objective:** Thread `spaceName` through the high-level API and low-level query layer.
**Files:**
- Modify: `src/types/api.ts`
- Modify: `src/ingestion/types.ts`
- Modify: `src/queries/topics.ts`
- Modify: `src/queries/facts.ts`
- Modify: `src/core/identity-db.ts`
- Modify: `src/index.ts` if new exported types are added
**Implementation notes:**
- Add public `Space` and `UpsertSpaceInput` types if needed.
- Add optional `spaceName` on write/query inputs where the caller targets a graph.
- Add helpers to resolve or create the requested space inside transactions.
- Ensure all existing topic lookup helpers filter by `space_id`.
- Ensure semantic search candidates are restricted to facts in the requested space.
- Preserve existing no-space API calls by mapping them to `default`.
**Verification:**
- Re-run the failing behavior tests.
- Expect: green for the new targeted tests.
---
## Task 5: Refine ergonomics and update documentation
**Objective:** Make the feature understandable and safe to use.
**Files:**
- Modify: `README.md`
- Optionally modify: wiki docs later if requested
**Implementation notes:**
- Document the default space behavior.
- Add examples for `spaceName: 'A'` and `spaceName: 'B'`.
- Explain that spaces are hard isolation boundaries for topic/fact traversal and semantic retrieval.
**Verification:**
- Run: `bun run test && bun run check && bun run build`
- Confirm docs/examples align with the final public API.
---
## Suggested commit boundaries
1. `docs: add IdentityDB space isolation plan`
2. `test: specify isolated memory spaces`
3. `feat: add space-aware memory graph isolation`
4. `docs: document space-scoped memory usage`
---
## Acceptance checklist
- [ ] `spaces` table exists.
- [ ] Topics with the same normalized name can exist in different spaces.
- [ ] Facts from one space do not appear in another spaces queries.
- [ ] Alias and hierarchy traversal are space-aware.
- [ ] Semantic search and duplicate detection are space-aware.
- [ ] Existing callers still work via the `default` space.
- [ ] Full test/build/typecheck suite passes.

View File

@@ -0,0 +1,65 @@
# IdentityDB Wiki Documentation Implementation Plan
> **For Hermes:** Execute this plan step-by-step. Prefer concrete repository inspection over assumptions, and verify the wiki remote after each major write.
**Goal:** Verify the IdentityDB wiki repository state, create or clone it as needed, and publish concrete wiki documentation covering the project's purpose, usage, and extractor choices including `NaiveExtractor`.
**Architecture:** Treat the Gitea wiki as a separate Git repository. First verify whether the wiki feature is enabled and whether the `.wiki.git` remote already exists. If the remote does not exist yet, bootstrap it with a minimal `Home.md`, then clone the wiki repo into a local working directory and author Markdown pages there. Keep the documentation practical, using the package README and current source files as the canonical content source.
**Tech Stack:** Gitea, tea CLI, Git, Markdown, Bun/TypeScript project docs.
---
## Execution plan
### Task 1: Inspect wiki availability and remote state
**Objective:** Confirm that the repository has wiki support enabled and determine whether the Git-backed wiki repo already exists.
**Files:**
- Inspect: `https://git.psw.kr/p-sw/IdentityDB`
- Read: `/home/hermes-agent/IdentityDB/README.md`
- Read: `/home/hermes-agent/IdentityDB/src/ingestion/naive-extractor.ts`
- Read: `/home/hermes-agent/IdentityDB/src/ingestion/llm-extractor.ts`
**Verification:**
- Check Gitea repo metadata for `has_wiki=true`.
- Check whether `https://git.psw.kr/p-sw/IdentityDB.wiki.git` is readable.
### Task 2: Bootstrap the wiki repo if missing
**Objective:** Create the Git-backed wiki repository if it has not been materialized yet.
**Files:**
- Create temporarily: `/home/hermes-agent/IdentityDB-wiki-bootstrap/Home.md`
**Verification:**
- Push a first commit to `https://git.psw.kr/p-sw/IdentityDB.wiki.git`.
- Confirm the remote becomes cloneable afterward.
### Task 3: Clone the wiki repo and author concrete pages
**Objective:** Write practical docs explaining why IdentityDB exists, how to use it, and where `NaiveExtractor` fits.
**Files:**
- Clone to: `/home/hermes-agent/IdentityDB.wiki`
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/Home.md`
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/Getting-Started.md`
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/Extractors.md`
- Create/modify: `/home/hermes-agent/IdentityDB.wiki/_Sidebar.md`
**Verification:**
- Review the generated Markdown files locally.
- Ensure internal wiki links resolve by page name.
### Task 4: Commit, push, and verify the published wiki state
**Objective:** Publish the wiki docs and verify the remote history reflects the changes.
**Files:**
- Commit within: `/home/hermes-agent/IdentityDB.wiki`
**Verification:**
- Run `git status --short` and `git log --oneline -n 3` in the wiki repo.
- Push to the remote wiki repo.
- Confirm the wiki is cloneable and the latest commit is visible remotely.

View File

@@ -1,6 +1,6 @@
{
"name": "identitydb",
"version": "0.1.0",
"version": "0.2.0",
"description": "TypeScript memory graph database wrapper for topics, facts, and AI-assisted ingestion.",
"license": "MIT",
"type": "module",

View File

@@ -1,39 +1,27 @@
import {
type AddFactInput,
type ConnectedTopic,
type Fact,
type FactTopic,
type FindSimilarFactsInput,
type IndexFactEmbeddingsInput,
type LinkTopicsInput,
type ListTopicsOptions,
type ScoredFact,
type SearchFactsInput,
type Space,
type SpaceScopedInput,
type Topic,
type TopicLookupOptions,
type TopicWithFacts,
type UpsertSpaceInput,
type UpsertTopicInput,
type AddFactInput,
type LinkTopicsInput,
} from '../types/api';
import type { IngestStatementOptions } from '../ingestion/types';
import type { DatabaseConnection, IdentityDBConnectionConfig } from '../adapters/dialect';
import type { IdentityDatabaseSchema } from '../types/database';
import type { FactRecord, TopicRecord } from '../types/domain';
import type { FactRecord, SpaceRecord, TopicRecord } from '../types/domain';
import { createDatabase } from '../adapters/dialect';
import { IdentityDBError } from './errors';
import { initializeSchema } from './migrations';
import {
canonicalizeTopicName,
cosineSimilarity,
createContentHash,
createId,
deserializeEmbedding,
mapFactRow,
mapTopicRow,
normalizeTopicName,
nowIsoString,
serializeEmbedding,
serializeMetadata,
} from './utils';
import { extractFact } from '../ingestion/extractor';
import {
findFactRowsConnectingTopicIds,
@@ -41,16 +29,37 @@ import {
findTopicLinksForFactIds,
} from '../queries/facts';
import {
type DatabaseExecutor,
findChildTopicRows,
findConnectedTopicRows,
findParentTopicRows,
findSpaceRowByNormalizedName,
findTopicRowByNameOrAlias,
findTopicRowByNormalizedAlias,
findTopicRowByNormalizedName,
listTopicAliasRowsForTopicId,
listTopicRows,
findChildTopicRows,
findParentTopicRows,
type DatabaseExecutor,
} from '../queries/topics';
import { IdentityDBError } from './errors';
import { initializeSchema } from './migrations';
import {
canonicalizeSpaceName,
canonicalizeTopicName,
cosineSimilarity,
createContentHash,
createId,
deserializeEmbedding,
mapFactRow,
mapSpaceRow,
mapTopicRow,
normalizeSpaceName,
normalizeTopicName,
nowIsoString,
serializeEmbedding,
serializeMetadata,
} from './utils';
const DEFAULT_SPACE_NAME = 'default';
export class IdentityDB {
private constructor(private readonly connection: DatabaseConnection) {}
@@ -68,6 +77,72 @@ export class IdentityDB {
await this.connection.destroy();
}
async upsertSpace(input: UpsertSpaceInput): Promise<Space> {
return this.connection.db.transaction().execute(async (trx) => {
const normalizedName = normalizeSpaceName(input.name);
if (normalizedName.length === 0) {
throw new IdentityDBError('Space name cannot be empty.');
}
const now = nowIsoString();
const existing = await findSpaceRowByNormalizedName(trx, normalizedName);
if (existing) {
await trx
.updateTable('spaces')
.set({
name: canonicalizeSpaceName(input.name),
description: input.description !== undefined ? input.description : existing.description,
metadata: input.metadata !== undefined ? serializeMetadata(input.metadata) : existing.metadata,
updated_at: now,
})
.where('id', '=', existing.id)
.execute();
const updated = await trx
.selectFrom('spaces')
.selectAll()
.where('id', '=', existing.id)
.executeTakeFirstOrThrow();
return mapSpaceRow(updated);
}
const createdRow: SpaceRecord = {
id: createId(),
name: canonicalizeSpaceName(input.name),
normalized_name: normalizedName,
description: input.description ?? null,
metadata: serializeMetadata(input.metadata),
created_at: now,
updated_at: now,
};
await trx.insertInto('spaces').values(createdRow).execute();
return mapSpaceRow(createdRow);
});
}
async getSpaceByName(name: string): Promise<Space | null> {
const normalizedName = normalizeSpaceName(name);
if (normalizedName.length === 0) {
return null;
}
const row = await findSpaceRowByNormalizedName(this.connection.db, normalizedName);
return row ? mapSpaceRow(row) : null;
}
async listSpaces(): Promise<Space[]> {
const rows = await this.connection.db
.selectFrom('spaces')
.selectAll()
.orderBy('normalized_name', 'asc')
.execute();
return rows.map(mapSpaceRow);
}
async upsertTopic(input: UpsertTopicInput): Promise<Topic> {
return this.upsertTopicInExecutor(this.connection.db, input);
}
@@ -82,6 +157,7 @@ export class IdentityDB {
}
return this.connection.db.transaction().execute(async (trx) => {
const space = await this.getOrCreateSpaceInExecutor(trx, input.spaceName);
const createdAt = nowIsoString();
const factId = createId();
@@ -89,6 +165,7 @@ export class IdentityDB {
.insertInto('facts')
.values({
id: factId,
space_id: space.id,
statement: input.statement.trim(),
summary: input.summary ?? null,
source: input.source ?? null,
@@ -103,7 +180,11 @@ export class IdentityDB {
for (let index = 0; index < input.topics.length; index += 1) {
const topicInput = input.topics[index]!;
const topic = await this.upsertTopicInExecutor(trx, topicInput);
this.assertScopedTopicInput(space, topicInput.spaceName);
const topic = await this.upsertTopicInExecutor(trx, {
...topicInput,
spaceName: space.name,
});
await trx
.insertInto('fact_topics')
@@ -125,6 +206,7 @@ export class IdentityDB {
return {
id: factId,
spaceId: space.id,
statement: input.statement.trim(),
summary: input.summary ?? null,
source: input.source ?? null,
@@ -137,14 +219,12 @@ export class IdentityDB {
});
}
async ingestStatement(
statement: string,
options: IngestStatementOptions,
): Promise<Fact> {
async ingestStatement(statement: string, options: IngestStatementOptions): Promise<Fact> {
const extracted = await extractFact(statement, options.extractor);
const factInput: AddFactInput = {
statement: extracted.statement ?? statement,
topics: extracted.topics,
spaceName: options.spaceName,
};
if (extracted.summary !== undefined) {
@@ -170,6 +250,7 @@ export class IdentityDB {
topicNames: factInput.topics.map((topic) => topic.name),
limit: 1,
minimumScore: options.duplicateThreshold ?? 0.97,
spaceName: options.spaceName,
});
if (similarFacts[0]) {
@@ -180,15 +261,27 @@ export class IdentityDB {
const fact = await this.addFact(factInput);
if (options.embeddingProvider) {
await this.indexFactEmbedding(fact.id, { provider: options.embeddingProvider });
await this.indexFactEmbedding(fact.id, {
provider: options.embeddingProvider,
spaceName: options.spaceName,
});
}
return fact;
}
async indexFactEmbeddings(input: IndexFactEmbeddingsInput): Promise<void> {
const factRows = await this.connection.db.selectFrom('facts').selectAll().orderBy('created_at', 'asc').execute();
const space = await this.getSpaceForRead(input.spaceName);
if (input.spaceName && !space) {
return;
}
let factQuery = this.connection.db.selectFrom('facts').selectAll().orderBy('created_at', 'asc');
if (space) {
factQuery = factQuery.where('space_id', '=', space.id);
}
const factRows = await factQuery.execute();
if (factRows.length === 0) {
return;
}
@@ -222,6 +315,13 @@ export class IdentityDB {
throw new IdentityDBError(`Fact not found: ${factId}`);
}
if (input.spaceName) {
const space = await this.getSpaceForRead(input.spaceName);
if (!space || space.id !== factRow.space_id) {
throw new IdentityDBError(`Fact ${factId} does not belong to space ${canonicalizeSpaceName(input.spaceName)}.`);
}
}
const embedding = await input.provider.embed(factRow.statement);
this.assertEmbeddingShape(embedding, input.provider.dimensions);
@@ -236,6 +336,11 @@ export class IdentityDB {
return [];
}
const space = await this.getSpaceForRead(input.spaceName);
if (input.spaceName && !space) {
return [];
}
const queryEmbedding = await input.provider.embed(queryText);
this.assertEmbeddingShape(queryEmbedding, input.provider.dimensions);
@@ -245,6 +350,7 @@ export class IdentityDB {
topicNames: input.topicNames,
limit: input.limit,
minimumScore: input.minimumScore,
spaceId: space?.id,
});
}
@@ -254,6 +360,11 @@ export class IdentityDB {
return [];
}
const space = await this.getSpaceForRead(input.spaceName);
if (input.spaceName && !space) {
return [];
}
const queryEmbedding = await input.provider.embed(statement);
this.assertEmbeddingShape(queryEmbedding, input.provider.dimensions);
@@ -263,6 +374,7 @@ export class IdentityDB {
topicNames: input.topicNames,
limit: input.limit,
minimumScore: input.minimumScore,
spaceId: space?.id,
});
}
@@ -279,12 +391,15 @@ export class IdentityDB {
}
await this.connection.db.transaction().execute(async (trx) => {
const space = await this.getOrCreateSpaceInExecutor(trx, input.spaceName);
const parentTopic = await this.upsertTopicInExecutor(trx, {
name: input.parentName,
granularity: 'abstract',
spaceName: space.name,
});
const childTopic = await this.upsertTopicInExecutor(trx, {
name: input.childName,
spaceName: space.name,
});
const existing = await trx
@@ -309,7 +424,7 @@ export class IdentityDB {
});
}
async addTopicAlias(canonicalName: string, alias: string): Promise<void> {
async addTopicAlias(canonicalName: string, alias: string, options?: SpaceScopedInput): Promise<void> {
const normalizedAlias = normalizeTopicName(alias);
if (normalizedAlias.length === 0) {
@@ -317,18 +432,22 @@ export class IdentityDB {
}
await this.connection.db.transaction().execute(async (trx) => {
const canonicalTopic = await this.upsertTopicInExecutor(trx, { name: canonicalName });
const space = await this.getOrCreateSpaceInExecutor(trx, options?.spaceName);
const canonicalTopic = await this.upsertTopicInExecutor(trx, {
name: canonicalName,
spaceName: space.name,
});
if (normalizedAlias === canonicalTopic.normalizedName) {
return;
}
const exactTopicMatch = await findTopicRowByNormalizedName(trx, normalizedAlias);
const exactTopicMatch = await findTopicRowByNormalizedName(trx, space.id, normalizedAlias);
if (exactTopicMatch && exactTopicMatch.id !== canonicalTopic.id) {
throw new IdentityDBError('Cannot assign an alias that already belongs to another canonical topic.');
}
const aliasMatch = await findTopicRowByNormalizedAlias(trx, normalizedAlias);
const aliasMatch = await findTopicRowByNormalizedAlias(trx, space.id, normalizedAlias);
if (aliasMatch) {
if (aliasMatch.id !== canonicalTopic.id) {
throw new IdentityDBError('Cannot assign an alias that already resolves to another topic.');
@@ -341,6 +460,7 @@ export class IdentityDB {
.insertInto('topic_aliases')
.values({
id: createId(),
space_id: space.id,
topic_id: canonicalTopic.id,
alias: canonicalizeTopicName(alias),
normalized_alias: normalizedAlias,
@@ -352,47 +472,43 @@ export class IdentityDB {
});
}
async resolveTopic(name: string): Promise<Topic | null> {
const topicRow = await this.getRequiredTopicRow(name);
async resolveTopic(name: string, options?: SpaceScopedInput): Promise<Topic | null> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
return topicRow ? mapTopicRow(topicRow) : null;
}
async getTopicAliases(name: string): Promise<string[]> {
const topicRow = await this.getRequiredTopicRow(name);
async getTopicAliases(name: string, options?: SpaceScopedInput): Promise<string[]> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
if (!topicRow) {
return [];
}
const aliasRows = await listTopicAliasRowsForTopicId(this.connection.db, topicRow.id);
const aliasRows = await listTopicAliasRowsForTopicId(this.connection.db, topicRow.space_id, topicRow.id);
return aliasRows.map((aliasRow) => aliasRow.alias);
}
async getTopicChildren(name: string): Promise<Topic[]> {
const topicRow = await this.getRequiredTopicRow(name);
async getTopicChildren(name: string, options?: SpaceScopedInput): Promise<Topic[]> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
if (!topicRow) {
return [];
}
const childRows = await findChildTopicRows(this.connection.db, topicRow.id);
const childRows = await findChildTopicRows(this.connection.db, topicRow.space_id, topicRow.id);
return childRows.map(mapTopicRow);
}
async getTopicParents(name: string): Promise<Topic[]> {
const topicRow = await this.getRequiredTopicRow(name);
async getTopicParents(name: string, options?: SpaceScopedInput): Promise<Topic[]> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
if (!topicRow) {
return [];
}
const parentRows = await findParentTopicRows(this.connection.db, topicRow.id);
const parentRows = await findParentTopicRows(this.connection.db, topicRow.space_id, topicRow.id);
return parentRows.map(mapTopicRow);
}
async getTopicLineage(name: string): Promise<Topic[]> {
const topicRow = await this.getRequiredTopicRow(name);
async getTopicLineage(name: string, options?: SpaceScopedInput): Promise<Topic[]> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
if (!topicRow) {
return [];
}
@@ -405,8 +521,7 @@ export class IdentityDB {
const nextLevelIds: string[] = [];
for (const currentId of currentLevelIds) {
const parentRows = await findParentTopicRows(this.connection.db, currentId);
const parentRows = await findParentTopicRows(this.connection.db, topicRow.space_id, currentId);
for (const parentRow of parentRows) {
if (visitedTopicIds.has(parentRow.id)) {
continue;
@@ -424,97 +539,97 @@ export class IdentityDB {
return lineage;
}
async getTopicFacts(name: string): Promise<Fact[]> {
const topicRow = await this.getRequiredTopicRow(name);
async getTopicFacts(name: string, options?: SpaceScopedInput): Promise<Fact[]> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
if (!topicRow) {
return [];
}
const factRows = await findFactRowsForTopicId(this.connection.db, topicRow.id);
return this.hydrateFacts(factRows);
const factRows = await findFactRowsForTopicId(this.connection.db, topicRow.space_id, topicRow.id);
return this.hydrateFacts(factRows, topicRow.space_id);
}
async getTopicFactsLinkedTo(name: string, linkedTopicName: string): Promise<Fact[]> {
return this.findFactsConnectingTopics([name, linkedTopicName]);
async getTopicFactsLinkedTo(name: string, linkedTopicName: string, options?: SpaceScopedInput): Promise<Fact[]> {
return this.findFactsConnectingTopics([name, linkedTopicName], options);
}
async findFactsConnectingTopics(names: string[]): Promise<Fact[]> {
async findFactsConnectingTopics(names: string[], options?: SpaceScopedInput): Promise<Fact[]> {
if (names.length === 0) {
return [];
}
const topicRows = await Promise.all(names.map((name) => this.getRequiredTopicRow(name)));
const space = await this.getSpaceForRead(options?.spaceName);
if (options?.spaceName && !space) {
return [];
}
const topicRows = await Promise.all(names.map((name) => this.getRequiredTopicRow(name, options?.spaceName)));
if (topicRows.some((topicRow) => topicRow === undefined)) {
return [];
}
const topicIds = topicRows.map((topicRow) => topicRow!.id);
const factRows = await findFactRowsConnectingTopicIds(this.connection.db, topicIds);
const spaceId = topicRows[0]!.space_id ?? space?.id;
const factRows = await findFactRowsConnectingTopicIds(this.connection.db, spaceId, topicIds);
return this.hydrateFacts(factRows);
return this.hydrateFacts(factRows, spaceId);
}
async getTopicByName(
name: string,
options: { includeFacts: true },
): Promise<TopicWithFacts | null>;
async getTopicByName(name: string, options: { includeFacts: true; spaceName?: string }): Promise<TopicWithFacts | null>;
async getTopicByName(name: string, options?: TopicLookupOptions): Promise<Topic | null>;
async getTopicByName(
name: string,
options?: TopicLookupOptions,
): Promise<Topic | TopicWithFacts | null> {
const topicRow = await this.getRequiredTopicRow(name);
async getTopicByName(name: string, options?: TopicLookupOptions): Promise<Topic | TopicWithFacts | null> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
if (!topicRow) {
return null;
}
const topic = mapTopicRow(topicRow);
if (options?.includeFacts) {
return {
...topic,
facts: await this.getTopicFacts(name),
facts: await this.getTopicFacts(name, { spaceName: options.spaceName }),
};
}
return topic;
}
async listTopics(options: { includeFacts: true; limit?: number }): Promise<TopicWithFacts[]>;
async listTopics(options: { includeFacts: true; limit?: number; spaceName?: string }): Promise<TopicWithFacts[]>;
async listTopics(options?: ListTopicsOptions): Promise<Topic[]>;
async listTopics(
options?: ListTopicsOptions,
): Promise<Topic[] | TopicWithFacts[]> {
const rows = await listTopicRows(this.connection.db, options?.limit);
async listTopics(options?: ListTopicsOptions): Promise<Topic[] | TopicWithFacts[]> {
const space = await this.getSpaceForRead(options?.spaceName);
if (options?.spaceName && !space) {
return [];
}
const spaceId = space?.id ?? await this.getDefaultSpaceIdForRead();
if (!spaceId) {
return [];
}
const rows = await listTopicRows(this.connection.db, spaceId, options?.limit);
if (!options?.includeFacts) {
return rows.map(mapTopicRow);
}
const topicsWithFacts: TopicWithFacts[] = [];
for (const row of rows) {
topicsWithFacts.push({
...mapTopicRow(row),
facts: await this.getTopicFacts(row.name),
facts: await this.getTopicFacts(row.name, { spaceName: options?.spaceName }),
});
}
return topicsWithFacts;
}
async findConnectedTopics(name: string): Promise<ConnectedTopic[]> {
const topicRow = await this.getRequiredTopicRow(name);
async findConnectedTopics(name: string, options?: SpaceScopedInput): Promise<ConnectedTopic[]> {
const topicRow = await this.getRequiredTopicRow(name, options?.spaceName);
if (!topicRow) {
return [];
}
const rows = await findConnectedTopicRows(this.connection.db, topicRow.id);
const rows = await findConnectedTopicRows(this.connection.db, topicRow.space_id, topicRow.id);
return rows.map((row) => ({
...mapTopicRow(row),
sharedFactCount: row.shared_fact_count,
@@ -527,18 +642,25 @@ export class IdentityDB {
topicNames?: string[] | undefined;
limit?: number | undefined;
minimumScore?: number | undefined;
spaceId?: string | undefined;
}): Promise<ScoredFact[]> {
const topicIds = await this.resolveTopicIds(input.topicNames);
const effectiveSpaceId = input.spaceId ?? await this.getDefaultSpaceIdForRead();
if (!effectiveSpaceId) {
return [];
}
const topicIds = await this.resolveTopicIds(input.topicNames, effectiveSpaceId);
if (topicIds === null) {
return [];
}
const factRows = topicIds.length > 0
? await findFactRowsConnectingTopicIds(this.connection.db, topicIds)
? await findFactRowsConnectingTopicIds(this.connection.db, effectiveSpaceId, topicIds)
: await this.connection.db
.selectFrom('facts')
.innerJoin('fact_embeddings', 'fact_embeddings.fact_id', 'facts.id')
.selectAll('facts')
.where('facts.space_id', '=', effectiveSpaceId)
.where('fact_embeddings.model', '=', input.providerModel)
.orderBy('facts.created_at', 'asc')
.execute();
@@ -547,14 +669,14 @@ export class IdentityDB {
return [];
}
const embeddingRowsQuery = this.connection.db
const embeddingRows = await this.connection.db
.selectFrom('fact_embeddings')
.selectAll()
.where('model', '=', input.providerModel);
const embeddingRows = factRows.length > 0
? await embeddingRowsQuery.where('fact_id', 'in', factRows.map((factRow) => factRow.id)).execute()
: [];
.innerJoin('facts', 'facts.id', 'fact_embeddings.fact_id')
.selectAll('fact_embeddings')
.where('facts.space_id', '=', effectiveSpaceId)
.where('fact_embeddings.model', '=', input.providerModel)
.where('fact_embeddings.fact_id', 'in', factRows.map((factRow) => factRow.id))
.execute();
const embeddingsByFactId = new Map(
embeddingRows.map((embeddingRow) => [embeddingRow.fact_id, deserializeEmbedding(embeddingRow.embedding)]),
@@ -578,7 +700,7 @@ export class IdentityDB {
return [];
}
const hydratedFacts = await this.hydrateFacts(scoredRows.map((entry) => entry.factRow));
const hydratedFacts = await this.hydrateFacts(scoredRows.map((entry) => entry.factRow), effectiveSpaceId);
const factsById = new Map(hydratedFacts.map((fact) => [fact.id, fact]));
return scoredRows.map((entry) => ({
@@ -587,12 +709,12 @@ export class IdentityDB {
}));
}
private async resolveTopicIds(topicNames?: string[]): Promise<string[] | null> {
private async resolveTopicIds(topicNames: string[] | undefined, spaceId: string): Promise<string[] | null> {
if (!topicNames || topicNames.length === 0) {
return [];
}
const topicRows = await Promise.all(topicNames.map((topicName) => this.getRequiredTopicRow(topicName)));
const topicRows = await Promise.all(topicNames.map((topicName) => this.getRequiredTopicRowInSpaceId(topicName, spaceId)));
if (topicRows.some((topicRow) => !topicRow)) {
return null;
}
@@ -637,30 +759,28 @@ export class IdentityDB {
}
}
private async upsertTopicInExecutor(
executor: DatabaseExecutor,
input: UpsertTopicInput,
): Promise<Topic> {
private async upsertTopicInExecutor(executor: DatabaseExecutor, input: UpsertTopicInput): Promise<Topic> {
const normalizedName = normalizeTopicName(input.name);
if (normalizedName.length === 0) {
throw new IdentityDBError('Topic name cannot be empty.');
}
const existing = await findTopicRowByNormalizedName(executor, normalizedName);
const space = await this.getOrCreateSpaceInExecutor(executor, input.spaceName);
const existing = await findTopicRowByNormalizedName(executor, space.id, normalizedName);
const now = nowIsoString();
if (existing) {
return this.updateTopicRowInExecutor(executor, existing, input, now, true);
}
const aliasedTopic = await findTopicRowByNormalizedAlias(executor, normalizedName);
const aliasedTopic = await findTopicRowByNormalizedAlias(executor, space.id, normalizedName);
if (aliasedTopic) {
return this.updateTopicRowInExecutor(executor, aliasedTopic, input, now, false);
}
const createdRow: TopicRecord = {
id: createId(),
space_id: space.id,
name: canonicalizeTopicName(input.name),
normalized_name: normalizedName,
category: input.category ?? 'custom',
@@ -672,7 +792,6 @@ export class IdentityDB {
};
await executor.insertInto('topics').values(createdRow).execute();
return mapTopicRow(createdRow);
}
@@ -705,22 +824,39 @@ export class IdentityDB {
return mapTopicRow(updated);
}
private async getRequiredTopicRow(name: string): Promise<TopicRecord | undefined> {
const normalizedName = normalizeTopicName(name);
private async getRequiredTopicRow(name: string, spaceName?: string): Promise<TopicRecord | undefined> {
const space = await this.getSpaceForRead(spaceName);
if (spaceName && !space) {
return undefined;
}
const spaceId = space?.id ?? await this.getDefaultSpaceIdForRead();
if (!spaceId) {
return undefined;
}
return this.getRequiredTopicRowInSpaceId(name, spaceId);
}
private async getRequiredTopicRowInSpaceId(name: string, spaceId: string): Promise<TopicRecord | undefined> {
const normalizedName = normalizeTopicName(name);
if (normalizedName.length === 0) {
return undefined;
}
return findTopicRowByNameOrAlias(this.connection.db, normalizedName);
return findTopicRowByNameOrAlias(this.connection.db, spaceId, normalizedName);
}
private async hydrateFacts(factRows: FactRecord[]): Promise<Fact[]> {
private async hydrateFacts(factRows: FactRecord[], spaceId?: string): Promise<Fact[]> {
if (factRows.length === 0) {
return [];
}
const effectiveSpaceId = spaceId ?? factRows[0]!.space_id;
const factIds = factRows.map((fact) => fact.id);
const topicLinks = await findTopicLinksForFactIds(this.connection.db, factIds);
const topicLinks = await findTopicLinksForFactIds(this.connection.db, effectiveSpaceId, factIds);
const topicsByFactId = new Map<string, FactTopic[]>();
for (const topicLink of topicLinks) {
const topics = topicsByFactId.get(topicLink.fact_id) ?? [];
topics.push({
@@ -733,4 +869,57 @@ export class IdentityDB {
return factRows.map((factRow) => mapFactRow(factRow, topicsByFactId.get(factRow.id) ?? []));
}
private async getOrCreateSpaceInExecutor(executor: DatabaseExecutor, requestedSpaceName?: string): Promise<SpaceRecord> {
const normalizedName = normalizeSpaceName(requestedSpaceName ?? DEFAULT_SPACE_NAME);
const canonicalName = canonicalizeSpaceName(requestedSpaceName ?? DEFAULT_SPACE_NAME);
const existing = await findSpaceRowByNormalizedName(executor, normalizedName);
if (existing) {
return existing;
}
const now = nowIsoString();
const createdRow: SpaceRecord = {
id: createId(),
name: canonicalName,
normalized_name: normalizedName,
description: null,
metadata: null,
created_at: now,
updated_at: now,
};
await executor.insertInto('spaces').values(createdRow).execute();
return createdRow;
}
private async getSpaceForRead(spaceName?: string): Promise<SpaceRecord | undefined> {
if (!spaceName) {
return undefined;
}
const normalizedName = normalizeSpaceName(spaceName);
if (normalizedName.length === 0) {
return undefined;
}
return findSpaceRowByNormalizedName(this.connection.db, normalizedName);
}
private async getDefaultSpaceIdForRead(): Promise<string | undefined> {
const defaultSpace = await findSpaceRowByNormalizedName(this.connection.db, normalizeSpaceName(DEFAULT_SPACE_NAME));
return defaultSpace?.id;
}
private assertScopedTopicInput(space: SpaceRecord, topicSpaceName?: string): void {
if (!topicSpaceName) {
return;
}
if (normalizeSpaceName(topicSpaceName) !== space.normalized_name) {
throw new IdentityDBError(
`Fact topics cannot point to a different space than the fact itself (${space.name}).`,
);
}
}
}

View File

@@ -4,6 +4,7 @@ import {
FACTS_TABLE,
FACT_EMBEDDINGS_TABLE,
FACT_TOPICS_TABLE,
SPACES_TABLE,
TOPIC_ALIASES_TABLE,
TOPIC_RELATIONS_TABLE,
TOPICS_TABLE,
@@ -14,23 +15,42 @@ export async function initializeSchema(
db: Kysely<IdentityDatabaseSchema>,
): Promise<void> {
await db.schema
.createTable(TOPICS_TABLE)
.createTable(SPACES_TABLE)
.ifNotExists()
.addColumn('id', 'text', (column) => column.primaryKey())
.addColumn('name', 'text', (column) => column.notNull())
.addColumn('normalized_name', 'text', (column) => column.notNull().unique())
.addColumn('category', 'text', (column) => column.notNull())
.addColumn('granularity', 'text', (column) => column.notNull())
.addColumn('description', 'text')
.addColumn('metadata', 'text')
.addColumn('created_at', 'text', (column) => column.notNull())
.addColumn('updated_at', 'text', (column) => column.notNull())
.execute();
await db.schema
.createTable(TOPICS_TABLE)
.ifNotExists()
.addColumn('id', 'text', (column) => column.primaryKey())
.addColumn('space_id', 'text', (column) =>
column.notNull().references(`${SPACES_TABLE}.id`).onDelete('cascade'),
)
.addColumn('name', 'text', (column) => column.notNull())
.addColumn('normalized_name', 'text', (column) => column.notNull())
.addColumn('category', 'text', (column) => column.notNull())
.addColumn('granularity', 'text', (column) => column.notNull())
.addColumn('description', 'text')
.addColumn('metadata', 'text')
.addColumn('created_at', 'text', (column) => column.notNull())
.addColumn('updated_at', 'text', (column) => column.notNull())
.addUniqueConstraint('topics_space_normalized_name_key', ['space_id', 'normalized_name'])
.execute();
await db.schema
.createTable(FACTS_TABLE)
.ifNotExists()
.addColumn('id', 'text', (column) => column.primaryKey())
.addColumn('space_id', 'text', (column) =>
column.notNull().references(`${SPACES_TABLE}.id`).onDelete('cascade'),
)
.addColumn('statement', 'text', (column) => column.notNull())
.addColumn('summary', 'text')
.addColumn('source', 'text')
@@ -88,14 +108,32 @@ export async function initializeSchema(
.createTable(TOPIC_ALIASES_TABLE)
.ifNotExists()
.addColumn('id', 'text', (column) => column.primaryKey())
.addColumn('space_id', 'text', (column) =>
column.notNull().references(`${SPACES_TABLE}.id`).onDelete('cascade'),
)
.addColumn('topic_id', 'text', (column) =>
column.notNull().references(`${TOPICS_TABLE}.id`).onDelete('cascade'),
)
.addColumn('alias', 'text', (column) => column.notNull())
.addColumn('normalized_alias', 'text', (column) => column.notNull().unique())
.addColumn('normalized_alias', 'text', (column) => column.notNull())
.addColumn('is_primary', 'integer', (column) => column.notNull())
.addColumn('created_at', 'text', (column) => column.notNull())
.addColumn('updated_at', 'text', (column) => column.notNull())
.addUniqueConstraint('topic_aliases_space_normalized_alias_key', ['space_id', 'normalized_alias'])
.execute();
await db.schema
.createIndex('topics_space_id_idx')
.ifNotExists()
.on(TOPICS_TABLE)
.column('space_id')
.execute();
await db.schema
.createIndex('facts_space_id_idx')
.ifNotExists()
.on(FACTS_TABLE)
.column('space_id')
.execute();
await db.schema
@@ -133,6 +171,13 @@ export async function initializeSchema(
.column('child_topic_id')
.execute();
await db.schema
.createIndex('topic_aliases_space_id_idx')
.ifNotExists()
.on(TOPIC_ALIASES_TABLE)
.column('space_id')
.execute();
await db.schema
.createIndex('topic_aliases_topic_id_idx')
.ifNotExists()

View File

@@ -1,3 +1,4 @@
export const SPACES_TABLE = 'spaces';
export const TOPICS_TABLE = 'topics';
export const FACTS_TABLE = 'facts';
export const FACT_TOPICS_TABLE = 'fact_topics';
@@ -5,8 +6,19 @@ export const TOPIC_RELATIONS_TABLE = 'topic_relations';
export const TOPIC_ALIASES_TABLE = 'topic_aliases';
export const FACT_EMBEDDINGS_TABLE = 'fact_embeddings';
export const SPACE_COLUMNS = [
'id',
'name',
'normalized_name',
'description',
'metadata',
'created_at',
'updated_at',
] as const;
export const TOPIC_COLUMNS = [
'id',
'space_id',
'name',
'normalized_name',
'category',
@@ -19,6 +31,7 @@ export const TOPIC_COLUMNS = [
export const FACT_COLUMNS = [
'id',
'space_id',
'statement',
'summary',
'source',
@@ -45,6 +58,7 @@ export const TOPIC_RELATION_COLUMNS = [
export const TOPIC_ALIAS_COLUMNS = [
'id',
'space_id',
'topic_id',
'alias',
'normalized_alias',

View File

@@ -1,7 +1,7 @@
import { createHash, randomUUID } from 'node:crypto';
import type { Fact, FactTopic, Topic } from '../types/api';
import type { FactRecord, TopicRecord } from '../types/domain';
import type { Fact, FactTopic, Space, Topic } from '../types/api';
import type { FactRecord, SpaceRecord, TopicRecord } from '../types/domain';
export function normalizeTopicName(name: string): string {
return name.trim().replace(/\s+/g, ' ').toLowerCase();
@@ -11,6 +11,14 @@ export function canonicalizeTopicName(name: string): string {
return name.trim().replace(/\s+/g, ' ');
}
export function normalizeSpaceName(name: string): string {
return normalizeTopicName(name);
}
export function canonicalizeSpaceName(name: string): string {
return canonicalizeTopicName(name);
}
export function nowIsoString(): string {
return new Date().toISOString();
}
@@ -71,9 +79,22 @@ export function cosineSimilarity(left: number[], right: number[]): number {
return dot / (Math.sqrt(leftMagnitude) * Math.sqrt(rightMagnitude));
}
export function mapSpaceRow(record: SpaceRecord): Space {
return {
id: record.id,
name: record.name,
normalizedName: record.normalized_name,
description: record.description,
metadata: deserializeMetadata(record.metadata) as Space['metadata'],
createdAt: record.created_at,
updatedAt: record.updated_at,
};
}
export function mapTopicRow(record: TopicRecord): Topic {
return {
id: record.id,
spaceId: record.space_id,
name: record.name,
normalizedName: record.normalized_name,
category: record.category,
@@ -88,6 +109,7 @@ export function mapTopicRow(record: TopicRecord): Topic {
export function mapFactRow(record: FactRecord, topics: FactTopic[]): Fact {
return {
id: record.id,
spaceId: record.space_id,
statement: record.statement,
summary: record.summary,
source: record.source,

View File

@@ -31,4 +31,5 @@ export interface IngestStatementOptions {
extractor: FactExtractor;
embeddingProvider?: EmbeddingProvider;
duplicateThreshold?: number;
spaceName?: string;
}

View File

@@ -13,12 +13,14 @@ export interface FactTopicJoinRow extends TopicRecord {
export async function findFactRowsForTopicId(
executor: DatabaseExecutor,
spaceId: string,
topicId: string,
): Promise<FactRecord[]> {
return executor
.selectFrom('facts')
.innerJoin('fact_topics', 'fact_topics.fact_id', 'facts.id')
.selectAll('facts')
.where('facts.space_id', '=', spaceId)
.where('fact_topics.topic_id', '=', topicId)
.orderBy('facts.created_at', 'asc')
.execute();
@@ -26,6 +28,7 @@ export async function findFactRowsForTopicId(
export async function findFactRowsConnectingTopicIds(
executor: DatabaseExecutor,
spaceId: string,
topicIds: string[],
): Promise<FactRecord[]> {
if (topicIds.length === 0) {
@@ -36,6 +39,7 @@ export async function findFactRowsConnectingTopicIds(
.selectFrom('facts')
.innerJoin('fact_topics', 'fact_topics.fact_id', 'facts.id')
.selectAll('facts')
.where('facts.space_id', '=', spaceId)
.where('fact_topics.topic_id', 'in', topicIds)
.groupBy('facts.id')
.having((eb) => eb.fn.count<number>('fact_topics.topic_id'), '=', topicIds.length)
@@ -45,6 +49,7 @@ export async function findFactRowsConnectingTopicIds(
export async function findTopicLinksForFactIds(
executor: DatabaseExecutor,
spaceId: string,
factIds: string[],
): Promise<FactTopicJoinRow[]> {
if (factIds.length === 0) {
@@ -60,6 +65,7 @@ export async function findTopicLinksForFactIds(
'fact_topics.role as role',
'fact_topics.position as position',
])
.where('topics.space_id', '=', spaceId)
.where('fact_topics.fact_id', 'in', factIds)
.orderBy('fact_topics.position', 'asc')
.execute() as Promise<FactTopicJoinRow[]>;

View File

@@ -1,7 +1,7 @@
import type { Kysely, Transaction } from 'kysely';
import type { IdentityDatabaseSchema } from '../types/database';
import type { TopicAliasRecord, TopicRecord } from '../types/domain';
import type { SpaceRecord, TopicAliasRecord, TopicRecord } from '../types/domain';
export type DatabaseExecutor = Kysely<IdentityDatabaseSchema> | Transaction<IdentityDatabaseSchema>;
@@ -9,48 +9,66 @@ export interface ConnectedTopicRow extends TopicRecord {
shared_fact_count: number;
}
export async function findSpaceRowByNormalizedName(
executor: DatabaseExecutor,
normalizedName: string,
): Promise<SpaceRecord | undefined> {
return executor
.selectFrom('spaces')
.selectAll()
.where('normalized_name', '=', normalizedName)
.executeTakeFirst();
}
export async function findTopicRowByNormalizedName(
executor: DatabaseExecutor,
spaceId: string,
normalizedName: string,
): Promise<TopicRecord | undefined> {
return executor
.selectFrom('topics')
.selectAll()
.where('space_id', '=', spaceId)
.where('normalized_name', '=', normalizedName)
.executeTakeFirst();
}
export async function findTopicRowByNormalizedAlias(
executor: DatabaseExecutor,
spaceId: string,
normalizedAlias: string,
): Promise<TopicRecord | undefined> {
return executor
.selectFrom('topic_aliases')
.innerJoin('topics', 'topics.id', 'topic_aliases.topic_id')
.selectAll('topics')
.where('topic_aliases.space_id', '=', spaceId)
.where('topic_aliases.normalized_alias', '=', normalizedAlias)
.executeTakeFirst();
}
export async function findTopicRowByNameOrAlias(
executor: DatabaseExecutor,
spaceId: string,
normalizedName: string,
): Promise<TopicRecord | undefined> {
const directMatch = await findTopicRowByNormalizedName(executor, normalizedName);
const directMatch = await findTopicRowByNormalizedName(executor, spaceId, normalizedName);
if (directMatch) {
return directMatch;
}
return findTopicRowByNormalizedAlias(executor, normalizedName);
return findTopicRowByNormalizedAlias(executor, spaceId, normalizedName);
}
export async function listTopicAliasRowsForTopicId(
executor: DatabaseExecutor,
spaceId: string,
topicId: string,
): Promise<TopicAliasRecord[]> {
return executor
.selectFrom('topic_aliases')
.selectAll()
.where('space_id', '=', spaceId)
.where('topic_id', '=', topicId)
.orderBy('is_primary', 'desc')
.orderBy('normalized_alias', 'asc')
@@ -59,9 +77,14 @@ export async function listTopicAliasRowsForTopicId(
export async function listTopicRows(
executor: DatabaseExecutor,
spaceId: string,
limit?: number,
): Promise<TopicRecord[]> {
let query = executor.selectFrom('topics').selectAll().orderBy('normalized_name', 'asc');
let query = executor
.selectFrom('topics')
.selectAll()
.where('space_id', '=', spaceId)
.orderBy('normalized_name', 'asc');
if (limit !== undefined) {
query = query.limit(limit);
@@ -72,14 +95,18 @@ export async function listTopicRows(
export async function findConnectedTopicRows(
executor: DatabaseExecutor,
spaceId: string,
topicId: string,
): Promise<ConnectedTopicRow[]> {
return executor
.selectFrom('fact_topics as source_link')
.innerJoin('facts', 'facts.id', 'source_link.fact_id')
.innerJoin('fact_topics as related_link', 'related_link.fact_id', 'source_link.fact_id')
.innerJoin('topics', 'topics.id', 'related_link.topic_id')
.selectAll('topics')
.select((eb) => eb.fn.count<number>('related_link.fact_id').as('shared_fact_count'))
.where('facts.space_id', '=', spaceId)
.where('topics.space_id', '=', spaceId)
.where('source_link.topic_id', '=', topicId)
.whereRef('related_link.topic_id', '!=', 'source_link.topic_id')
.groupBy('topics.id')
@@ -90,12 +117,14 @@ export async function findConnectedTopicRows(
export async function findChildTopicRows(
executor: DatabaseExecutor,
spaceId: string,
parentTopicId: string,
): Promise<TopicRecord[]> {
return executor
.selectFrom('topic_relations')
.innerJoin('topics', 'topics.id', 'topic_relations.child_topic_id')
.selectAll('topics')
.where('topics.space_id', '=', spaceId)
.where('topic_relations.parent_topic_id', '=', parentTopicId)
.where('topic_relations.relation', '=', 'parent_of')
.orderBy('topics.normalized_name', 'asc')
@@ -104,12 +133,14 @@ export async function findChildTopicRows(
export async function findParentTopicRows(
executor: DatabaseExecutor,
spaceId: string,
childTopicId: string,
): Promise<TopicRecord[]> {
return executor
.selectFrom('topic_relations')
.innerJoin('topics', 'topics.id', 'topic_relations.parent_topic_id')
.selectAll('topics')
.where('topics.space_id', '=', spaceId)
.where('topic_relations.child_topic_id', '=', childTopicId)
.where('topic_relations.relation', '=', 'parent_of')
.orderBy('topics.normalized_name', 'asc')

View File

@@ -1,6 +1,26 @@
import type { JsonValue, TopicCategory, TopicGranularity } from './domain';
export interface UpsertTopicInput {
export interface SpaceScopedInput {
spaceName?: string | undefined;
}
export interface UpsertSpaceInput {
name: string;
description?: string | null;
metadata?: JsonValue | null;
}
export interface Space {
id: string;
name: string;
normalizedName: string;
description: string | null;
metadata: JsonValue | null;
createdAt: string;
updatedAt: string;
}
export interface UpsertTopicInput extends SpaceScopedInput {
name: string;
category?: TopicCategory;
granularity?: TopicGranularity;
@@ -12,7 +32,7 @@ export interface TopicLinkInput extends UpsertTopicInput {
role?: string | null;
}
export interface AddFactInput {
export interface AddFactInput extends SpaceScopedInput {
statement: string;
summary?: string | null;
source?: string | null;
@@ -21,13 +41,14 @@ export interface AddFactInput {
topics: TopicLinkInput[];
}
export interface LinkTopicsInput {
export interface LinkTopicsInput extends SpaceScopedInput {
parentName: string;
childName: string;
}
export interface Topic {
id: string;
spaceId: string;
name: string;
normalizedName: string;
category: TopicCategory;
@@ -45,6 +66,7 @@ export interface FactTopic extends Topic {
export interface Fact {
id: string;
spaceId: string;
statement: string;
summary: string | null;
source: string | null;
@@ -63,11 +85,11 @@ export interface ConnectedTopic extends Topic {
sharedFactCount: number;
}
export interface TopicLookupOptions {
export interface TopicLookupOptions extends SpaceScopedInput {
includeFacts?: boolean;
}
export interface ListTopicsOptions {
export interface ListTopicsOptions extends SpaceScopedInput {
includeFacts?: boolean;
limit?: number;
}
@@ -79,11 +101,11 @@ export interface EmbeddingProvider {
embedMany?(inputs: string[]): Promise<number[][]>;
}
export interface IndexFactEmbeddingsInput {
export interface IndexFactEmbeddingsInput extends SpaceScopedInput {
provider: EmbeddingProvider;
}
export interface SearchFactsInput {
export interface SearchFactsInput extends SpaceScopedInput {
query: string;
provider: EmbeddingProvider;
topicNames?: string[];
@@ -91,7 +113,7 @@ export interface SearchFactsInput {
minimumScore?: number;
}
export interface FindSimilarFactsInput {
export interface FindSimilarFactsInput extends SpaceScopedInput {
statement: string;
provider: EmbeddingProvider;
topicNames?: string[];

View File

@@ -2,12 +2,14 @@ import type {
FactEmbeddingRecord,
FactRecord,
FactTopicRecord,
SpaceRecord,
TopicAliasRecord,
TopicRecord,
TopicRelationRecord,
} from './domain';
export interface IdentityDatabaseSchema {
spaces: SpaceRecord;
topics: TopicRecord;
facts: FactRecord;
fact_topics: FactTopicRecord;

View File

@@ -5,8 +5,19 @@ export type TopicGranularity = 'abstract' | 'concrete' | 'mixed';
export type JsonPrimitive = string | number | boolean | null;
export type JsonValue = JsonPrimitive | JsonValue[] | { [key: string]: JsonValue };
export interface SpaceRecord {
id: string;
name: string;
normalized_name: string;
description: string | null;
metadata: string | null;
created_at: string;
updated_at: string;
}
export interface TopicRecord {
id: string;
space_id: string;
name: string;
normalized_name: string;
category: TopicCategory;
@@ -19,6 +30,7 @@ export interface TopicRecord {
export interface FactRecord {
id: string;
space_id: string;
statement: string;
summary: string | null;
source: string | null;
@@ -45,6 +57,7 @@ export interface TopicRelationRecord {
export interface TopicAliasRecord {
id: string;
space_id: string;
topic_id: string;
alias: string;
normalized_alias: string;

View File

@@ -34,6 +34,58 @@ describe('IdentityDB topic and fact writes', () => {
expect(topics).toHaveLength(1);
});
it('keeps same normalized topic names isolated across spaces', async () => {
const alpha = await db.upsertTopic({
name: 'TypeScript',
category: 'entity',
granularity: 'concrete',
spaceName: 'A',
});
const beta = await db.upsertTopic({
name: 'TypeScript',
category: 'entity',
granularity: 'concrete',
spaceName: 'B',
});
expect(beta.id).not.toBe(alpha.id);
const alphaTopics = await db.listTopics({ includeFacts: false, spaceName: 'A' });
const betaTopics = await db.listTopics({ includeFacts: false, spaceName: 'B' });
const defaultTopics = await db.listTopics({ includeFacts: false });
expect(alphaTopics.map((topic) => topic.name)).toEqual(['TypeScript']);
expect(betaTopics.map((topic) => topic.name)).toEqual(['TypeScript']);
expect(defaultTopics).toHaveLength(0);
});
it('keeps alias resolution scoped to the requested space', async () => {
await db.upsertTopic({
name: 'TypeScript',
category: 'entity',
granularity: 'concrete',
spaceName: 'A',
});
await db.upsertTopic({
name: 'TeamSpeak',
category: 'entity',
granularity: 'concrete',
spaceName: 'B',
});
await db.addTopicAlias('TypeScript', 'TS', { spaceName: 'A' });
await db.addTopicAlias('TeamSpeak', 'TS', { spaceName: 'B' });
const alphaResolved = await db.resolveTopic('ts', { spaceName: 'A' });
const betaResolved = await db.resolveTopic('ts', { spaceName: 'B' });
const defaultResolved = await db.resolveTopic('ts');
expect(alphaResolved?.name).toBe('TypeScript');
expect(betaResolved?.name).toBe('TeamSpeak');
expect(defaultResolved).toBeNull();
});
it('adds one fact that links multiple topics', async () => {
const fact = await db.addFact({
statement: 'I have worked with TypeScript since 2025.',

View File

@@ -16,7 +16,7 @@ afterEach(async () => {
});
describe('initializeSchema', () => {
it('creates the topics, facts, fact_embeddings, fact_topics, topic_relations, and topic_aliases tables', async () => {
it('creates the spaces, topics, facts, fact_embeddings, fact_topics, topic_relations, and topic_aliases tables', async () => {
const connection = await createDatabase({ client: 'sqlite', filename: ':memory:' });
openConnections.push(connection.destroy);
@@ -31,6 +31,7 @@ describe('initializeSchema', () => {
const tableNames = tables.rows.map((row) => row.name);
expect(tableNames).toContain('spaces');
expect(tableNames).toContain('topics');
expect(tableNames).toContain('facts');
expect(tableNames).toContain('fact_embeddings');
@@ -45,6 +46,7 @@ describe('initializeSchema', () => {
await initializeSchema(connection.db);
const spaceColumns = await sql<{ name: string }>`PRAGMA table_info(spaces)`.execute(connection.db);
const topicsColumns = await sql<{ name: string }>`PRAGMA table_info(topics)`.execute(connection.db);
const factsColumns = await sql<{ name: string }>`PRAGMA table_info(facts)`.execute(connection.db);
const factEmbeddingsColumns = await sql<{ name: string }>`PRAGMA table_info(fact_embeddings)`.execute(connection.db);
@@ -52,8 +54,19 @@ describe('initializeSchema', () => {
const topicRelationsColumns = await sql<{ name: string }>`PRAGMA table_info(topic_relations)`.execute(connection.db);
const topicAliasesColumns = await sql<{ name: string }>`PRAGMA table_info(topic_aliases)`.execute(connection.db);
expect(spaceColumns.rows.map((row) => row.name)).toEqual([
'id',
'name',
'normalized_name',
'description',
'metadata',
'created_at',
'updated_at',
]);
expect(topicsColumns.rows.map((row) => row.name)).toEqual([
'id',
'space_id',
'name',
'normalized_name',
'category',
@@ -66,6 +79,7 @@ describe('initializeSchema', () => {
expect(factsColumns.rows.map((row) => row.name)).toEqual([
'id',
'space_id',
'statement',
'summary',
'source',
@@ -102,6 +116,7 @@ describe('initializeSchema', () => {
expect(topicAliasesColumns.rows.map((row) => row.name)).toEqual([
'id',
'space_id',
'topic_id',
'alias',
'normalized_alias',

View File

@@ -2,9 +2,10 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { IdentityDB } from '../src/core/identity-db';
async function seedMemoryGraph(db: IdentityDB): Promise<void> {
async function seedMemoryGraph(db: IdentityDB, spaceName?: string): Promise<void> {
await db.addFact({
statement: 'I have worked with TypeScript since 2025.',
spaceName,
topics: [
{ name: 'I', category: 'entity', granularity: 'concrete', role: 'subject' },
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'object' },
@@ -14,6 +15,7 @@ async function seedMemoryGraph(db: IdentityDB): Promise<void> {
await db.addFact({
statement: 'TypeScript is a programming language.',
spaceName,
topics: [
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'subject' },
{ name: 'programming language', category: 'concept', granularity: 'abstract', role: 'classification' },
@@ -23,11 +25,13 @@ async function seedMemoryGraph(db: IdentityDB): Promise<void> {
await db.linkTopics({
parentName: 'software technology',
childName: 'programming language',
spaceName,
});
await db.linkTopics({
parentName: 'programming language',
childName: 'TypeScript',
spaceName,
});
}
@@ -114,6 +118,56 @@ describe('IdentityDB queries', () => {
]);
});
it('keeps hierarchy and fact queries isolated per space', async () => {
const isolatedDb = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
try {
await isolatedDb.initialize();
await seedMemoryGraph(isolatedDb, 'A');
await isolatedDb.addFact({
statement: 'TypeScript is a typed superset.',
spaceName: 'B',
topics: [
{ name: 'TypeScript', category: 'entity', granularity: 'concrete', role: 'subject' },
{ name: 'superset', category: 'concept', granularity: 'abstract', role: 'classification' },
],
});
await isolatedDb.linkTopics({
parentName: 'language family',
childName: 'TypeScript',
spaceName: 'B',
});
const alphaTopic = await isolatedDb.getTopicByName('TypeScript', {
includeFacts: true,
spaceName: 'A',
});
const betaTopic = await isolatedDb.getTopicByName('TypeScript', {
includeFacts: true,
spaceName: 'B',
});
const alphaParents = await isolatedDb.getTopicParents('TypeScript', { spaceName: 'A' });
const betaParents = await isolatedDb.getTopicParents('TypeScript', { spaceName: 'B' });
const alphaConnected = await isolatedDb.findConnectedTopics('TypeScript', { spaceName: 'A' });
const betaConnected = await isolatedDb.findConnectedTopics('TypeScript', { spaceName: 'B' });
expect(alphaTopic?.facts.map((fact) => fact.statement)).toEqual([
'I have worked with TypeScript since 2025.',
'TypeScript is a programming language.',
]);
expect(betaTopic?.facts.map((fact) => fact.statement)).toEqual([
'TypeScript is a typed superset.',
]);
expect(alphaParents.map((topic) => topic.name)).toEqual(['programming language']);
expect(betaParents.map((topic) => topic.name)).toEqual(['language family']);
expect(alphaConnected.map((topic) => topic.name)).toEqual(['2025', 'I', 'programming language']);
expect(betaConnected.map((topic) => topic.name)).toEqual(['superset']);
} finally {
await isolatedDb.close();
}
});
it('resolves alias names in topic lookups', async () => {
await db.addTopicAlias('TypeScript', 'TS');

View File

@@ -120,6 +120,53 @@ describe('IdentityDB semantic search', () => {
expect(matches[0]?.statement).toBe('Bun runs TypeScript tooling quickly.');
expect(matches[0]!.score).toBeGreaterThan(matches[1]!.score);
});
it('keeps semantic search isolated per space', async () => {
const isolatedDb = await IdentityDB.connect({ client: 'sqlite', filename: ':memory:' });
try {
await isolatedDb.initialize();
await isolatedDb.addFact({
statement: 'Bun runs TypeScript tooling quickly.',
spaceName: 'A',
topics: [
{ name: 'Bun', category: 'entity', granularity: 'concrete' },
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
],
});
await isolatedDb.addFact({
statement: 'TypeScript runtime tooling belongs to another tenant.',
spaceName: 'B',
topics: [
{ name: 'TypeScript', category: 'entity', granularity: 'concrete' },
],
});
await isolatedDb.indexFactEmbeddings({ provider, spaceName: 'A' });
await isolatedDb.indexFactEmbeddings({ provider, spaceName: 'B' });
const alphaMatches = await isolatedDb.searchFacts({
query: 'TypeScript runtime tooling',
provider,
spaceName: 'A',
});
const betaMatches = await isolatedDb.searchFacts({
query: 'TypeScript runtime tooling',
provider,
spaceName: 'B',
});
expect(alphaMatches.map((match) => match.statement)).toEqual([
'Bun runs TypeScript tooling quickly.',
]);
expect(betaMatches.map((match) => match.statement)).toEqual([
'TypeScript runtime tooling belongs to another tenant.',
]);
} finally {
await isolatedDb.close();
}
});
});
describe('IdentityDB dedup-aware ingestion', () => {
@@ -167,4 +214,26 @@ describe('IdentityDB dedup-aware ingestion', () => {
expect(facts).toHaveLength(1);
expect(facts[0]?.statement).toBe('Bun runs TypeScript tooling quickly.');
});
it('does not reuse a semantic duplicate from another space', async () => {
const first = await db.ingestStatement('Bun runs TypeScript tooling quickly.', {
extractor,
embeddingProvider: provider,
spaceName: 'A',
});
const second = await db.ingestStatement('Bun makes TypeScript tooling fast.', {
extractor,
embeddingProvider: provider,
duplicateThreshold: 0.95,
spaceName: 'B',
});
const alphaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'A' });
const betaFacts = await db.getTopicFacts('TypeScript', { spaceName: 'B' });
expect(second.id).not.toBe(first.id);
expect(alphaFacts).toHaveLength(1);
expect(betaFacts).toHaveLength(1);
});
});