Files
leaudit-platform-backend/scripts/schema_add_rag_chat.sql
T

188 lines
8.8 KiB
PL/PgSQL

BEGIN;
CREATE TABLE IF NOT EXISTS rag_dataset (
id BIGSERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description TEXT NOT NULL DEFAULT '',
area VARCHAR(50) NOT NULL DEFAULT '',
is_public BOOLEAN NOT NULL DEFAULT FALSE,
is_default BOOLEAN NOT NULL DEFAULT FALSE,
collection_name VARCHAR(100) NOT NULL UNIQUE,
embedding_model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-v4',
embedding_dim INTEGER NOT NULL DEFAULT 1024,
chunk_max_size INTEGER NOT NULL DEFAULT 800,
chunk_min_size INTEGER NOT NULL DEFAULT 20,
document_count INTEGER NOT NULL DEFAULT 0,
total_chunks INTEGER NOT NULL DEFAULT 0,
retrieval_model JSONB NOT NULL DEFAULT '{}'::jsonb,
sort_order INTEGER NOT NULL DEFAULT 0,
status SMALLINT NOT NULL DEFAULT 1,
created_by BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_by BIGINT,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMPTZ
);
CREATE TABLE IF NOT EXISTS rag_document (
id BIGSERIAL PRIMARY KEY,
dataset_id BIGINT NOT NULL REFERENCES rag_dataset(id),
filename VARCHAR(500) NOT NULL,
original_name VARCHAR(500) NOT NULL,
minio_path VARCHAR(1000) NOT NULL,
file_type VARCHAR(20) NOT NULL,
file_size BIGINT NOT NULL DEFAULT 0,
chunk_count INTEGER NOT NULL DEFAULT 0,
indexing_status VARCHAR(20) NOT NULL DEFAULT 'pending',
indexing_error TEXT,
indexing_started_at TIMESTAMPTZ,
indexing_completed_at TIMESTAMPTZ,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
hit_count INTEGER NOT NULL DEFAULT 0,
created_by BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMPTZ
);
CREATE TABLE IF NOT EXISTS rag_chat_app (
id BIGSERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description TEXT NOT NULL DEFAULT '',
area VARCHAR(50) NOT NULL DEFAULT '',
dataset_id BIGINT REFERENCES rag_dataset(id),
system_prompt TEXT NOT NULL DEFAULT '',
llm_model VARCHAR(100) NOT NULL DEFAULT '',
temperature REAL NOT NULL DEFAULT 0.3,
max_tokens INTEGER NOT NULL DEFAULT 2048,
opening_statement TEXT NOT NULL DEFAULT '',
suggested_questions TEXT NOT NULL DEFAULT '[]',
is_default BOOLEAN NOT NULL DEFAULT FALSE,
sort_order INTEGER NOT NULL DEFAULT 0,
status SMALLINT NOT NULL DEFAULT 1,
created_by BIGINT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_by BIGINT,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMPTZ
);
CREATE TABLE IF NOT EXISTS rag_conversation (
id BIGSERIAL PRIMARY KEY,
conversation_id VARCHAR(100) NOT NULL UNIQUE,
user_id BIGINT NOT NULL,
app_id BIGINT REFERENCES rag_chat_app(id),
name VARCHAR(500) NOT NULL DEFAULT '新对话',
introduction TEXT NOT NULL DEFAULT '',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMPTZ
);
CREATE TABLE IF NOT EXISTS rag_message (
id BIGSERIAL PRIMARY KEY,
message_id VARCHAR(100) NOT NULL UNIQUE,
conversation_id VARCHAR(100) NOT NULL REFERENCES rag_conversation(conversation_id),
role VARCHAR(20) NOT NULL,
content TEXT NOT NULL DEFAULT '',
sources JSONB NOT NULL DEFAULT '[]'::jsonb,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
feedback VARCHAR(20),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
ALTER TABLE rag_dataset
ADD COLUMN IF NOT EXISTS retrieval_model JSONB NOT NULL DEFAULT '{}'::jsonb,
ADD COLUMN IF NOT EXISTS embedding_model VARCHAR(100) NOT NULL DEFAULT 'text-embedding-v4',
ADD COLUMN IF NOT EXISTS embedding_dim INTEGER NOT NULL DEFAULT 1024,
ADD COLUMN IF NOT EXISTS chunk_max_size INTEGER NOT NULL DEFAULT 800,
ADD COLUMN IF NOT EXISTS chunk_min_size INTEGER NOT NULL DEFAULT 20,
ADD COLUMN IF NOT EXISTS document_count INTEGER NOT NULL DEFAULT 0,
ADD COLUMN IF NOT EXISTS total_chunks INTEGER NOT NULL DEFAULT 0,
ADD COLUMN IF NOT EXISTS sort_order INTEGER NOT NULL DEFAULT 0,
ADD COLUMN IF NOT EXISTS status SMALLINT NOT NULL DEFAULT 1,
ADD COLUMN IF NOT EXISTS updated_by BIGINT,
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
ALTER TABLE rag_document
ADD COLUMN IF NOT EXISTS enabled BOOLEAN NOT NULL DEFAULT TRUE,
ADD COLUMN IF NOT EXISTS hit_count INTEGER NOT NULL DEFAULT 0,
ADD COLUMN IF NOT EXISTS indexing_started_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS indexing_completed_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
ALTER TABLE rag_chat_app
ADD COLUMN IF NOT EXISTS dataset_id BIGINT REFERENCES rag_dataset(id),
ADD COLUMN IF NOT EXISTS system_prompt TEXT NOT NULL DEFAULT '',
ADD COLUMN IF NOT EXISTS llm_model VARCHAR(100) NOT NULL DEFAULT '',
ADD COLUMN IF NOT EXISTS temperature REAL NOT NULL DEFAULT 0.3,
ADD COLUMN IF NOT EXISTS max_tokens INTEGER NOT NULL DEFAULT 2048,
ADD COLUMN IF NOT EXISTS opening_statement TEXT NOT NULL DEFAULT '',
ADD COLUMN IF NOT EXISTS suggested_questions TEXT NOT NULL DEFAULT '[]',
ADD COLUMN IF NOT EXISTS is_default BOOLEAN NOT NULL DEFAULT FALSE,
ADD COLUMN IF NOT EXISTS sort_order INTEGER NOT NULL DEFAULT 0,
ADD COLUMN IF NOT EXISTS status SMALLINT NOT NULL DEFAULT 1,
ADD COLUMN IF NOT EXISTS updated_by BIGINT,
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
ALTER TABLE rag_conversation
ADD COLUMN IF NOT EXISTS app_id BIGINT REFERENCES rag_chat_app(id),
ADD COLUMN IF NOT EXISTS introduction TEXT NOT NULL DEFAULT '',
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
ALTER TABLE rag_message
ADD COLUMN IF NOT EXISTS sources JSONB NOT NULL DEFAULT '[]'::jsonb,
ADD COLUMN IF NOT EXISTS metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
ADD COLUMN IF NOT EXISTS feedback VARCHAR(20),
ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ NOT NULL DEFAULT NOW();
UPDATE rag_dataset SET retrieval_model = '{}'::jsonb WHERE retrieval_model IS NULL;
UPDATE rag_document SET enabled = TRUE WHERE enabled IS NULL;
UPDATE rag_document SET hit_count = 0 WHERE hit_count IS NULL;
UPDATE rag_chat_app SET suggested_questions = '[]' WHERE suggested_questions IS NULL;
UPDATE rag_message SET sources = '[]'::jsonb WHERE sources IS NULL;
UPDATE rag_message SET metadata = '{}'::jsonb WHERE metadata IS NULL;
CREATE UNIQUE INDEX IF NOT EXISTS uq_rag_dataset_collection_name ON rag_dataset(collection_name);
CREATE INDEX IF NOT EXISTS idx_rag_dataset_area ON rag_dataset(area) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_dataset_status ON rag_dataset(status) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_document_dataset ON rag_document(dataset_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_document_status ON rag_document(indexing_status) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_document_original_name ON rag_document(dataset_id, original_name) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_chat_app_area ON rag_chat_app(area) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_chat_app_status ON rag_chat_app(status) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_conversation_user ON rag_conversation(user_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_conversation_app ON rag_conversation(app_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_rag_message_conversation ON rag_message(conversation_id, created_at);
CREATE OR REPLACE FUNCTION update_rag_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DO $$
DECLARE
t TEXT;
BEGIN
FOREACH t IN ARRAY ARRAY['rag_dataset', 'rag_document', 'rag_chat_app', 'rag_conversation']
LOOP
EXECUTE format('DROP TRIGGER IF EXISTS trg_%s_updated_at ON %I', t, t);
EXECUTE format(
'CREATE TRIGGER trg_%s_updated_at
BEFORE UPDATE ON %I
FOR EACH ROW EXECUTE FUNCTION update_rag_updated_at()',
t, t
);
END LOOP;
END;
$$;
COMMIT;