import { CodeTabs } from '../views/docs/CodeTabs';
import { CodeBlock } from '../views/docs/CodeBlock';
import { Callout, NextLink } from '../views/docs/prose';
export const DOCUMENT_QA_SIG_PY = `from guava.helpers.rag import DocumentQA

DocumentQA(
    store=None,             # VectorStore for local mode; omit for server mode
    documents=None,         # str or list[str] — documents to index
    ids=None,               # list[str] — stable IDs for upsert/delete
    chunk_size=5000,        # max chars per chunk (local mode only)
    chunk_overlap=200,      # overlap between chunks (local mode only)
    instructions=None,      # system instruction override
    *,
    generation_model=None,  # GenerationModel (required for local mode)
    namespace=None,         # server-mode namespace for concurrent instances
)`;

export const DOCUMENT_QA_SIG_TS = `// The new guava.helpers.rag.DocumentQA is not yet available in TypeScript.
// For TypeScript projects, see the DocumentQA (Legacy) page.`;

export const DOCUMENT_QA_EX_PY = `from guava.helpers.rag import DocumentQA

# Server mode (default) — documents stored and queried on Guava's server
qa = DocumentQA(documents=[policy_text, faq_text], namespace="policy_faq")
answer = qa.ask("What is the deductible?")

# Server mode — multiple concurrent instances (use namespace to isolate)
dental = DocumentQA(documents=dental_docs, namespace="dental")
restaurant = DocumentQA(documents=restaurant_docs, namespace="restaurant")
dental.ask("What is the copay?")       # only searches dental docs
restaurant.ask("Do you have vegan options?")  # only searches restaurant docs

# Local mode — LanceDB + Vertex AI
from google import genai
from guava.helpers.lancedb import LanceDBStore
from guava.helpers.vertexai import VertexAIEmbedding, VertexAIGeneration

client = genai.Client(project="my-project", location="us-central1")
store = LanceDBStore("gs://my-bucket/lancedb", embedding_model=VertexAIEmbedding(client=client))
qa = DocumentQA(store=store, generation_model=VertexAIGeneration(client=client))
qa.upsert_document("policy", my_text)
answer = qa.ask("What is the deductible?")

# Wiring into an Agent
import guava
from guava import Agent

agent = Agent(name="Support", organization="Acme Corp", purpose="Answer customer questions.")
document_qa = DocumentQA(documents=some_text)

@agent.on_question
def on_question(call: guava.Call, question: str) -> str:
    return document_qa.ask(question)`;

export const DOCUMENT_QA_EX_TS = `// guava.helpers.rag.DocumentQA is not yet available in TypeScript.`;

export const DOCUMENT_QA_MGMT_EX_PY = `from guava.helpers.rag import DocumentQA

# Load initial documents with stable IDs
qa = DocumentQA(
    documents=[policy_v1, faq_v1, terms_v1],
    ids=["policy", "faq", "terms"],
    namespace="insurance",
)

# Later: policy was updated — replace it in-place
qa.upsert_document("policy", policy_v2)

# Add a new document without a pre-assigned ID
qa.add_document(new_bulletin_text)

# Remove a document that's no longer relevant
qa.delete_document("terms")

# Wipe everything and start fresh
qa.clear()`;

## DocumentQA

`DocumentQA` answers caller questions against documents using retrieval-augmented generation (RAG). It operates in one of two modes:

- **Server mode (default):** Documents are uploaded to the Guava server and questions are answered server-side. Intended for simple use cases with few documents.
- **Local mode:** Bring your own vector store and generation model for full control over the RAG pipeline. Guava provides ready-made backends for ChromaDB, LanceDB, pgvector, and Pinecone.

### Constructor

<CodeTabs
  python={{ code: DOCUMENT_QA_SIG_PY, filename: "signature" }}
  typescript={{ code: DOCUMENT_QA_SIG_TS, filename: "signature" }}
/>

| Parameter | Type | Required | Default | Description |
|-----------|------|----------|---------|-------------|
| `store` | `VectorStore \| None` | No | `None` | Vector store for local mode. When omitted, server mode is used automatically. |
| `documents` | `list[str] \| str \| None` | No | `None` | Documents to index at construction time. Accepts a single string or a list. |
| `ids` | `list[str] \| None` | No | `None` | Caller-provided IDs for each document, enabling later `upsert_document` / `delete_document`. Length must match `documents` if provided. |
| `chunk_size` | `int` | No | `5000` | Maximum characters per chunk (local mode only). |
| `chunk_overlap` | `int` | No | `200` | Overlap between consecutive chunks in characters (local mode only). |
| `instructions` | `str \| None` | No | `None` | System instruction for the generation model. Overrides the built-in default. |
| `generation_model` | `GenerationModel \| None` | Local mode | `None` | Generation model for producing answers. Required when `store` is provided. |
| `namespace` | `str \| None` | Server mode | `None` | Stable string to scope this instance's documents on the server. |

<Callout>
  <span className="text-primary font-semibold">namespace requirement:</span> In server mode, `namespace` is required when running multiple `DocumentQA` instances concurrently — even across different files. Without a namespace, concurrent instances may interfere with each other's document stores.
</Callout>

### Methods

**`ask(question: str, k: int = 5) -> str`** — Retrieve relevant chunks and generate an answer. In server mode, `k` is ignored (the server uses full document context).

**`upsert_document(key: str, text: str) -> None`** — Add or replace a document by key. Stale chunks from a previously longer document are deleted automatically.

**`add_document(text: str) -> None`** — Add a document without specifying a key. In server mode, uses a content-derived key (SHA-256 hash).

**`delete_document(key: str) -> None`** — Delete a previously upserted document by key.

**`clear() -> None`** — Remove all documents from the store.

### Available VectorStore Backends (Local Mode)

| Class | Import | Install | Default Embedding |
|-------|--------|---------|-------------------|
| `ChromaVectorStore` | `guava.helpers.chromadb` | `pip install 'gridspace-guava[chromadb]'` | Built-in `all-MiniLM-L6-v2` (no API needed) |
| `LanceDBStore` | `guava.helpers.lancedb` | `pip install 'gridspace-guava[lancedb]'` | Required — pass an `EmbeddingModel` |
| `PgVectorStore` | `guava.helpers.pgvector` | `pip install 'gridspace-guava[pgvector]'` | Required — pass an `EmbeddingModel` |
| `PineconeVectorStore` | `guava.helpers.pinecone` | `pip install 'gridspace-guava[pinecone]'` | `multilingual-e5-large` via Pinecone Inference |

See the <a href="/docs/vector-stores">Vector Stores</a> reference for full constructor details and backend-specific options.

### Examples

<CodeTabs
  python={{ code: DOCUMENT_QA_EX_PY, filename: "document_qa_examples.py" }}
  typescript={{ code: DOCUMENT_QA_EX_TS, filename: "document_qa_examples.ts" }}
/>

### Incremental Document Management

Use `ids` to assign stable keys to documents at construction time, then use `upsert_document`, `delete_document`, and `clear` to manage documents without re-creating the `DocumentQA` instance.

<CodeBlock code={DOCUMENT_QA_MGMT_EX_PY} filename="document_management.py" language="python" />

<NextLink section="datetime-filter" label="DatetimeFilter" />
