From 8435561dc3946eef6aeafa524ac36d63262ea1f8 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:46:36 +0200 Subject: [PATCH 1/2] LCORE-1037: update BYOK and RAG guides to use lightspeed-stack config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all run.yaml references with lightspeed-stack.yaml byok_rag configuration. Users should no longer edit run.yaml directly — the Lightspeed Stack service auto-generates Llama Stack config at startup from the byok_rag and rag sections in lightspeed-stack.yaml. - Rewrite BYOK guide Step 4 to use byok_rag as primary config path - Add field reference table for all byok_rag options - Rewrite RAG guide vector store sections with byok_rag examples - Replace full run.yaml config examples with lightspeed-stack.yaml format - Add embedding_model field to BYOK example config - Update Step 3 to reference byok_rag embedding_model field Co-Authored-By: Claude Opus 4.6 --- docs/byok_guide.md | 444 ++++++-------------- docs/rag_guide.md | 261 +++--------- examples/lightspeed-stack-byok-okp-rag.yaml | 2 + 3 files changed, 201 insertions(+), 506 deletions(-) diff --git a/docs/byok_guide.md b/docs/byok_guide.md index dd7382f9a..489152538 100644 --- a/docs/byok_guide.md +++ b/docs/byok_guide.md @@ -15,7 +15,7 @@ The BYOK (Bring Your Own Knowledge) feature in Lightspeed Core enables users to * [Step 1: Prepare Your Knowledge Sources](#step-1-prepare-your-knowledge-sources) * [Step 2: Create Vector Database](#step-2-create-vector-database) * [Step 3: Configure Embedding Model](#step-3-configure-embedding-model) - * [Step 4: Configure Llama Stack](#step-4-configure-llama-stack) + * [Step 4: Configure BYOK Knowledge Sources](#step-4-configure-byok-knowledge-sources) * [Step 5: Configure RAG Strategy](#step-5-configure-rag-strategy) * [Supported Vector Database Types](#supported-vector-database-types) * [Configuration Examples](#configuration-examples) @@ -156,7 +156,7 @@ class CustomMetadataProcessor(MetadataProcessor): ### Step 3: Configure Embedding Model -You have two options for configuring your embedding model: +You have two options for obtaining your embedding model: #### Option 1: Use rag-content Download Script (Optional) You can use the embedding generation step mentioned in the rag-content repo: @@ -171,131 +171,62 @@ Alternatively, you can download your own embedding model and update the path in 1. **Download your preferred embedding model** from Hugging Face or other sources 2. **Place the model** in your desired directory (e.g., `/path/to/your/embedding_models/`) -3. **Update the YAML configuration** to point to your model path: + +The embedding model is specified per knowledge source in the `byok_rag` section of `lightspeed-stack.yaml` via the `embedding_model` field. The default is `sentence-transformers/all-mpnet-base-v2` with a dimension of `768`. + +**Note**: Ensure the same embedding model is used for both vector database creation and querying. + +### Step 4: Configure BYOK Knowledge Sources + +Declare your knowledge sources in the `byok_rag` section of your `lightspeed-stack.yaml`. The Lightspeed Stack service automatically generates the required Llama Stack configuration at startup. ```yaml -models: - - model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /path/to/your/embedding_models/all-mpnet-base-v2 +byok_rag: + - rag_id: my-docs # Unique identifier for this knowledge source + rag_type: inline::faiss # Vector store type (default: inline::faiss) + embedding_model: sentence-transformers/all-mpnet-base-v2 # Embedding model (default) + embedding_dimension: 768 # Must match your embedding model's output + vector_db_id: your-index-id # Llama Stack vector store ID (from index generation) + db_path: /path/to/vector_db/faiss_store.db # Path to the vector database file + score_multiplier: 1.0 # Weight for Inline RAG result ranking (default: 1.0) ``` -**Note**: Ensure the same embedding model is used for both vector database creation and querying. +**`byok_rag` field reference:** + +| Field | Required | Default | Description | +|-------|----------|---------|-------------| +| `rag_id` | Yes | — | Unique identifier for the knowledge source | +| `rag_type` | No | `inline::faiss` | Vector store provider type | +| `embedding_model` | No | `sentence-transformers/all-mpnet-base-v2` | Embedding model identifier or path | +| `embedding_dimension` | No | `768` | Embedding vector dimensionality | +| `vector_db_id` | Yes | — | Vector store ID (must match the ID from index generation) | +| `db_path` | Yes | — | Path to the vector database file | +| `score_multiplier` | No | `1.0` | Weight for Inline RAG ranking (values > 1.0 boost; < 1.0 reduce) | -### Step 4: Configure Llama Stack +**Multiple knowledge sources:** -Edit your `run.yaml` file to include BYOK configuration: +You can configure multiple BYOK sources. When using Inline RAG, `score_multiplier` adjusts the relative importance of each store's results: ```yaml -version: 2 -image_name: byok-configuration - -# Required APIs for BYOK -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents_state - backend: kv_default - responses: - table_name: agents_responses - backend: sql_default - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: your-knowledge-base - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: byok_backend # References storage.backends - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db - sql_default: - type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db - byok_backend: - type: kv_sqlite - db_path: /path/to/vector_db/faiss_store.db - -registered_resources: - models: - - model_id: your-llm-model - provider_id: openai - model_type: llm - provider_model_id: gpt-4o-mini - - model_id: sentence-transformers/all-mpnet-base-v2 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /path/to/embedding_models/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - vector_stores: - - vector_store_id: your-index-id # ID used during index generation - provider_id: your-knowledge-base +byok_rag: + - rag_id: ocp-docs + rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime -``` + vector_db_id: ocp-index + db_path: /data/vector_dbs/ocp_docs/faiss_store.db + score_multiplier: 1.0 -**⚠️ Important**: The `vector_store_id` value must exactly match the ID you provided when creating the vector database using the rag-content tool. This identifier links your Llama Stack configuration to the specific vector database index you created. + - rag_id: internal-kb + rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: kb-index + db_path: /data/vector_dbs/internal_kb/faiss_store.db + score_multiplier: 1.2 # Boost results from this store +``` -> [!TIP] -> Instead of manually editing `run.yaml`, you can declare your knowledge sources in the `byok_rag` -> section of `lightspeed-stack.yaml`. The lightspeed-stack service automatically generates the required configuration -> at startup. -> -> ```yaml -> byok_rag: -> - rag_id: my-docs # Unique identifier for this knowledge source -> rag_type: inline::faiss -> embedding_model: sentence-transformers/all-mpnet-base-v2 -> embedding_dimension: 768 -> vector_db_id: your-index-id # Llama Stack vector store ID (from index generation) -> db_path: /path/to/vector_db/faiss_store.db -> score_multiplier: 1.0 # Optional: weight results when mixing multiple sources -> ``` -> -> When multiple BYOK sources are configured, `score_multiplier` adjusts the relative importance of -> each store's results during Inline RAG retrieval. Values above 1.0 boost a store; below 1.0 reduce it. +**⚠️ Important**: The `vector_db_id` value must exactly match the ID you provided when creating the vector database using the rag-content tool. This identifier links your configuration to the specific vector database index you created. ### Step 5: Configure RAG Strategy @@ -338,47 +269,41 @@ Both modes can be enabled simultaneously. Choose based on your latency and contr ### 1. FAISS (Recommended) - **Type**: Local vector database with SQLite metadata - **Best for**: Small to medium-sized knowledge bases -- **Configuration**: `inline::faiss` +- **Configuration**: `rag_type: inline::faiss` - **Storage**: SQLite database file ```yaml -providers: - vector_io: - - provider_id: faiss-knowledge - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: faiss_backend - -storage: - backends: - faiss_backend: - type: kv_sqlite - db_path: /path/to/faiss_store.db +byok_rag: + - rag_id: faiss-knowledge + rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: your-index-id + db_path: /path/to/faiss_store.db ``` ### 2. pgvector (PostgreSQL) - **Type**: PostgreSQL with pgvector extension - **Best for**: Large-scale deployments, shared knowledge bases -- **Configuration**: `remote::pgvector` +- **Configuration**: `rag_type: remote::pgvector` - **Requirements**: PostgreSQL with pgvector extension ```yaml -vector_io: -- provider_id: pgvector-knowledge - provider_type: remote::pgvector - config: - host: localhost - port: 5432 - db: knowledge_db - user: lightspeed_user - password: ${env.DB_PASSWORD} - kvstore: - type: sqlite - db_path: .llama/distributions/pgvector/registry.db +byok_rag: + - rag_id: pgvector-knowledge + rag_type: remote::pgvector + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: enterprise-docs + db_path: .llama/distributions/pgvector/registry.db ``` +> [!NOTE] +> For pgvector, `db_path` points to the local registry database used by Llama Stack to track the vector store metadata. +> The PostgreSQL connection details (host, port, database, user, password) are configured +> in the Llama Stack provider configuration. Use environment variables for credentials +> (e.g., `POSTGRES_PASSWORD`). + **pgvector Table Schema:** - `id` (text): UUID identifier of the chunk - `document` (jsonb): JSON containing content and metadata @@ -388,182 +313,81 @@ vector_io: ## Configuration Examples -### Example 1: OpenAI + FAISS -Complete configuration for OpenAI LLM with local FAISS knowledge base: +### Example 1: FAISS Knowledge Base + +A minimal `lightspeed-stack.yaml` configuration with a FAISS-based BYOK knowledge source: ```yaml -version: 2 -image_name: openai-faiss-byok - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents_state - backend: kv_default - responses: - table_name: agents_responses - backend: sql_default - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: company-docs - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: company_docs_backend - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db - sql_default: - type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db - company_docs_backend: - type: kv_sqlite - db_path: /home/user/vector_dbs/company_docs/faiss_store.db - -registered_resources: - models: - - model_id: gpt-4o-mini - provider_id: openai - model_type: llm - provider_model_id: gpt-4o-mini - - model_id: sentence-transformers/all-mpnet-base-v2 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /home/user/embedding_models/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - vector_stores: - - vector_store_id: company-knowledge-index - provider_id: company-docs +name: Lightspeed Core Service (LCS) +service: + host: localhost + port: 8080 + auth_enabled: false + +byok_rag: + - rag_id: company-docs + rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime + vector_db_id: company-knowledge-index + db_path: /home/user/vector_dbs/company_docs/faiss_store.db + +rag: + inline: + - company-docs + tool: + - company-docs ``` -### Example 2: vLLM + pgvector -Configuration for local vLLM inference with PostgreSQL knowledge base: +> [!NOTE] +> Your LLM inference provider (e.g., OpenAI, vLLM) must also be configured. +> For OpenAI, set the `OPENAI_API_KEY` environment variable. + +### Example 2: Multiple Knowledge Sources with pgvector + +A configuration combining a local FAISS store with a remote pgvector store: ```yaml -version: 2 -image_name: vllm-pgvector-byok - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -models: -- model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: vllm - model_type: llm - provider_model_id: null - -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: sentence-transformers/all-mpnet-base-v2 - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: vllm - provider_type: remote::vllm - config: - url: http://localhost:8000/v1/ - api_token: your-token-here - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents_state - backend: kv_default - responses: - table_name: agents_responses - backend: sql_default - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: enterprise-knowledge - provider_type: remote::pgvector - config: - host: postgres.company.com - port: 5432 - db: enterprise_kb - user: rag_user - password: ${env.POSTGRES_PASSWORD} - kvstore: - type: sqlite - db_path: .llama/distributions/pgvector/registry.db - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_stores: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: enterprise-knowledge - vector_store_id: enterprise-docs +name: Lightspeed Core Service (LCS) +service: + host: localhost + port: 8080 + auth_enabled: false + +byok_rag: + - rag_id: local-docs + rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: local-index + db_path: /data/vector_dbs/local/faiss_store.db + score_multiplier: 1.0 + + - rag_id: enterprise-kb + rag_type: remote::pgvector + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: enterprise-docs + db_path: .llama/distributions/pgvector/registry.db + score_multiplier: 1.2 + +rag: + inline: + - local-docs + - enterprise-kb + tool: + - local-docs + - enterprise-kb ``` +> [!NOTE] +> For pgvector, ensure your PostgreSQL credentials are available via environment variables +> (e.g., `POSTGRES_PASSWORD`). + +> [!TIP] +> A complete working example combining BYOK and OKP is available at +> [`examples/lightspeed-stack-byok-okp-rag.yaml`](../examples/lightspeed-stack-byok-okp-rag.yaml). + --- ## Conclusion @@ -575,4 +399,4 @@ For additional support and advanced configurations, refer to: - [Llama Stack Documentation](https://llama-stack.readthedocs.io/) - [rag-content Tool Repository](https://github.com/lightspeed-core/rag-content) -Remember to regularly update your knowledge sources and monitor system performance to maintain optimal BYOK functionality. \ No newline at end of file +Remember to regularly update your knowledge sources and monitor system performance to maintain optimal BYOK functionality. diff --git a/docs/rag_guide.md b/docs/rag_guide.md index 16c7ab48b..eef29918d 100644 --- a/docs/rag_guide.md +++ b/docs/rag_guide.md @@ -1,6 +1,6 @@ # RAG Configuration Guide -This document explains how to configure and customize your RAG pipeline using the `llama-stack` configuration YAML file. You will: +This document explains how to configure and customize your RAG pipeline. You will: * Initialize a vector store * Download and point to a local embedding model @@ -15,8 +15,7 @@ This document explains how to configure and customize your RAG pipeline using th * [Prerequisites](#prerequisites) * [Set Up the Vector Database](#set-up-the-vector-database) * [Download an Embedding Model](#download-an-embedding-model) -* [Automatic Configuration Enrichment](#automatic-configuration-enrichment) -* [Manual Configuration](#manual-configuration) +* [Configure BYOK Knowledge Sources](#configure-byok-knowledge-sources) * [Add an Inference Model (LLM)](#add-an-inference-model-llm) * [Complete Configuration Reference](#complete-configuration-reference) * [System Prompt Guidance for RAG (as a tool)](#system-prompt-guidance-for-rag-as-a-tool) @@ -58,90 +57,32 @@ Use the [`rag-content`](https://github.com/lightspeed-core/rag-content) reposito Download a local embedding model such as `sentence-transformers/all-mpnet-base-v2` by using the script in [`rag-content`](https://github.com/lightspeed-core/rag-content) or manually download and place in your desired path. > [!NOTE] -> Llama Stack can also download a model for you, which will make the first start-up slower. In the YAML configuration file `run.yaml` specify a supported model name as `provider_model_id` instead of a path. LLama Stack will then download the model to the `~/.cache/huggingface/hub` folder. +> Llama Stack can also download a model for you, which will make the first start-up slower. In the `byok_rag` section of `lightspeed-stack.yaml`, specify a supported model name as `embedding_model` instead of a local path. Llama Stack will then download the model to the `~/.cache/huggingface/hub` folder. --- -## Automatic Configuration Enrichment +## Configure BYOK Knowledge Sources -For users with BYOK or OKP configurations, you can automatically enrich your `run.yaml` file using the `llama_stack_configuration.py` script: - -```bash -# Enrich run.yaml with BYOK and/or OKP configurations from lightspeed-stack.yaml -uv run src/llama_stack_configuration.py -c lightspeed-stack.yaml -i run.yaml -o run_enriched.yaml -``` - -This script automatically adds the necessary: -- **Storage backends** for BYOK vector databases -- **Vector IO providers** for BYOK and OKP -- **Vector stores** and **embedding models** registration -- **OKP provider configuration** when `okp` is enabled in your RAG configuration - -The script reads your `lightspeed-stack.yaml` configuration and enriches a base `run.yaml` file with all required Llama Stack sections, eliminating the need to manually configure complex vector store setups. - -**Command line options:** -- `-c, --config`: Lightspeed config file (default: `lightspeed-stack.yaml`) -- `-i, --input`: Input Llama Stack config (default: `run.yaml`) -- `-o, --output`: Output enriched config (default: `run_.yaml`) - -> [!TIP] -> Use this script to generate your initial `run.yaml` configuration, then manually customize as needed for your specific setup. - ---- - -## Manual Configuration - -If you prefer to manually configure your `run.yaml` file, update it to point to: - -* Your downloaded **embedding model** -* Your generated **vector database** +BYOK knowledge sources are configured in the `byok_rag` section of `lightspeed-stack.yaml`. The Lightspeed Stack service automatically generates the required Llama Stack configuration at startup — no manual enrichment is needed. ### FAISS example ```yaml -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - # FAISS vector store - vector_io: - - provider_id: custom-index - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: rag_backend # References storage.backends.rag_backend - -storage: - backends: - rag_backend: - type: kv_sqlite - db_path: # e.g. /home/USER/vector_db/faiss_store.db - -registered_resources: - models: - - model_id: # e.g. sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: # e.g. 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: # e.g. /home/USER/embedding_model - - vector_stores: - - embedding_dimension: # e.g. 768 - embedding_model: # e.g. sentence-transformers/all-mpnet-base-v2 - provider_id: custom-index - vector_store_id: +byok_rag: + - rag_id: custom-index + rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 # or path to local model + embedding_dimension: 768 + vector_db_id: # ID used during index generation + db_path: # e.g. /home/USER/vector_db/faiss_store.db ``` Where: -- `provider_model_id` is the path to the folder of the embedding model (or alternatively, the supported embedding model to download) +- `embedding_model` is the embedding model identifier or path to the local model folder - `db_path` is the path to the vector index (.db file in this case) -- `vector_store_id` is the index ID used to generate the db +- `vector_db_id` is the index ID used to generate the db -See the full working [config example](examples/run.yaml) for more details. +See the full working [config example](../examples/lightspeed-stack-byok-okp-rag.yaml) for more details. ### pgvector example @@ -152,8 +93,6 @@ This example shows how to configure a remote PostgreSQL database with the [pgvec > CREATE EXTENSION IF NOT EXISTS vector; > ``` -Update the connection details (`host`, `port`, `db`, `user`, `password`) to match your PostgreSQL setup. - Each pgvector-backed table follows this schema: - `id` (`text`): UUID identifier of the chunk @@ -161,37 +100,26 @@ Each pgvector-backed table follows this schema: - `embedding` (`vector(n)`): the embedding vector, where `n` is the embedding dimension and will match the model's output size (e.g. 768 for `all-mpnet-base-v2`) > [!NOTE] -> The `vector_store_id` (e.g. `rhdocs`) is used to point to the table named `vector_store_rhdocs` in the specified database, which stores the vector embeddings. - +> The `vector_db_id` (e.g. `rhdocs`) is used to point to the table named `vector_store_rhdocs` in the specified database, which stores the vector embeddings. ```yaml -[...] -providers: - [...] - vector_io: - - provider_id: pgvector-example - provider_type: remote::pgvector - config: - host: localhost - port: 5432 - db: pgvector_example # PostgreSQL database (psql -d pgvector_example) - user: lightspeed # PostgreSQL user - password: password123 - kvstore: - type: sqlite - db_path: .llama/distributions/pgvector/pgvector_registry.db - -vector_stores: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: pgvector-example - # A unique ID that becomes the PostgreSQL table name, prefixed with 'vector_store_'. - # e.g., 'rhdocs' will create the table 'vector_store_rhdocs'. - # If the table was already created, this value must match the ID used at creation. - vector_store_id: rhdocs +byok_rag: + - rag_id: pgvector-example + rag_type: remote::pgvector + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + # A unique ID that becomes the PostgreSQL table name, prefixed with 'vector_store_'. + # e.g., 'rhdocs' will create the table 'vector_store_rhdocs'. + # If the table was already created, this value must match the ID used at creation. + vector_db_id: rhdocs + db_path: .llama/distributions/pgvector/pgvector_registry.db ``` -See the full working [config example](examples/openai-pgvector-run.yaml) for more details. +> [!NOTE] +> For pgvector, the PostgreSQL connection details (host, port, database, user, password) are configured +> in the Llama Stack provider configuration. Use environment variables for credentials. + +See the full working [config example](../examples/lightspeed-stack-byok-okp-rag.yaml) for more details. --- @@ -223,7 +151,7 @@ podman run \ > For other supported models and configuration options, see the vLLM documentation: > [vLLM: Tool Calling](https://docs.vllm.ai/en/stable/features/tool_calling.html) -After starting the container edit your `run.yaml` file, matching `model_id` with the model provided in the `podman run` command. +After starting the container, configure the vLLM provider in your Llama Stack configuration file, matching `model_id` with the model provided in the `podman run` command. ```yaml [...] @@ -244,7 +172,9 @@ providers: api_token: # if any ``` -See the full working [config example](examples/vllm-llama-faiss-run.yaml) for more details. +> [!TIP] +> The Llama Stack configuration file path is set in `lightspeed-stack.yaml` under +> `llama_stack.library_client_config_path` when using library mode. ### OpenAI example @@ -277,8 +207,6 @@ export OPENAI_API_KEY= > When experimenting with different `models`, `providers` and `vector_dbs`, you might need to manually unregister the old ones with the Llama Stack client CLI (e.g. `llama-stack-client vector_dbs list`) -See the full working [config example](examples/openai-faiss-run.yaml) for more details. - ### Azure OpenAI Not yet supported. @@ -321,7 +249,7 @@ Set `rhokp_url` to the base URL of your OKP server. Use `${env.RH_SERVER_OKP}` t > [!NOTE] > When `okp` is listed in `rag.inline` or `rag.tool`, Lightspeed Stack automatically enriches -> the Llama Stack `run.yaml` at startup with the required `vector_io` provider and `registered_resources` +> the Llama Stack configuration at startup with the required `vector_io` provider and `registered_resources` > entries for the OKP vector store. No manual registration is needed. **Query Request Example:** @@ -397,104 +325,45 @@ the number of retrieved chunks, set the constants in `src/constants.py`: # Complete Configuration Reference -To enable RAG functionality, make sure the `agents`, `tool_runtime`, and `safety` APIs are included and properly configured in your YAML. +To enable RAG functionality, configure the `byok_rag` and `rag` sections in your `lightspeed-stack.yaml`, and ensure the Llama Stack configuration includes `agents`, `tool_runtime`, and `safety` APIs. -Below is a real example of a working config, with: +Below is an example of a working `lightspeed-stack.yaml` configuration with: * A local `all-mpnet-base-v2` embedding model * A `FAISS`-based vector store -* `OpenAI` as the inference provider -* Agent-based RAG setup +* Inline and Tool RAG enabled > [!TIP] -> We recommend starting with a minimal working configuration (one is automatically generated by the `rag-content` tool when generating the database) and extending it as needed by adding more APIs and providers. +> We recommend starting with a minimal working configuration and extending it as needed. ```yaml -version: 2 -image_name: rag-configuration - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence: - agent_state: - namespace: agents_state - backend: kv_default - responses: - table_name: agents_responses - backend: sql_default - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: ocp-docs - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: ocp_docs_backend # References storage.backends - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -storage: - backends: - kv_default: - type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db - sql_default: - type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db - ocp_docs_backend: - type: kv_sqlite - db_path: /home/USER/lightspeed-stack/vector_dbs/ocp_docs/faiss_store.db - -registered_resources: - models: - - model_id: gpt-test - provider_id: openai - model_type: llm - provider_model_id: gpt-4o-mini - - model_id: sentence-transformers/all-mpnet-base-v2 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /home/USER/lightspeed-stack/embedding_models/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - vector_stores: - - vector_store_id: openshift-index # This ID was defined during index generation - provider_id: ocp-docs # References providers.vector_io +name: Lightspeed Core Service (LCS) +service: + host: localhost + port: 8080 + auth_enabled: false + +llama_stack: + use_as_library_client: true + library_client_config_path: /path/to/llama-stack-config.yaml + +byok_rag: + - rag_id: ocp-docs + rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime + vector_db_id: openshift-index + db_path: /home/USER/lightspeed-stack/vector_dbs/ocp_docs/faiss_store.db + +rag: + inline: + - ocp-docs + tool: + - ocp-docs ``` +The Llama Stack configuration file (referenced by `library_client_config_path`) should include your inference provider, agents, safety, and tool runtime configuration. The BYOK vector store providers and registered resources are automatically generated by Lightspeed Stack at startup from the `byok_rag` entries above. + --- # System Prompt Guidance for RAG (as a tool) @@ -509,12 +378,12 @@ You are a helpful assistant with access to a 'knowledge_search' tool. When users --- # Llama Stack RAG annotations -The top-level `vector_stores` block in Llama Stack configuration may include `annotation_prompt_params` to control whether Llama Stack injects extra RAG annotation instructions into the model prompt (for example, citation-style markers). The [`run.yaml`](../run.yaml) in this repository sets `enable_annotations: false` under that block to avoid unwanted annotations. For a configuration that enables annotations and customizes the instruction template, see [`examples/run.yaml`](../examples/run.yaml). +The top-level `vector_stores` block in Llama Stack configuration may include `annotation_prompt_params` to control whether Llama Stack injects extra RAG annotation instructions into the model prompt (for example, citation-style markers). The default Llama Stack configuration sets `enable_annotations: false` under that block to avoid unwanted annotations. --- # References * [Llama Stack - RAG](https://llama-stack.readthedocs.io/en/latest/building_applications/rag.html) -* [Llama Stack - Configuring a “Stack"](https://llama-stack.readthedocs.io/en/latest/distributions/configuration.html) +* [Llama Stack - Configuring a "Stack"](https://llama-stack.readthedocs.io/en/latest/distributions/configuration.html) * [Llama Stack - Sample configurations](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions) diff --git a/examples/lightspeed-stack-byok-okp-rag.yaml b/examples/lightspeed-stack-byok-okp-rag.yaml index 4a6b433b8..a7550ff9a 100644 --- a/examples/lightspeed-stack-byok-okp-rag.yaml +++ b/examples/lightspeed-stack-byok-okp-rag.yaml @@ -37,12 +37,14 @@ quota_handlers: byok_rag: - rag_id: ocp-docs # referenced in rag.inline / rag.tool rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 1024 vector_db_id: vs_123 # Llama-stack vector_store_id db_path: /tmp/ocp.faiss score_multiplier: 1.0 # Weight for this vector store's results (Inline RAG only) - rag_id: knowledge-base # referenced in rag.inline / rag.tool rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 384 vector_db_id: vs_456 # Llama-stack vector_store_id db_path: /tmp/kb.faiss From 82ddcf08e66ff211841ceabf35efc5587dd716e6 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Tue, 2 Jun 2026 11:50:08 +0200 Subject: [PATCH 2/2] LCORE-1037: remove all llama-stack references from BYOK and RAG guides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace remaining Llama Stack mentions with generic terms — users should not need to know about the underlying Llama Stack layer. Co-Authored-By: Claude Opus 4.6 --- docs/byok_guide.md | 91 ++++++++++++--------- docs/rag_guide.md | 87 ++++++++++---------- examples/lightspeed-stack-byok-okp-rag.yaml | 4 +- examples/run.yaml | 24 +----- 4 files changed, 102 insertions(+), 104 deletions(-) diff --git a/docs/byok_guide.md b/docs/byok_guide.md index 489152538..86acc1c80 100644 --- a/docs/byok_guide.md +++ b/docs/byok_guide.md @@ -148,10 +148,8 @@ class CustomMetadataProcessor(MetadataProcessor): ``` **Important Notes:** -- The vector database must be compatible with Llama Stack - Supported formats: - - Llama-Stack Faiss Vector-IO - - Llama-Stack SQLite-vec Vector-IO + - Faiss Vector-IO - The same embedding model must be used for both creation and querying ### Step 3: Configure Embedding Model @@ -178,7 +176,7 @@ The embedding model is specified per knowledge source in the `byok_rag` section ### Step 4: Configure BYOK Knowledge Sources -Declare your knowledge sources in the `byok_rag` section of your `lightspeed-stack.yaml`. The Lightspeed Stack service automatically generates the required Llama Stack configuration at startup. +Declare your knowledge sources in the `byok_rag` section of your `lightspeed-stack.yaml`. The required configuration is automatically generated at startup when using `make run`, `make run-stack`, `docker-compose`, or library mode. ```yaml byok_rag: @@ -186,7 +184,7 @@ byok_rag: rag_type: inline::faiss # Vector store type (default: inline::faiss) embedding_model: sentence-transformers/all-mpnet-base-v2 # Embedding model (default) embedding_dimension: 768 # Must match your embedding model's output - vector_db_id: your-index-id # Llama Stack vector store ID (from index generation) + vector_db_id: vs_8c94967b-81cc-4028-a294-9cfac6fd9ae2 # Generated by rag-content during index creation db_path: /path/to/vector_db/faiss_store.db # Path to the vector database file score_multiplier: 1.0 # Weight for Inline RAG result ranking (default: 1.0) ``` @@ -199,7 +197,7 @@ byok_rag: | `rag_type` | No | `inline::faiss` | Vector store provider type | | `embedding_model` | No | `sentence-transformers/all-mpnet-base-v2` | Embedding model identifier or path | | `embedding_dimension` | No | `768` | Embedding vector dimensionality | -| `vector_db_id` | Yes | — | Vector store ID (must match the ID from index generation) | +| `vector_db_id` | Yes | — | Vector store ID generated by rag-content (e.g. `vs_8c94967b-81cc-4028-a294-9cfac6fd9ae2`) | | `db_path` | Yes | — | Path to the vector database file | | `score_multiplier` | No | `1.0` | Weight for Inline RAG ranking (values > 1.0 boost; < 1.0 reduce) | @@ -213,7 +211,7 @@ byok_rag: rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - vector_db_id: ocp-index + vector_db_id: vs_3a7f9b2e-45dc-4e1a-b8f2-1c9d0e3f5a6b db_path: /data/vector_dbs/ocp_docs/faiss_store.db score_multiplier: 1.0 @@ -221,12 +219,12 @@ byok_rag: rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - vector_db_id: kb-index + vector_db_id: vs_d4c8e1f0-92ab-4d3c-a5e7-6b8f0c2d1e3a db_path: /data/vector_dbs/internal_kb/faiss_store.db score_multiplier: 1.2 # Boost results from this store ``` -**⚠️ Important**: The `vector_db_id` value must exactly match the ID you provided when creating the vector database using the rag-content tool. This identifier links your configuration to the specific vector database index you created. +**⚠️ Important**: The `vector_db_id` value must exactly match the ID generated by the rag-content tool during index creation (e.g. `vs_8c94967b-81cc-4028-a294-9cfac6fd9ae2`). This identifier links your configuration to the specific vector database index. ### Step 5: Configure RAG Strategy @@ -278,32 +276,35 @@ byok_rag: rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - vector_db_id: your-index-id + vector_db_id: vs_8c94967b-81cc-4028-a294-9cfac6fd9ae2 db_path: /path/to/faiss_store.db ``` ### 2. pgvector (PostgreSQL) - **Type**: PostgreSQL with pgvector extension - **Best for**: Large-scale deployments, shared knowledge bases -- **Configuration**: `rag_type: remote::pgvector` +- **Configuration**: `remote::pgvector` - **Requirements**: PostgreSQL with pgvector extension +> [!NOTE] +> pgvector is not yet supported via `byok_rag` in `lightspeed-stack.yaml` (see [LCORE-2437](https://redhat.atlassian.net/browse/LCORE-2437)). +> It must be configured directly in the Llama Stack configuration file. + ```yaml -byok_rag: - - rag_id: pgvector-knowledge - rag_type: remote::pgvector - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: enterprise-docs - db_path: .llama/distributions/pgvector/registry.db +vector_io: +- provider_id: pgvector-knowledge + provider_type: remote::pgvector + config: + host: localhost + port: 5432 + db: knowledge_db + user: lightspeed_user + password: ${env.POSTGRES_PASSWORD} + kvstore: + type: sqlite + db_path: .llama/distributions/pgvector/registry.db ``` -> [!NOTE] -> For pgvector, `db_path` points to the local registry database used by Llama Stack to track the vector store metadata. -> The PostgreSQL connection details (host, port, database, user, password) are configured -> in the Llama Stack provider configuration. Use environment variables for credentials -> (e.g., `POSTGRES_PASSWORD`). - **pgvector Table Schema:** - `id` (text): UUID identifier of the chunk - `document` (jsonb): JSON containing content and metadata @@ -329,7 +330,7 @@ byok_rag: rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - vector_db_id: company-knowledge-index + vector_db_id: vs_f1a2b3c4-56de-4f78-90ab-cdef12345678 db_path: /home/user/vector_dbs/company_docs/faiss_store.db rag: @@ -340,12 +341,18 @@ rag: ``` > [!NOTE] -> Your LLM inference provider (e.g., OpenAI, vLLM) must also be configured. +> Your LLM inference provider (e.g., OpenAI, vLLM) must also be configured in your `run.yaml`. > For OpenAI, set the `OPENAI_API_KEY` environment variable. ### Example 2: Multiple Knowledge Sources with pgvector -A configuration combining a local FAISS store with a remote pgvector store: +A configuration combining a local FAISS store (via `byok_rag`) with a remote pgvector store (configured directly in the Llama Stack configuration file): + +> [!NOTE] +> pgvector is not yet supported via `byok_rag` in `lightspeed-stack.yaml` (see [LCORE-2437](https://redhat.atlassian.net/browse/LCORE-2437)). +> The pgvector provider must be configured directly in the Llama Stack configuration file. + +**`lightspeed-stack.yaml`** — FAISS store and RAG strategy: ```yaml name: Lightspeed Core Service (LCS) @@ -359,25 +366,32 @@ byok_rag: rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - vector_db_id: local-index + vector_db_id: vs_e9d8c7b6-43af-4b2d-8e1f-0a9b8c7d6e5f db_path: /data/vector_dbs/local/faiss_store.db score_multiplier: 1.0 - - rag_id: enterprise-kb - rag_type: remote::pgvector - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: enterprise-docs - db_path: .llama/distributions/pgvector/registry.db - score_multiplier: 1.2 - rag: inline: - local-docs - - enterprise-kb tool: - local-docs - - enterprise-kb +``` + +**Llama Stack configuration file** — pgvector provider: + +```yaml +vector_io: +- provider_id: enterprise-kb + provider_type: remote::pgvector + config: + host: localhost + port: 5432 + db: knowledge_db + user: lightspeed_user + password: ${env.POSTGRES_PASSWORD} + kvstore: + type: sqlite + db_path: .llama/distributions/pgvector/registry.db ``` > [!NOTE] @@ -396,7 +410,6 @@ The BYOK (Bring Your Own Knowledge) feature in Lightspeed Core provides powerful For additional support and advanced configurations, refer to: - [RAG Configuration Guide](rag_guide.md) -- [Llama Stack Documentation](https://llama-stack.readthedocs.io/) - [rag-content Tool Repository](https://github.com/lightspeed-core/rag-content) Remember to regularly update your knowledge sources and monitor system performance to maintain optimal BYOK functionality. diff --git a/docs/rag_guide.md b/docs/rag_guide.md index eef29918d..740e34c47 100644 --- a/docs/rag_guide.md +++ b/docs/rag_guide.md @@ -19,7 +19,7 @@ This document explains how to configure and customize your RAG pipeline. You wil * [Add an Inference Model (LLM)](#add-an-inference-model-llm) * [Complete Configuration Reference](#complete-configuration-reference) * [System Prompt Guidance for RAG (as a tool)](#system-prompt-guidance-for-rag-as-a-tool) -* [Llama Stack RAG annotations](#llama-stack-rag-annotations) +* [RAG annotations](#rag-annotations) * [References](#references) @@ -48,7 +48,7 @@ The **Embedding Model** is used to convert queries and documents into vector rep Use the [`rag-content`](https://github.com/lightspeed-core/rag-content) repository to build a compatible vector database. > [!IMPORTANT] -> The resulting DB must be compatible with Llama Stack (e.g., FAISS with SQLite metadata, SQLite-vec). This can be configured when using the tool to generate the index. +> The resulting DB must be in a supported format (e.g., FAISS with SQLite metadata). This can be configured when using the tool to generate the index. --- @@ -57,13 +57,13 @@ Use the [`rag-content`](https://github.com/lightspeed-core/rag-content) reposito Download a local embedding model such as `sentence-transformers/all-mpnet-base-v2` by using the script in [`rag-content`](https://github.com/lightspeed-core/rag-content) or manually download and place in your desired path. > [!NOTE] -> Llama Stack can also download a model for you, which will make the first start-up slower. In the `byok_rag` section of `lightspeed-stack.yaml`, specify a supported model name as `embedding_model` instead of a local path. Llama Stack will then download the model to the `~/.cache/huggingface/hub` folder. +> The embedding model can also be downloaded automatically at first start-up (which will be slower). In the `byok_rag` section of `lightspeed-stack.yaml`, specify a supported model name as `embedding_model` instead of a local path. The model will be downloaded to the `~/.cache/huggingface/hub` folder. --- ## Configure BYOK Knowledge Sources -BYOK knowledge sources are configured in the `byok_rag` section of `lightspeed-stack.yaml`. The Lightspeed Stack service automatically generates the required Llama Stack configuration at startup — no manual enrichment is needed. +BYOK knowledge sources are configured in the `byok_rag` section of `lightspeed-stack.yaml`. The required configuration is automatically generated at startup when using `make run`, `make run-stack`, `docker-compose`, or library mode — no manual enrichment is needed. ### FAISS example @@ -73,14 +73,14 @@ byok_rag: rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 # or path to local model embedding_dimension: 768 - vector_db_id: # ID used during index generation + vector_db_id: vs_8c94967b-81cc-4028-a294-9cfac6fd9ae2 # Generated by rag-content during index creation db_path: # e.g. /home/USER/vector_db/faiss_store.db ``` Where: - `embedding_model` is the embedding model identifier or path to the local model folder - `db_path` is the path to the vector index (.db file in this case) -- `vector_db_id` is the index ID used to generate the db +- `vector_db_id` is the ID generated by rag-content during index creation (e.g. `vs_8c94967b-81cc-4028-a294-9cfac6fd9ae2`) See the full working [config example](../examples/lightspeed-stack-byok-okp-rag.yaml) for more details. @@ -88,6 +88,10 @@ See the full working [config example](../examples/lightspeed-stack-byok-okp-rag. This example shows how to configure a remote PostgreSQL database with the [pgvector](https://github.com/pgvector/pgvector) extension for storing embeddings. +> [!NOTE] +> pgvector is not yet supported via `byok_rag` in `lightspeed-stack.yaml` (see [LCORE-2437](https://redhat.atlassian.net/browse/LCORE-2437)). +> It must be configured directly in the Llama Stack configuration file. + > You will need to install PostgreSQL with a matching version to pgvector, then log in with `psql` and enable the extension with: > ```sql > CREATE EXTENSION IF NOT EXISTS vector; @@ -100,26 +104,36 @@ Each pgvector-backed table follows this schema: - `embedding` (`vector(n)`): the embedding vector, where `n` is the embedding dimension and will match the model's output size (e.g. 768 for `all-mpnet-base-v2`) > [!NOTE] -> The `vector_db_id` (e.g. `rhdocs`) is used to point to the table named `vector_store_rhdocs` in the specified database, which stores the vector embeddings. +> The `vector_store_id` (e.g. `rhdocs`) is used to point to the table named `vector_store_rhdocs` in the specified database, which stores the vector embeddings. ```yaml -byok_rag: - - rag_id: pgvector-example - rag_type: remote::pgvector - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - # A unique ID that becomes the PostgreSQL table name, prefixed with 'vector_store_'. - # e.g., 'rhdocs' will create the table 'vector_store_rhdocs'. - # If the table was already created, this value must match the ID used at creation. - vector_db_id: rhdocs - db_path: .llama/distributions/pgvector/pgvector_registry.db +providers: + [...] + vector_io: + - provider_id: pgvector-example + provider_type: remote::pgvector + config: + host: localhost + port: 5432 + db: pgvector_example # PostgreSQL database (psql -d pgvector_example) + user: lightspeed # PostgreSQL user + password: password123 + kvstore: + type: sqlite + db_path: .llama/distributions/pgvector/pgvector_registry.db +vector_stores: +- embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: pgvector-example + # A unique ID that becomes the PostgreSQL table name, prefixed with 'vector_store_'. + # e.g., 'rhdocs' will create the table 'vector_store_rhdocs'. + # If the table was already created, this value must match the ID used at creation. + vector_store_id: rhdocs ``` > [!NOTE] > For pgvector, the PostgreSQL connection details (host, port, database, user, password) are configured -> in the Llama Stack provider configuration. Use environment variables for credentials. - -See the full working [config example](../examples/lightspeed-stack-byok-okp-rag.yaml) for more details. +> in the provider configuration. Use environment variables for credentials. --- @@ -151,7 +165,7 @@ podman run \ > For other supported models and configuration options, see the vLLM documentation: > [vLLM: Tool Calling](https://docs.vllm.ai/en/stable/features/tool_calling.html) -After starting the container, configure the vLLM provider in your Llama Stack configuration file, matching `model_id` with the model provided in the `podman run` command. +After starting the container, configure the vLLM provider in your `run.yaml`, matching `model_id` with the model provided in the `podman run` command. ```yaml [...] @@ -172,13 +186,9 @@ providers: api_token: # if any ``` -> [!TIP] -> The Llama Stack configuration file path is set in `lightspeed-stack.yaml` under -> `llama_stack.library_client_config_path` when using library mode. - ### OpenAI example -Add a provider for your language model (e.g., OpenAI): +Add a provider for your language model in your `run.yaml` (e.g., OpenAI): ```yaml models: @@ -204,7 +214,7 @@ export OPENAI_API_KEY= ``` > [!NOTE] -> When experimenting with different `models`, `providers` and `vector_dbs`, you might need to manually unregister the old ones with the Llama Stack client CLI (e.g. `llama-stack-client vector_dbs list`) +> When experimenting with different `models`, `providers` and `vector_dbs`, you might need to manually unregister the old ones via the CLI. ### Azure OpenAI @@ -214,9 +224,9 @@ Not yet supported. ### Ollama The `remote::ollama` provider can be used for inference. However, it does not support tool calling, including RAG. -While Ollama also exposes an OpenAI compatible endpoint that supports tool calling, it cannot be used with `llama-stack` due to current limitations in the `remote::openai` provider. +While Ollama also exposes an OpenAI compatible endpoint that supports tool calling, it cannot currently be used due to limitations in the `remote::openai` provider. -There is an [ongoing discussion](https://github.com/meta-llama/llama-stack/discussions/3034) about enabling tool calling with Ollama. +Tool calling with Ollama is not yet supported. Currently, tool calling is not supported out of the box. Some experimental patches exist (including internal workarounds), but these are not officially released. ### vLLM Mistral @@ -249,7 +259,7 @@ Set `rhokp_url` to the base URL of your OKP server. Use `${env.RH_SERVER_OKP}` t > [!NOTE] > When `okp` is listed in `rag.inline` or `rag.tool`, Lightspeed Stack automatically enriches -> the Llama Stack configuration at startup with the required `vector_io` provider and `registered_resources` +> the underlying configuration at startup with the required `vector_io` provider and `registered_resources` > entries for the OKP vector store. No manual registration is needed. **Query Request Example:** @@ -325,7 +335,7 @@ the number of retrieved chunks, set the constants in `src/constants.py`: # Complete Configuration Reference -To enable RAG functionality, configure the `byok_rag` and `rag` sections in your `lightspeed-stack.yaml`, and ensure the Llama Stack configuration includes `agents`, `tool_runtime`, and `safety` APIs. +To enable RAG functionality, configure the `byok_rag` and `rag` sections in your `lightspeed-stack.yaml`. Below is an example of a working `lightspeed-stack.yaml` configuration with: @@ -343,16 +353,12 @@ service: port: 8080 auth_enabled: false -llama_stack: - use_as_library_client: true - library_client_config_path: /path/to/llama-stack-config.yaml - byok_rag: - rag_id: ocp-docs rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 768 - vector_db_id: openshift-index + vector_db_id: vs_3a7f9b2e-45dc-4e1a-b8f2-1c9d0e3f5a6b db_path: /home/USER/lightspeed-stack/vector_dbs/ocp_docs/faiss_store.db rag: @@ -362,7 +368,7 @@ rag: - ocp-docs ``` -The Llama Stack configuration file (referenced by `library_client_config_path`) should include your inference provider, agents, safety, and tool runtime configuration. The BYOK vector store providers and registered resources are automatically generated by Lightspeed Stack at startup from the `byok_rag` entries above. +The BYOK vector store providers and registered resources are automatically generated at startup from the `byok_rag` entries above. Models and inference providers must be configured separately in your `run.yaml`. --- @@ -376,14 +382,11 @@ You are a helpful assistant with access to a 'knowledge_search' tool. When users ``` --- -# Llama Stack RAG annotations +# RAG annotations -The top-level `vector_stores` block in Llama Stack configuration may include `annotation_prompt_params` to control whether Llama Stack injects extra RAG annotation instructions into the model prompt (for example, citation-style markers). The default Llama Stack configuration sets `enable_annotations: false` under that block to avoid unwanted annotations. +The top-level `vector_stores` block in [`run.yaml`](../examples/run.yaml) may include `annotation_prompt_params` to control whether extra RAG annotation instructions are injected into the model prompt (for example, citation-style markers). The default configuration sets `enable_annotations: false` under that block to avoid unwanted annotations. --- # References -* [Llama Stack - RAG](https://llama-stack.readthedocs.io/en/latest/building_applications/rag.html) -* [Llama Stack - Configuring a "Stack"](https://llama-stack.readthedocs.io/en/latest/distributions/configuration.html) -* [Llama Stack - Sample configurations](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions) diff --git a/examples/lightspeed-stack-byok-okp-rag.yaml b/examples/lightspeed-stack-byok-okp-rag.yaml index a7550ff9a..3e2fb18b8 100644 --- a/examples/lightspeed-stack-byok-okp-rag.yaml +++ b/examples/lightspeed-stack-byok-okp-rag.yaml @@ -39,14 +39,14 @@ byok_rag: rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 1024 - vector_db_id: vs_123 # Llama-stack vector_store_id + vector_db_id: vs_123 # Vector store ID (from index generation) db_path: /tmp/ocp.faiss score_multiplier: 1.0 # Weight for this vector store's results (Inline RAG only) - rag_id: knowledge-base # referenced in rag.inline / rag.tool rag_type: inline::faiss embedding_model: sentence-transformers/all-mpnet-base-v2 embedding_dimension: 384 - vector_db_id: vs_456 # Llama-stack vector_store_id + vector_db_id: vs_456 # Vector store ID (from index generation) db_path: /tmp/kb.faiss score_multiplier: 1.2 # Weight for this vector store's results (Inline RAG only) diff --git a/examples/run.yaml b/examples/run.yaml index cf6fcc4df..5b330b2fc 100644 --- a/examples/run.yaml +++ b/examples/run.yaml @@ -64,13 +64,6 @@ providers: - config: {} # Enable the MCP tool provider_id: model-context-protocol provider_type: remote::model-context-protocol - vector_io: - - config: # Define the storage backend for RAG - persistence: - namespace: vector_io::faiss - backend: kv_default - provider_id: faiss - provider_type: inline::faiss agents: - config: persistence: @@ -113,10 +106,10 @@ scoring_fns: [] server: port: 8321 storage: - backends: - kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + backends: # Define the storage backends type for metadata storage of registered resources (e.g. models, vector_stores) + kv_default: type: kv_sqlite - db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} sql_default: type: sql_sqlite db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} @@ -141,17 +134,6 @@ registered_resources: provider_id: openai model_type: llm provider_model_id: gpt-4o-mini - - model_id: sentence-transformers/all-mpnet-base-v2 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - vector_stores: - - embedding_dimension: 768 - embedding_model: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 - provider_id: faiss - vector_store_id: vs_503a2261-c256-45ff-90aa-580a80de64b8 shields: - shield_id: llama-guard provider_id: llama-guard