Initial setup: Knowledge base RAG system with LlamaIndex and ChromaDB

- Add Python project with uv package manager - Implement LlamaIndex + ChromaDB RAG pipeline - Add sentence-transformers for local embeddings (all-MiniLM-L6-v2) - Create MCP server with semantic search, indexing, and stats tools - Add Markdown chunker with heading/wikilink/frontmatter support - Add Dockerfile and docker-compose.yaml for self-hosted deployment - Include sample Obsidian vault files for testing - Add .gitignore and .env.example
2026-03-03 20:42:42 -05:00
parent 94dd158d1c
commit 11c3f705ce
11 changed files with 5319 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,15 @@
+# Knowledge RAG Configuration
+
+# Path to your Obsidian vault (must contain markdown files)
+# This should be an absolute path or relative to where you run docker-compose
+VAULT_PATH=./knowledge
+
+# Embedding model to use
+# Default: all-MiniLM-L6-v2 (fast, good quality, ~90MB)
+# Other options:
+#   - all-mpnet-base-v2 (higher quality, slower, ~420MB)
+#   - BAAI/bge-small-en-v1.5 (good quality, ~130MB)
+EMBEDDING_MODEL=all-MiniLM-L6-v2
+
+# Optional: Log level (DEBUG, INFO, WARNING, ERROR)
+LOG_LEVEL=INFO
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,47 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+.venv/
+env/
+.env/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# uv
+.ruff_cache/
+.mypy_cache/
+.pytest_cache/
+
+# Data directories (should be mounted externally)
+data/
+knowledge/
+
+# Environment
+.env
+.env.local
--- a/33
+++ b/33
@ -0,0 +1,33 @@
+FROM python:3.11-slim
+
+# Install system dependencies for sentence-transformers
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Install uv
+RUN pip install uv
+
+# Copy pyproject.toml
+COPY pyproject.toml .
+
+# Install dependencies
+RUN uv sync --frozen --no-dev
+
+# Copy source code
+COPY src/ ./src/
+
+# Create data directories
+RUN mkdir -p /data/vault /data/chroma_db /data/embeddings_cache
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    VAULT_PATH=/data/vault \
+    EMBEDDINGS_CACHE_DIR=/data/embeddings_cache
+
+# Default command runs the MCP server
+CMD ["python", "-m", "knowledge_rag.server"]
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,32 @@
+version: "3.8"
+
+services:
+  knowledge-rag:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: knowledge-rag
+    volumes:
+      # Mount your obsidian vault here
+      - ${VAULT_PATH:-./knowledge}:/data/vault
+      # Persist ChromaDB vector store
+      - ./data/chroma_db:/data/chroma_db
+      # Persist embeddings cache
+      - ./data/embeddings_cache:/data/embeddings_cache
+    environment:
+      - VAULT_PATH=/data/vault
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL:-all-MiniLM-L6-v2}
+      - EMBEDDINGS_CACHE_DIR=/data/embeddings_cache
+    restart: unless-stopped
+
+  # Optional: Watchtower for auto-updates
+  # watchtower:
+  #   image: containrr/watchtower
+  #   container_name: watchtower
+  #   volumes:
+  #     - /var/run/docker.sock:/var/run/docker.sock
+  #   environment:
+  #     - WATCHTOWER_CLEANUP=true
+  #     - WATCHTOWER_INCLUDE_STOPPED=true
+  #   command: --interval 3600 knowledge-rag
+  #   restart: unless-stopped
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,36 @@
+[project]
+name = "knowledge-rag"
+version = "0.1.0"
+description = "RAG system for Obsidian vault knowledge base with MCP server"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "llama-index>=0.10.0",
+    "llama-index-vector-stores-chroma>=0.1.0",
+    "chromadb>=0.4.0",
+    "sentence-transformers>=2.2.0",
+    "mcp>=1.0.0",
+    "python-dotenv>=1.0.0",
+    "pydantic>=2.0.0",
+    "watchdog>=3.0.0",
+    "httpx>=0.25.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "ruff>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "N", "W"]
+ignore = ["E501"]
--- a/src/knowledge_rag/init.py
+++ b/src/knowledge_rag/init.py
@ -0,0 +1,3 @@
+"""Knowledge RAG - RAG system for Obsidian vault knowledge base."""
+
+__version__ = "0.1.0"
--- a/src/knowledge_rag/chunker.py
+++ b/src/knowledge_rag/chunker.py
@ -0,0 +1,181 @@
+"""Markdown-aware document chunking for Obsidian vault."""
+
+import os
+import re
+from pathlib import Path
+from typing import List, Optional
+
+from llama_index.core.schema import TextNode
+
+
+class MarkdownChunker:
+    """Intelligent markdown chunker for Obsidian vaults.
+
+    Chunks markdown files while preserving:
+    - Document/folder structure context
+    - Code blocks as atomic units
+    - Heading hierarchy
+    - Wiki links as metadata
+    """
+
+    # Default chunk settings
+    DEFAULT_CHUNK_SIZE = 512
+    DEFAULT_CHUNK_OVERLAP = 50
+
+    def __init__(
+        self,
+        chunk_size: int = DEFAULT_CHUNK_SIZE,
+        chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+
+    def chunk_file(self, file_path: str, content: str) -> List[TextNode]:
+        """Chunk a single markdown file.
+
+        Args:
+            file_path: Path to the markdown file
+            content: Raw markdown content
+
+        Returns:
+            List of TextNode chunks with metadata
+        """
+        # Extract frontmatter if present
+        frontmatter, body = self._extract_frontmatter(content)
+
+        # Extract wiki links for metadata
+        wiki_links = self._extract_wiki_links(body)
+
+        # Get relative path for context
+        rel_path = os.path.relpath(file_path)
+
+        # Split into sections based on headings
+        sections = self._split_by_headings(body)
+
+        chunks = []
+        for i, section in enumerate(sections):
+            if not section["content"].strip():
+                continue
+
+            # Create chunk with metadata
+            # Note: wiki_links must be a string for ChromaDB compatibility
+            node = TextNode(
+                text=section["content"],
+                metadata={
+                    "source": rel_path,
+                    "file_name": os.path.basename(file_path),
+                    "heading": section.get("heading", ""),
+                    "section_index": i,
+                    "wiki_links": ",".join(wiki_links) if wiki_links else "",
+                    "has_frontmatter": frontmatter is not None,
+                },
+                excluded_embed_metadata_keys=["wiki_links"],
+                excluded_search_metadata_keys=["wiki_links"],
+            )
+            chunks.append(node)
+
+        return chunks
+
+    def chunk_directory(self, dir_path: str) -> List[TextNode]:
+        """Chunk all markdown files in a directory recursively.
+
+        Args:
+            dir_path: Root directory containing markdown files
+
+        Returns:
+            List of all TextNode chunks
+        """
+        all_chunks = []
+        dir_path = Path(dir_path)
+
+        if not dir_path.exists():
+            raise FileNotFoundError(f"Directory not found: {dir_path}")
+
+        # Find all .md files
+        md_files = list(dir_path.rglob("*.md"))
+
+        for md_file in md_files:
+            try:
+                content = md_file.read_text(encoding="utf-8")
+                chunks = self.chunk_file(str(md_file), content)
+                all_chunks.extend(chunks)
+            except Exception as e:
+                print(f"Error chunking {md_file}: {e}")
+                continue
+
+        return all_chunks
+
+    def _extract_frontmatter(
+        self, content: str
+    ) -> tuple[Optional[dict], str]:
+        """Extract YAML frontmatter from markdown."""
+        if not content.startswith("---"):
+            return None, content
+
+        # Find closing ---
+        lines = content.split("\n")
+        if len(lines) < 3:
+            return None, content
+
+        frontmatter_lines = []
+        body_start = 2
+
+        for i in range(1, len(lines)):
+            if lines[i].strip() == "---":
+                body_start = i + 1
+                break
+            frontmatter_lines.append(lines[i])
+
+        # Parse simple key-value frontmatter
+        frontmatter = {}
+        for line in frontmatter_lines:
+            if ":" in line:
+                key, value = line.split(":", 1)
+                frontmatter[key.strip()] = value.strip()
+
+        body = "\n".join(lines[body_start:])
+        return frontmatter, body
+
+    def _extract_wiki_links(self, content: str) -> List[str]:
+        """Extract [[wiki links]] from markdown content."""
+        wiki_link_pattern = r"\[\[([^\]|]+)(?:\|[^\]]+)?\]]"
+        return re.findall(wiki_link_pattern, content)
+
+    def _split_by_headings(self, content: str) -> List[dict]:
+        """Split content by markdown headings while preserving context."""
+        # Split by heading lines (# ## ### etc)
+        heading_pattern = r"^(#{1,6})\s+(.+)$"
+
+        sections = []
+        current_section = {
+            "heading": "",
+            "content": "",
+        }
+
+        lines = content.split("\n")
+        for line in lines:
+            match = re.match(heading_pattern, line)
+            if match:
+                # Save current section if non-empty
+                if current_section["content"].strip():
+                    sections.append(current_section)
+
+                # Start new section
+                level = len(match.group(1))
+                heading_text = match.group(2).strip()
+                current_section = {
+                    "heading": heading_text,
+                    "content": line + "\n",
+                }
+            else:
+                current_section["content"] += line + "\n"
+
+        # Don't forget the last section
+        if current_section["content"].strip():
+            sections.append(current_section)
+
+        # If no headings found, treat entire content as one section
+        if not sections:
+            sections = [{"heading": "", "content": content}]
+
+        return sections
--- a/src/knowledge_rag/embeddings.py
+++ b/src/knowledge_rag/embeddings.py
@ -0,0 +1,75 @@
+"""Embedding model wrapper using sentence-transformers."""
+
+import os
+from typing import List, Any
+
+from llama_index.core.embeddings import BaseEmbedding
+from sentence_transformers import SentenceTransformer
+
+
+class LocalEmbeddingModel(BaseEmbedding):
+    """Local embedding model using sentence-transformers.
+
+    Uses a lightweight, high-quality model for semantic similarity.
+    Default model: 'all-MiniLM-L6-v2' - fast and good quality.
+    """
+
+    def __init__(
+        self,
+        model_name: str = "all-MiniLM-L6-v2",
+        cache_folder: str | None = None,
+        **kwargs,
+    ):
+        # Store model name before super init
+        self._model_name = model_name
+        
+        # Use persistent cache directory for Docker, or local cache for development
+        if cache_folder is None:
+            if os.path.exists("/data"):
+                cache_folder = "/data/embeddings_cache"
+            else:
+                cache_folder = None
+        
+        # Load model first
+        model = SentenceTransformer(model_name, cache_folder=cache_folder)
+        embed_dim = model.get_sentence_embedding_dimension()
+        
+        # Initialize pydantic model with required fields
+        super().__init__(
+            embed_dim=embed_dim,
+            model_name=model_name,
+            **kwargs,
+        )
+        
+        # Now set the model after pydantic init
+        object.__setattr__(self, '_model', model)
+
+    def _get_text_embedding(self, text: str) -> List[float]:
+        """Get embedding for a single text."""
+        return self._model.encode(text, convert_to_numpy=True).tolist()
+
+    async def _aget_text_embedding(self, text: str) -> List[float]:
+        """Async get embedding - synchronous for local model."""
+        return self._get_text_embedding(text)
+
+    def _get_query_embedding(self, query: str) -> List[float]:
+        """Get embedding for a query."""
+        return self._model.encode(query, convert_to_numpy=True).tolist()
+
+    async def _aget_query_embedding(self, query: str) -> List[float]:
+        """Async get query embedding - synchronous for local model."""
+        return self._get_query_embedding(query)
+
+    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Get embeddings for multiple texts."""
+        return self._model.encode(texts, convert_to_numpy=True).tolist()
+
+    async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """Async get embeddings - synchronous for local model."""
+        return self._get_text_embeddings(texts)
+
+
+def get_embedding_model() -> LocalEmbeddingModel:
+    """Factory function to create the embedding model."""
+    model_name = os.environ.get("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
+    return LocalEmbeddingModel(model_name=model_name)
--- a/src/knowledge_rag/server.py
+++ b/src/knowledge_rag/server.py
@ -0,0 +1,282 @@
+"""MCP server for knowledge base RAG system."""
+
+import os
+import sys
+import logging
+from pathlib import Path
+from typing import Any
+
+from mcp.server import Server
+from mcp.server.stdio import stdio_server
+from mcp.types import Tool, TextContent
+from pydantic import AnyUrl
+
+from .chunker import MarkdownChunker
+from .embeddings import get_embedding_model
+from .vector_store import KnowledgeVectorStore
+
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+class KnowledgeMCPServer:
+    """MCP server for semantic search in Obsidian vault.
+
+    Provides tools to:
+    - Search the knowledge base semantically
+    - Index/update the knowledge base
+    - Get statistics about indexed content
+    """
+
+    def __init__(self, vault_path: str | None = None):
+        # Get vault path from environment or use default
+        self.vault_path = vault_path or os.environ.get(
+            "VAULT_PATH", "/data/vault"
+        )
+
+        # Ensure vault path exists
+        if not Path(self.vault_path).exists():
+            logger.warning(f"Vault path does not exist: {self.vault_path}")
+
+        # Initialize components
+        self.embedding_model = get_embedding_model()
+        self.vector_store = KnowledgeVectorStore(
+            embedding_model=self.embedding_model
+        )
+        self.chunker = MarkdownChunker()
+
+        # Track indexing status
+        self._indexed = False
+
+        # Create MCP server
+        self.server = Server("knowledge-rag")
+
+        # Register handlers
+        self._register_handlers()
+
+    def _register_handlers(self):
+        """Register MCP request handlers."""
+
+        @self.server.list_tools()
+        async def list_tools() -> list[Tool]:
+            """List available MCP tools."""
+            return [
+                Tool(
+                    name="search_knowledge",
+                    description="Semantic search through the knowledge base. "
+                                "Uses embeddings to find relevant content based on meaning, "
+                                "not just keywords. Best for answering questions or finding "
+                                "related concepts.",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "query": {
+                                "type": "string",
+                                "description": "The search query in natural language",
+                            },
+                            "top_k": {
+                                "type": "integer",
+                                "description": "Number of results to return",
+                                "default": 5,
+                            },
+                        },
+                        "required": ["query"],
+                    },
+                ),
+                Tool(
+                    name="index_knowledge",
+                    description="Index or re-index the knowledge base. "
+                                "Run this after adding new files to the vault. "
+                                "Scans all markdown files and builds the search index.",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "force": {
+                                "type": "boolean",
+                                "description": "Force re-index (clear existing index first)",
+                                "default": False,
+                            },
+                        },
+                    },
+                ),
+                Tool(
+                    name="get_knowledge_stats",
+                    description="Get statistics about the indexed knowledge base.",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {},
+                    },
+                ),
+            ]
+
+        @self.server.call_tool()
+        async def call_tool(
+            name: str, arguments: dict | None
+        ) -> list[TextContent]:
+            """Handle tool calls."""
+            if name == "search_knowledge":
+                return await self._search_knowledge(arguments or {})
+            elif name == "index_knowledge":
+                return await self._index_knowledge(arguments or {})
+            elif name == "get_knowledge_stats":
+                return await self._get_stats()
+            else:
+                raise ValueError(f"Unknown tool: {name}")
+
+    async def _search_knowledge(
+        self, arguments: dict[str, Any]
+    ) -> list[TextContent]:
+        """Search the knowledge base semantically."""
+        query = arguments.get("query", "")
+        top_k = arguments.get("top_k", 5)
+
+        if not query:
+            return [TextContent(type="text", text="Query cannot be empty.")]
+
+        # Ensure we've indexed
+        if not self._indexed:
+            await self._index_knowledge({})
+
+        try:
+            # Search with embeddings
+            results = self.vector_store.search(
+                query=query,
+                top_k=top_k,
+            )
+
+            if not results:
+                return [
+                    TextContent(
+                        type="text",
+                        text="No results found. Try indexing your knowledge base first."
+                    )
+                ]
+
+            # Format results
+            output = []
+            for i, result in enumerate(results, 1):
+                source = result["metadata"].get("file_name", "unknown")
+                heading = result["metadata"].get("heading", "")
+                score = result.get("score", 0)
+
+                text = result["text"][:500]  # Truncate long text
+                if len(result["text"]) > 500:
+                    text += "..."
+
+                output.append(
+                    f"--- Result {i} ---\n"
+                    f"Source: {source}"
+                    + (f" > {heading}" if heading else "")
+                    + f"\nRelevance: {score:.2f}\n\n{text}\n"
+                )
+
+            return [TextContent(type="text", text="\n".join(output))]
+
+        except Exception as e:
+            logger.exception("Search error")
+            return [TextContent(type="text", text=f"Search error: {str(e)}")]
+
+    async def _index_knowledge(
+        self, arguments: dict[str, Any]
+    ) -> list[TextContent]:
+        """Index the knowledge base."""
+        force = arguments.get("force", False)
+
+        vault_path = Path(self.vault_path)
+
+        if not vault_path.exists():
+            return [
+                TextContent(
+                    type="text",
+                    text=f"Vault path does not exist: {self.vault_path}"
+                )
+            ]
+
+        try:
+            # Clear existing index if forced
+            if force:
+                logger.info("Force re-indexing...")
+                self.vector_store.clear()
+            else:
+                logger.info("Indexing knowledge base...")
+
+            # Chunk all markdown files
+            chunks = self.chunker.chunk_directory(str(vault_path))
+
+            if not chunks:
+                return [
+                    TextContent(
+                        type="text",
+                        text="No markdown files found in vault."
+                    )
+                ]
+
+            logger.info(f"Created {len(chunks)} chunks, adding to vector store...")
+
+            # Add to vector store (this embeds them)
+            self.vector_store.add_nodes(chunks, embedding_model=self.embedding_model)
+
+            self._indexed = True
+
+            stats = self.vector_store.get_stats()
+            return [
+                TextContent(
+                    type="text",
+                    text=f"Successfully indexed {len(chunks)} chunks from the knowledge base.\n"
+                         f"Total chunks in index: {stats['total_chunks']}"
+                )
+            ]
+
+        except Exception as e:
+            logger.exception("Indexing error")
+            return [TextContent(type="text", text=f"Indexing error: {str(e)}")]
+
+    async def _get_stats(self) -> list[TextContent]:
+        """Get knowledge base statistics."""
+        stats = self.vector_store.get_stats()
+
+        vault_path = Path(self.vault_path)
+        md_files = list(vault_path.rglob("*.md")) if vault_path.exists() else []
+
+        return [
+            TextContent(
+                type="text",
+                text=f"Knowledge Base Statistics:\n"
+                     f"- Vault path: {self.vault_path}\n"
+                     f"- Markdown files: {len(md_files)}\n"
+                     f"- Indexed chunks: {stats['total_chunks']}\n"
+                     f"- Index status: {'Ready' if self._indexed else 'Not indexed'}"
+            )
+        ]
+
+    async def run(self):
+        """Run the MCP server."""
+        logger.info(f"Starting Knowledge RAG MCP Server")
+        logger.info(f"Vault path: {self.vault_path}")
+
+        # Auto-index on startup
+        await self._index_knowledge({})
+
+        # Run stdio server
+        async with stdio_server() as (read_stream, write_stream):
+            await self.server.run(
+                read_stream,
+                write_stream,
+                self.server.create_initialization_options(),
+            )
+
+
+async def main():
+    """Main entry point."""
+    server = KnowledgeMCPServer()
+    await server.run()
+
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
--- a/src/knowledge_rag/vector_store.py
+++ b/src/knowledge_rag/vector_store.py
@ -0,0 +1,137 @@
+"""ChromaDB vector store wrapper for knowledge base."""
+
+import os
+from typing import TYPE_CHECKING, Any, List, Optional
+
+from llama_index.core.schema import TextNode
+from llama_index.vector_stores.chroma import ChromaVectorStore
+import chromadb
+
+if TYPE_CHECKING:
+    from llama_index.core.embeddings import BaseEmbedding
+
+
+class KnowledgeVectorStore:
+    """ChromaDB vector store for the knowledge base.
+
+    Handles persistence of embeddings and semantic search.
+    """
+
+    def __init__(
+        self,
+        persist_dir: str | None = None,
+        collection_name: str = "knowledge_base",
+        embedding_model: "BaseEmbedding | None" = None,
+    ):
+        self._collection_name = collection_name
+        self._embedding_model = embedding_model
+
+        # Use Docker path if available, otherwise use local data dir
+        if persist_dir is None:
+            if os.path.exists("/data"):
+                persist_dir = "/data/chroma_db"
+            else:
+                persist_dir = "./data/chroma_db"
+
+        self._persist_dir = persist_dir
+
+        # Ensure persist directory exists
+        os.makedirs(persist_dir, exist_ok=True)
+
+        # Initialize ChromaDB client
+        self._client = chromadb.PersistentClient(path=persist_dir)
+
+        # Get or create collection
+        self._collection = self._client.get_or_create_collection(
+            name=collection_name,
+            metadata={"description": "Knowledge base embeddings"}
+        )
+
+        # Wrap in LlamaIndex vector store
+        # Pass the chroma_collection directly for PersistentClient
+        self._vector_store = ChromaVectorStore(
+            chroma_collection=self._collection,
+        )
+
+    def set_embedding_model(self, embedding_model: "BaseEmbedding") -> None:
+        """Set the embedding model for query embedding."""
+        self._embedding_model = embedding_model
+
+    @property
+    def vector_store(self) -> ChromaVectorStore:
+        """Get the LlamaIndex ChromaVectorStore."""
+        return self._vector_store
+
+    def add_nodes(self, nodes: List[TextNode], embedding_model: "BaseEmbedding | None" = None) -> None:
+        """Add nodes to the vector store."""
+        from llama_index.core import VectorStoreIndex, StorageContext
+        
+        # Use provided embedding model or the stored one
+        model = embedding_model or self._embedding_model
+        
+        if model is None:
+            raise ValueError("No embedding model provided")
+        
+        # First embed the nodes
+        for node in nodes:
+            node.embedding = model.get_text_embedding(node.text)
+        
+        # Then add to vector store
+        self._vector_store.add(nodes)
+
+    def search(
+        self,
+        query: str,
+        top_k: int = 5,
+        filter: Optional[dict[str, Any]] = None,
+    ) -> List[dict[str, Any]]:
+        """Semantic search for similar chunks.
+
+        Args:
+            query: The search query
+            top_k: Number of results to return
+            filter: Optional metadata filters
+
+        Returns:
+            List of search results with text and metadata
+        """
+        from llama_index.core import VectorStoreIndex
+
+        # Use embedding model if provided, otherwise use the one from storage
+        embed_model = self._embedding_model
+
+        index = VectorStoreIndex.from_vector_store(
+            self._vector_store,
+            embed_model=embed_model,
+        )
+
+        query_engine = index.as_retriever(
+            similarity_top_k=top_k,
+            filters=filter,
+        )
+
+        results = query_engine.retrieve(query)
+
+        return [
+            {
+                "text": node.text,
+                "score": node.score,
+                "metadata": node.metadata,
+            }
+            for node in results
+        ]
+
+    def clear(self) -> None:
+        """Clear all embeddings from the store."""
+        self._client.delete_collection(self._collection_name)
+        self._collection = self._client.get_or_create_collection(
+            name=self._collection_name,
+            metadata={"description": "Knowledge base embeddings"}
+        )
+
+    def get_stats(self) -> dict[str, Any]:
+        """Get vector store statistics."""
+        return {
+            "total_chunks": self._collection.count(),
+            "collection_name": self._collection_name,
+        }
--- a/uv.lock
+++ b/uv.lock