From c69202453a88b1adc7f7779c084d0975c98b9366 Mon Sep 17 00:00:00 2001
From: nacharki <naoufal.acharki@gmail.com>
Date: Wed, 6 Aug 2025 11:05:30 +0200
Subject: [PATCH] Using ZeroEntropy for Semantic Search

---
 README.md                                     |   2 +
 .../.env.example                              |   7 +
 .../semantic_search_over_articles/.gitignore  |  90 ++++
 .../semantic_search_over_articles/Dockerfile  |  47 ++
 .../semantic_search_over_articles/README.md   | 112 ++++
 .../backend/__main__.py                       | 220 ++++++++
 .../backend/indexer_ze.py                     | 178 +++++++
 .../backend/logger/__init__.py                |   1 +
 .../backend/logger/logging.py                 | 121 +++++
 .../backend/search_ze.py                      | 359 +++++++++++++
 .../backend/utils_ze.py                       | 371 ++++++++++++++
 .../demo_notebook.ipynb                       | 478 ++++++++++++++++++
 .../docker-compose.yml                        |  38 ++
 .../frontend/streamlit_app_ze.py              | 352 +++++++++++++
 14 files changed, 2376 insertions(+)
 create mode 100644 guides/semantic_search_over_articles/.env.example
 create mode 100644 guides/semantic_search_over_articles/.gitignore
 create mode 100644 guides/semantic_search_over_articles/Dockerfile
 create mode 100644 guides/semantic_search_over_articles/README.md
 create mode 100644 guides/semantic_search_over_articles/backend/__main__.py
 create mode 100644 guides/semantic_search_over_articles/backend/indexer_ze.py
 create mode 100644 guides/semantic_search_over_articles/backend/logger/__init__.py
 create mode 100644 guides/semantic_search_over_articles/backend/logger/logging.py
 create mode 100644 guides/semantic_search_over_articles/backend/search_ze.py
 create mode 100644 guides/semantic_search_over_articles/backend/utils_ze.py
 create mode 100644 guides/semantic_search_over_articles/demo_notebook.ipynb
 create mode 100644 guides/semantic_search_over_articles/docker-compose.yml
 create mode 100644 guides/semantic_search_over_articles/frontend/streamlit_app_ze.py

diff --git a/README.md b/README.md
index 2330694..77c84da 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@ As of now, the guides in this cookbook are written in Python, but the same conce
    Learn how to use ZeroEntropy as an Agent's search tool to access a knowledge base when responding to user queries.   
 8. **[Use LlamaParse in combination with ZeroEntropy to search and rerank PDFs](guides/rerank_llamaparsed_pdfs)**
    Learn how to use ZeroEntropy and LlamaParse to parse, search, and rerank complex PDF documents.   
+9. **[Use ZeroEntropy for Semantic Search over articles (French Gossip Media)](guides/semantic_search_over_articles)**
+   Learn how to use ZeroEntropy on a semantic search RAG to scrap, index, search, and rerank media articles.   
 
 *(More guides coming soon...)*
 
diff --git a/guides/semantic_search_over_articles/.env.example b/guides/semantic_search_over_articles/.env.example
new file mode 100644
index 0000000..f8497ac
--- /dev/null
+++ b/guides/semantic_search_over_articles/.env.example
@@ -0,0 +1,7 @@
+# AI Provider Configuration
+OPENAI_API_KEY=""
+ZEROENTROPY_API_KEY=""
+
+# Environment
+ENVIRONMENT=development
+EOF < /dev/null
diff --git a/guides/semantic_search_over_articles/.gitignore b/guides/semantic_search_over_articles/.gitignore
new file mode 100644
index 0000000..2499ae3
--- /dev/null
+++ b/guides/semantic_search_over_articles/.gitignore
@@ -0,0 +1,90 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+*.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.xml
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyPI configuration file
+.pypirc
diff --git a/guides/semantic_search_over_articles/Dockerfile b/guides/semantic_search_over_articles/Dockerfile
new file mode 100644
index 0000000..2f53512
--- /dev/null
+++ b/guides/semantic_search_over_articles/Dockerfile
@@ -0,0 +1,47 @@
+# Set python version
+FROM python:3.10-slim
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONPATH=/app
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements file
+COPY requirements.txt .
+
+# Install Python dependencies using uv
+RUN pip install --no-cache-dir uv
+RUN uv pip install --system --no-cache -r requirements.txt
+
+# Copy application code
+COPY backend/ ./backend/
+COPY frontend/ ./frontend/
+
+# Copy environment files (if any)
+COPY .env* ./
+
+# Create necessary directories
+RUN mkdir -p /app/data /app/logs
+
+# Set proper permissions
+RUN chmod -R 755 /app
+
+# Expose Streamlit port
+EXPOSE 8501
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8501/_stcore/health || exit 1
+
+# Set the entrypoint to run Streamlit
+CMD ["streamlit", "run", "frontend/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true"]
\ No newline at end of file
diff --git a/guides/semantic_search_over_articles/README.md b/guides/semantic_search_over_articles/README.md
new file mode 100644
index 0000000..04ad5f9
--- /dev/null
+++ b/guides/semantic_search_over_articles/README.md
@@ -0,0 +1,112 @@
+# French Gossip Semantic Search with ZeroEntropy
+
+This is a guide of production-ready semantic search for French gossip articles from **vsd.fr** and **public.fr** using ZeroEntropy.
+
+[![Python](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![ZeroEntropy](https://img.shields.io/badge/zeroentropy-latest-purple.svg)](https://zeroentropy.dev/)
+[![Streamlit](https://img.shields.io/badge/streamlit-1.30+-green.svg)](https://streamlit.io/)
+[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](https://docker.com/)
+
+## Features
+
+- **Advanced AI Retrieval**: Powered by ZeroEntropy's state-of-the-art search & reranking
+- **Multiple Search Types**: Documents, snippets, pages, and advanced reranked results  
+- **Real-time RSS Scraping**: Automatically indexes articles from gossip websites
+- **Interactive Web UI**: Beautiful Streamlit interface with advanced filtering
+- **Smart Reranking**: Uses `zerank-1-small` model for improved relevance
+
+## Quick Start
+
+### 1. Setup Environment
+```bash
+# Clone repository
+git clone <https://github.com/zeroentropy-ai/zcookbook>
+cd .\guides\semantic_search_over_articles
+
+# Install dependencies  
+pip install -r requirements.txt
+
+# Configure API key then add your ZEROENTROPY Credentials
+cp .env.example .env
+
+```
+
+### 2. Index Articles
+```bash
+# Scrape RSS feeds and index articles
+python backend scrape --collection my_articles
+```
+
+### 3. Search Articles
+```bash
+# Search for articles (CLI)
+python backend search "TPMP" --k 5 --collection my_articles
+python backend search "famille royale" --search-type snippets
+python backend search "célébrités" --search-type advanced --k 10
+```
+
+### 4. Web Interface
+```bash
+# Launch Streamlit app
+streamlit run frontend/streamlit_app.py
+```
+Access at: `http://localhost:8501`
+
+## Docker Deployment
+
+```bash
+# Build and run with Docker Compose
+docker-compose up --build
+
+# Or build docker image and run the command within the container
+docker build -t gossip-search .
+docker exec -it gossip-search-container python backend scrape --collection my_articles
+docker exec -it gossip-search-container python backend search "TPMP" --k 5
+```
+
+## Project Structure
+
+```
+├── backend/
+│   ├── main.py              # Main CLI interface
+│   ├── indexer_ze.py        # RSS scraping & indexing
+│   ├── search_ze.py         # Search functionality  
+│   ├── utils_ze.py          # Advanced utilities & reranking
+│   └── logger.py            # Logging configuration
+├── frontend/
+│   └── streamlit_app.py     # Web interface
+├── demo_notebook.ipynb      # Interactive demo
+├── docker-compose.yml       # Container orchestration
+├── requirements.txt         # Dependencies
+└── README.md               # This file
+```
+
+## Usage Examples
+
+### CLI Commands
+```bash
+# Collection management
+python backend manage list
+python backend manage status --collection my_articles
+
+# Advanced search with filters
+python backend search "mode" --filter-creator "Public" --reranker zerank-1-small
+
+# Different search types
+python backend search "actualité" --search-type documents --k 10
+python backend search "télé" --search-type snippets --k 5  
+python backend search "people" --search-type advanced --k 8
+```
+
+
+## Demo Notebook
+
+An Jupyter notebook is also available to explore the code and run it step by step;
+```bash
+jupyter notebook demo_notebook.ipynb
+```
+
+---
+## Author & Contribution
+
+**Created by [Naoufal Acharki](https://github.com/nacharki)**: This project is a demo of ZeroEntropy on a RAG for French gossip content. 
diff --git a/guides/semantic_search_over_articles/backend/__main__.py b/guides/semantic_search_over_articles/backend/__main__.py
new file mode 100644
index 0000000..e54768d
--- /dev/null
+++ b/guides/semantic_search_over_articles/backend/__main__.py
@@ -0,0 +1,220 @@
+# main.py
+import asyncio
+import json
+import argparse
+from dotenv import load_dotenv
+
+# Internal imports
+from indexer_ze import ZeroEntropyArticleIndexer
+from search_ze import ZeroEntropyArticleSearcher
+from utils_ze import ZeroEntropyUtils
+from logger import getLogger
+
+# Load environment variables
+load_dotenv()
+
+# Configure logger to display log messages
+logger = getLogger()
+
+
+class ZeroEntropyArticleManager:
+    """
+    Main class that orchestrates RSS scraping, indexing, and searching using ZeroEntropy.
+    """
+
+    def __init__(self, collection_name: str = "articles"):
+        self.collection_name = collection_name
+        self.indexer = ZeroEntropyArticleIndexer(collection_name)
+        self.searcher = ZeroEntropyArticleSearcher(collection_name)
+        self.utils = ZeroEntropyUtils(collection_name)
+
+    async def scrape_and_index(self):
+        """Scrape RSS feeds and index articles"""
+        # Initialize RSS feed URLs
+        rss_public_urls = [
+            "https://www.public.fr/feed",
+            "https://www.public.fr/people/feed",
+            "https://www.public.fr/tele/feed",
+            "https://www.public.fr/mode/feed",
+            "https://www.public.fr/people/familles-royales/feed",
+        ]
+
+        rss_vsd_urls = [
+            "https://vsd.fr/actu-people/feed/",
+            "https://vsd.fr/tele/feed/",
+            "https://vsd.fr/societe/feed/",
+            "https://vsd.fr/culture/feed/",
+            "https://vsd.fr/loisirs/feed/",
+        ]
+
+        # Initialize collection
+        await self.indexer.initialize_collection()
+
+        # Extract content from RSS feeds
+        articles = []
+        for url in rss_public_urls + rss_vsd_urls:
+            content = self.indexer.get_rss_feed_content(url)
+            if content:
+                articles.extend(content)
+                logger.info("Successfully extracted content from %s", url)
+            else:
+                logger.warning("Failed to extract content from %s", url)
+
+        # Save all content to a JSON file for backup
+        with open("articles.json", "w", encoding="utf-8") as f:
+            json.dump(articles, f, ensure_ascii=False, indent=4)
+
+        logger.info(f"Extracted {len(articles)} articles total")
+
+        # Index articles in ZeroEntropy
+        if articles:
+            await self.indexer.index_articles(articles)
+            logger.info("Successfully scraped and indexed articles in ZeroEntropy.")
+        else:
+            logger.warning("No articles to index.")
+
+    async def search_articles(
+        self,
+        query: str,
+        search_type: str = "documents",
+        k: int = 10,
+        filter_creator: str = None,
+        filter_category: str = None,
+        reranker: str = "zerank-1-small",
+        show_status: bool = False,
+    ):
+        """Search for articles"""
+        # Show status if requested
+        if show_status:
+            await self.searcher.get_collection_status()
+
+        # Prepare filter if specified
+        filter_dict = {}
+        if filter_creator:
+            filter_dict["creator"] = {"$eq": filter_creator}
+        if filter_category:
+            filter_dict["categories"] = {"$eq": filter_category}
+
+        filter_dict = filter_dict if filter_dict else None
+
+        # Perform search based on type
+        if search_type == "documents":
+            results = await self.searcher.search_documents(
+                query=query,
+                k=k,
+                filter_dict=filter_dict,
+                reranker=reranker,
+            )
+            self.searcher.display_document_results(results, query)
+
+        elif search_type == "snippets":
+            results = await self.searcher.search_snippets(
+                query=query,
+                k=k,
+                filter_dict=filter_dict,
+                reranker=reranker,
+            )
+            self.searcher.display_snippet_results(results, query)
+
+        elif search_type == "pages":
+            results = await self.searcher.search_pages(
+                query=query, k=k, filter_dict=filter_dict
+            )
+            self.searcher.display_page_results(results, query)
+
+        elif search_type == "advanced":
+            results = await self.utils.search_and_rerank(
+                query=query, k=k * 2, rerank_top_n=k
+            )
+            self.utils.display_advanced_results(results, query)
+
+        return results
+
+    async def manage_collections(self, action: str, collection_name: str = None):
+        """Manage collections (list, delete, status)"""
+        if action == "list":
+            collections = await self.utils.list_all_collections()
+            logger.info(f"Available collections: {collections}")
+            return collections
+
+        elif action == "delete" and collection_name:
+            success = await self.utils.delete_collection(collection_name)
+            if success:
+                logger.info(f"Successfully deleted collection: {collection_name}")
+            else:
+                logger.info(f"Failed to delete collection: {collection_name}")
+            return success
+
+        elif action == "status":
+            status = await self.searcher.get_collection_status()
+            return status
+
+        else:
+            logger.info("Invalid action or missing collection name")
+            return None
+
+
+async def main():
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description="ZeroEntropy RSS Article Manager")
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+
+    # Scrape command
+    scrape_parser = subparsers.add_parser("scrape", help="Scrape RSS feeds and index articles")
+    scrape_parser.add_argument("--collection", type=str, default="articles",
+                               help="Collection name to use")
+
+    # Search command
+    search_parser = subparsers.add_parser("search", help="Search for articles")
+    search_parser.add_argument("query", type=str, help="Search query string")
+    search_parser.add_argument("--k", type=int, default=10, help="Number of results to return")
+    search_parser.add_argument("--search-type", choices=["documents", "snippets", "pages", "advanced"],
+                               default="documents", help="Type of search to perform")
+    search_parser.add_argument("--collection", type=str, default="articles",
+                               help="Collection name to search in")
+    search_parser.add_argument("--filter-creator", type=str, help="Filter by creator/author")
+    search_parser.add_argument("--filter-category", type=str, help="Filter by category")
+    search_parser.add_argument("--status", action="store_true",
+                               help="Show collection status before searching")
+    search_parser.add_argument("--reranker", type=str, default="zerank-1-small",
+                               help="Reranker model to use")
+
+    # Collection management command
+    manage_parser = subparsers.add_parser("manage", help="Manage collections")
+    manage_parser.add_argument("action", choices=["list", "delete", "status"],
+                               help="Management action to perform")
+    manage_parser.add_argument("--collection", type=str, help="Collection name (required for delete)")
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    if args.command == "scrape":
+        # Initialize manager and scrape
+        manager = ZeroEntropyArticleManager(args.collection)
+        await manager.scrape_and_index()
+
+    elif args.command == "search":
+        # Initialize manager and search
+        manager = ZeroEntropyArticleManager(args.collection)
+        await manager.search_articles(
+            query=args.query,
+            search_type=args.search_type,
+            k=args.k,
+            filter_creator=args.filter_creator,
+            filter_category=args.filter_category,
+            reranker=args.reranker,
+            show_status=args.status
+        )
+
+    elif args.command == "manage":
+        # Initialize manager and manage collections
+        manager = ZeroEntropyArticleManager()
+        await manager.manage_collections(
+            action=args.action,
+            collection_name=args.collection
+        )
+
+
+if __name__ == "__main__":
+    # Run the main async function
+    asyncio.run(main())
diff --git a/guides/semantic_search_over_articles/backend/indexer_ze.py b/guides/semantic_search_over_articles/backend/indexer_ze.py
new file mode 100644
index 0000000..b2f5563
--- /dev/null
+++ b/guides/semantic_search_over_articles/backend/indexer_ze.py
@@ -0,0 +1,178 @@
+# indexer_ze.py
+import requests
+from typing import List, Dict
+from bs4 import BeautifulSoup
+from dotenv import load_dotenv
+from zeroentropy import AsyncZeroEntropy, ConflictError
+
+# Logger import
+from logger import getLogger
+
+# Load environment variables
+load_dotenv()
+
+# Configure logger to display log messages
+logger = getLogger()
+
+
+class ZeroEntropyArticleIndexer:
+    """
+    ZeroEntropyArticleIndexer handles RSS feed scraping and article indexing using ZeroEntropy API.
+    """
+    def __init__(self, collection_name: str = "articles"):
+        self.collection_name = collection_name
+        self.zclient = AsyncZeroEntropy()
+
+    async def initialize_collection(self):
+        """
+        Initialize ZeroEntropy collection. Creates a new collection in ZeroEntropy if it doesn't exist.
+        If the collection already exists, logs a message and continues.
+
+        Raises
+        ------
+        ConflictError
+            If there's a conflict during collection creation (handled gracefully)
+        """
+        try:
+            await self.zclient.collections.add(collection_name=self.collection_name)
+            logger.info(f"Created new collection: {self.collection_name}")
+        except ConflictError:
+            logger.error(f"Collection '{self.collection_name}' already exists")
+
+    @staticmethod
+    def get_rss_feed_content(url: str) -> List[Dict]:
+        """
+        Extract content from an RSS feed URL and returns a list of dictionaries containing the extracted content.
+
+        Parameters
+        ----------
+        url : str
+            The RSS feed URL to extract content from
+
+        Returns
+        -------
+        List[Dict]
+            A list of dictionaries, each containing article information with keys:
+            - title : str
+                The article title
+            - creator : str
+                The article author/creator
+            - categories : List[str]
+                List of article categories
+            - description : str
+                Brief description of the article
+            - pub_date : str
+                Publication date of the article
+            - content : str
+                Full cleaned text content of the article
+            - source_url : str
+                The original RSS feed URL
+
+        Raises
+        ------
+        requests.exceptions.HTTPError
+            If the HTTP request fails
+        requests.exceptions.RequestException
+            If there's a general request error
+        """
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"  # noqa: E501
+        }
+
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        response.encoding = "utf-8"
+
+        soup = BeautifulSoup(response.content, "xml")
+        items = soup.find_all("item")
+        content_list = []
+
+        for item in items:
+            title = item.find("title").get_text(strip=True) if item.find("title") else "N/A"
+            creator = item.find("dc:creator").get_text(strip=True) if item.find("dc:creator") else "N/A"
+            categories = [category.get_text(strip=True) for category in item.find_all("category")]
+            description = item.find("description").get_text(strip=True) if item.find("description") else "N/A"
+            publication_date = item.find("pubDate").get_text(strip=True) if item.find("pubDate") else "N/A"
+            content_encoded = (
+                item.find("content:encoded").get_text(strip=True) if item.find("content:encoded") else "N/A"
+            )
+
+            # Clean up HTML content
+            content_text = ""
+            if content_encoded != "N/A":
+                content_encoded_soup = BeautifulSoup(content_encoded, "html.parser")
+                for script_or_style in content_encoded_soup(["script", "style"]):
+                    script_or_style.decompose()
+                content_text = content_encoded_soup.get_text(separator=" ", strip=True).replace("\n", " ")
+            else:
+                description_soup = BeautifulSoup(description, "html.parser")
+                for script_or_style in description_soup(["script", "style"]):
+                    script_or_style.decompose()
+                content_text = description_soup.get_text(separator=" ", strip=True).replace("\n", " ")
+
+            content_list.append({
+                "title": title,
+                "creator": creator,
+                "categories": categories,
+                "description": description,
+                "pub_date": publication_date,
+                "content": content_text,
+                "source_url": url
+            })
+
+        return content_list
+
+    async def index_articles(self, articles: List[Dict]):
+        """
+        Index articles in ZeroEntropy using the documents API.
+
+        Takes a list of article dictionaries and indexes them in the ZeroEntropy
+        collection. Creates unique document paths and prepares metadata for each article.
+
+        Parameters
+        ----------
+        articles : List[Dict]
+            List of article dictionaries containing article information.
+            Each dictionary should have keys: title, creator, categories,
+            description, pub_date, content, source_url
+        """
+        indexed_count = 0
+        failed_count = 0
+
+        for idx, article in enumerate(articles):
+            try:
+                # Create unique document path
+                doc_path = f"article_{idx}_{hash(article['title'][:50])}"
+
+                # Prepare content for indexing - combine title, description, and content
+                full_content = f"Title: {article['title']}\n\n"
+                full_content += f"Description: {article['description']}\n\n"
+                full_content += f"Content: {article['content']}"
+
+                # Prepare metadata
+                metadata = {
+                    "title": article["title"][:500],  # Limit length for metadata
+                    "creator": article["creator"][:200],
+                    "categories": ", ".join(article["categories"][:5])[:300],  # Limit categories
+                    "pub_date": article["pub_date"][:100],
+                    "source_url": article["source_url"][:300],
+                    "type": "rss_article",
+                }
+
+                # Add document to ZeroEntropy
+                await self.zclient.documents.add(
+                    collection_name=self.collection_name,
+                    path=doc_path,
+                    content={"type": "text", "text": full_content},
+                    metadata=metadata,
+                )
+
+                indexed_count += 1
+                if indexed_count % 10 == 0:
+                    logger.info(f"Indexed {indexed_count} articles...")
+
+            except ConflictError:
+                logger.warning(f"Article {idx} already exists, skipping...")
+                continue
+
+        logger.info(f"Indexing complete. Success: {indexed_count}, Failed: {failed_count}")
diff --git a/guides/semantic_search_over_articles/backend/logger/__init__.py b/guides/semantic_search_over_articles/backend/logger/__init__.py
new file mode 100644
index 0000000..ae07c67
--- /dev/null
+++ b/guides/semantic_search_over_articles/backend/logger/__init__.py
@@ -0,0 +1 @@
+from .logging import CustomLogRecord, getLogger  # noqa: F401
diff --git a/guides/semantic_search_over_articles/backend/logger/logging.py b/guides/semantic_search_over_articles/backend/logger/logging.py
new file mode 100644
index 0000000..5d4a26d
--- /dev/null
+++ b/guides/semantic_search_over_articles/backend/logger/logging.py
@@ -0,0 +1,121 @@
+from coloredlogs import ColoredFormatter
+import logging
+import socket
+import os
+
+
+class CustomLogRecord(logging.LogRecord):
+    """
+    A custom log record class that extends logging.LogRecord.
+
+    Attributes
+    ----------
+    hostname : str
+        The hostname where the log record originated.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize the object with the hostname of the system.
+
+        Parameters
+        ----------
+        *args : Variable length argument list.
+        **kwargs : Keyword arguments.
+
+        Returns
+        -------
+        None
+
+        Notes
+        -----
+        This function initializes the object with the hostname of the system using the socket module.
+
+        Raises
+        ------
+        None
+        """
+        super().__init__(*args, **kwargs)
+        self.hostname = socket.gethostname()
+
+
+def getLogger(name: str = "main", loglevel: str = "INFO", logdir: str = "./logs", stream=None, color_logs=True):
+    """
+    Get a logger object with custom settings and formatters.
+
+    Parameters:
+        name (str): Name of the logger.
+        loglevel (str): Log level for the logger.
+        logdir (str): Directory to store log files.
+        stream (stream): Stream to write logs to.
+        color_logs (bool): Whether to colorize logs.
+
+    Returns:
+        logger: A logger object with custom settings and formatters.
+    """
+    logging.setLogRecordFactory(CustomLogRecord)
+    logger = logging.getLogger(name)
+    logger.propagate = False
+
+    if logger.handlers:
+        return logger
+    else:
+        loglevel = getattr(logging, loglevel.upper(), logging.INFO)
+        logger.setLevel(loglevel)
+
+        log_format = "%(asctime)s | %(module)s | %(levelname)-8s | %(message)s [%(filename)s:%(lineno)s]"
+
+        simple_formatter = logging.Formatter(log_format, datefmt="%Y-%m-%d %H:%M:%S")
+
+        colored_formatter = ColoredFormatter(
+            log_format,
+            datefmt="%Y-%m-%d %H:%M:%S",
+            level_styles={
+                "critical": {"bold": True, "color": "red"},
+                "debug": {"color": "green"},
+                "error": {"color": "red"},
+                "info": {"color": "white"},
+                "notice": {"color": "magenta"},
+                "spam": {"color": "green", "faint": True},
+                "success": {"bold": True, "color": "green"},
+                "verbose": {"color": "blue"},
+                "warning": {"color": "yellow"},
+            },
+            field_styles={
+                "asctime": {"color": "green"},
+                "hostname": {"color": "magenta"},
+                "levelname": {"bold": True, "color": "magenta"},
+                "module": {"color": "blue"},
+                "programname": {"color": "cyan"},
+                "username": {"color": "yellow"},
+            },
+        )
+
+        if not os.path.isdir(logdir):
+            os.mkdir(logdir)
+
+        fileHandler = logging.FileHandler(os.path.join(logdir, "logs.txt"))
+        fileHandler.setLevel(logging.DEBUG)
+        fileHandler.setFormatter(simple_formatter)
+
+        streamHandler = logging.StreamHandler(stream=stream)
+        streamHandler.setLevel(loglevel)
+        if color_logs:
+            streamHandler.setFormatter(colored_formatter)
+        else:
+            streamHandler.setFormatter(simple_formatter)
+
+        logger.addHandler(fileHandler)
+        logger.addHandler(streamHandler)
+
+    return logger
+
+
+# a simple usecase
+if __name__ == "__main__":
+    logger = getLogger(loglevel="DEBUG")
+    logger.debug("A message only developers care about")
+    logger.info("Curious users might want to know this")
+    logger.warning("Something is wrong and any user should be informed")
+    logger.error("Serious stuff, this is red for a reason")
+    logger.critical("!OH NO everything is on fire")
diff --git a/guides/semantic_search_over_articles/backend/search_ze.py b/guides/semantic_search_over_articles/backend/search_ze.py
new file mode 100644
index 0000000..7058ff8
--- /dev/null
+++ b/guides/semantic_search_over_articles/backend/search_ze.py
@@ -0,0 +1,359 @@
+# search_ze.py
+from dotenv import load_dotenv
+from zeroentropy import AsyncZeroEntropy
+
+# Logger import
+from logger import getLogger
+
+# Load environment variables
+load_dotenv()
+
+# Configure logger to display log messages
+logger = getLogger()
+
+
+class ZeroEntropyArticleSearcher:
+    """
+    ZeroEntropyArticleSearcher handles article searching using ZeroEntropy's advanced retrieval capabilities.
+
+    Parameters
+    ----------
+    collection_name : str, optional
+        Name of the ZeroEntropy collection to search in, by default "articles"
+
+    Attributes
+    ----------
+    collection_name : str
+        The name of the ZeroEntropy collection
+    zclient : AsyncZeroEntropy
+        The ZeroEntropy async client instance
+    """
+
+    def __init__(self, collection_name: str = "articles"):
+        self.collection_name = collection_name
+        self.zclient = AsyncZeroEntropy()
+
+    async def search_documents(
+        self,
+        query: str,
+        k: int = 10,
+        filter_dict: dict = None,
+        include_metadata: bool = True,
+        reranker: str = "zerank-1-small",
+    ) -> list:
+        """Search for documents using ZeroEntropy's top_documents query.
+
+        Parameters
+        ----------
+        query : str
+            The search query string
+        k : int, optional
+            Number of top results to return, by default 10
+        filter_dict : dict, optional
+            Dictionary containing filters for metadata fields, by default None
+        include_metadata : bool, optional
+            Whether to include document metadata in results, by default True
+        reranker : str, optional
+            Name of the reranker model to use, by default "zerank-1-small"
+
+        Returns
+        -------
+        List:
+            List of document result dictionaries, each containing:
+            - path : str
+                Unique document path identifier
+            - score : float
+                Relevance score for the document
+            - file_url : str or None
+                URL to access the full document file
+            - metadata : Dict
+                Document metadata including title, creator, categories, etc.
+        """
+        response = await self.zclient.queries.top_documents(
+            collection_name=self.collection_name,
+            query=query,
+            k=k,
+            filter=filter_dict,
+            include_metadata=include_metadata,
+            reranker=reranker,
+            latency_mode="low",
+        )
+
+        # Convert Result objects to dictionaries
+        results = []
+        for result in response.results:
+            result_dict = {
+                'path': result.path,
+                'score': result.score,
+                'file_url': getattr(result, 'file_url', None),
+                'metadata': getattr(result, 'metadata', {})
+            }
+            results.append(result_dict)
+
+        return results
+
+    async def search_snippets(
+        self,
+        query: str,
+        k: int = 10,
+        filter_dict: dict = None,
+        precise_responses: bool = True,
+        reranker: str = "zerank-1-small",
+    ) -> list:
+        """
+        Search for specific snippets using ZeroEntropy's top_snippets query.
+
+        Parameters
+        ----------
+        query : str
+            The search query string
+        k : int, optional
+            Number of top snippets to return, by default 10
+        filter_dict : dict, optional
+            Dictionary containing filters for metadata fields, by default None
+        precise_responses : bool, optional
+            Whether to return precise snippet boundaries, by default True
+        reranker : str, optional
+            Name of the reranker model to use, by default "zerank-1-small"
+
+        Returns
+        -------
+        List:
+            List of snippet result dictionaries, each containing:
+            - path : str
+                Document path where the snippet was found
+            - score : float
+                Relevance score for the snippet
+            - start_index : int
+                Starting character index of the snippet in the document
+            - end_index : int
+                Ending character index of the snippet in the document
+            - page_span : List[int]
+                List indicating which pages the snippet spans
+            - content : str
+                The actual text content of the snippet
+            - metadata : Dict
+                Document metadata including title, creator, categories, etc.
+        """
+        response = await self.zclient.queries.top_snippets(
+            collection_name=self.collection_name,
+            query=query,
+            k=k,
+            filter=filter_dict,
+            precise_responses=precise_responses,
+            include_document_metadata=True,
+            reranker=reranker,
+        )
+
+        # Convert Result objects to dictionaries
+        results = []
+        for result in response.results:
+            result_dict = {
+                'path': result.path,
+                'score': result.score,
+                'start_index': getattr(result, 'start_index', 0),
+                'end_index': getattr(result, 'end_index', 0),
+                'page_span': getattr(result, 'page_span', []),
+                'content': getattr(result, 'content', ''),
+                'metadata': getattr(result, 'metadata', {})
+            }
+            results.append(result_dict)
+
+        return results
+
+    async def search_pages(
+        self,
+        query: str,
+        k: int = 10,
+        filter_dict: dict = None,
+        include_content: bool = True,
+    ) -> list:
+        """
+        Search for pages using ZeroEntropy's top_pages query.
+
+        Parameters
+        ----------
+        query : str
+            The search query string
+        k : int, optional
+            Number of top pages to return, by default 10
+        filter_dict : dict, optional
+            Dictionary containing filters for metadata fields, by default None
+        include_content : bool, optional
+            Whether to include page content in results, by default True
+
+        Returns
+        -------
+        List:
+            List of page result dictionaries, each containing:
+            - path : str
+                Document path containing the page
+            - score : float
+                Relevance score for the page
+            - page_index : int
+                Index of the page within the document (0-based)
+            - content : str
+                The text content of the page (if include_content=True)
+            - metadata : Dict
+                Document metadata including title, creator, categories, etc.
+        """
+        response = await self.zclient.queries.top_pages(
+            collection_name=self.collection_name,
+            query=query,
+            k=k,
+            filter=filter_dict,
+            include_content=include_content,
+            latency_mode="low",
+        )
+
+        # Convert Result objects to dictionaries
+        results = []
+        for result in response.results:
+            result_dict = {
+                'path': result.path,
+                'score': result.score,
+                'page_index': getattr(result, 'page_index', 0),
+                'content': getattr(result, 'content', ''),
+                'metadata': getattr(result, 'metadata', {})
+            }
+            results.append(result_dict)
+
+        return results
+
+    @staticmethod
+    def display_document_results(results: list, query: str) -> None:
+        """
+        Display document search results in a formatted way.
+
+        Parameters
+        ----------
+        results : list
+            List of document result dictionaries from search_documents()
+        query : str
+            The original search query string for display purposes
+        """
+        if not results:
+            print(f"No results found for query: '{query}'")
+            return
+
+        print(f"\n{'=' * 60}")
+        print(f"DOCUMENT SEARCH RESULTS FOR: '{query}'")
+        print(f"Found {len(results)} results")
+        print(f"{'=' * 60}\n")
+
+        for i, result in enumerate(results, 1):
+            print(f"Result {i}")
+            print(f"Document Path: {result['path']}")
+            print(f"Relevance Score: {result['score']:.4f}")
+
+            if result.get("metadata"):
+                metadata = result["metadata"]
+                print(f"Title: {metadata.get('title', 'N/A')}")
+                print(f"Author: {metadata.get('creator', 'N/A')}")
+                print(f"Publication Date: {metadata.get('pub_date', 'N/A')}")
+                print(f"Categories: {metadata.get('categories', 'N/A')}")
+                print(f"Source URL: {metadata.get('source_url', 'N/A')}")
+
+            # NOTE: file_url contains sensitive tokens - UNCOMMENT ONLY FOR DEBUGGING/TESTING
+            if result.get("file_url"):
+                # print(f"File URL: {result['file_url']}")
+                print("File URL: [Available - hidden for security]")
+
+            print("\n" + "-" * 50 + "\n")
+
+    @staticmethod
+    def display_snippet_results(results: list, query: str):
+        """Display snippet search results in a formatted way.
+
+        Parameters
+        ----------
+        results : list
+            List of snippet result dictionaries from search_snippets()
+        query : str
+            The original search query string for display purposes
+        """
+        if not results:
+            print(f"No snippets found for query: '{query}'")
+            return
+
+        print(f"\n{'=' * 60}")
+        print(f"SNIPPET SEARCH RESULTS FOR: '{query}'")
+        print(f"Found {len(results)} snippets")
+        print(f"{'=' * 60}\n")
+
+        for i, result in enumerate(results, 1):
+            print(f"Snippet {i}")
+            print(f"Document Path: {result['path']}")
+            print(f"Relevance Score: {result['score']:.4f}")
+            print(f"Character Range: {result['start_index']}-{result['end_index']}")
+            print(f"Page Span: {result['page_span']}")
+
+            if result.get("content"):
+                print(f"Content: {result['content'][:300]}...")
+
+            if result.get("metadata"):
+                metadata = result["metadata"]
+                print(f"Title: {metadata.get('title', 'N/A')}")
+                print(f"Author: {metadata.get('creator', 'N/A')}")
+
+            print("\n" + "-" * 40 + "\n")
+
+    @staticmethod
+    def display_page_results(results: list, query: str):
+        """
+        Display page search results in a formatted way.
+
+        Parameters
+        ----------
+        results : list
+            List of page result dictionaries from search_pages()
+        query : str
+            The original search query string for display purposes
+        """
+        if not results:
+            print(f"No page results found for query: '{query}'")
+            return
+
+        print(f"\n{'=' * 60}")
+        print(f"PAGE SEARCH RESULTS FOR: '{query}'")
+        print(f"Found {len(results)} page results")
+        print(f"{'=' * 60}\n")
+
+        for i, result in enumerate(results, 1):
+            print(f"Page {i}")
+            print(f"Document Path: {result['path']}")
+            print(f"Page Index: {result['page_index']}")
+            print(f"Relevance Score: {result['score']:.4f}")
+
+            if result.get("content"):
+                print(f"Content: {result['content'][:300]}...")
+
+            if result.get("metadata"):
+                metadata = result["metadata"]
+                print(f"Title: {metadata.get('title', 'N/A')}")
+                print(f"Author: {metadata.get('creator', 'N/A')}")
+
+            print("\n" + "-" * 40 + "\n")
+
+    async def get_collection_status(self):
+        """Get and display collection status information.
+
+        Returns
+        -------
+        status : object
+            ZeroEntropy status response object containing collection statistics
+
+        Notes
+        -----
+        Prints collection status including total documents, indexed documents,
+        documents currently being parsed/indexed, and failed documents.
+        This is useful for monitoring the health and progress of document indexing.
+        """
+        status = await self.zclient.status.get(collection_name=self.collection_name)
+        print(f"\nCollection Status for '{self.collection_name}':")
+        print(f"Total Documents: {status.num_documents}")
+        print(f"Indexed Documents: {status.num_indexed_documents}")
+        print(f"Parsing Documents: {status.num_parsing_documents}")
+        print(f"Indexing Documents: {status.num_indexing_documents}")
+        print(f"Failed Documents: {status.num_failed_documents}")
+        return status
diff --git a/guides/semantic_search_over_articles/backend/utils_ze.py b/guides/semantic_search_over_articles/backend/utils_ze.py
new file mode 100644
index 0000000..6122510
--- /dev/null
+++ b/guides/semantic_search_over_articles/backend/utils_ze.py
@@ -0,0 +1,371 @@
+# utils_ze.py
+from dotenv import load_dotenv
+from zeroentropy import AsyncZeroEntropy
+
+# Logger import
+from logger import getLogger
+
+# Load environment variables
+load_dotenv()
+
+# Configure logger to display log messages
+logger = getLogger()
+
+
+class ZeroEntropyUtils:
+    """
+    Utility class for advanced ZeroEntropy operations including reranking, collection management, and batch operations.
+
+    Parameters
+    ----------
+    collection_name : str, optional
+        Name of the default ZeroEntropy collection to work with, by default "articles"
+
+    Attributes
+    ----------
+    collection_name : str
+        The default collection name for operations
+    zclient : AsyncZeroEntropy
+        The ZeroEntropy async client instance
+    """
+    def __init__(self, collection_name: str = "articles"):
+        self.collection_name = collection_name
+        self.zclient = AsyncZeroEntropy()
+
+    async def rerank_documents(
+        self,
+        query: str,
+        document_texts: list,
+        model: str = "zerank-1-small",
+        top_n: int = 10,
+    ) -> list:
+        """
+        Use ZeroEntropy's reranking model to rerank a list of documents.
+
+        Parameters
+        ----------
+        query : str
+            The search query to use for reranking
+        document_texts : list
+            List of document text strings to be reranked
+        model : str, optional
+            Name of the reranking model to use, by default "zerank-1-small"
+        top_n : int, optional
+            Number of top reranked results to return, by default 10
+
+        Returns
+        -------
+        list
+            List of reranked result dictionaries, each containing:
+            - index : int
+                Original index of the document in the input list
+            - relevance_score : float
+                Reranking relevance score for the document
+        """
+        response = await self.zclient.models.rerank(
+            query=query, documents=document_texts, model=model, top_n=top_n
+        )
+        return response.results
+
+    async def list_all_collections(self) -> list:
+        """
+        List all available collections in the ZeroEntropy instance.
+
+        Returns
+        -------
+        list
+            List of collection names available in the ZeroEntropy instance
+        """
+        response = await self.zclient.collections.get_list()
+        return response.collection_names
+
+    async def delete_collection(self, collection_name: str) -> bool:
+        """
+        Delete a collection from ZeroEntropy.
+
+        Parameters
+        ----------
+        collection_name : str
+            Name of the collection to delete
+
+        Returns
+        -------
+        bool
+            True if deletion was successful
+        """
+        await self.zclient.collections.delete(collection_name=collection_name)
+        logger.info(f"Successfully deleted collection: {collection_name}")
+        return True
+
+    async def list_documents_in_collection(
+        self, collection_name: str = None, limit: int = 100
+    ) -> list:
+        """
+        List all documents in a collection with pagination support.
+
+        Parameters
+        ----------
+        collection_name : str, optional
+            Name of the collection to list documents from. If None, uses the default
+            collection name, by default None
+        limit : int, optional
+            Maximum number of documents to return, by default 100
+
+        Returns
+        -------
+        list
+            List of document metadata dictionaries containing information about
+            each document in the collection
+        """
+        if collection_name is None:
+            collection_name = self.collection_name
+
+        response = await self.zclient.documents.get_info_list(
+            collection_name=collection_name, limit=limit
+        )
+        return response.documents
+
+    async def get_document_info(
+        self,
+        path: str,
+        collection_name: str = None,
+        include_content: bool = False,
+    ) -> dict:
+        """
+        Get detailed information about a specific document.
+
+        Parameters
+        ----------
+        path : str
+            Unique path identifier of the document
+        collection_name : str, optional
+            Name of the collection containing the document. If None, uses the default
+            collection name, by default None
+        include_content : bool, optional
+            Whether to include the full document content in the response, by default False
+
+        Returns
+        -------
+        Optional[Dict]
+            Dictionary containing document information including metadata, status,
+            and optionally the full content. Returns None if document not found.
+        """
+        if collection_name is None:
+            collection_name = self.collection_name
+
+        response = await self.zclient.documents.get_info(
+            collection_name=collection_name,
+            path=path,
+            include_content=include_content,
+        )
+        return response.document
+
+    async def update_document_metadata(
+        self, path: str, metadata: dict, collection_name: str = None
+    ) -> bool:
+        """
+        Update metadata for a specific document.
+
+        Parameters
+        ----------
+        path : str
+            Unique path identifier of the document to update
+        metadata : Dict[str, str]
+            Dictionary of metadata keys and values to update
+        collection_name : str, optional
+            Name of the collection containing the document. If None, uses the default
+            collection name, by default None
+
+        Returns
+        -------
+        bool
+            True if the metadata update was successful
+        """
+        if collection_name is None:
+            collection_name = self.collection_name
+
+        await self.zclient.documents.update(
+            collection_name=collection_name, path=path, metadata=metadata
+        )
+        logger.info(f"Successfully updated metadata for document: {path}")
+        return True
+
+    async def delete_document(
+        self, path: str, collection_name: str = None
+    ) -> bool:
+        """
+        Delete a specific document from a collection.
+
+        Parameters
+        ----------
+        path : str
+            Unique path identifier of the document to delete
+        collection_name : str, optional
+            Name of the collection containing the document. If None, uses the default
+            collection name, by default None
+
+        Returns
+        -------
+        bool
+            True if the document deletion was successful
+        """
+        if collection_name is None:
+            collection_name = self.collection_name
+
+        await self.zclient.documents.delete(
+            collection_name=collection_name, path=path
+        )
+        logger.info(f"Successfully deleted document: {path}")
+        return True
+
+    async def batch_delete_documents(
+        self, paths: list, collection_name: str = None
+    ) -> dict:
+        """
+        Delete multiple documents in batch operation.
+
+        Parameters
+        ----------
+        paths : list
+            List of document path identifiers to delete
+        collection_name : str, optional
+            Name of the collection containing the documents. If None, uses the default
+            collection name, by default None
+
+        Returns
+        -------
+        Dict[str, bool]
+            Dictionary mapping each document path to its deletion success status.
+            True indicates successful deletion, False indicates failure.
+        """
+        if collection_name is None:
+            collection_name = self.collection_name
+
+        results = {}
+        for path in paths:
+            success = await self.delete_document(path, collection_name)
+            results[path] = success
+
+        return results
+
+    async def search_and_rerank(
+        self,
+        query: str,
+        k: int = 20,
+        rerank_top_n: int = 10,
+        collection_name: str = None,
+    ) -> list:
+        """
+        Advanced search that first retrieves more documents, then reranks them for improved relevance.
+
+        Parameters
+        ----------
+        query : str
+            The search query string
+        k : int, optional
+            Number of initial documents to retrieve before reranking, by default 20
+        rerank_top_n : int, optional
+            Number of top documents to return after reranking, by default 10
+        collection_name : str, optional
+            Name of the collection to search in. If None, uses the default
+            collection name, by default None
+
+        Returns
+        -------
+        list
+            List of reranked document result dictionaries, each containing:
+            - All original document fields (path, metadata, etc.)
+            - original_score : float
+                The original search relevance score
+            - rerank_score : float
+                The improved relevance score from reranking
+        """
+        if collection_name is None:
+            collection_name = self.collection_name
+
+        # First, get more documents than needed
+        search_results = await self.zclient.queries.top_documents(
+            collection_name=collection_name,
+            query=query,
+            k=k,
+            include_metadata=True,
+            latency_mode="low",
+        )
+
+        if not search_results.results:
+            return []
+
+        # Extract document texts for reranking
+        document_texts = []
+        for result in search_results.results:
+            # Get full document content
+            doc_info = await self.get_document_info(
+                path=result["path"],
+                collection_name=collection_name,
+                include_content=True,
+            )
+            if doc_info and doc_info.get("content"):
+                document_texts.append(doc_info["content"])
+            else:
+                # Fallback to using metadata if content not available
+                metadata = result.get("metadata", {})
+                fallback_text = (
+                    f"{metadata.get('title', '')} {metadata.get('description', '')}"
+                )
+                document_texts.append(fallback_text)
+
+        # Rerank the documents
+        rerank_results = await self.rerank_documents(
+            query=query,
+            document_texts=document_texts,
+            top_n=min(rerank_top_n, len(document_texts)),
+        )
+
+        # Combine rerank results with original metadata
+        final_results = []
+        for rerank_result in rerank_results:
+            original_result = search_results.results[rerank_result["index"]]
+            final_results.append(
+                {
+                    **original_result,
+                    "rerank_score": rerank_result["relevance_score"],
+                    "original_score": original_result["score"],
+                }
+            )
+
+        return final_results
+
+    def display_advanced_results(self, results: list, query: str):
+        """
+        Display results with both original and rerank scores in a formatted way.
+
+        Parameters
+        ----------
+        results : list
+            List of advanced search result dictionaries from search_and_rerank()
+        query : str
+            The original search query string for display purposes
+        """
+        if not results:
+            print(f"No results found for query: '{query}'")
+            return
+
+        print(f"\n{'='*70}")
+        print(f"ADVANCED SEARCH & RERANK RESULTS FOR: '{query}'")
+        print(f"Found {len(results)} results")
+        print(f"{'='*70}\n")
+
+        for i, result in enumerate(results, 1):
+            print(f"Result {i}")
+            print(f"Document Path: {result['path']}")
+            print(f"Original Score: {result['original_score']:.4f}")
+            print(f"Rerank Score: {result['rerank_score']:.4f}")
+
+            if result.get("metadata"):
+                metadata = result["metadata"]
+                print(f"Title: {metadata.get('title', 'N/A')}")
+                print(f"Author: {metadata.get('creator', 'N/A')}")
+                print(f"Publication Date: {metadata.get('pub_date', 'N/A')}")
+                print(f"Categories: {metadata.get('categories', 'N/A')}")
+
+            print("\n" + "-" * 60 + "\n")
diff --git a/guides/semantic_search_over_articles/demo_notebook.ipynb b/guides/semantic_search_over_articles/demo_notebook.ipynb
new file mode 100644
index 0000000..4d31b4d
--- /dev/null
+++ b/guides/semantic_search_over_articles/demo_notebook.ipynb
@@ -0,0 +1,478 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ff59aa32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# main.py\n",
+    "import json\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "# Internal imports\n",
+    "from indexer_ze import ZeroEntropyArticleIndexer\n",
+    "from search_ze import ZeroEntropyArticleSearcher\n",
+    "from utils_ze import ZeroEntropyUtils\n",
+    "from logger import getLogger\n",
+    "\n",
+    "# Load environment variables\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Configure logger to display log messages\n",
+    "logger = getLogger()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c6413325",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ZeroEntropyArticleManager:\n",
+    "    \"\"\"\n",
+    "    Main class that orchestrates RSS scraping, indexing, and searching using ZeroEntropy.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self, collection_name: str = \"articles\"):\n",
+    "        self.collection_name = collection_name\n",
+    "        self.indexer = ZeroEntropyArticleIndexer(collection_name)\n",
+    "        self.searcher = ZeroEntropyArticleSearcher(collection_name)\n",
+    "        self.utils = ZeroEntropyUtils(collection_name)\n",
+    "\n",
+    "    async def scrape_and_index(self):\n",
+    "        \"\"\"Scrape RSS feeds and index articles\"\"\"\n",
+    "        # Initialize RSS feed URLs\n",
+    "        rss_public_urls = [\n",
+    "            \"https://www.public.fr/feed\",\n",
+    "            \"https://www.public.fr/people/feed\",\n",
+    "            \"https://www.public.fr/tele/feed\",\n",
+    "            \"https://www.public.fr/mode/feed\",\n",
+    "            \"https://www.public.fr/people/familles-royales/feed\",\n",
+    "        ]\n",
+    "\n",
+    "        rss_vsd_urls = [\n",
+    "            \"https://vsd.fr/actu-people/feed/\",\n",
+    "            \"https://vsd.fr/tele/feed/\",\n",
+    "            \"https://vsd.fr/societe/feed/\",\n",
+    "            \"https://vsd.fr/culture/feed/\",\n",
+    "            \"https://vsd.fr/loisirs/feed/\",\n",
+    "        ]\n",
+    "\n",
+    "        # Initialize collection\n",
+    "        await self.indexer.initialize_collection()\n",
+    "\n",
+    "        # Extract content from RSS feeds\n",
+    "        articles = []\n",
+    "        for url in rss_public_urls + rss_vsd_urls:\n",
+    "            content = self.indexer.get_rss_feed_content(url)\n",
+    "            if content:\n",
+    "                articles.extend(content)\n",
+    "                logger.info(\"Successfully extracted content from %s\", url)\n",
+    "            else:\n",
+    "                logger.warning(\"Failed to extract content from %s\", url)\n",
+    "\n",
+    "        # Save all content to a JSON file for backup\n",
+    "        with open(\"articles.json\", \"w\", encoding=\"utf-8\") as f:\n",
+    "            json.dump(articles, f, ensure_ascii=False, indent=4)\n",
+    "\n",
+    "        logger.info(f\"Extracted {len(articles)} articles total\")\n",
+    "\n",
+    "        # Index articles in ZeroEntropy\n",
+    "        if articles:\n",
+    "            await self.indexer.index_articles(articles)\n",
+    "            logger.info(\"Successfully scraped and indexed articles in ZeroEntropy.\")\n",
+    "        else:\n",
+    "            logger.warning(\"No articles to index.\")\n",
+    "\n",
+    "    async def search_articles(\n",
+    "        self,\n",
+    "        query: str,\n",
+    "        search_type: str = \"documents\",\n",
+    "        k: int = 10,\n",
+    "        filter_creator: str = None,\n",
+    "        filter_category: str = None,\n",
+    "        reranker: str = \"zerank-1-small\",\n",
+    "        show_status: bool = False,\n",
+    "    ):\n",
+    "        \"\"\"Search for articles\"\"\"\n",
+    "        # Show status if requested\n",
+    "        if show_status:\n",
+    "            await self.searcher.get_collection_status()\n",
+    "\n",
+    "        # Prepare filter if specified\n",
+    "        filter_dict = {}\n",
+    "        if filter_creator:\n",
+    "            filter_dict[\"creator\"] = {\"$eq\": filter_creator}\n",
+    "        if filter_category:\n",
+    "            filter_dict[\"categories\"] = {\"$eq\": filter_category}\n",
+    "\n",
+    "        filter_dict = filter_dict if filter_dict else None\n",
+    "\n",
+    "        # Perform search based on type\n",
+    "        if search_type == \"documents\":\n",
+    "            results = await self.searcher.search_documents(\n",
+    "                query=query,\n",
+    "                k=k,\n",
+    "                filter_dict=filter_dict,\n",
+    "                reranker=reranker,\n",
+    "            )\n",
+    "            self.searcher.display_document_results(results, query)\n",
+    "\n",
+    "        elif search_type == \"snippets\":\n",
+    "            results = await self.searcher.search_snippets(\n",
+    "                query=query,\n",
+    "                k=k,\n",
+    "                filter_dict=filter_dict,\n",
+    "                reranker=reranker,\n",
+    "            )\n",
+    "            self.searcher.display_snippet_results(results, query)\n",
+    "\n",
+    "        elif search_type == \"pages\":\n",
+    "            results = await self.searcher.search_pages(\n",
+    "                query=query, k=k, filter_dict=filter_dict\n",
+    "            )\n",
+    "            self.searcher.display_page_results(results, query)\n",
+    "\n",
+    "        elif search_type == \"advanced\":\n",
+    "            results = await self.utils.search_and_rerank(\n",
+    "                query=query, k=k * 2, rerank_top_n=k\n",
+    "            )\n",
+    "            self.utils.display_advanced_results(results, query)\n",
+    "\n",
+    "        return results\n",
+    "\n",
+    "    async def manage_collections(self, action: str, collection_name: str = None):\n",
+    "        \"\"\"Manage collections (list, delete, status)\"\"\"\n",
+    "        if action == \"list\":\n",
+    "            collections = await self.utils.list_all_collections()\n",
+    "            print(f\"Available collections: {collections}\")\n",
+    "            return collections\n",
+    "\n",
+    "        elif action == \"delete\" and collection_name:\n",
+    "            success = await self.utils.delete_collection(collection_name)\n",
+    "            if success:\n",
+    "                print(f\"Successfully deleted collection: {collection_name}\")\n",
+    "            else:\n",
+    "                print(f\"Failed to delete collection: {collection_name}\")\n",
+    "            return success\n",
+    "\n",
+    "        elif action == \"status\":\n",
+    "            status = await self.searcher.get_collection_status()\n",
+    "            return status\n",
+    "\n",
+    "        else:\n",
+    "            print(\"Invalid action or missing collection name\")\n",
+    "            return None\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "3e0d6bbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "collection = \"my_articles\"\n",
+    "manager = ZeroEntropyArticleManager(collection)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae95b984",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-08-06 10:30:52\u001b[0m | \u001b[34mindexer_ze\u001b[0m | \u001b[1;35mERROR   \u001b[0m | \u001b[31mCollection 'my_articles' already exists\u001b[0m [indexer_ze.py:40]\n",
+      "\u001b[32m2025-08-06 10:31:04\u001b[0m | \u001b[34m1820264054\u001b[0m | \u001b[1;35mINFO    \u001b[0m | \u001b[37mSuccessfully extracted content from https://www.public.fr/feed\u001b[0m [1820264054.py:40]\n",
+      "\u001b[32m2025-08-06 10:31:06\u001b[0m | \u001b[34m1820264054\u001b[0m | \u001b[1;35mINFO    \u001b[0m | \u001b[37mSuccessfully extracted content from https://www.public.fr/people/feed\u001b[0m [1820264054.py:40]\n"
+     ]
+    }
+   ],
+   "source": [
+    "await manager.scrape_and_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c271f12",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "============================================================\n",
+      "DOCUMENT SEARCH RESULTS FOR: 'famille royale'\n",
+      "Found 10 results\n",
+      "============================================================\n",
+      "\n",
+      "Result 1\n",
+      "Document Path: article_204_2420503972517589745\n",
+      "Relevance Score: 1.1434\n",
+      "Title: Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu\n",
+      "Author: Elisabeth Sall\n",
+      "Publication Date: Tue, 22 Jul 2025 07:45:00 +0000\n",
+      "Categories: People, Royauté, Famille royale britannique, Mort, Prince Harry\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 2\n",
+      "Document Path: article_244_4511532550014461900\n",
+      "Relevance Score: 1.0142\n",
+      "Title: Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle\n",
+      "Author: Clément Garin\n",
+      "Publication Date: Sat, 03 May 2025 07:50:00 +0000\n",
+      "Categories: People, Royauté, Cancer, Clash, Famille royale britannique\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 3\n",
+      "Document Path: article_204_5642323426295626288\n",
+      "Relevance Score: 0.8656\n",
+      "Title: Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu\n",
+      "Author: Elisabeth Sall\n",
+      "Publication Date: Tue, 22 Jul 2025 07:45:00 +0000\n",
+      "Categories: People, Royauté, Famille royale britannique, Mort, Prince Harry\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 4\n",
+      "Document Path: article_244_-4945558259102855923\n",
+      "Relevance Score: 0.7137\n",
+      "Title: Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle\n",
+      "Author: Clément Garin\n",
+      "Publication Date: Sat, 03 May 2025 07:50:00 +0000\n",
+      "Categories: People, Royauté, Cancer, Clash, Famille royale britannique\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 5\n",
+      "Document Path: article_204_-5586505152836586607\n",
+      "Relevance Score: 0.5834\n",
+      "Title: Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu\n",
+      "Author: Elisabeth Sall\n",
+      "Publication Date: Tue, 22 Jul 2025 07:45:00 +0000\n",
+      "Categories: People, Royauté, Famille royale britannique, Mort, Prince Harry\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 6\n",
+      "Document Path: article_204_4431767949249707685\n",
+      "Relevance Score: 0.5344\n",
+      "Title: Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu\n",
+      "Author: Elisabeth Sall\n",
+      "Publication Date: Tue, 22 Jul 2025 07:45:00 +0000\n",
+      "Categories: People, Royauté, Famille royale britannique, Mort, Prince Harry\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 7\n",
+      "Document Path: article_204_-7449109881057781290\n",
+      "Relevance Score: 0.4924\n",
+      "Title: Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu\n",
+      "Author: Elisabeth Sall\n",
+      "Publication Date: Tue, 22 Jul 2025 07:45:00 +0000\n",
+      "Categories: People, Royauté, Famille royale britannique, Mort, Prince Harry\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 8\n",
+      "Document Path: article_244_1906632056819888007\n",
+      "Relevance Score: 0.4643\n",
+      "Title: Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle\n",
+      "Author: Clément Garin\n",
+      "Publication Date: Sat, 03 May 2025 07:50:00 +0000\n",
+      "Categories: People, Royauté, Cancer, Clash, Famille royale britannique\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 9\n",
+      "Document Path: article_244_522793638848069728\n",
+      "Relevance Score: 0.4395\n",
+      "Title: Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle\n",
+      "Author: Clément Garin\n",
+      "Publication Date: Sat, 03 May 2025 07:50:00 +0000\n",
+      "Categories: People, Royauté, Cancer, Clash, Famille royale britannique\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Result 10\n",
+      "Document Path: article_204_5853657413749066345\n",
+      "Relevance Score: 0.4209\n",
+      "Title: Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu\n",
+      "Author: Elisabeth Sall\n",
+      "Publication Date: Tue, 22 Jul 2025 07:45:00 +0000\n",
+      "Categories: People, Royauté, Famille royale britannique, Mort, Prince Harry\n",
+      "Source URL: https://www.public.fr/people/familles-royales/feed\n",
+      "File URL: [Available - hidden for security]\n",
+      "\n",
+      "--------------------------------------------------\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[{'path': 'article_204_2420503972517589745',\n",
+       "  'score': 1.1434071251552826,\n",
+       "  'file_url': '[Available - hidden for security]',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu',\n",
+       "   'creator': 'Elisabeth Sall',\n",
+       "   'pub_date': 'Tue, 22 Jul 2025 07:45:00 +0000',\n",
+       "   'categories': 'People, Royauté, Famille royale britannique, Mort, Prince Harry',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_244_4511532550014461900',\n",
+       "  'score': 1.0141609646223106,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle',\n",
+       "   'creator': 'Clément Garin',\n",
+       "   'pub_date': 'Sat, 03 May 2025 07:50:00 +0000',\n",
+       "   'categories': 'People, Royauté, Cancer, Clash, Famille royale britannique',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_204_5642323426295626288',\n",
+       "  'score': 0.865620797153619,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu',\n",
+       "   'creator': 'Elisabeth Sall',\n",
+       "   'pub_date': 'Tue, 22 Jul 2025 07:45:00 +0000',\n",
+       "   'categories': 'People, Royauté, Famille royale britannique, Mort, Prince Harry',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_244_-4945558259102855923',\n",
+       "  'score': 0.7137316794271147,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle',\n",
+       "   'creator': 'Clément Garin',\n",
+       "   'pub_date': 'Sat, 03 May 2025 07:50:00 +0000',\n",
+       "   'categories': 'People, Royauté, Cancer, Clash, Famille royale britannique',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_204_-5586505152836586607',\n",
+       "  'score': 0.5834117259799423,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu',\n",
+       "   'creator': 'Elisabeth Sall',\n",
+       "   'pub_date': 'Tue, 22 Jul 2025 07:45:00 +0000',\n",
+       "   'categories': 'People, Royauté, Famille royale britannique, Mort, Prince Harry',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_204_4431767949249707685',\n",
+       "  'score': 0.5343611770621242,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu',\n",
+       "   'creator': 'Elisabeth Sall',\n",
+       "   'pub_date': 'Tue, 22 Jul 2025 07:45:00 +0000',\n",
+       "   'categories': 'People, Royauté, Famille royale britannique, Mort, Prince Harry',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_204_-7449109881057781290',\n",
+       "  'score': 0.49244116519499875,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu',\n",
+       "   'creator': 'Elisabeth Sall',\n",
+       "   'pub_date': 'Tue, 22 Jul 2025 07:45:00 +0000',\n",
+       "   'categories': 'People, Royauté, Famille royale britannique, Mort, Prince Harry',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_244_1906632056819888007',\n",
+       "  'score': 0.46429313152067825,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle',\n",
+       "   'creator': 'Clément Garin',\n",
+       "   'pub_date': 'Sat, 03 May 2025 07:50:00 +0000',\n",
+       "   'categories': 'People, Royauté, Cancer, Clash, Famille royale britannique',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_244_522793638848069728',\n",
+       "  'score': 0.43949404810417025,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry: \"Les médecins ont recommandé au roi de ne plus parler à son fils\", Stéphane Bern s’en mêle',\n",
+       "   'creator': 'Clément Garin',\n",
+       "   'pub_date': 'Sat, 03 May 2025 07:50:00 +0000',\n",
+       "   'categories': 'People, Royauté, Cancer, Clash, Famille royale britannique',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}},\n",
+       " {'path': 'article_204_5853657413749066345',\n",
+       "  'score': 0.42092474401628305,\n",
+       "  'file_url': 'Available - hidden for security',\n",
+       "  'metadata': {'type': 'rss_article',\n",
+       "   'title': 'Prince Harry et Prince William : leur cousine de 20 ans retrouvée morte à proximité d’une arme à feu',\n",
+       "   'creator': 'Elisabeth Sall',\n",
+       "   'pub_date': 'Tue, 22 Jul 2025 07:45:00 +0000',\n",
+       "   'categories': 'People, Royauté, Famille royale britannique, Mort, Prince Harry',\n",
+       "   'source_url': 'https://www.public.fr/people/familles-royales/feed'}}]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query = \"famille royale\"\n",
+    "search_type = \"documents\"\n",
+    "k = 10\n",
+    "reranker = \"zerank-1-small\"\n",
+    "\n",
+    "manager = ZeroEntropyArticleManager(collection)\n",
+    "await manager.search_articles(\n",
+    "    query=query,\n",
+    "    search_type=search_type,\n",
+    "    k=k,\n",
+    "    reranker=reranker,\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/guides/semantic_search_over_articles/docker-compose.yml b/guides/semantic_search_over_articles/docker-compose.yml
new file mode 100644
index 0000000..4a7b061
--- /dev/null
+++ b/guides/semantic_search_over_articles/docker-compose.yml
@@ -0,0 +1,38 @@
+# docker-compose.yml
+version: '3.8'
+
+services:
+  gossip-search:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: zeroentropy-gossip-search
+    ports:
+      - "8501:8501"
+    environment:
+      - ZEROENTROPY_API_KEY=${ZEROENTROPY_API_KEY}
+      - PYTHONPATH=/app
+    env_file:
+      - .env
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+
+  # Add a reverse proxy
+  nginx:
+    image: nginx:alpine
+    container_name: gossip-search-proxy
+    ports:
+      - "80:80"
+    volumes:
+      - ./nginx.conf:/etc/nginx/conf.d/default.conf
+    depends_on:
+      - gossip-search
+    restart: unless-stopped
\ No newline at end of file
diff --git a/guides/semantic_search_over_articles/frontend/streamlit_app_ze.py b/guides/semantic_search_over_articles/frontend/streamlit_app_ze.py
new file mode 100644
index 0000000..f73e593
--- /dev/null
+++ b/guides/semantic_search_over_articles/frontend/streamlit_app_ze.py
@@ -0,0 +1,352 @@
+# streamlit_app.py
+import os
+import sys
+import asyncio
+import streamlit as st
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Set the working directory to the backend
+current_dir = os.path.dirname(os.path.abspath(__file__))
+backend_path = os.path.abspath(os.path.join(current_dir, "../..", "backend"))
+sys.path.append(backend_path)
+
+# Internal imports
+from backend.search_ze import ZeroEntropyArticleSearcher  # noqa: E402
+from backend.utils_ze import ZeroEntropyUtils  # noqa: E402
+
+
+# Cache the searcher and utils to avoid reinitializing
+@st.cache_resource
+def get_searcher_and_utils(collection_name="articles"):
+    """Initialize and cache ZeroEntropy searcher and utils"""
+    searcher = ZeroEntropyArticleSearcher(collection_name)
+    utils = ZeroEntropyUtils(collection_name)
+    return searcher, utils
+
+
+async def get_search_results(
+    query: str,
+    search_type: str,
+    k: int,
+    collection_name: str,
+    filter_creator: str = None,
+    filter_category: str = None,
+    reranker: str = "zerank-1-small",
+):
+    """Async search function that uses ZeroEntropy backend functionality"""
+    searcher, utils = get_searcher_and_utils(collection_name)
+
+    # Prepare filter if specified
+    filter_dict = {}
+    if filter_creator:
+        filter_dict["creator"] = {"$eq": filter_creator}
+    if filter_category:
+        filter_dict["categories"] = {"$eq": filter_category}
+    filter_dict = filter_dict if filter_dict else None
+
+    # Perform search based on type
+    if search_type == "documents":
+        results = await searcher.search_documents(
+            query=query,
+            k=k,
+            filter_dict=filter_dict,
+            reranker=reranker,
+        )
+    elif search_type == "snippets":
+        results = await searcher.search_snippets(
+            query=query,
+            k=k,
+            filter_dict=filter_dict,
+            reranker=reranker,
+        )
+    elif search_type == "pages":
+        results = await searcher.search_pages(
+            query=query, k=k, filter_dict=filter_dict
+        )
+    elif search_type == "advanced":
+        results = await utils.search_and_rerank(
+            query=query, k=k * 2, rerank_top_n=k  # Get more documents initially
+        )
+    else:
+        results = []
+
+    return results, search_type
+
+
+def run_async_search(
+    query: str,
+    search_type: str,
+    k: int,
+    collection_name: str,
+    filter_creator: str = None,
+    filter_category: str = None,
+    reranker: str = "zerank-1-small",
+):
+    """Wrapper to run async search in Streamlit"""
+    # Create new event loop for this thread
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    # Run the async function
+    results, search_type = loop.run_until_complete(
+        get_search_results(
+            query,
+            search_type,
+            k,
+            collection_name,
+            filter_creator,
+            filter_category,
+            reranker,
+        )
+    )
+
+    loop.close()
+    return results, search_type
+
+
+def display_document_results(results):
+    """Display document search results"""
+    for i, result in enumerate(results, 1):
+        with st.container():
+            col1, col2 = st.columns([3, 1])
+
+            with col1:
+                metadata = result.get("metadata", {})
+                title = metadata.get("title", "N/A")
+
+                # Make title clickable if file_url exists
+                if result.get("file_url"):
+                    st.markdown(f"### [{title}]({result['file_url']})")
+                else:
+                    st.markdown(f"### {title}")
+
+                st.markdown(f"**Author:** {metadata.get('creator', 'N/A')}")
+                st.markdown(f"**Publication Date:** {metadata.get('pub_date', 'N/A')}")
+                st.markdown(f"**Categories:** {metadata.get('categories', 'N/A')}")
+
+                if metadata.get("source_url"):
+                    st.markdown(f"**Source:** {metadata.get('source_url', 'N/A')}")
+
+            with col2:
+                st.metric("Relevance Score", f"{result['score']:.3f}")
+                if result.get("rerank_score"):
+                    st.metric("Rerank Score", f"{result['rerank_score']:.3f}")
+
+            st.markdown("---")
+
+
+def display_snippet_results(results):
+    """Display snippet search results"""
+    for i, result in enumerate(results, 1):
+        with st.container():
+            col1, col2 = st.columns([3, 1])
+
+            with col1:
+                metadata = result.get("metadata", {})
+                st.markdown(f"### Snippet {i}")
+                st.markdown(f"**From:** {metadata.get('title', 'N/A')}")
+                st.markdown(f"**Author:** {metadata.get('creator', 'N/A')}")
+
+                # Show snippet content
+                if result.get("content"):
+                    with st.expander("Show Content", expanded=True):
+                        st.write(result["content"])
+
+            with col2:
+                st.metric("Relevance Score", f"{result['score']:.3f}")
+                st.write(
+                    f"**Char Range:** {result.get('start_index', 0)}-{result.get('end_index', 0)}"
+                )
+                st.write(f"**Page Span:** {result.get('page_span', [])}")
+
+            st.markdown("---")
+
+
+def display_page_results(results):
+    """Display page search results"""
+    for i, result in enumerate(results, 1):
+        with st.container():
+            col1, col2 = st.columns([3, 1])
+
+            with col1:
+                metadata = result.get("metadata", {})
+                st.markdown(f"### Page {i}")
+                st.markdown(f"**From:** {metadata.get('title', 'N/A')}")
+                st.markdown(f"**Page Index:** {result.get('page_index', 0)}")
+
+                # Show page content
+                if result.get("content"):
+                    with st.expander("Show Content", expanded=False):
+                        st.write(
+                            result["content"][:500] + "..."
+                            if len(result["content"]) > 500
+                            else result["content"]
+                        )
+
+            with col2:
+                st.metric("Relevance Score", f"{result['score']:.3f}")
+
+            st.markdown("---")
+
+
+def display_advanced_results(results):
+    """Display advanced search results with reranking"""
+    for i, result in enumerate(results, 1):
+        with st.container():
+            col1, col2 = st.columns([3, 1])
+
+            with col1:
+                metadata = result.get("metadata", {})
+                title = metadata.get("title", "N/A")
+
+                st.markdown(f"### {title}")
+                st.markdown(f"**Author:** {metadata.get('creator', 'N/A')}")
+                st.markdown(f"**Publication Date:** {metadata.get('pub_date', 'N/A')}")
+                st.markdown(f"**Categories:** {metadata.get('categories', 'N/A')}")
+
+            with col2:
+                st.metric("Original Score", f"{result.get('original_score', 0):.3f}")
+                st.metric("Rerank Score", f"{result.get('rerank_score', 0):.3f}")
+
+            st.markdown("---")
+
+
+async def get_collection_status(collection_name):
+    """Get collection status"""
+    try:
+        searcher, _ = get_searcher_and_utils(collection_name)
+        status = await searcher.get_collection_status()
+        return status
+    except Exception as e:
+        st.error(f"Error getting status: {str(e)}")
+        return None
+
+
+def run_async_status(collection_name):
+    """Wrapper to run async status check in Streamlit"""
+    try:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        status = loop.run_until_complete(get_collection_status(collection_name))
+        loop.close()
+        return status
+    except Exception as e:
+        st.error(f"Error getting status: {str(e)}")
+        return None
+
+
+def main():
+    st.set_page_config(
+        page_title="ZeroEntropy Gossip Search", layout="wide", page_icon="📰"
+    )
+
+    st.title("📰 RAG Gossip Semantic Search with ZeroEntropy")
+    st.write(
+        "Search for the latest articles from VSD and Public using advanced AI-powered retrieval."
+    )
+
+    # Sidebar for configuration
+    with st.sidebar:
+        st.header("🔧 Search Configuration")
+
+        collection_name = st.text_input("Collection Name", value="my_articles")
+
+        search_type = st.selectbox(
+            "Search Type",
+            options=["documents", "snippets", "pages", "advanced"],
+            index=0,
+            help="Choose the type of search to perform",
+        )
+
+        k = st.slider("Number of results", min_value=1, max_value=20, value=5)
+
+        # Advanced options
+        with st.expander("🔍 Advanced Options"):
+            reranker = st.selectbox(
+                "Reranker Model", options=["zerank-1-small"], index=0
+            )
+
+            filter_creator = st.text_input(
+                "Filter by Creator", placeholder="e.g., Arno Crampont"
+            )
+            filter_category = st.text_input(
+                "Filter by Category", placeholder="e.g., TPMP"
+            )
+
+        # Collection status
+        if st.button("📊 Check Collection Status"):
+            with st.spinner("Getting status..."):
+                status = run_async_status(collection_name)
+                if status:
+                    st.success("✅ Collection Status")
+                    st.write(f"**Total Documents:** {status.num_documents}")
+                    st.write(f"**Indexed:** {status.num_indexed_documents}")
+                    st.write(f"**Parsing:** {status.num_parsing_documents}")
+                    st.write(f"**Indexing:** {status.num_indexing_documents}")
+                    st.write(f"**Failed:** {status.num_failed_documents}")
+
+    # Main search interface
+    query = st.text_input(
+        "🔍 Enter your search query:",
+        value="",
+        placeholder="e.g., TPMP, famille royale, célébrités...",
+    )
+
+    if st.button("Search for a keyword", type="primary"):
+        if query.strip():
+            with st.spinner(f"Searching with {search_type} mode..."):
+                results, result_type = run_async_search(
+                    query=query,
+                    search_type=search_type,
+                    k=k,
+                    collection_name=collection_name,
+                    filter_creator=filter_creator if filter_creator else None,
+                    filter_category=filter_category if filter_category else None,
+                    reranker=reranker,
+                )
+
+                if results:
+                    st.success(f"✅ Found {len(results)} results for '{query}'")
+
+                    # Display results based on search type
+                    if result_type == "documents":
+                        display_document_results(results)
+                    elif result_type == "snippets":
+                        display_snippet_results(results)
+                    elif result_type == "pages":
+                        display_page_results(results)
+                    elif result_type == "advanced":
+                        display_advanced_results(results)
+
+                else:
+                    st.warning("❌ No results found for your query.")
+        else:
+            st.error("⚠️ Please enter a valid query.")
+
+    # Help section
+    with st.expander("ℹ️ How to use this app"):
+        st.markdown(
+            """
+        ### Search Types:
+        - **Documents**: Search entire articles for the most relevant matches
+        - **Snippets**: Find specific text snippets within articles
+        - **Pages**: Search individual pages of documents
+        - **Advanced**: Uses reranking for improved relevance
+
+        ### Filters:
+        - **Creator**: Filter by article author
+        - **Category**: Filter by article category
+
+        ### Tips:
+        - Use specific keywords for better results
+        - Try different search types for different use cases
+        - Use filters to narrow down results
+        """
+        )
+
+
+if __name__ == "__main__":
+    main()