<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title><![CDATA[BigData Boutique Blog]]></title>
    <description><![CDATA[Expert insights on Elasticsearch, OpenSearch, Flink, Spark and big data technologies]]></description>
    <link>https://bigdataboutique.com</link>
    <image>
      <url>https://bigdataboutique.com/images/og.png</url>
      <title><![CDATA[BigData Boutique Blog]]></title>
      <link>https://bigdataboutique.com</link>
    </image>
    <generator>BigData Boutique RSS Generator</generator>
    <lastBuildDate>Tue, 10 Mar 2026 00:23:19 GMT</lastBuildDate>
    <atom:link href="https://bigdataboutique.com/blog/rss.xml" rel="self" type="application/rss+xml" />
    <copyright><![CDATA[Copyright 2026 BigData Boutique]]></copyright>
    <language><![CDATA[en]]></language>
    <item>
      <title><![CDATA[How to Run LLMs Locally: A Practical Guide for Developers]]></title>
      <description><![CDATA[A practical guide to running LLMs on your own hardware - covering the tools (Ollama, LM Studio, Jan), hardware requirements by VRAM tier, model selection, quantization formats, and how to integrate local inference into your dev workflow.]]></description>
      <link>https://bigdataboutique.com/blog/how-to-run-llms-locally-a-practical-guide-for-developers</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/how-to-run-llms-locally-a-practical-guide-for-developers</guid>
      <category><![CDATA[LLM]]></category>
      <category><![CDATA[GenAI]]></category>
      <dc:creator><![CDATA[Rafał Kuć]]></dc:creator>
      <pubDate>Mon, 09 Mar 2026 14:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images//running-llms-locally.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[BigData Boutique Launches  Enterprise-Grade OpenSearch Distribution]]></title>
      <description><![CDATA[BigData Boutique launches its Enterprise-Hardened OpenSearch Distribution with Long-Term Support (LTS) - a rigorously tested, production-ready platform built for organizations running OpenSearch in business-critical environments.]]></description>
      <link>https://bigdataboutique.com/blog/bigdata-boutique-launches-enterprise-grade-opensearch-distribution</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/bigdata-boutique-launches-enterprise-grade-opensearch-distribution</guid>
      <category><![CDATA[OpenSearch]]></category>
      <category><![CDATA[Press Release]]></category>
      <dc:creator><![CDATA[Zevi Reinitz]]></dc:creator>
      <pubDate>Fri, 06 Mar 2026 09:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/images/os-enterprise-og.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Sparse vs Dense Vectors: How Lexical and Semantic Search Actually Work]]></title>
      <description><![CDATA[Dense vectors dominate the conversation around semantic search, but sparse vectors still solve problems dense embeddings can't. This guide breaks down how each works, where each fails, and why understanding both is non-negotiable for modern retrieval systems.]]></description>
      <link>https://bigdataboutique.com/blog/sparse-vs-dense-vectors-how-lexical-and-semantic-search-actually-work</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/sparse-vs-dense-vectors-how-lexical-and-semantic-search-actually-work</guid>
      <category><![CDATA[vector search]]></category>
      <category><![CDATA[information retrieval]]></category>
      <category><![CDATA[hybrid search]]></category>
      <category><![CDATA[BM25]]></category>
      <category><![CDATA[embeddings]]></category>
      <dc:creator><![CDATA[Shai Greenberg]]></dc:creator>
      <pubDate>Wed, 04 Mar 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/sparse-vs-dense-vectors.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Managing Mutations in ClickHouse]]></title>
      <description><![CDATA[Understand why ALTER TABLE UPDATE and DELETE operations can grind your ClickHouse cluster to a halt, and learn practical alternatives like insert-select-rename and partition-level operations.]]></description>
      <link>https://bigdataboutique.com/blog/clickhouse-managing-mutations</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/clickhouse-managing-mutations</guid>
      <category><![CDATA[ClickHouse]]></category>
      <dc:creator><![CDATA[Lior Friedler]]></dc:creator>
      <pubDate>Tue, 03 Mar 2026 14:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/7e1d6b.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Complete Guide to AWS Glue]]></title>
      <description><![CDATA[A practical engineering guide to AWS Glue covering its architecture, job types, worker sizing, Data Catalog, cost optimization, and where Glue fits in a modern data platform.]]></description>
      <link>https://bigdataboutique.com/blog/complete-guide-to-aws-glue</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/complete-guide-to-aws-glue</guid>
      <category><![CDATA[AWS]]></category>
      <category><![CDATA[AWS Glue]]></category>
      <category><![CDATA[Data Engineering]]></category>
      <category><![CDATA[ETL]]></category>
      <dc:creator><![CDATA[Kobi Lemberg]]></dc:creator>
      <pubDate>Sun, 01 Mar 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/introduction-amazon-glue.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[AI-Powered Search with Elasticsearch]]></title>
      <description><![CDATA[A practical guide to building AI-powered search with Elasticsearch - inference endpoints, semantic_text, hybrid retrieval with RRF, embedding strategy choices, and production considerations.]]></description>
      <link>https://bigdataboutique.com/blog/ai-powered-search-with-elasticsearch</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/ai-powered-search-with-elasticsearch</guid>
      <category><![CDATA[Elasticsearch]]></category>
      <category><![CDATA[AI]]></category>
      <category><![CDATA[Vector Search]]></category>
      <category><![CDATA[Semantic Search]]></category>
      <dc:creator><![CDATA[Itamar Syn-Hershko]]></dc:creator>
      <pubDate>Thu, 26 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/elasticsearch-ai.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Memory Pressure and Query Optimization in ClickHouse]]></title>
      <description><![CDATA[Identify memory-hungry queries in ClickHouse using normalized query hashes and system.query_log analysis. Learn to spot common culprits like groupArray, uniqExact, and inefficient joins.]]></description>
      <link>https://bigdataboutique.com/blog/clickhouse-memory-pressure-query-optimization</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/clickhouse-memory-pressure-query-optimization</guid>
      <category><![CDATA[ClickHouse]]></category>
      <dc:creator><![CDATA[Lior Friedler]]></dc:creator>
      <pubDate>Tue, 24 Feb 2026 14:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/5c9f3a.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Building Agentic RAG with LangGraph and OpenSearch]]></title>
      <description><![CDATA[A practical langgraph tutorial on building self-correcting RAG with langgraph agents. Learn how to use langgraph for document grading, query rewriting loops, and hybrid search in production retrieval systems.]]></description>
      <link>https://bigdataboutique.com/blog/building-agentic-rag-with-langgraph-opensearch</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/building-agentic-rag-with-langgraph-opensearch</guid>
      <category><![CDATA[GenAI]]></category>
      <category><![CDATA[RAG]]></category>
      <category><![CDATA[LangGraph]]></category>
      <category><![CDATA[OpenSearch]]></category>
      <dc:creator><![CDATA[Itamar Syn-Hershko]]></dc:creator>
      <pubDate>Mon, 23 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/4f8e2a.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Apache Iceberg Architecture Deep Dive]]></title>
      <description><![CDATA[The Apache Iceberg Table Format explained - a layer-by-layer walkthrough of catalogs, metadata files, manifest lists, manifests, and data files, with a concrete example of what happens on disk when you create and write to a table.]]></description>
      <link>https://bigdataboutique.com/blog/apache-iceberg-architecture-deep-dive</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/apache-iceberg-architecture-deep-dive</guid>
      <category><![CDATA[Apache Iceberg]]></category>
      <category><![CDATA[Data Lakes]]></category>
      <category><![CDATA[Big Data]]></category>
      <dc:creator><![CDATA[Itamar Syn-Hershko]]></dc:creator>
      <pubDate>Sun, 22 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/huia9q.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Kafka MirrorMaker 2: Deployment, Gotchas, and Disaster Recovery Failback Playbook]]></title>
      <description><![CDATA[How to setup Kafka cluster replication with MirrorMaker 2 - covering deployment model, connector tuning, topic drift gotchas, and a step-by-step failback playbook for Kafka disaster recovery.]]></description>
      <link>https://bigdataboutique.com/blog/kafka-mirrormaker-2-deployment-gotchas-disaster-recovery</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/kafka-mirrormaker-2-deployment-gotchas-disaster-recovery</guid>
      <category><![CDATA[Apache Kafka]]></category>
      <category><![CDATA[Disaster Recovery]]></category>
      <category><![CDATA[Mirror Maker]]></category>
      <dc:creator><![CDATA[Kobi Lemberg]]></dc:creator>
      <pubDate>Fri, 20 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/kafka-replication.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[The KFC Architecture Blueprint: Kafka, Flink, and ClickHouse]]></title>
      <description><![CDATA[We often see data platforms combining Kafka, Flink, and ClickHouse. Let's discuss when it works, when it's overkill, and where the boundary lies.]]></description>
      <link>https://bigdataboutique.com/blog/kfc-architecture-blueprint-kafka-flink-and-clickhouse</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/kfc-architecture-blueprint-kafka-flink-and-clickhouse</guid>
      <category><![CDATA[Apache Kafka]]></category>
      <category><![CDATA[Apache Flink]]></category>
      <category><![CDATA[ClickHouse]]></category>
      <category><![CDATA[Data Architecture]]></category>
      <category><![CDATA[BigData]]></category>
      <dc:creator><![CDATA[Itamar Syn-Hershko]]></dc:creator>
      <pubDate>Thu, 19 Feb 2026 13:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/kafka-flink-clickhouse-kfc.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[The 'Too Many Parts' Problem in ClickHouse]]></title>
      <description><![CDATA[When small inserts causes part accumulation, the most common ClickHouse performance issue surfaces. Learn how to detect it, and how batch inserts and async inserts can prevent it.]]></description>
      <link>https://bigdataboutique.com/blog/clickhouse-too-many-parts</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/clickhouse-too-many-parts</guid>
      <category><![CDATA[ClickHouse]]></category>
      <dc:creator><![CDATA[Lior Friedler]]></dc:creator>
      <pubDate>Tue, 17 Feb 2026 14:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/8d2e4f.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[ClickHouse vs PostgreSQL: Choosing the Right Database for Analytics]]></title>
      <description><![CDATA[PostgreSQL excels at transactional workloads and small-scale analytics, but for real-world large-scale analytical queries ClickHouse is purpose-built to be faster, more efficient, and significantly more economical.]]></description>
      <link>https://bigdataboutique.com/blog/clickhouse-vs-postgresql</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/clickhouse-vs-postgresql</guid>
      <category><![CDATA[ClickHouse]]></category>
      <category><![CDATA[PostgreSQL]]></category>
      <dc:creator><![CDATA[Zevi Reinitz]]></dc:creator>
      <pubDate>Mon, 16 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/clickhouse-vs-postgresql.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Everything You Need to Know Before Building AI Agents]]></title>
      <description><![CDATA[AI agents are more than chatbots with tools. Before you build one, understand the core components - memory, planning, and tools - the levels of autonomy, and the pitfalls that derail most projects.]]></description>
      <link>https://bigdataboutique.com/blog/everything-you-need-to-know-before-building-ai-agents</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/everything-you-need-to-know-before-building-ai-agents</guid>
      <category><![CDATA[GenAI]]></category>
      <category><![CDATA[AI Agents]]></category>
      <category><![CDATA[RAG]]></category>
      <dc:creator><![CDATA[Itamar Syn-Hershko]]></dc:creator>
      <pubDate>Sat, 14 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/building-ai-agents.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Real-Time Data Analysis Tools: An Engineer's Guide to Choosing the Right Stack]]></title>
      <description><![CDATA[A practitioner's guide to choosing real-time data analysis tools. Covers ClickHouse, Elasticsearch, OpenSearch, Druid, Pinot, and the pipeline and visualization tools that connect them - with honest trade-offs from engineers who operate these systems in production.]]></description>
      <link>https://bigdataboutique.com/blog/real-time-data-analysis-tools-engineers-guide</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/real-time-data-analysis-tools-engineers-guide</guid>
      <category><![CDATA[ClickHouse]]></category>
      <category><![CDATA[Elasticsearch]]></category>
      <category><![CDATA[OpenSearch]]></category>
      <category><![CDATA[BigData]]></category>
      <dc:creator><![CDATA[Kobi Lemberg]]></dc:creator>
      <pubDate>Wed, 11 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/real-time-data-analysis.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Understanding the ClickHouse MergeTree Engine]]></title>
      <description><![CDATA[A deep dive into the MergeTree engine - parts, granules, sparse indexes, merges, mutations, data skipping indices, compression codecs, TTL, and the key settings that control it all.]]></description>
      <link>https://bigdataboutique.com/blog/clickhouse-mergetree-engine</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/clickhouse-mergetree-engine</guid>
      <category><![CDATA[ClickHouse]]></category>
      <dc:creator><![CDATA[Lior Friedler]]></dc:creator>
      <pubDate>Tue, 10 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/3a7b1c.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[ClickHouse: Production Monitoring & Optimization Tips]]></title>
      <description><![CDATA[Key insights from our webinar on monitoring and optimizing ClickHouse in production - covering part management, memory pressure, stuck mutations, schema efficiency, and query log analysis.]]></description>
      <link>https://bigdataboutique.com/blog/clickhouse-production-monitoring-and-optimization-tips-9b26bc</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/clickhouse-production-monitoring-and-optimization-tips-9b26bc</guid>
      <category><![CDATA[ClickHouse]]></category>
      <category><![CDATA[Webinar]]></category>
      <dc:creator><![CDATA[Lior Friedler]]></dc:creator>
      <pubDate>Fri, 06 Feb 2026 00:00:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/assets-blog/clickhouse-webinar.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[AWS Kinesis vs Kafka: Choosing the Right Streaming Platform]]></title>
      <description><![CDATA[A practical comparison of Amazon Kinesis and Apache Kafka for stream processing. Learn when each platform makes sense based on scale, operational requirements, and cost.]]></description>
      <link>https://bigdataboutique.com/blog/aws-kinesis-vs-kafka-choosing-the-right-streaming-platform-93206a</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/aws-kinesis-vs-kafka-choosing-the-right-streaming-platform-93206a</guid>
      <category><![CDATA[AWS Kinesis]]></category>
      <category><![CDATA[Apache Kafka]]></category>
      <category><![CDATA[Data Streaming]]></category>
      <dc:creator><![CDATA[Lior Friedler]]></dc:creator>
      <pubDate>Thu, 05 Feb 2026 07:29:03 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/93206a.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Databricks vs Snowflake - 2026 Comparison]]></title>
      <description><![CDATA[A practical comparison of Databricks and Snowflake for engineers - architecture differences, workload fit, pricing realities, and where each platform excels.]]></description>
      <link>https://bigdataboutique.com/blog/databricks-vs-snowflake-2026-comparison-d731b5</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/databricks-vs-snowflake-2026-comparison-d731b5</guid>
      <category><![CDATA[Databricks]]></category>
      <category><![CDATA[Snowflake]]></category>
      <category><![CDATA[Data Lakes]]></category>
      <dc:creator><![CDATA[Lior Friedler]]></dc:creator>
      <pubDate>Mon, 02 Feb 2026 12:42:00 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/d731b5.webp" length="0" type="image/webp" />
    </item>
    <item>
      <title><![CDATA[Scaling Vector Search with OpenSearch]]></title>
      <description><![CDATA[Master scaling vector search in OpenSearch. Learn how to optimize HNSW parameters, leverage quantization (SQ, BQ, PQ), and architect systems for billion-vector datasets]]></description>
      <link>https://bigdataboutique.com/blog/scaling-vector-search-with-opensearch-c0cdfc</link>
      <guid isPermaLink="false">https://bigdataboutique.com/blog/scaling-vector-search-with-opensearch-c0cdfc</guid>
      <category><![CDATA[OpenSearch]]></category>
      <category><![CDATA[Amazon OpenSearch Service]]></category>
      <category><![CDATA[Vector Search]]></category>
      <dc:creator><![CDATA[Itamar Syn-Hershko]]></dc:creator>
      <pubDate>Sun, 01 Feb 2026 13:47:47 GMT</pubDate>
      <enclosure url="https://bigdataboutique.com/blog-images/c0cdfc.webp" length="0" type="image/webp" />
    </item>
  </channel>
</rss>