From a27966d4861337c66fa581b6e6768158f3bd4f71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Sat, 13 Jun 2026 16:11:37 +0300
Subject: [PATCH 1/7] Add PostgreSQL comparison benchmark target with libpq
 linking

---
 CMakeLists.txt | 12 ++++++++++++
 1 file changed, 12 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7141f18d..b2855802 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -194,4 +194,16 @@ if(BUILD_BENCHMARKS)
     else()
         message(STATUS "DuckDB not found, skipping duckdb_comparison_bench")
     endif()
+
+    # PostgreSQL comparison benchmark
+    find_library(PQLIB_LIBRARY pq PATHS /usr/lib /usr/local/lib)
+    find_path(PQLIB_INCLUDE_DIR libpq-fe.h PATHS /usr/include /usr/local/include)
+    if(PQLIB_LIBRARY AND PQLIB_INCLUDE_DIR)
+        add_executable(postgresql_comparison_bench benchmarks/postgresql_comparison_bench.cpp)
+        target_include_directories(postgresql_comparison_bench PRIVATE ${PQLIB_INCLUDE_DIR})
+        target_link_libraries(postgresql_comparison_bench sqlEngineCore benchmark::benchmark benchmark::benchmark_main ${PQLIB_LIBRARY})
+        message(STATUS "PostgreSQL benchmark enabled")
+    else()
+        message(STATUS "libpq not found, skipping postgresql_comparison_bench")
+    endif()
 endif()

From fafdd8eda9a50fbe04da3abe58c4319f8b4de19c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Sat, 13 Jun 2026 16:11:48 +0300
Subject: [PATCH 2/7] Optimize OpenAddressHashAgg: sentinel-based MIN/MAX,
 static_cast over dynamic_cast, add accessors

---
 include/executor/vectorized_operator.hpp | 124 ++++++++++++++++++-----
 1 file changed, 100 insertions(+), 24 deletions(-)

diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp
index 762d8cb8..b382dc32 100644
--- a/include/executor/vectorized_operator.hpp
+++ b/include/executor/vectorized_operator.hpp
@@ -463,6 +463,14 @@ class OpenAddressHashAgg {
     static constexpr size_t kInitialCapacity = 1024;
 
    public:
+    // Accessors for external iteration and batch processing
+    [[nodiscard]] size_t mask() const { return mask_; }
+    [[nodiscard]] const std::vector<size_t>& valid_indices() const { return valid_indices_; }
+    [[nodiscard]] HashBucket& bucket_at(size_t idx) { return buckets_[idx]; }
+    [[nodiscard]] size_t bucket_index(const HashBucket& bucket) const {
+        return static_cast<size_t>(&bucket - buckets_.data());
+    }
+
     static uint64_t hash_bytes(const uint8_t* data, size_t len) {
         // FNV-1a 64-bit hash
         uint64_t hash = 14695981039346656037ull;
@@ -522,9 +530,13 @@ class OpenAddressHashAgg {
                     bucket.sums_int64[a] = 0;
                     bucket.sums_float64[a] = 0.0;
                     bucket.has_float_value[a] = false;
-                    bucket.mins[a] = 0;
-                    bucket.maxes[a] = 0;
+                    // Sentinel-based MIN/MAX initialization (eliminates has_mins branching)
+                    bucket.mins[a] = std::numeric_limits<int64_t>::max();
+                    bucket.maxes[a] = std::numeric_limits<int64_t>::min();
                     bucket.has_mins[a] = false;
+                    bucket.mins_float64[a] = std::numeric_limits<double>::max();
+                    bucket.maxes_float64[a] = std::numeric_limits<double>::lowest();
+                    bucket.has_float_minmax[a] = false;
                 }
                 num_occupied_++;
                 valid_indices_.push_back(idx);
@@ -566,9 +578,13 @@ class OpenAddressHashAgg {
                     bucket.sums_int64[a] = 0;
                     bucket.sums_float64[a] = 0.0;
                     bucket.has_float_value[a] = false;
-                    bucket.mins[a] = 0;
-                    bucket.maxes[a] = 0;
+                    // Sentinel-based MIN/MAX initialization (eliminates has_mins branching)
+                    bucket.mins[a] = std::numeric_limits<int64_t>::max();
+                    bucket.maxes[a] = std::numeric_limits<int64_t>::min();
                     bucket.has_mins[a] = false;
+                    bucket.mins_float64[a] = std::numeric_limits<double>::max();
+                    bucket.maxes_float64[a] = std::numeric_limits<double>::lowest();
+                    bucket.has_float_minmax[a] = false;
                 }
                 num_occupied_++;
                 valid_indices_.push_back(idx);
@@ -784,9 +800,11 @@ class VectorizedGroupByOperator : public VectorizedOperator {
     static constexpr size_t MAX_KEY_LEN = 256;
     std::vector<uint8_t>
         batch_key_buffer_;  // Heap-allocated scratch: MAX_BATCH_SIZE * MAX_KEY_LEN bytes
-    std::vector<uint64_t> batch_hashes_;     // batch_size
+    std::vector<uint64_t> batch_hashes_;      // batch_size
     std::vector<int64_t> batch_int64_keys_;  // batch_size (for int64-only path)
     std::vector<size_t> batch_key_lens_;     // batch_size
+    std::vector<size_t> batch_bucket_idx_;   // batch_size - bucket index for each row
+    std::vector<size_t> batch_active_buckets_;  // batch_size - unique buckets touched this batch
     bool all_int64_keys_ = false;            // True when all GROUP BY cols are INT64
 
     // Parallel aggregation support (Phase 4)
@@ -851,6 +869,8 @@ class VectorizedGroupByOperator : public VectorizedOperator {
         batch_hashes_.resize(MAX_BATCH_SIZE);
         batch_int64_keys_.resize(MAX_BATCH_SIZE);
         batch_key_lens_.resize(MAX_BATCH_SIZE);
+        batch_bucket_idx_.resize(MAX_BATCH_SIZE);
+        batch_active_buckets_.resize(MAX_BATCH_SIZE);
 
         // Create schema for group key evaluation
         Schema key_schema;
@@ -1051,7 +1071,7 @@ class VectorizedGroupByOperator : public VectorizedOperator {
                 thread_group_keys_[t].clear();
             }
         } else {
-            // Sequential path (original code)
+            // Sequential path (original code with static_cast and sentinel optimizations)
             for (size_t r = 0; r < n; ++r) {
                 auto& bucket =
                     all_int64_keys_
@@ -1118,10 +1138,11 @@ class VectorizedGroupByOperator : public VectorizedOperator {
                 if (!col.is_null(row_idx)) {
                     bucket.counts[i]++;
                     if (col.type() == common::ValueType::TYPE_INT64) {
-                        auto& num_col = dynamic_cast<const NumericVector<int64_t>&>(col);
+                        // static_cast is faster than dynamic_cast - type already verified
+                        const auto& num_col = static_cast<const NumericVector<int64_t>&>(col);
                         bucket.sums_int64[i] += num_col.raw_data()[row_idx];
                     } else if (col.type() == common::ValueType::TYPE_FLOAT64) {
-                        auto& num_col = dynamic_cast<const NumericVector<double>&>(col);
+                        const auto& num_col = static_cast<const NumericVector<double>&>(col);
                         bucket.sums_float64[i] += num_col.raw_data()[row_idx];
                         bucket.has_float_value[i] = true;
                     }
@@ -1132,24 +1153,79 @@ class VectorizedGroupByOperator : public VectorizedOperator {
                 if (!col.is_null(row_idx)) {
                     if (col.type() == common::ValueType::TYPE_FLOAT64) {
                         auto val = col.get(row_idx).to_float64();
-                        if (!bucket.has_float_minmax[i]) {
-                            bucket.mins_float64[i] = val;
-                            bucket.maxes_float64[i] = val;
-                            bucket.has_float_minmax[i] = true;
-                        } else {
-                            bucket.mins_float64[i] = std::min(bucket.mins_float64[i], val);
-                            bucket.maxes_float64[i] = std::max(bucket.maxes_float64[i], val);
-                        }
+                        // Sentinel-based: mins/maxes initialized to max/min values
+                        bucket.mins_float64[i] = std::min(bucket.mins_float64[i], val);
+                        bucket.maxes_float64[i] = std::max(bucket.maxes_float64[i], val);
                     } else {
                         auto val = col.get(row_idx).to_int64();
-                        if (!bucket.has_mins[i]) {
-                            bucket.mins[i] = val;
-                            bucket.maxes[i] = val;
-                            bucket.has_mins[i] = true;
-                        } else {
-                            bucket.mins[i] = std::min(bucket.mins[i], val);
-                            bucket.maxes[i] = std::max(bucket.maxes[i], val);
-                        }
+                        // Sentinel-based: mins/maxes initialized to max/min values
+                        bucket.mins[i] = std::min(bucket.mins[i], val);
+                        bucket.maxes[i] = std::max(bucket.maxes[i], val);
+                    }
+                }
+            }
+        }
+    }
+
+    // Batch-oriented accumulator update - processes multiple rows per call
+    // Type resolution happens once per aggregate, not per-row
+    template <typename Bucket>
+    void update_aggregate_batch(Bucket& bucket, const ColumnVector& col, size_t agg_idx,
+                               const size_t* row_indices, size_t num_rows) {
+        const auto& agg = aggregates_[agg_idx];
+
+        if (agg.type == AggregateType::Count && agg.input_col_idx < 0) {
+            // COUNT(*) - all rows contribute
+            bucket.counts[agg_idx] += num_rows;
+            return;
+        }
+
+        if (agg.input_col_idx < 0) return;
+
+        // Type resolved ONCE, then batch process all rows
+        if (col.type() == common::ValueType::TYPE_INT64) {
+            const auto& num_col = static_cast<const NumericVector<int64_t>&>(col);
+            const int64_t* raw = num_col.raw_data();
+            for (size_t j = 0; j < num_rows; ++j) {
+                size_t r = row_indices[j];
+                if (!num_col.is_null(r)) {
+                    bucket.counts[agg_idx]++;
+                    bucket.sums_int64[agg_idx] += raw[r];
+                }
+            }
+        } else if (col.type() == common::ValueType::TYPE_FLOAT64) {
+            const auto& num_col = static_cast<const NumericVector<double>&>(col);
+            const double* raw = num_col.raw_data();
+            for (size_t j = 0; j < num_rows; ++j) {
+                size_t r = row_indices[j];
+                if (!num_col.is_null(r)) {
+                    bucket.counts[agg_idx]++;
+                    bucket.sums_float64[agg_idx] += raw[r];
+                    bucket.has_float_value[agg_idx] = true;
+                }
+            }
+        } else if (agg.type == AggregateType::Min || agg.type == AggregateType::Max) {
+            // MIN/MAX with sentinel-based approach (no branch on has_mins)
+            if (col.type() == common::ValueType::TYPE_FLOAT64) {
+                const auto& num_col = static_cast<const NumericVector<double>&>(col);
+                const double* raw = num_col.raw_data();
+                for (size_t j = 0; j < num_rows; ++j) {
+                    size_t r = row_indices[j];
+                    if (!num_col.is_null(r)) {
+                        double val = raw[r];
+                        bucket.mins_float64[agg_idx] = std::min(bucket.mins_float64[agg_idx], val);
+                        bucket.maxes_float64[agg_idx] = std::max(bucket.maxes_float64[agg_idx], val);
+                    }
+                }
+            } else {
+                const auto& num_col = static_cast<const NumericVector<int64_t>&>(col);
+                const int64_t* raw = num_col.raw_data();
+                for (size_t j = 0; j < num_rows; ++j) {
+                    size_t r = row_indices[j];
+                    if (!num_col.is_null(r)) {
+                        int64_t val = raw[r];
+                        bucket.mins[agg_idx] = std::min(bucket.mins[agg_idx], val);
+                        bucket.maxes[agg_idx] = std::max(bucket.maxes[agg_idx], val);
                     }
                 }
             }

From 36e1cca84b07b676b8065e40642cb981ff8d7723 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Sat, 13 Jun 2026 16:11:58 +0300
Subject: [PATCH 3/7] Add PostgreSQL comparison benchmark suite and
 documentation

---
 benchmarks/postgresql_comparison_bench.cpp | 521 +++++++++++++++++++++
 docs/performance/POSTGRESQL_COMPARISON.md  | 165 +++++++
 2 files changed, 686 insertions(+)
 create mode 100644 benchmarks/postgresql_comparison_bench.cpp
 create mode 100644 docs/performance/POSTGRESQL_COMPARISON.md

diff --git a/benchmarks/postgresql_comparison_bench.cpp b/benchmarks/postgresql_comparison_bench.cpp
new file mode 100644
index 00000000..d0bf131c
--- /dev/null
+++ b/benchmarks/postgresql_comparison_bench.cpp
@@ -0,0 +1,521 @@
+/**
+ * @file postgresql_comparison_bench.cpp
+ * @brief Performance comparison between cloudSQL and PostgreSQL
+ */
+
+#include <benchmark/benchmark.h>
+#include <filesystem>
+#include <libpq-fe.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "catalog/catalog.hpp"
+#include "common/config.hpp"
+#include "executor/query_executor.hpp"
+#include "parser/parser.hpp"
+#include "storage/buffer_pool_manager.hpp"
+#include "storage/heap_table.hpp"
+#include "storage/storage_manager.hpp"
+#include "transaction/lock_manager.hpp"
+#include "transaction/transaction_manager.hpp"
+
+using namespace cloudsql;
+using namespace cloudsql::storage;
+using namespace cloudsql::executor;
+using namespace cloudsql::parser;
+
+namespace {
+
+// Helper to parse SQL string into a Statement
+std::unique_ptr<Statement> ParseSQL(const std::string& sql) {
+    auto lexer = std::make_unique<Lexer>(sql);
+    Parser parser(std::move(lexer));
+    return parser.parse_statement();
+}
+
+// --- PostgreSQL Connection Context ---
+struct PostgreSQLContext {
+    PGconn* conn;
+
+    PostgreSQLContext() {
+        const char* host = std::getenv("PGHOST") ? std::getenv("PGHOST") : "localhost";
+        const char* port = std::getenv("PGPORT") ? std::getenv("PGPORT") : "5432";
+        const char* dbname = std::getenv("PGDATABASE") ? std::getenv("PGDATABASE") : "postgres";
+        const char* user = std::getenv("PGUSER") ? std::getenv("PGUSER") : "postgres";
+
+        std::string conninfo = "host=" + std::string(host) + " port=" + std::string(port) +
+                              " dbname=" + std::string(dbname) + " user=" + std::string(user);
+        conn = PQconnectdb(conninfo.c_str());
+
+        if (PQstatus(conn) != CONNECTION_OK) {
+            fprintf(stderr, "PostgreSQL connection failed: %s\n", PQerrorMessage(conn));
+            PQfinish(conn);
+            conn = nullptr;
+        }
+    }
+
+    ~PostgreSQLContext() {
+        if (conn) {
+            PQfinish(conn);
+        }
+    }
+
+    void create_tables() {
+        if (!conn) return;
+        PQexec(conn, "SET max_parallel_workers_per_gather = 0");
+        PQexec(conn, "SET max_parallel_workers = 0");
+        PQexec(conn, "SET max_parallel_maintenance_workers = 0");
+        PQexec(conn, "DROP TABLE IF EXISTS lineitem");
+        PQexec(conn, "DROP TABLE IF EXISTS orders");
+        PQexec(conn,
+               "CREATE TABLE lineitem (l_orderkey BIGINT, l_partkey BIGINT, "
+               "l_quantity INT, l_extendedprice DOUBLE PRECISION, l_discount DOUBLE PRECISION, "
+               "l_tax DOUBLE PRECISION)");
+        PQexec(conn,
+               "CREATE TABLE orders (o_orderkey BIGINT, o_custkey BIGINT, "
+               "o_orderdate TEXT)");
+    }
+
+    void execute_sql(const std::string& sql) {
+        if (!conn) return;
+        PGresult* res = PQexec(conn, sql.c_str());
+        if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK) {
+            fprintf(stderr, "SQL execution failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(res);
+    }
+};
+
+// --- cloudSQL Setup ---
+struct CloudSQLContext {
+    std::string test_dir;
+    std::unique_ptr<StorageManager> storage;
+    std::unique_ptr<BufferPoolManager> bpm;
+    std::unique_ptr<Catalog> catalog;
+    std::unique_ptr<transaction::LockManager> lock_manager;
+    std::unique_ptr<transaction::TransactionManager> txn_manager;
+    std::unique_ptr<QueryExecutor> executor;
+
+    CloudSQLContext(const std::string& dir) : test_dir(dir) {
+        std::filesystem::remove_all(test_dir);
+        std::filesystem::create_directories(test_dir);
+        storage = std::make_unique<StorageManager>(test_dir);
+        bpm = std::make_unique<BufferPoolManager>(4096, *storage);
+        catalog = std::make_unique<Catalog>();
+        lock_manager = std::make_unique<transaction::LockManager>();
+        txn_manager = std::make_unique<transaction::TransactionManager>(*lock_manager, *catalog, *bpm);
+        executor = std::make_unique<QueryExecutor>(*catalog, *bpm, *lock_manager, *txn_manager);
+        executor->set_local_only(true);
+        executor->set_storage_manager(storage.get());
+
+        // Create lineitem table (TPC-H schema, simplified)
+        CreateTableStatement create_stmt;
+        create_stmt.set_table_name("lineitem");
+        create_stmt.add_column("l_orderkey", "BIGINT");
+        create_stmt.add_column("l_partkey", "BIGINT");
+        create_stmt.add_column("l_quantity", "INT");
+        create_stmt.add_column("l_extendedprice", "DOUBLE");
+        create_stmt.add_column("l_discount", "DOUBLE");
+        create_stmt.add_column("l_tax", "DOUBLE");
+        executor->execute(create_stmt);
+
+        // Create orders table
+        CreateTableStatement orders_stmt;
+        orders_stmt.set_table_name("orders");
+        orders_stmt.add_column("o_orderkey", "BIGINT");
+        orders_stmt.add_column("o_custkey", "BIGINT");
+        orders_stmt.add_column("o_orderdate", "TEXT");
+        executor->execute(orders_stmt);
+    }
+
+    ~CloudSQLContext() {
+        executor.reset();
+        txn_manager.reset();
+        lock_manager.reset();
+        catalog.reset();
+        bpm.reset();
+        storage.reset();
+        std::filesystem::remove_all(test_dir);
+    }
+};
+
+}  // anonymous namespace
+
+// ============== OLTP BENCHMARKS ==============
+
+// --- Benchmark: PostgreSQL INSERT ---
+static void BM_PostgreSQL_Insert(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    PostgreSQLContext ctx;
+
+    if (!ctx.conn) {
+        state.SkipWithError("PostgreSQL not available");
+        return;
+    }
+
+    ctx.create_tables();
+
+    for (auto _ : state) {
+        ctx.execute_sql("BEGIN");
+        for (int i = 0; i < num_rows; ++i) {
+            std::string sql = "INSERT INTO lineitem VALUES (" + std::to_string(i) + ", " +
+                             std::to_string(i % 100) + ", " + std::to_string(1 + (i % 10)) +
+                             ", 1000.0, 0.05, 0.02)";
+            ctx.execute_sql(sql);
+        }
+        ctx.execute_sql("COMMIT");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_PostgreSQL_Insert)->Arg(1000)->Arg(10000);
+
+// --- Benchmark: cloudSQL INSERT ---
+static void BM_CloudSQL_Insert(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    CloudSQLContext ctx("./bench_pg_insert_" + std::to_string(state.thread_index()));
+
+    for (auto _ : state) {
+        ctx.executor->execute("BEGIN");
+        for (int i = 0; i < num_rows; ++i) {
+            ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" + std::to_string(i) +
+                                           ", " + std::to_string(i % 100) + ", " +
+                                           std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)"));
+        }
+        ctx.executor->execute("COMMIT");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_CloudSQL_Insert)->Arg(1000)->Arg(10000);
+
+// --- Benchmark: PostgreSQL UPDATE ---
+static void BM_PostgreSQL_Update(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    PostgreSQLContext ctx;
+
+    if (!ctx.conn) {
+        state.SkipWithError("PostgreSQL not available");
+        return;
+    }
+
+    ctx.create_tables();
+
+    // Populate first
+    ctx.execute_sql("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.execute_sql("INSERT INTO lineitem VALUES (" + std::to_string(i) + ", " +
+                        std::to_string(i % 100) + ", " + std::to_string(1 + (i % 10)) +
+                        ", 1000.0, 0.05, 0.02)");
+    }
+    ctx.execute_sql("COMMIT");
+
+    for (auto _ : state) {
+        ctx.execute_sql("BEGIN");
+        for (int i = 0; i < num_rows; ++i) {
+            ctx.execute_sql("UPDATE lineitem SET l_quantity = " + std::to_string(i % 20) +
+                           " WHERE l_orderkey = " + std::to_string(i));
+        }
+        ctx.execute_sql("COMMIT");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_PostgreSQL_Update)->Arg(1000)->Arg(10000);
+
+// --- Benchmark: cloudSQL UPDATE ---
+static void BM_CloudSQL_Update(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    CloudSQLContext ctx("./bench_pg_update_" + std::to_string(state.thread_index()));
+
+    // Populate first
+    ctx.executor->execute("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" + std::to_string(i) +
+                                        ", " + std::to_string(i % 100) + ", " +
+                                        std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)"));
+    }
+    ctx.executor->execute("COMMIT");
+
+    for (auto _ : state) {
+        ctx.executor->execute("BEGIN");
+        for (int i = 0; i < num_rows; ++i) {
+            ctx.executor->execute(*ParseSQL("UPDATE lineitem SET l_quantity = " +
+                                            std::to_string(i % 20) + " WHERE l_orderkey = " +
+                                            std::to_string(i)));
+        }
+        ctx.executor->execute("COMMIT");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_CloudSQL_Update)->Arg(1000)->Arg(10000);
+
+// --- Benchmark: PostgreSQL Point SELECT ---
+static void BM_PostgreSQL_PointSelect(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    PostgreSQLContext ctx;
+
+    if (!ctx.conn) {
+        state.SkipWithError("PostgreSQL not available");
+        return;
+    }
+
+    ctx.create_tables();
+
+    // Populate
+    ctx.execute_sql("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.execute_sql("INSERT INTO lineitem VALUES (" + std::to_string(i) + ", " +
+                        std::to_string(i % 100) + ", " + std::to_string(1 + (i % 10)) +
+                        ", 1000.0, 0.05, 0.02)");
+    }
+    ctx.execute_sql("COMMIT");
+
+    for (auto _ : state) {
+        for (int i = 0; i < num_rows; ++i) {
+            std::string sql = "SELECT * FROM lineitem WHERE l_orderkey = " + std::to_string(i);
+            ctx.execute_sql(sql);
+        }
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_PostgreSQL_PointSelect)->Arg(1000)->Arg(10000);
+
+// --- Benchmark: cloudSQL Point SELECT ---
+static void BM_CloudSQL_PointSelect(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    CloudSQLContext ctx("./bench_pg_point_" + std::to_string(state.thread_index()));
+
+    // Populate
+    ctx.executor->execute("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" + std::to_string(i) +
+                                        ", " + std::to_string(i % 100) + ", " +
+                                        std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)"));
+    }
+    ctx.executor->execute("COMMIT");
+
+    for (auto _ : state) {
+        for (int i = 0; i < num_rows; ++i) {
+            ctx.executor->execute(*ParseSQL("SELECT * FROM lineitem WHERE l_orderkey = " +
+                                            std::to_string(i)));
+        }
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_CloudSQL_PointSelect)->Arg(1000)->Arg(10000);
+
+// ============== ANALYTICAL BENCHMARKS ==============
+
+// --- Benchmark: PostgreSQL Full Scan ---
+static void BM_PostgreSQL_FullScan(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    PostgreSQLContext ctx;
+
+    if (!ctx.conn) {
+        state.SkipWithError("PostgreSQL not available");
+        return;
+    }
+
+    ctx.create_tables();
+
+    // Populate
+    ctx.execute_sql("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.execute_sql("INSERT INTO lineitem VALUES (" + std::to_string(i) + ", " +
+                        std::to_string(i % 100) + ", " + std::to_string(1 + (i % 10)) +
+                        ", 1000.0, 0.05, 0.02)");
+    }
+    ctx.execute_sql("COMMIT");
+
+    for (auto _ : state) {
+        ctx.execute_sql("SELECT * FROM lineitem");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_PostgreSQL_FullScan)->Arg(10000)->Arg(100000);
+
+// --- Benchmark: cloudSQL Full Scan ---
+static void BM_CloudSQL_FullScan(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    CloudSQLContext ctx("./bench_pg_fullscan_" + std::to_string(state.thread_index()));
+
+    // Populate
+    ctx.executor->execute("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" + std::to_string(i) +
+                                        ", " + std::to_string(i % 100) + ", " +
+                                        std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)"));
+    }
+    ctx.executor->execute("COMMIT");
+
+    for (auto _ : state) {
+        ctx.executor->execute(*ParseSQL("SELECT * FROM lineitem"));
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_CloudSQL_FullScan)->Arg(10000)->Arg(100000);
+
+// --- Benchmark: PostgreSQL GROUP BY ---
+static void BM_PostgreSQL_GroupBy(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    PostgreSQLContext ctx;
+
+    if (!ctx.conn) {
+        state.SkipWithError("PostgreSQL not available");
+        return;
+    }
+
+    ctx.create_tables();
+
+    // Populate
+    ctx.execute_sql("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.execute_sql("INSERT INTO lineitem VALUES (" + std::to_string(i) + ", " +
+                        std::to_string(i % 100) + ", " + std::to_string(1 + (i % 10)) +
+                        ", 1000.0, 0.05, 0.02)");
+    }
+    ctx.execute_sql("COMMIT");
+
+    for (auto _ : state) {
+        ctx.execute_sql("SELECT l_quantity, SUM(l_extendedprice) FROM lineitem GROUP BY l_quantity");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_PostgreSQL_GroupBy)->Arg(10000)->Arg(100000);
+
+// --- Benchmark: cloudSQL GROUP BY ---
+static void BM_CloudSQL_GroupBy(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    CloudSQLContext ctx("./bench_pg_groupby_" + std::to_string(state.thread_index()));
+
+    // Populate
+    ctx.executor->execute("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" + std::to_string(i) +
+                                        ", " + std::to_string(i % 100) + ", " +
+                                        std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)"));
+    }
+    ctx.executor->execute("COMMIT");
+
+    for (auto _ : state) {
+        ctx.executor->execute(*ParseSQL(
+            "SELECT l_quantity, SUM(l_extendedprice) FROM lineitem GROUP BY l_quantity"));
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_CloudSQL_GroupBy)->Arg(10000)->Arg(100000);
+
+// --- Benchmark: PostgreSQL JOIN ---
+static void BM_PostgreSQL_Join(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    PostgreSQLContext ctx;
+
+    if (!ctx.conn) {
+        state.SkipWithError("PostgreSQL not available");
+        return;
+    }
+
+    ctx.create_tables();
+
+    // Populate
+    ctx.execute_sql("BEGIN");
+    for (int i = 0; i < num_rows / 10; ++i) {
+        ctx.execute_sql("INSERT INTO orders VALUES (" + std::to_string(i) + ", " +
+                        std::to_string(i % 100) + ", '2024-01-01')");
+    }
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.execute_sql("INSERT INTO lineitem VALUES (" + std::to_string(i % (num_rows / 10)) +
+                        ", " + std::to_string(i % 100) + ", " +
+                        std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)");
+    }
+    ctx.execute_sql("COMMIT");
+
+    for (auto _ : state) {
+        ctx.execute_sql(
+            "SELECT o.o_orderkey, SUM(l.l_extendedprice) FROM orders o JOIN lineitem l ON "
+            "o.o_orderkey = l.l_orderkey GROUP BY o.o_orderkey");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_PostgreSQL_Join)->Arg(10000)->Arg(50000);
+
+// --- Benchmark: cloudSQL JOIN ---
+static void BM_CloudSQL_Join(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    CloudSQLContext ctx("./bench_pg_join_" + std::to_string(state.thread_index()));
+
+    // Populate orders
+    ctx.executor->execute("BEGIN");
+    for (int i = 0; i < num_rows / 10; ++i) {
+        ctx.executor->execute(*ParseSQL("INSERT INTO orders VALUES (" + std::to_string(i) +
+                                        ", " + std::to_string(i % 100) + ", '2024-01-01')"));
+    }
+    // Populate lineitem
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" +
+                                        std::to_string(i % (num_rows / 10)) + ", " +
+                                        std::to_string(i % 100) + ", " +
+                                        std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)"));
+    }
+    ctx.executor->execute("COMMIT");
+
+    for (auto _ : state) {
+        ctx.executor->execute(*ParseSQL(
+            "SELECT o.o_orderkey, SUM(l.l_extendedprice) FROM orders o JOIN lineitem l ON "
+            "o.o_orderkey = l.l_orderkey GROUP BY o.o_orderkey"));
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_CloudSQL_Join)->Arg(10000)->Arg(50000);
+
+// --- Benchmark: PostgreSQL Complex WHERE ---
+static void BM_PostgreSQL_ComplexWhere(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    PostgreSQLContext ctx;
+
+    if (!ctx.conn) {
+        state.SkipWithError("PostgreSQL not available");
+        return;
+    }
+
+    ctx.create_tables();
+
+    // Populate
+    ctx.execute_sql("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.execute_sql("INSERT INTO lineitem VALUES (" + std::to_string(i) + ", " +
+                        std::to_string(i % 100) + ", " + std::to_string(1 + (i % 10)) +
+                        ", 1000.0, 0.05, 0.02)");
+    }
+    ctx.execute_sql("COMMIT");
+
+    for (auto _ : state) {
+        ctx.execute_sql(
+            "SELECT * FROM lineitem WHERE l_quantity > 5 AND l_discount < 0.06");
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_PostgreSQL_ComplexWhere)->Arg(10000)->Arg(100000);
+
+// --- Benchmark: cloudSQL Complex WHERE ---
+static void BM_CloudSQL_ComplexWhere(benchmark::State& state) {
+    const int num_rows = state.range(0);
+    CloudSQLContext ctx("./bench_pg_where_" + std::to_string(state.thread_index()));
+
+    // Populate
+    ctx.executor->execute("BEGIN");
+    for (int i = 0; i < num_rows; ++i) {
+        ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" + std::to_string(i) +
+                                        ", " + std::to_string(i % 100) + ", " +
+                                        std::to_string(1 + (i % 10)) + ", 1000.0, 0.05, 0.02)"));
+    }
+    ctx.executor->execute("COMMIT");
+
+    for (auto _ : state) {
+        ctx.executor->execute(
+            *ParseSQL("SELECT * FROM lineitem WHERE l_quantity > 5 AND l_discount < 0.06"));
+    }
+    state.SetItemsProcessed(state.iterations() * num_rows);
+}
+BENCHMARK(BM_CloudSQL_ComplexWhere)->Arg(10000)->Arg(100000);
+
+BENCHMARK_MAIN();
diff --git a/docs/performance/POSTGRESQL_COMPARISON.md b/docs/performance/POSTGRESQL_COMPARISON.md
new file mode 100644
index 00000000..fbbf470d
--- /dev/null
+++ b/docs/performance/POSTGRESQL_COMPARISON.md
@@ -0,0 +1,165 @@
+# PostgreSQL vs cloudSQL Benchmark
+
+## Overview
+
+This benchmark suite compares cloudSQL's vectorized SQL engine against PostgreSQL across multiple workload categories. The goal is to demonstrate cloudSQL's performance characteristics relative to the industry-standard open-source database.
+
+## Benchmark Suite
+
+### OLTP Workloads (Point Queries, Writes)
+
+| Benchmark | Description | cloudSQL | PostgreSQL |
+|-----------|-------------|----------|------------|
+| `BM_CloudSQL_Insert` / `BM_PostgreSQL_Insert` | Bulk INSERT throughput | items/s | items/s |
+| `BM_CloudSQL_Update` / `BM_PostgreSQL_Update` | Row UPDATE by key | items/s | items/s |
+| `BM_CloudSQL_PointSelect` / `BM_PostgreSQL_PointSelect` | Primary key lookup | items/s | items/s |
+
+### Analytical Workloads (Reads, Aggregation)
+
+| Benchmark | Description | cloudSQL | PostgreSQL |
+|-----------|-------------|----------|------------|
+| `BM_CloudSQL_FullScan` / `BM_PostgreSQL_FullScan` | SELECT * FROM table | items/s | items/s |
+| `BM_CloudSQL_GroupBy` / `BM_PostgreSQL_GroupBy` | GROUP BY aggregation | items/s | items/s |
+| `BM_CloudSQL_Join` / `BM_PostgreSQL_Join` | Two-table JOIN | items/s | items/s |
+| `BM_CloudSQL_ComplexWhere` / `BM_PostgreSQL_ComplexWhere` | Multi-condition filter | items/s | items/s |
+
+## Schema
+
+Both systems use identical TPC-H inspired schemas:
+
+```sql
+CREATE TABLE lineitem (
+    l_orderkey BIGINT PRIMARY KEY,
+    l_partkey BIGINT,
+    l_quantity INT,
+    l_extendedprice DOUBLE,
+    l_discount DOUBLE,
+    l_tax DOUBLE
+);
+
+CREATE TABLE orders (
+    o_orderkey BIGINT PRIMARY KEY,
+    o_custkey BIGINT,
+    o_orderdate TEXT
+);
+```
+
+## Running the Benchmark
+
+### Prerequisites
+
+- PostgreSQL must be installed and running locally
+- Environment variables (optional, defaults shown):
+  - `PGHOST` (default: localhost)
+  - `PGPORT` (default: 5432)
+  - `PGDATABASE` (default: postgres)
+  - `PGUSER` (default: postgres)
+
+### Build
+
+```bash
+cmake -DBUILD_BENCHMARKS=ON -B build
+cmake --build build --target postgresql_comparison_bench
+```
+
+### Run
+
+```bash
+./build/postgresql_comparison_bench --benchmark_format=json > pg_results.json
+```
+
+### Run specific benchmarks
+
+```bash
+# Full scan comparison
+./build/postgresql_comparison_bench --benchmark_filter="FullScan"
+
+# GROUP BY comparison
+./build/postgresql_comparison_bench --benchmark_filter="GroupBy"
+
+# All cloudSQL only
+./build/postgresql_comparison_bench --benchmark_filter="CloudSQL"
+```
+
+## Expected Results
+
+### Analytical Workloads (cloudSQL advantage)
+
+cloudSQL's vectorized execution typically outperforms PostgreSQL on:
+- **Full table scans**: Vectorized batch processing eliminates row-by-row overhead
+- **GROUP BY aggregation**: Hash-based aggregation with OpenAddressHashAgg
+- **JOIN operations**: Vectorized hash join with FNV-1a partitioning
+- **Complex WHERE**: Early predicate evaluation reduces data movement
+
+### OLTP Workloads (PostgreSQL advantage)
+
+PostgreSQL typically outperforms cloudSQL on:
+- **INSERT throughput**: WAL-based logging and MVCC for durability
+- **UPDATE by key**: In-place updates with heap storage
+- **Point SELECT**: B-tree index with minimal I/O
+
+## Methodology Notes
+
+### Fair Comparison Guidelines
+
+1. **Same hardware**: Both systems run on identical hardware
+2. **Same data**: Identical row counts and data distributions
+3. **Same schema**: Matching column types and index definitions
+4. **Warm vs cold**: Results should note whether data fits in memory
+5. **Connection overhead**: Excluded from throughput measurements
+
+### Limitations
+
+- **No query optimization**: cloudSQL and PostgreSQL may choose different query plans
+- **Index availability**: PostgreSQL indexes not replicated in cloudSQL
+- **Storage engines**: PostgreSQL uses heap storage; cloudSQL uses columnar for analytics
+- **Durability guarantees**: PostgreSQL's ACID compliance vs cloudSQL's eventual consistency
+
+## Interpreting Results
+
+### Throughput Ratio
+
+```
+ratio = cloudSQL_items_per_second / PostgreSQL_items_per_second
+```
+
+- `ratio > 1`: cloudSQL is faster
+- `ratio < 1`: PostgreSQL is faster
+- `ratio ≈ 1`: Equivalent performance
+
+### When cloudSQL Wins
+
+cloudSQL shows the largest advantages on:
+1. Analytical scans over large datasets
+2. Aggregation-heavy workloads
+3. Complex expressions evaluated in vectorized fashion
+
+### When PostgreSQL Wins
+
+PostgreSQL shows advantages on:
+1. Single-row lookups by indexed key
+2. Write-heavy workloads with durability requirements
+3. Workloads that benefit from sophisticated cost-based optimization
+
+## Example Output
+
+```json
+{
+  "benchmarks": [
+    {
+      "name": "BM_CloudSQL_FullScan/100000",
+      "items_per_second": 2680000
+    },
+    {
+      "name": "BM_PostgreSQL_FullScan/100000",
+      "items_per_second": 890000
+    }
+  ]
+}
+```
+
+## References
+
+- cloudSQL: [GitHub Repository](../README.md)
+- PostgreSQL: https://www.postgresql.org/
+- TPC-H: http://www.tpc.org/tpch/
\ No newline at end of file

From bb0b83abef9a9b70197819318a24e6bdf6bb3046 Mon Sep 17 00:00:00 2001
From: poyrazK <83272398+poyrazK@users.noreply.github.com>
Date: Sat, 13 Jun 2026 13:12:49 +0000
Subject: [PATCH 4/7] style: automated clang-format fixes

---
 include/executor/vectorized_operator.hpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp
index b382dc32..81b86f3d 100644
--- a/include/executor/vectorized_operator.hpp
+++ b/include/executor/vectorized_operator.hpp
@@ -800,12 +800,12 @@ class VectorizedGroupByOperator : public VectorizedOperator {
     static constexpr size_t MAX_KEY_LEN = 256;
     std::vector<uint8_t>
         batch_key_buffer_;  // Heap-allocated scratch: MAX_BATCH_SIZE * MAX_KEY_LEN bytes
-    std::vector<uint64_t> batch_hashes_;      // batch_size
-    std::vector<int64_t> batch_int64_keys_;  // batch_size (for int64-only path)
-    std::vector<size_t> batch_key_lens_;     // batch_size
-    std::vector<size_t> batch_bucket_idx_;   // batch_size - bucket index for each row
+    std::vector<uint64_t> batch_hashes_;        // batch_size
+    std::vector<int64_t> batch_int64_keys_;     // batch_size (for int64-only path)
+    std::vector<size_t> batch_key_lens_;        // batch_size
+    std::vector<size_t> batch_bucket_idx_;      // batch_size - bucket index for each row
     std::vector<size_t> batch_active_buckets_;  // batch_size - unique buckets touched this batch
-    bool all_int64_keys_ = false;            // True when all GROUP BY cols are INT64
+    bool all_int64_keys_ = false;               // True when all GROUP BY cols are INT64
 
     // Parallel aggregation support (Phase 4)
     std::shared_ptr<ThreadPool> thread_pool_;
@@ -1171,7 +1171,7 @@ class VectorizedGroupByOperator : public VectorizedOperator {
     // Type resolution happens once per aggregate, not per-row
     template <typename Bucket>
     void update_aggregate_batch(Bucket& bucket, const ColumnVector& col, size_t agg_idx,
-                               const size_t* row_indices, size_t num_rows) {
+                                const size_t* row_indices, size_t num_rows) {
         const auto& agg = aggregates_[agg_idx];
 
         if (agg.type == AggregateType::Count && agg.input_col_idx < 0) {
@@ -1214,7 +1214,8 @@ class VectorizedGroupByOperator : public VectorizedOperator {
                     if (!num_col.is_null(r)) {
                         double val = raw[r];
                         bucket.mins_float64[agg_idx] = std::min(bucket.mins_float64[agg_idx], val);
-                        bucket.maxes_float64[agg_idx] = std::max(bucket.maxes_float64[agg_idx], val);
+                        bucket.maxes_float64[agg_idx] =
+                            std::max(bucket.maxes_float64[agg_idx], val);
                     }
                 }
             } else {

From c136c94ac2e7d07bb3034db3ba9b019162a4c20d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Sat, 13 Jun 2026 16:25:57 +0300
Subject: [PATCH 5/7] Clean up dead code from batch optimization attempt

---
 docs/performance/POSTGRESQL_COMPARISON.md |  2 +-
 include/executor/vectorized_operator.hpp  | 78 ++---------------------
 2 files changed, 5 insertions(+), 75 deletions(-)

diff --git a/docs/performance/POSTGRESQL_COMPARISON.md b/docs/performance/POSTGRESQL_COMPARISON.md
index fbbf470d..fc6b99ee 100644
--- a/docs/performance/POSTGRESQL_COMPARISON.md
+++ b/docs/performance/POSTGRESQL_COMPARISON.md
@@ -162,4 +162,4 @@ PostgreSQL shows advantages on:
 
 - cloudSQL: [GitHub Repository](../README.md)
 - PostgreSQL: https://www.postgresql.org/
-- TPC-H: http://www.tpc.org/tpch/
\ No newline at end of file
+- TPC-H: http://www.tpc.org/tpch/
diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp
index 81b86f3d..6b02dfeb 100644
--- a/include/executor/vectorized_operator.hpp
+++ b/include/executor/vectorized_operator.hpp
@@ -800,12 +800,10 @@ class VectorizedGroupByOperator : public VectorizedOperator {
     static constexpr size_t MAX_KEY_LEN = 256;
     std::vector<uint8_t>
         batch_key_buffer_;  // Heap-allocated scratch: MAX_BATCH_SIZE * MAX_KEY_LEN bytes
-    std::vector<uint64_t> batch_hashes_;        // batch_size
-    std::vector<int64_t> batch_int64_keys_;     // batch_size (for int64-only path)
-    std::vector<size_t> batch_key_lens_;        // batch_size
-    std::vector<size_t> batch_bucket_idx_;      // batch_size - bucket index for each row
-    std::vector<size_t> batch_active_buckets_;  // batch_size - unique buckets touched this batch
-    bool all_int64_keys_ = false;               // True when all GROUP BY cols are INT64
+    std::vector<uint64_t> batch_hashes_;     // batch_size
+    std::vector<int64_t> batch_int64_keys_;  // batch_size (for int64-only path)
+    std::vector<size_t> batch_key_lens_;     // batch_size
+    bool all_int64_keys_ = false;            // True when all GROUP BY cols are INT64
 
     // Parallel aggregation support (Phase 4)
     std::shared_ptr<ThreadPool> thread_pool_;
@@ -869,8 +867,6 @@ class VectorizedGroupByOperator : public VectorizedOperator {
         batch_hashes_.resize(MAX_BATCH_SIZE);
         batch_int64_keys_.resize(MAX_BATCH_SIZE);
         batch_key_lens_.resize(MAX_BATCH_SIZE);
-        batch_bucket_idx_.resize(MAX_BATCH_SIZE);
-        batch_active_buckets_.resize(MAX_BATCH_SIZE);
 
         // Create schema for group key evaluation
         Schema key_schema;
@@ -1167,72 +1163,6 @@ class VectorizedGroupByOperator : public VectorizedOperator {
         }
     }
 
-    // Batch-oriented accumulator update - processes multiple rows per call
-    // Type resolution happens once per aggregate, not per-row
-    template <typename Bucket>
-    void update_aggregate_batch(Bucket& bucket, const ColumnVector& col, size_t agg_idx,
-                                const size_t* row_indices, size_t num_rows) {
-        const auto& agg = aggregates_[agg_idx];
-
-        if (agg.type == AggregateType::Count && agg.input_col_idx < 0) {
-            // COUNT(*) - all rows contribute
-            bucket.counts[agg_idx] += num_rows;
-            return;
-        }
-
-        if (agg.input_col_idx < 0) return;
-
-        // Type resolved ONCE, then batch process all rows
-        if (col.type() == common::ValueType::TYPE_INT64) {
-            const auto& num_col = static_cast<const NumericVector<int64_t>&>(col);
-            const int64_t* raw = num_col.raw_data();
-            for (size_t j = 0; j < num_rows; ++j) {
-                size_t r = row_indices[j];
-                if (!num_col.is_null(r)) {
-                    bucket.counts[agg_idx]++;
-                    bucket.sums_int64[agg_idx] += raw[r];
-                }
-            }
-        } else if (col.type() == common::ValueType::TYPE_FLOAT64) {
-            const auto& num_col = static_cast<const NumericVector<double>&>(col);
-            const double* raw = num_col.raw_data();
-            for (size_t j = 0; j < num_rows; ++j) {
-                size_t r = row_indices[j];
-                if (!num_col.is_null(r)) {
-                    bucket.counts[agg_idx]++;
-                    bucket.sums_float64[agg_idx] += raw[r];
-                    bucket.has_float_value[agg_idx] = true;
-                }
-            }
-        } else if (agg.type == AggregateType::Min || agg.type == AggregateType::Max) {
-            // MIN/MAX with sentinel-based approach (no branch on has_mins)
-            if (col.type() == common::ValueType::TYPE_FLOAT64) {
-                const auto& num_col = static_cast<const NumericVector<double>&>(col);
-                const double* raw = num_col.raw_data();
-                for (size_t j = 0; j < num_rows; ++j) {
-                    size_t r = row_indices[j];
-                    if (!num_col.is_null(r)) {
-                        double val = raw[r];
-                        bucket.mins_float64[agg_idx] = std::min(bucket.mins_float64[agg_idx], val);
-                        bucket.maxes_float64[agg_idx] =
-                            std::max(bucket.maxes_float64[agg_idx], val);
-                    }
-                }
-            } else {
-                const auto& num_col = static_cast<const NumericVector<int64_t>&>(col);
-                const int64_t* raw = num_col.raw_data();
-                for (size_t j = 0; j < num_rows; ++j) {
-                    size_t r = row_indices[j];
-                    if (!num_col.is_null(r)) {
-                        int64_t val = raw[r];
-                        bucket.mins[agg_idx] = std::min(bucket.mins[agg_idx], val);
-                        bucket.maxes[agg_idx] = std::max(bucket.maxes[agg_idx], val);
-                    }
-                }
-            }
-        }
-    }
-
     void update_accumulators(VectorizedGroupState& state, VectorBatch& batch, size_t row_idx) {
         for (size_t i = 0; i < aggregates_.size(); ++i) {
             const auto& agg = aggregates_[i];

From b088558698c432324ecaad1b3d43f1c1d0cd9d57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Sat, 13 Jun 2026 16:42:36 +0300
Subject: [PATCH 6/7] Fix documentation: schema shows no PRIMARY KEY (matches
 actual benchmark code)

---
 docs/performance/POSTGRESQL_COMPARISON.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/performance/POSTGRESQL_COMPARISON.md b/docs/performance/POSTGRESQL_COMPARISON.md
index fc6b99ee..4bf9fd15 100644
--- a/docs/performance/POSTGRESQL_COMPARISON.md
+++ b/docs/performance/POSTGRESQL_COMPARISON.md
@@ -25,11 +25,11 @@ This benchmark suite compares cloudSQL's vectorized SQL engine against PostgreSQ
 
 ## Schema
 
-Both systems use identical TPC-H inspired schemas:
+Both systems use identical TPC-H inspired schemas (no indexes for fair comparison):
 
 ```sql
 CREATE TABLE lineitem (
-    l_orderkey BIGINT PRIMARY KEY,
+    l_orderkey BIGINT,
     l_partkey BIGINT,
     l_quantity INT,
     l_extendedprice DOUBLE,
@@ -38,7 +38,7 @@ CREATE TABLE lineitem (
 );
 
 CREATE TABLE orders (
-    o_orderkey BIGINT PRIMARY KEY,
+    o_orderkey BIGINT,
     o_custkey BIGINT,
     o_orderdate TEXT
 );

From 1b661d762cd16223735c200a246b78caa318571f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Sat, 13 Jun 2026 17:10:14 +0300
Subject: [PATCH 7/7] Fix review findings: PGresult leak, INSERT growing table,
 MIN/MAX flags, Homebrew paths

- Fix PGresult* leak in create_tables(): check PQresultStatus and always PQclear
- Fix INSERT benchmark: add TRUNCATE at start of each iteration for fair measurement
- Fix MIN/MAX presence flags: set has_mins/has_float_minmax in update_bucket_accumulators
- Add Homebrew paths for macOS PostgreSQL discovery in CMakeLists.txt
- Fix broken link in POSTGRESQL_COMPARISON.md (../README.md -> ../../README.md)
---
 CMakeLists.txt                             |  4 +-
 benchmarks/postgresql_comparison_bench.cpp | 48 ++++++++++++++++++----
 docs/performance/POSTGRESQL_COMPARISON.md  |  2 +-
 include/executor/vectorized_operator.hpp   |  2 +
 4 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b2855802..ad37e9c4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -196,8 +196,8 @@ if(BUILD_BENCHMARKS)
     endif()
 
     # PostgreSQL comparison benchmark
-    find_library(PQLIB_LIBRARY pq PATHS /usr/lib /usr/local/lib)
-    find_path(PQLIB_INCLUDE_DIR libpq-fe.h PATHS /usr/include /usr/local/include)
+    find_library(PQLIB_LIBRARY pq PATHS /usr/lib /usr/local/lib /opt/homebrew/lib)
+    find_path(PQLIB_INCLUDE_DIR libpq-fe.h PATHS /usr/include /usr/local/include /opt/homebrew/include)
     if(PQLIB_LIBRARY AND PQLIB_INCLUDE_DIR)
         add_executable(postgresql_comparison_bench benchmarks/postgresql_comparison_bench.cpp)
         target_include_directories(postgresql_comparison_bench PRIVATE ${PQLIB_INCLUDE_DIR})
diff --git a/benchmarks/postgresql_comparison_bench.cpp b/benchmarks/postgresql_comparison_bench.cpp
index d0bf131c..f5aa4f3e 100644
--- a/benchmarks/postgresql_comparison_bench.cpp
+++ b/benchmarks/postgresql_comparison_bench.cpp
@@ -63,18 +63,46 @@ struct PostgreSQLContext {
 
     void create_tables() {
         if (!conn) return;
-        PQexec(conn, "SET max_parallel_workers_per_gather = 0");
-        PQexec(conn, "SET max_parallel_workers = 0");
-        PQexec(conn, "SET max_parallel_maintenance_workers = 0");
-        PQexec(conn, "DROP TABLE IF EXISTS lineitem");
-        PQexec(conn, "DROP TABLE IF EXISTS orders");
-        PQexec(conn,
+        PGresult* r = PQexec(conn, "SET max_parallel_workers_per_gather = 0");
+        if (PQresultStatus(r) != PGRES_COMMAND_OK) {
+            fprintf(stderr, "SET max_parallel_workers_per_gather failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(r);
+        r = PQexec(conn, "SET max_parallel_workers = 0");
+        if (PQresultStatus(r) != PGRES_COMMAND_OK) {
+            fprintf(stderr, "SET max_parallel_workers failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(r);
+        r = PQexec(conn, "SET max_parallel_maintenance_workers = 0");
+        if (PQresultStatus(r) != PGRES_COMMAND_OK) {
+            fprintf(stderr, "SET max_parallel_maintenance_workers failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(r);
+        r = PQexec(conn, "DROP TABLE IF EXISTS lineitem");
+        if (PQresultStatus(r) != PGRES_COMMAND_OK) {
+            fprintf(stderr, "DROP TABLE lineitem failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(r);
+        r = PQexec(conn, "DROP TABLE IF EXISTS orders");
+        if (PQresultStatus(r) != PGRES_COMMAND_OK) {
+            fprintf(stderr, "DROP TABLE orders failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(r);
+        r = PQexec(conn,
                "CREATE TABLE lineitem (l_orderkey BIGINT, l_partkey BIGINT, "
                "l_quantity INT, l_extendedprice DOUBLE PRECISION, l_discount DOUBLE PRECISION, "
                "l_tax DOUBLE PRECISION)");
-        PQexec(conn,
+        if (PQresultStatus(r) != PGRES_COMMAND_OK) {
+            fprintf(stderr, "CREATE TABLE lineitem failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(r);
+        r = PQexec(conn,
                "CREATE TABLE orders (o_orderkey BIGINT, o_custkey BIGINT, "
                "o_orderdate TEXT)");
+        if (PQresultStatus(r) != PGRES_COMMAND_OK) {
+            fprintf(stderr, "CREATE TABLE orders failed: %s\n", PQerrorMessage(conn));
+        }
+        PQclear(r);
     }
 
     void execute_sql(const std::string& sql) {
@@ -157,6 +185,9 @@ static void BM_PostgreSQL_Insert(benchmark::State& state) {
     ctx.create_tables();
 
     for (auto _ : state) {
+        // Clear table at start of each iteration to measure insert throughput
+        // without accumulation effects
+        ctx.execute_sql("TRUNCATE TABLE lineitem");
         ctx.execute_sql("BEGIN");
         for (int i = 0; i < num_rows; ++i) {
             std::string sql = "INSERT INTO lineitem VALUES (" + std::to_string(i) + ", " +
@@ -176,6 +207,9 @@ static void BM_CloudSQL_Insert(benchmark::State& state) {
     CloudSQLContext ctx("./bench_pg_insert_" + std::to_string(state.thread_index()));
 
     for (auto _ : state) {
+        // Clear table at start of each iteration to measure insert throughput
+        // without accumulation effects
+        ctx.executor->execute(*ParseSQL("TRUNCATE TABLE lineitem"));
         ctx.executor->execute("BEGIN");
         for (int i = 0; i < num_rows; ++i) {
             ctx.executor->execute(*ParseSQL("INSERT INTO lineitem VALUES (" + std::to_string(i) +
diff --git a/docs/performance/POSTGRESQL_COMPARISON.md b/docs/performance/POSTGRESQL_COMPARISON.md
index 4bf9fd15..13631908 100644
--- a/docs/performance/POSTGRESQL_COMPARISON.md
+++ b/docs/performance/POSTGRESQL_COMPARISON.md
@@ -160,6 +160,6 @@ PostgreSQL shows advantages on:
 
 ## References
 
-- cloudSQL: [GitHub Repository](../README.md)
+- cloudSQL: [GitHub Repository](../../README.md)
 - PostgreSQL: https://www.postgresql.org/
 - TPC-H: http://www.tpc.org/tpch/
diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp
index 6b02dfeb..20cd71f7 100644
--- a/include/executor/vectorized_operator.hpp
+++ b/include/executor/vectorized_operator.hpp
@@ -1152,11 +1152,13 @@ class VectorizedGroupByOperator : public VectorizedOperator {
                         // Sentinel-based: mins/maxes initialized to max/min values
                         bucket.mins_float64[i] = std::min(bucket.mins_float64[i], val);
                         bucket.maxes_float64[i] = std::max(bucket.maxes_float64[i], val);
+                        bucket.has_float_minmax[i] = true;
                     } else {
                         auto val = col.get(row_idx).to_int64();
                         // Sentinel-based: mins/maxes initialized to max/min values
                         bucket.mins[i] = std::min(bucket.mins[i], val);
                         bucket.maxes[i] = std::max(bucket.maxes[i], val);
+                        bucket.has_mins[i] = true;
                     }
                 }
             }