diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4fc6d2e..f15d224 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -73,6 +73,80 @@ jobs:
       - after_failure:
           when : "on_fail"
 
+  perf_bench:
+    description: "Run insert/select performance benchmarks on master and on this branch, print the diff"
+    docker:
+      - image: nuodb/nuodb:latest
+        user: root
+    resource_class: xlarge
+    environment:
+      TZ : America/New_York
+      NUO_SET_TLS : disable
+      NUOCMD_CLIENT_KEY : ""
+      NUOCMD_VERIFY_SERVER : ""
+      NUOCMD_PLUGINS : ""
+    steps:
+      - checkout
+      - run:
+          name: Install build tools
+          command: |
+            PYVER=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
+            dnf install -y git make gcc "python${PYVER}-devel"
+      - run:
+          name: Install pip
+          command: |
+            curl https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
+            python3 /tmp/get-pip.py --user
+      - run:
+          name: Make artifact directories
+          command: mkdir -p artifacts results
+      - run:
+          name: Start NuoDB Admin
+          command: |
+            sudo -u nuodb /opt/nuodb/etc/nuoadmin tls $NUO_SET_TLS
+            sudo -u nuodb /opt/nuodb/etc/nuoadmin tls status
+            sudo -u nuodb /opt/nuodb/etc/nuoadmin start
+            sudo -u nuodb /opt/nuodb/bin/nuocmd --show-json get effective-license
+      # Run master then this branch on the same runner and let compare.py
+      # print a delta table.  Hardware noise mostly cancels because both
+      # runs share the container; xlarge gives us dedicated cores.
+      - run:
+          name: Baseline benchmarks on master
+          command: |
+            GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" \
+                git fetch --no-tags --depth=1 origin master
+            git worktree add /tmp/base FETCH_HEAD
+            # Copy the perf suite + conftest hooks from this branch so the
+            # master worktree's driver is exercised by the same benchmarks.
+            cp -a tests/perf /tmp/base/tests/
+            cp tests/conftest.py /tmp/base/tests/conftest.py
+            cd /tmp/base
+            make PYTHON=python3 install
+            $HOME/.local/bin/pip install pytest-benchmark
+            python3 -m pytest tests/perf --run-perf --benchmark-only \
+                --benchmark-json=/tmp/master.json \
+                --benchmark-columns=min,mean,median,stddev,rounds
+      - run:
+          name: Reset DB
+          command: |
+            sudo -u nuodb /opt/nuodb/bin/nuocmd shutdown database \
+                --db-name pynuodb_test 2>/dev/null || true
+      - run:
+          name: Branch benchmarks + diff vs master
+          command: |
+            make PYTHON=python3 install
+            $HOME/.local/bin/pip install pytest-benchmark
+            python3 -m pytest tests/perf --run-perf --benchmark-only \
+                --benchmark-json=artifacts/branch.json \
+                --benchmark-columns=min,mean,median,stddev,rounds
+            python3 tests/perf/compare.py \
+                /tmp/master.json artifacts/branch.json \
+                | tee artifacts/perf_diff.txt
+      - store_artifacts:
+          path: artifacts
+      - after_failure:
+          when : "on_fail"
+
 workflows:
   build-project:
     jobs:
@@ -80,3 +154,7 @@ workflows:
           name: "Build and run regression tests"
           context:
             - common-config
+      - perf_bench:
+          name: "Run performance benchmarks, comparing results to master"
+          context:
+            - common-config
diff --git a/test-performance/timesInsert.py b/test-performance/timesInsert.py
deleted file mode 100644
index 9ee32bd..0000000
--- a/test-performance/timesInsert.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# A database named test with user dba / password dba must be created first
-
-import os
-import time
-
-import pynuodb
-
-smallIterations = 100
-largeIterations = smallIterations * 1000
-
-
-def gettime():
-    return time.time()
-
-
-def insert(count):
-    for i in range(count):
-        cursor.execute("INSERT INTO perf_test (a,b ) VALUES (%d,'A')" % i)
-    connection.commit()
-
-
-def select():
-    cursor.execute("select * from perf_test")
-    cursor.fetchall()
-
-
-dropTable = "drop table perf_test cascade if exists"
-createTable = "create table perf_test (a int,b char)"
-
-port = os.environ.get('NUODB_PORT')
-if not port:
-    port = '48004'
-
-options = {}
-trustStore = os.environ.get('NUOCMD_VERIFY_SERVER')
-if trustStore:
-    options = {'trustStore': trustStore, 'verifyHostname': 'False'}
-
-connection = pynuodb.connect("test", "localhost:" + port, "dba", "dba",
-                             options=options)
-cursor = connection.cursor()
-cursor.execute("use test")
-
-# Begin SMALL_INSERT_ITERATIONS test
-cursor.execute(dropTable)
-cursor.execute(createTable)
-start = gettime()
-insert(smallIterations)
-smallInsertElapsed = gettime() - start
-
-print("Elapse time of SMALL_INSERT_ITERATIONS = %.4fs" % (smallInsertElapsed))
-
-# Begin SMALL_SELECT_ITERATIONS test
-start = gettime()
-select()
-smallSelectElapsed = gettime() - start
-print("Elapse time of SMALL_SELECT_ITERATIONS = %.4fs" % (smallSelectElapsed))
-
-# Begin LARGE_INSERT_ITERATIONS test
-cursor.execute(dropTable)
-cursor.execute(createTable)
-
-start = gettime()
-insert(largeIterations)
-largeInsertElapsed = gettime() - start
-
-print("Elapse time of LARGE_INSERT_ITERATIONS = %.4fs" % (largeInsertElapsed))
-
-# Begin LARGE_SELECT_ITERATIONS test
-start = gettime()
-select()
-largeSelectElapsed = gettime() - start
-
-print("Elapse time of LARGE_SELECT_ITERATIONS = %.4fs" % (largeSelectElapsed))
-
-if largeInsertElapsed > smallInsertElapsed * 1000:
-    print("Insert is too slow!")
-
-if largeSelectElapsed > smallSelectElapsed * 1000:
-    print("Select is too slow!")
-
-print("\n")
diff --git a/test_requirements.txt b/test_requirements.txt
index ef90a4a..2328d32 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -1,6 +1,7 @@
 mock>=1.0
 nose>=1.3
 pytest>=2.7
+pytest-benchmark>=4.0
 coverage>=3.7
 pytest-cov>=1.8.1
 coveralls>=0.5
diff --git a/tests/conftest.py b/tests/conftest.py
index fb7cd90..180e014 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -30,6 +30,26 @@
 
 from . import nuocmd, cvtjson
 
+
+def pytest_addoption(parser):
+    parser.addoption("--run-perf", action="store_true", default=False,
+                     help="run performance benchmarks under tests/perf")
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers",
+        "perf: performance benchmark; skipped unless --run-perf is passed")
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--run-perf"):
+        return
+    skip = pytest.mark.skip(reason="need --run-perf to run performance tests")
+    for item in items:
+        if "perf" in item.keywords:
+            item.add_marker(skip)
+
 _log = logging.getLogger("pynuodbtest")
 
 DB_OPTIONS = []  # type: List[str]
diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/perf/compare.py b/tests/perf/compare.py
new file mode 100644
index 0000000..82d83e7
--- /dev/null
+++ b/tests/perf/compare.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+"""Compare two pytest-benchmark JSON files (master vs branch).
+
+Prints a table of master min, branch min, and the absolute + percentage
+delta per test.  Exits non-zero if any test regressed by more than
+--fail-threshold (default 10%), so CI turns a real regression into a
+failed build.  Improvements never fail the build.
+"""
+from __future__ import print_function
+
+import argparse
+import json
+import sys
+
+
+def _load(path):
+    with open(path) as f:
+        data = json.load(f)
+    return {b['name']: b['stats']['min'] for b in data['benchmarks']}
+
+
+def main(args):
+    master = _load(args.master)
+    branch = _load(args.branch)
+
+    print("%-40s %16s %16s %14s %10s" % ( "Test", "master min (ms)", "branch min (ms)", "delta (ms)", "delta %"))
+    print("-" * 100)
+
+    regressed = []
+    for name in sorted(set(master) & set(branch)):
+        m = master[name] * 1000.0
+        b = branch[name] * 1000.0
+        d = b - m
+        p = (d / m) * 100.0 if m else float('nan')
+        print("%-40s %16.3f %16.3f %+14.3f %+9.2f%%" % (name, m, b, d, p))
+        if p > args.fail_threshold:
+            regressed.append((name, p))
+
+    only_master = sorted(set(master) - set(branch))
+    only_branch = sorted(set(branch) - set(master))
+    if only_master:
+        print("\nOnly in master: %s" % ", ".join(only_master))
+    if only_branch:
+        print("Only in branch: %s" % ", ".join(only_branch))
+
+    print()
+    if regressed:
+        print("FAIL: %d test(s) regressed by more than %.2f%%:" % (len(regressed), args.fail_threshold))
+        for name, p in regressed:
+            print("  %s: %+.2f%%" % (name, p))
+        sys.exit(1)
+    print("OK: no test regressed by more than %.2f%%" % args.fail_threshold)
+
+
+def _parse_args():
+    p = argparse.ArgumentParser()
+    p.add_argument('master', help='pytest-benchmark JSON for master')
+    p.add_argument('branch', help='pytest-benchmark JSON for this branch')
+    p.add_argument('--fail-threshold', type=float, default=15.0,
+                   help='percent slowdown that fails the build (default 15)')
+    return p.parse_args()
+
+
+if __name__ == '__main__':
+    main(_parse_args())
diff --git a/tests/perf/test_insert_select_bench.py b/tests/perf/test_insert_select_bench.py
new file mode 100644
index 0000000..ab3d8b9
--- /dev/null
+++ b/tests/perf/test_insert_select_bench.py
@@ -0,0 +1,258 @@
+# -*- coding: utf-8 -*-
+"""Insert / select micro-benchmarks.
+
+(C) Copyright 2025 Dassault Systemes SE.  All Rights Reserved.
+
+This software is licensed under a BSD 3-Clause License.
+See the LICENSE file provided with this software.
+
+Ported from test-performance/timesInsert.py so the numbers live alongside
+the correctness suite and can be run via `pytest --benchmark-only`.
+
+Each test measures one operation the driver's hot paths care about:
+
+    * bulk INSERT via executemany (encode path, session send)
+    * fetchall over a small result set (decode + per-row dispatch)
+    * fetchall over a large result set (batched decode + refill loop)
+
+pytest-benchmark auto-repeats each function and reports min / mean /
+median / stddev.  Numbers move meaningfully with the crypt, session,
+cursor and Cython PRs; that's the point.
+"""
+
+import math
+import time
+
+import pytest
+
+from tests import nuodb_base
+
+
+# Small tests have tiny absolute times (sub-ms to a few ms), so per-round
+# jitter dominates unless we run *both* many rounds *and* for a long enough
+# total wall-time to average out kernel/network noise.  This helper probes
+# a single call to size `rounds` so that rounds * per_call_time >= min_seconds,
+# with a floor of `min_rounds`.
+def _rounds_for(target, min_rounds, min_seconds, setup=None, probes=5,
+                iterations=1):
+    total = 0.0
+    for _ in range(probes):
+        if setup is not None:
+            setup()
+        t0 = time.perf_counter()
+        target()
+        total += time.perf_counter() - t0
+    per_call = total / probes
+    if per_call <= 0:
+        return min_rounds
+    per_round = per_call * iterations
+    return max(min_rounds, int(math.ceil(min_seconds / per_round)))
+
+
+# Skip this whole module unless `--run-perf` is passed on the pytest
+# command line.  We don't want `make fulltest` to sit through a 100k-row
+# insert on every commit.
+pytestmark = pytest.mark.perf
+
+
+_DDL_DROP     = "DROP TABLE IF EXISTS perf_bench"
+_DDL_CREATE   = "CREATE TABLE perf_bench (a INT, b VARCHAR(64))"
+_DDL_TRUNCATE = "TRUNCATE TABLE perf_bench"
+
+_SMALL = 1000
+_LARGE = 20_000
+
+
+def _rows(n):
+    return [(i, 'A dark and stormy night %d' % i) for i in range(n)]
+
+
+class TestInsertSelectPerf(nuodb_base.NuoBase):
+
+    def _reset(self, con):
+        cur = con.cursor()
+        cur.execute(_DDL_DROP)
+        cur.execute(_DDL_CREATE)
+        con.commit()
+
+    def _seed(self, con, n):
+        self._reset(con)
+        con.cursor().executemany("INSERT INTO perf_bench (a, b) VALUES (?, ?)", _rows(n))
+        con.commit()
+
+    # -- INSERT ---------------------------------------------------------
+
+    def test_insert_small(self, benchmark):
+        """1000 rows via executemany.  Sensitive to per-row putValue cost.  """
+        con = self._connect()
+        try:
+            self._reset(con)
+            cur = con.cursor()
+            rows = _rows(_SMALL)
+
+            def target():
+                cur.executemany(
+                    "INSERT INTO perf_bench (a, b) VALUES (?, ?)", rows)
+                con.commit()
+
+            def setup():
+                cur.execute(_DDL_TRUNCATE)
+                con.commit()
+
+            rounds = _rounds_for(target, min_rounds=500, min_seconds=10.0, setup=setup)
+            benchmark.pedantic(target, setup=setup, warmup_rounds=5, rounds=rounds, iterations=1)
+        finally:
+            con.close()
+
+    def test_insert_large(self, benchmark):
+        """20k rows via executemany """
+        con = self._connect()
+        try:
+            self._reset(con)
+            cur = con.cursor()
+            rows = _rows(_LARGE)
+
+            def target():
+                cur.executemany( "INSERT INTO perf_bench (a, b) VALUES (?, ?)", rows)
+                con.commit()
+
+            def setup():
+                cur.execute(_DDL_TRUNCATE)
+                con.commit()
+
+            benchmark.pedantic(target, setup=setup, warmup_rounds=2, rounds=100, iterations=1)
+        finally:
+            con.close()
+
+    # -- SELECT ---------------------------------------------------------
+
+    def test_fetchall_small(self, benchmark):
+        """fetchall over 1000 rows."""
+        con = self._connect()
+        try:
+            self._seed(con, _SMALL)
+            cur = con.cursor()
+
+            def target():
+                cur.execute("SELECT a, b FROM perf_bench")
+                return cur.fetchall()
+
+            rounds = _rounds_for(target, min_rounds=500, min_seconds=10.0)
+            rows = benchmark.pedantic(target, warmup_rounds=5, rounds=rounds, iterations=1)
+            assert len(rows) == _SMALL
+        finally:
+            con.close()
+
+    def test_fetchall_large(self, benchmark):
+        """fetchall over 20k rows. """
+        con = self._connect()
+        try:
+            self._seed(con, _LARGE)
+            cur = con.cursor()
+
+            def target():
+                cur.execute("SELECT a, b FROM perf_bench")
+                return cur.fetchall()
+
+            rows = benchmark.pedantic(target, warmup_rounds=2, rounds=100, iterations=1)
+            assert len(rows) == _LARGE
+        finally:
+            con.close()
+
+    def test_fetchmany_large(self, benchmark):
+        """fetchmany(1000) over 20k rows"""
+        con = self._connect()
+        try:
+            self._seed(con, _LARGE)
+            cur = con.cursor()
+
+            def target():
+                cur.execute("SELECT a, b FROM perf_bench")
+                total = 0
+                while True:
+                    batch = cur.fetchmany(1000)
+                    if not batch:
+                        break
+                    total += len(batch)
+                return total
+
+            total = benchmark.pedantic(target, warmup_rounds=2, rounds=100, iterations=1)
+            assert total == _LARGE
+        finally:
+            con.close()
+
+    def test_fetchone_loop_large(self, benchmark):
+        """fetchone() in a loop over 20k rows.  Isolates per-row overhead """
+        con = self._connect()
+        try:
+            self._seed(con, _LARGE)
+            cur = con.cursor()
+
+            def target():
+                cur.execute("SELECT a, b FROM perf_bench")
+                n = 0
+                while True:
+                    row = cur.fetchone()
+                    if row is None:
+                        break
+                    n += 1
+                return n
+
+            n = benchmark.pedantic(target, warmup_rounds=2, rounds=100, iterations=1)
+            assert n == _LARGE
+        finally:
+            con.close()
+
+    # -- Wide rows / mixed types ---------------------------------------
+
+    _WIDE_COLS = 50
+    _WIDE_ROWS = 1000
+
+    def test_fetchall_wide(self, benchmark):
+        """50 columns x 1000 rows """
+        cols = ["c%d INT" % i for i in range(self._WIDE_COLS)]
+        col_names = ", ".join("c%d" % i for i in range(self._WIDE_COLS))
+        placeholders = ", ".join(["?"] * self._WIDE_COLS)
+
+        con = self._connect()
+        try:
+            cur = con.cursor()
+            cur.execute("DROP TABLE IF EXISTS perf_wide")
+            cur.execute("CREATE TABLE perf_wide (%s)" % (", ".join(cols),))
+            con.commit()
+            rows = [tuple(range(self._WIDE_COLS)) for _ in range(self._WIDE_ROWS)]
+            cur.executemany( "INSERT INTO perf_wide (%s) VALUES (%s)" % (col_names, placeholders), rows)
+            con.commit()
+
+            def target():
+                cur.execute("SELECT %s FROM perf_wide" % col_names)
+                return cur.fetchall()
+
+            result = benchmark.pedantic(target, warmup_rounds=2, rounds=100, iterations=1)
+            assert len(result) == self._WIDE_ROWS
+            assert len(result[0]) == self._WIDE_COLS
+        finally:
+            con.close()
+
+    def test_fetchall_mixed_types(self, benchmark):
+        """SELECT with variety of types: int / decimal / double / timestamp / bool / varchar / null. """
+        con = self._connect()
+        try:
+            cur = con.cursor()
+            cur.execute("DROP TABLE IF EXISTS perf_mixed")
+            cur.execute(
+                "CREATE TABLE perf_mixed (i INT,  d DECIMAL(12, 4), f DOUBLE,  ts TIMESTAMP,"
+                "bl BOOLEAN, s VARCHAR(64), n INT)")
+            con.commit()
+            rows = [ (i, i * 1.25, i / 3.0, '2024-01-01 12:34:56', bool(i & 1), 'row #%d' % i, None) for i in range(_LARGE) ]
+            cur.executemany( "INSERT INTO perf_mixed (i, d, f, ts, bl, s, n)" " VALUES (?, ?, ?, ?, ?, ?, ?)", rows)
+            con.commit()
+
+            def target():
+                cur.execute("SELECT i, d, f, ts, bl, s, n FROM perf_mixed")
+                return cur.fetchall()
+
+            result = benchmark.pedantic(target, warmup_rounds=2, rounds=100, iterations=1)
+            assert len(result) == _LARGE
+        finally:
+            con.close()