From 815e156e860e49b6d4ecb263f96762eeea82b7c2 Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Mon, 26 Jan 2026 06:54:19 -0500
Subject: [PATCH 01/11] test: improve benchmarks, use codspeed

---
 .github/workflows/benchmark.yml               | 138 ---
 .github/workflows/codspeed.yml                |  40 +
 DEVELOPER.md                                  |  84 +-
 autotest/benchmarks/__init__.py               |   0
 autotest/benchmarks/benchmark_arrays.py       |  93 ++
 .../benchmarks/benchmark_cellbudgetfile.py    |  54 +
 autotest/benchmarks/benchmark_endpointfile.py |  40 +
 autotest/benchmarks/benchmark_export.py       |  45 +
 .../benchmarks/benchmark_formattedfile.py     |  69 ++
 .../benchmarks/benchmark_gridintersect.py     | 183 ++++
 autotest/benchmarks/benchmark_grids.py        |  63 ++
 autotest/benchmarks/benchmark_headfile.py     |  80 ++
 autotest/benchmarks/benchmark_headufile.py    |  32 +
 autotest/benchmarks/benchmark_mf6_io.py       |  70 ++
 .../benchmarks/benchmark_mf6listbudget.py     |  48 +
 autotest/benchmarks/benchmark_mf_io.py        |  62 ++
 autotest/benchmarks/benchmark_mflistbudget.py |  15 +
 .../benchmarks/benchmark_mfusglistbudget.py   |  19 +
 autotest/benchmarks/benchmark_mtlistfile.py   |  24 +
 autotest/benchmarks/benchmark_pathlinefile.py |  56 ++
 .../benchmarks/benchmark_postprocessing.py    |  60 ++
 autotest/benchmarks/benchmark_rasters.py      | 134 +++
 .../benchmarks/benchmark_sfroutputfile.py     |  34 +
 autotest/benchmarks/benchmark_ucnfile.py      |  32 +
 autotest/benchmarks/benchmark_zonebudget.py   |  89 ++
 autotest/benchmarks/conftest.py               |  54 +
 autotest/pytest.ini                           |  23 -
 autotest/test_grid_cases.py                   |  24 +
 docs/benchmarking_plan.md                     | 924 ++++++++++++++++++
 pyproject.toml                                |  27 +
 30 files changed, 2431 insertions(+), 185 deletions(-)
 delete mode 100644 .github/workflows/benchmark.yml
 create mode 100644 .github/workflows/codspeed.yml
 create mode 100644 autotest/benchmarks/__init__.py
 create mode 100644 autotest/benchmarks/benchmark_arrays.py
 create mode 100644 autotest/benchmarks/benchmark_cellbudgetfile.py
 create mode 100644 autotest/benchmarks/benchmark_endpointfile.py
 create mode 100644 autotest/benchmarks/benchmark_export.py
 create mode 100644 autotest/benchmarks/benchmark_formattedfile.py
 create mode 100644 autotest/benchmarks/benchmark_gridintersect.py
 create mode 100644 autotest/benchmarks/benchmark_grids.py
 create mode 100644 autotest/benchmarks/benchmark_headfile.py
 create mode 100644 autotest/benchmarks/benchmark_headufile.py
 create mode 100644 autotest/benchmarks/benchmark_mf6_io.py
 create mode 100644 autotest/benchmarks/benchmark_mf6listbudget.py
 create mode 100644 autotest/benchmarks/benchmark_mf_io.py
 create mode 100644 autotest/benchmarks/benchmark_mflistbudget.py
 create mode 100644 autotest/benchmarks/benchmark_mfusglistbudget.py
 create mode 100644 autotest/benchmarks/benchmark_mtlistfile.py
 create mode 100644 autotest/benchmarks/benchmark_pathlinefile.py
 create mode 100644 autotest/benchmarks/benchmark_postprocessing.py
 create mode 100644 autotest/benchmarks/benchmark_rasters.py
 create mode 100644 autotest/benchmarks/benchmark_sfroutputfile.py
 create mode 100644 autotest/benchmarks/benchmark_ucnfile.py
 create mode 100644 autotest/benchmarks/benchmark_zonebudget.py
 create mode 100644 autotest/benchmarks/conftest.py
 delete mode 100644 autotest/pytest.ini
 create mode 100644 docs/benchmarking_plan.md

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
deleted file mode 100644
index 8f1af21105..0000000000
--- a/.github/workflows/benchmark.yml
+++ /dev/null
@@ -1,138 +0,0 @@
-name: Benchmarks
-
-on:
-  schedule:
-    - cron: '0 8 * * *' # run at 8 AM UTC (12 am PST)
-
-jobs:
-  benchmark:
-    name: Benchmarks
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ ubuntu-latest, macos-latest, windows-latest ]
-        python-version: [ "3.10", "3.11", "3.12" ]
-    defaults:
-      run:
-        shell: bash -l {0}
-    timeout-minutes: 90
-
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v6
-
-      - name: Setup Python ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@v7
-        with:
-          cache-dependency-glob: "**/pyproject.toml"
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install FloPy
-        run: uv sync --all-extras
-
-      - name: Install Modflow executables
-        uses: modflowpy/install-modflow-action@v1
-
-      - name: Install triangle (macOS workaround)
-        if: runner.os == 'macOS'
-        uses: modflowpy/install-modflow-action@v1
-        with:
-          repo: executables
-          ostag: mac
-          subset: triangle
-
-      - name: Run benchmarks
-        working-directory: autotest
-        run: |
-          mkdir -p .benchmarks
-          uv run pytest -v --durations=0 --benchmark-only --benchmark-json .benchmarks/${{ matrix.os }}_python${{ matrix.python-version }}.json --keep-failed=.failed
-          ls .benchmarks
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Upload failed benchmark artifact
-        uses: actions/upload-artifact@v6
-        if: failure()
-        with:
-          name: failed-benchmark-${{ matrix.os }}-${{ matrix.python-version }}-${{ github.run_id }}
-          path: autotest/.failed/**
-
-      - name: Upload benchmark result artifact
-        uses: actions/upload-artifact@v6
-        with:
-          name: benchmarks-${{ matrix.os }}-${{ matrix.python-version }}-${{ github.run_id }}
-          path: autotest/.benchmarks/*.json
-          include-hidden-files: true
-
-  post_benchmark:
-    needs:
-      - benchmark
-    name: Process benchmark results
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash
-    timeout-minutes: 10
-
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v6
-
-      - name: Setup Python
-        uses: astral-sh/setup-uv@v7
-        with:
-          cache-dependency-glob: "**/pyproject.toml"
-
-      - name: Install FloPy
-        run: uv sync
-
-      - name: Install seaborn
-        run: uv pip install seaborn
-
-      - name: Download all artifacts
-        uses: actions/download-artifact@v7
-        with:
-          path: autotest/.benchmarks
-
-      - name: Process benchmark results
-        run: |
-          repo="${{ github.repository }}"
-          path="autotest/.benchmarks"
-
-          # list benchmark artifacts
-          artifact_json=$(gh api -X GET -H "Accept: application/vnd.github+json" /repos/$repo/actions/artifacts)
-
-          # get artifact ids and download artifacts
-          get_artifact_ids="
-          import json
-          import sys
-          from os import linesep
-
-          artifacts = json.load(sys.stdin, strict=False)['artifacts']
-          artifacts = [a for a in artifacts if a['name'].startswith('benchmarks-') and a['name'].split('-')[-1].isdigit()]
-
-          print(linesep.join([str(a['id']) for a in artifacts]))
-          "
-          echo $artifact_json \
-            | python -c "$get_artifact_ids" \
-            | xargs -I@ bash -c "gh api -H 'Accept: application/vnd.github+json' /repos/$repo/actions/artifacts/@/zip >> $path/@.zip"
-
-          # unzip artifacts
-          zipfiles=( $path/*.zip )
-          if (( ${#zipfiles[@]} )); then
-            unzip -o "$path/*.zip" -d $path
-          fi
-
-          # process benchmarks
-          uv run scripts/process_benchmarks.py $path $path
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Upload benchmark results
-        uses: actions/upload-artifact@v6
-        with:
-          name: benchmarks-${{ github.run_id }}
-          path: |
-            autotest/.benchmarks/*.csv
-            autotest/.benchmarks/*.png
diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
new file mode 100644
index 0000000000..e8e7029ee2
--- /dev/null
+++ b/.github/workflows/codspeed.yml
@@ -0,0 +1,40 @@
+name: CodSpeed Benchmarks
+
+on:
+  push:
+    branches:
+      - develop
+      - main
+  pull_request:
+  workflow_dispatch:
+
+# Required for OIDC authentication
+permissions:
+  contents: read
+  actions: read
+  id-token: write
+
+jobs:
+  benchmarks:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          cache-dependency-glob: "**/pyproject.toml"
+
+      - name: Install FloPy with test dependencies
+        run: uv sync --all-extras
+
+      - name: Run benchmarks with CodSpeed
+        uses: CodSpeedHQ/action@v3
+        with:
+          run: uv run pytest autotest/benchmarks --codspeed
diff --git a/DEVELOPER.md b/DEVELOPER.md
index 0f0943a75e..f7a456bace 100644
--- a/DEVELOPER.md
+++ b/DEVELOPER.md
@@ -370,45 +370,81 @@ To allow optional separation of performance from correctness concerns, performan
 
 #### Benchmarking
 
-Any test function can be turned into a benchmark by requesting the `benchmark` fixture (i.e. declaring a `benchmark` argument), which can be used to wrap any function call. For instance:
+FloPy includes a benchmark suite to track performance over time and validate optimization efforts. Benchmarking focuses on I/O operations, data structure manipulations, and utility functions.
 
-```python
-def test_benchmark(benchmark):
-    def sleep_1s():
-        import time
-        time.sleep(1)
-        return True
+**Note**: Benchmarks test FloPy code performance only, not the runtime of the various executables FloPy drives.
+
+Benchmarks use [CodSpeed](https://codspeed.io) to automatically track performance in CI. Benchmarks written using `pytest-benchmark` syntax are compatible. Benchmarks are organized in `autotest/benchmarks/` by functional area.
+
+##### Running Benchmarks
+
+```bash
+# Run all benchmarks
+pytest autotest/benchmarks --benchmark-only
+
+# Run specific benchmark file
+pytest autotest/benchmarks/benchmark_io_mf6.py --benchmark-only
 
-    assert benchmark(sleep_1s)
+# Run benchmarks by marker
+pytest -m "benchmark and not slow" --benchmark-only
+
+# Save results to file
+pytest autotest/benchmarks --benchmark-only --benchmark-autosave
+
+# Compare against saved baseline
+pytest autotest/benchmarks --benchmark-only --benchmark-compare
 ```
 
-Arguments can be provided to the function as well:
+##### Writing Benchmarks
+
+Any test function can be turned into a benchmark by requesting the `benchmark` fixture:
 
 ```python
-def test_benchmark(benchmark):
-    def sleep_s(s):
-        import time
-        time.sleep(s)
-        return True
+@pytest.mark.benchmark
+def test_model_load_time(benchmark, function_tmpdir):
+    """
+    Benchmark model loading time.
+
+    Measures time to load a MODFLOW model from disk.
+    """
+    model = create_test_model(function_tmpdir)
+    model.write_input()
 
-    assert benchmark(sleep_s, 1)
+    benchmark(lambda: Modflow.load(f"{model.name}.nam", model_ws=function_tmpdir))
 ```
 
-Rather than alter an existing function call to use this syntax, a lambda can be used to wrap the call unmodified:
+**Best Practices:**
+- Use descriptive test names (e.g., `test_mf6_sim_load_large`, not `test_load1`)
+- Include docstrings explaining what is benchmarked and why
+- Use fixtures for setup (not timed)
+- Mark all benchmarks `@pytest.mark.benchmark`
+- Mark slow benchmarks with `@pytest.mark.slow`
+
+##### Advanced Usage
+
+Arguments can be provided to benchmark functions:
 
 ```python
-def test_benchmark(benchmark):
-    def sleep_s(s):
-        import time
-        time.sleep(s)
-        return True
+def test_benchmark_with_args(benchmark):
+    benchmark(some_function, arg1, arg2)
+```
+
+For fine-grained control over iterations and rounds:
 
-    assert benchmark(lambda: sleep_s(1))
+```python
+def test_benchmark_controlled(benchmark):
+    benchmark.pedantic(some_function, iterations=10, rounds=5)
 ```
 
-This can be convenient when the function call is complicated or passes many arguments.
+Lambda functions are convenient for wrapping complex calls:
+
+```python
+def test_complex_benchmark(benchmark):
+    result = benchmark(lambda: complex_function(many, different, args))
+    assert result is not None
+```
 
-Benchmarked functions are repeated several times (the number of iterations depending on the test's runtime, with faster tests generally getting more reps) to compute summary statistics. To control the number of repetitions and rounds (repetitions of repetitions) use `benchmark.pedantic`, e.g. `benchmark.pedantic(some_function(), iterations=1, rounds=1)`.
+##### Configuration
 
 Benchmarking is incompatible with `pytest-xdist` and is disabled automatically when tests are run in parallel. When tests are not run in parallel, benchmarking is enabled by default. Benchmarks can be disabled with the `--benchmark-disable` flag.
 
diff --git a/autotest/benchmarks/__init__.py b/autotest/benchmarks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/autotest/benchmarks/benchmark_arrays.py b/autotest/benchmarks/benchmark_arrays.py
new file mode 100644
index 0000000000..bf68cc2167
--- /dev/null
+++ b/autotest/benchmarks/benchmark_arrays.py
@@ -0,0 +1,93 @@
+"""
+Benchmarks for flopy.utils.Util2d and Util3d operations including:
+- Array creation
+- External file I/O
+- get_file_entry() performance
+"""
+
+import numpy as np
+import pytest
+
+from flopy.utils import Util2d, Util3d
+
+SIZES = {
+    "small": {"nlay": 3, "nrow": 10, "ncol": 10},
+    "medium": {"nlay": 10, "nrow": 1000, "ncol": 1000},
+    "large": {"nlay": 20, "nrow": 2000, "ncol": 2000},
+}
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_util2d_create(benchmark, size):
+    dims = SIZES[size]
+    shape = (dims["nrow"], dims["ncol"])
+    data = np.random.random(shape)
+    benchmark(lambda: Util2d(None, shape, np.float32, data.copy(), "test"))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_util3d_create(benchmark, size):
+    dims = SIZES[size]
+    shape = (dims["nlay"], dims["nrow"], dims["ncol"])
+    data = np.random.random(shape)
+    benchmark(lambda: Util3d(None, shape, np.float32, data.copy(), "test"))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_util2d_external_write(benchmark, function_tmpdir, size):
+    dims = SIZES[size]
+    shape = (dims["nrow"], dims["ncol"])
+    data = np.random.random(shape)
+    u2d = Util2d(None, shape, np.float32, data, "test")
+    fpath = function_tmpdir / "test_array.dat"
+
+    def write_external():
+        u2d.write(str(fpath))
+        return u2d
+
+    benchmark(write_external)
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_util3d_external_write(benchmark, function_tmpdir, size):
+    dims = SIZES[size]
+    shape = (dims["nlay"], dims["nrow"], dims["ncol"])
+    data = np.random.random(shape)
+    u3d = Util3d(None, shape, np.float32, data, "test")
+    fpath = function_tmpdir / "test_array3d.dat"
+
+    def write_external():
+        u3d.write(str(fpath))
+        return u3d
+
+    benchmark(write_external)
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_util2d_array_copy(benchmark, size):
+    dims = SIZES[size]
+    shape = (dims["nrow"], dims["ncol"])
+    data = np.random.random(shape)
+    u2d = Util2d(None, shape, np.float32, data, "test")
+    benchmark(lambda: u2d.array.copy())
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_util3d_array_copy(benchmark, size):
+    dims = SIZES[size]
+    shape = (dims["nlay"], dims["nrow"], dims["ncol"])
+    data = np.random.random(shape)
+    u3d = Util3d(None, shape, np.float32, data, "test")
+    benchmark(lambda: u3d.array.copy())
diff --git a/autotest/benchmarks/benchmark_cellbudgetfile.py b/autotest/benchmarks/benchmark_cellbudgetfile.py
new file mode 100644
index 0000000000..eea641101e
--- /dev/null
+++ b/autotest/benchmarks/benchmark_cellbudgetfile.py
@@ -0,0 +1,54 @@
+from pathlib import Path
+
+import pytest
+
+from flopy.utils import CellBudgetFile
+
+
+@pytest.fixture
+def cbcf(example_data_path) -> CellBudgetFile:
+    return CellBudgetFile(
+        example_data_path
+        / "mf6"
+        / "create_tests"
+        / "test021_twri"
+        / "expected_output"
+        / "twri.cbc"
+    )
+
+
+@pytest.mark.benchmark
+def test_cellbudgetfile_load(benchmark, cbcf):
+    benchmark(lambda: CellBudgetFile(cbcf.fname))
+
+
+@pytest.mark.benchmark
+def test_cellbudgetfile_get_data_all(benchmark, cbcf):
+    records = cbcf.list_unique_records()
+    term = records[0]
+    benchmark(lambda: cbcf.get_data(text=term))
+
+
+@pytest.mark.benchmark
+def test_cellbudgetfile_get_data_one(benchmark, cbcf):
+    benchmark(lambda: cbcf.get_data(kstpkper=(0, 1)))
+
+
+@pytest.mark.benchmark
+def test_cellbudgetfile_list_records(benchmark, cbcf):
+    benchmark(cbcf.list_records)
+
+
+@pytest.mark.benchmark
+def test_cellbudgetfile_list_unique_records(benchmark, cbcf):
+    benchmark(cbcf.list_unique_records)
+
+
+@pytest.mark.benchmark
+def test_cellbudgetfile_get_times(benchmark, cbcf):
+    benchmark(cbcf.get_times)
+
+
+@pytest.mark.benchmark
+def test_cellbudgetfile_get_kstpkper(benchmark, cbcf):
+    benchmark(cbcf.get_kstpkper)
diff --git a/autotest/benchmarks/benchmark_endpointfile.py b/autotest/benchmarks/benchmark_endpointfile.py
new file mode 100644
index 0000000000..b50a40bd2a
--- /dev/null
+++ b/autotest/benchmarks/benchmark_endpointfile.py
@@ -0,0 +1,40 @@
+import pytest
+
+from autotest.benchmarks.benchmark_pathlinefile import ex01_mp7_model
+from flopy.utils.modpathfile import EndpointFile
+
+
+@pytest.fixture(scope="module")
+def epf(ex01_mp7_model) -> EndpointFile:
+    mp, ws = ex01_mp7_model
+    mp.write_input()
+    success, buff = mp.run_model()
+    assert success
+    return EndpointFile(ws / f"{mp.name}.mpend")
+
+
+@pytest.mark.benchmark
+def test_endpointfile_load(benchmark, epf):
+    benchmark(lambda: EndpointFile(epf.fname))
+
+
+@pytest.mark.benchmark
+def test_endpointfile_get_data(benchmark, epf):
+    benchmark(epf.get_data)
+
+
+@pytest.mark.benchmark
+def test_endpointfile_get_alldata(benchmark, epf):
+    benchmark(epf.get_alldata)
+
+
+@pytest.mark.benchmark
+def test_pathlinefile_to_geodataframe(benchmark, plf):
+    pytest.importorskip("geopandas")
+    benchmark(plf.to_geodataframe)
+
+
+@pytest.mark.benchmark
+def test_endpointfile_to_geodataframe(benchmark, epf):
+    pytest.importorskip("geopandas")
+    benchmark(epf.to_geodataframe)
diff --git a/autotest/benchmarks/benchmark_export.py b/autotest/benchmarks/benchmark_export.py
new file mode 100644
index 0000000000..be8d94442f
--- /dev/null
+++ b/autotest/benchmarks/benchmark_export.py
@@ -0,0 +1,45 @@
+import pytest
+from modflow_devtools.misc import has_pkg
+
+from autotest.conftest import load_mf6_sim, load_mf2005_model
+
+
+@pytest.mark.benchmark
+def test_mf6_export_shapefile(benchmark, function_tmpdir):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    gwf = sim.get_model()
+    output_path = function_tmpdir / "export.shp"
+    benchmark(lambda: gwf.export(str(output_path)))
+
+
+@pytest.mark.benchmark
+def test_mf2005_export_shapefile(benchmark, function_tmpdir):
+    model = load_mf2005_model(function_tmpdir, model_key="freyberg")
+    output_path = function_tmpdir / "export_mf2005.shp"
+    benchmark(lambda: model.export(str(output_path)))
+
+
+@pytest.mark.benchmark
+@pytest.mark.skipif(not has_pkg("netCDF4"), reason="requires netCDF4")
+def test_mf6_export_netcdf(benchmark, function_tmpdir):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    gwf = sim.get_model()
+    output_path = function_tmpdir / "export.nc"
+    benchmark(lambda: gwf.export(str(output_path), fmt="netcdf"))
+
+
+@pytest.mark.benchmark
+@pytest.mark.skipif(not has_pkg("geopandas"), reason="requires geopandas")
+def test_mf6_modelgrid_to_geodataframe(benchmark, function_tmpdir):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    gwf = sim.get_model()
+    benchmark(gwf.modelgrid.to_geodataframe)
+
+
+@pytest.mark.benchmark
+@pytest.mark.skipif(not has_pkg("vtk"), reason="requires vtk")
+def test_mf6_export_vtk(benchmark, function_tmpdir):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    gwf = sim.get_model()
+    output_path = function_tmpdir / "export.vtk"
+    benchmark(lambda: gwf.export(str(output_path), fmt="vtk"))
diff --git a/autotest/benchmarks/benchmark_formattedfile.py b/autotest/benchmarks/benchmark_formattedfile.py
new file mode 100644
index 0000000000..74ddf1232b
--- /dev/null
+++ b/autotest/benchmarks/benchmark_formattedfile.py
@@ -0,0 +1,69 @@
+"""
+Benchmarks for flopy.utils.formattedfile.FormattedHeadFile operations including:
+- Formatted (ASCII) head file loading
+- Data extraction for single stress period
+- Time series extraction
+- Full data retrieval
+"""
+
+import numpy as np
+import pytest
+
+from flopy.utils.formattedfile import FormattedHeadFile
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+def test_formattedfile_load(benchmark, example_data_path):
+    pth = example_data_path / "mf2005_test" / "test1tr.githds"
+    benchmark(lambda: FormattedHeadFile(pth))
+
+
+@pytest.fixture
+def fhd(example_data_path) -> FormattedHeadFile:
+    return FormattedHeadFile(str(example_data_path / "mf2005_test" / "test1tr.githds"))
+
+
+@pytest.mark.benchmark
+def test_formattedfile_get_data_totim(benchmark, fhd):
+    times = fhd.get_times()
+    mid_time = times[len(times) // 2]
+    benchmark(lambda: fhd.get_data(totim=mid_time))
+
+
+@pytest.mark.benchmark
+def test_formattedfile_get_data_first(benchmark, fhd):
+    benchmark(lambda: fhd.get_data(idx=0))
+
+
+@pytest.mark.benchmark
+def test_formattedfile_get_data_last(benchmark, fhd):
+    times = fhd.get_times()
+    benchmark(lambda: fhd.get_data(totim=times[-1]))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+def test_formattedfile_get_alldata(benchmark, fhd):
+    benchmark(fhd.get_alldata)
+
+
+@pytest.mark.benchmark
+def test_formattedfile_get_ts(benchmark, fhd):
+    benchmark(lambda: fhd.get_ts((2, 25, 25)))
+
+
+@pytest.mark.benchmark
+def test_formattedfile_get_times(benchmark, fhd):
+    benchmark(fhd.get_times)
+
+
+@pytest.mark.benchmark
+def test_formattedfile_get_kstpkper(benchmark, fhd):
+    benchmark(fhd.get_kstpkper)
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+def test_formattedfile_many_stress_periods(benchmark, fhd):
+    benchmark(fhd.get_alldata)
diff --git a/autotest/benchmarks/benchmark_gridintersect.py b/autotest/benchmarks/benchmark_gridintersect.py
new file mode 100644
index 0000000000..b4de7a902a
--- /dev/null
+++ b/autotest/benchmarks/benchmark_gridintersect.py
@@ -0,0 +1,183 @@
+"""
+Benchmarks for spatial intersection operations including:
+
+1. GridIntersect class (flopy.utils.gridintersect.GridIntersect):
+   - Initialization (with/without STR-tree spatial index)
+   - Point, LineString, and Polygon intersections
+   - Spatial query performance
+
+2. Grid.intersect() method (direct coordinate-based intersection):
+   - Single point lookup
+   - Batch point operations
+   - 3D coordinate intersection
+"""
+
+import numpy as np
+import pytest
+
+from autotest.test_grid_cases import GridCases
+from flopy.utils.geometry import LineString, Point, Polygon
+from flopy.utils.gridintersect import GridIntersect
+
+STRUCTURED_GRIDS = {
+    "small": GridCases.structured_small(),
+    "medium": GridCases.structured_medium(),
+    "large": GridCases.structured_large(),
+}
+
+
+# GridIntersect class benchmarks
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+@pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
+def test_init(benchmark, grid, rtree):
+    benchmark(lambda: GridIntersect(grid, rtree=rtree))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+@pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
+def test_intersect_point(benchmark, grid, rtree):
+    gi = GridIntersect(grid, rtree=rtree)
+    xmin, xmax, ymin, ymax = grid.extent
+    point = Point((xmin + xmax) / 2, (ymin + ymax) / 2)
+    benchmark(lambda: gi.intersect(point, "point"))
+
+
+def make_line(grid, line_type) -> LineString:
+    xmin, xmax, ymin, ymax = grid.extent
+    if line_type == "diagonal":
+        return LineString([(xmin, ymin), (xmax, ymax)])
+    elif line_type == "horizontal":
+        y_mid = (ymin + ymax) / 2
+        return LineString([(xmin, y_mid), (xmax, y_mid)])
+    elif line_type == "complex":
+        x = np.linspace(xmin, xmax, 20)
+        y_mid = (ymin + ymax) / 2
+        y_range = (ymax - ymin) * 0.2
+        y = y_mid + y_range * np.sin(x / (xmax - xmin) * 10)
+        coords = list(zip(x, y))
+        return LineString(coords)
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+@pytest.mark.parametrize("line", ["diagonal", "horizontal", "complex"])
+@pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
+def test_intersect_linestring_diagonal(benchmark, grid, line, rtree):
+    gi = GridIntersect(grid, rtree=rtree)
+    line = make_line(grid, line)
+    benchmark(lambda: gi.intersect(line, "linestring"))
+
+
+def make_poly(grid, poly_type) -> Polygon:
+    xmin, xmax, ymin, ymax = grid.extent
+    x_center = (xmin + xmax) / 2
+    y_center = (ymin + ymax) / 2
+    x_range = xmax - xmin
+    y_range = ymax - ymin
+
+    if poly_type == "small":
+        # 10% of grid extent centered
+        dx = x_range * 0.05
+        dy = y_range * 0.05
+        coords = [
+            (x_center - dx, y_center - dy),
+            (x_center + dx, y_center - dy),
+            (x_center + dx, y_center + dy),
+            (x_center - dx, y_center + dy),
+        ]
+    elif poly_type == "medium":
+        # 50% of grid extent centered
+        dx = x_range * 0.25
+        dy = y_range * 0.25
+        coords = [
+            (x_center - dx, y_center - dy),
+            (x_center + dx, y_center - dy),
+            (x_center + dx, y_center + dy),
+            (x_center - dx, y_center + dy),
+        ]
+    elif poly_type == "large":
+        # 90% of grid extent centered
+        dx = x_range * 0.45
+        dy = y_range * 0.45
+        coords = [
+            (x_center - dx, y_center - dy),
+            (x_center + dx, y_center - dy),
+            (x_center + dx, y_center + dy),
+            (x_center - dx, y_center + dy),
+        ]
+    elif poly_type == "irregular":
+        # Irregular star-like polygon
+        n_points = 20
+        theta = np.linspace(0, 2 * np.pi, n_points, endpoint=False)
+        r_base = min(x_range, y_range) * 0.3
+        r_var = r_base * 0.33
+        r = r_base + r_var * np.sin(5 * theta)
+        x = x_center + r * np.cos(theta)
+        y = y_center + r * np.sin(theta)
+        coords = list(zip(x, y))
+
+    return Polygon(coords)
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+@pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
+@pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
+def test_intersect_polygon(benchmark, grid, poly, rtree):
+    gi = GridIntersect(grid, rtree=rtree)
+    polygon = make_poly(grid, poly)
+    benchmark(lambda: gi.intersect(polygon, "polygon"))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+@pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
+@pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
+def test_query_grid_polygon(benchmark, grid, poly, rtree):
+    gi = GridIntersect(grid, rtree=rtree)
+    polygon = make_poly(grid, poly)
+    benchmark(lambda: gi.query_grid(polygon))
+
+
+# Grid.intersect() method benchmarks (coordinate-based intersection)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_grid_intersect_single_point(benchmark, grid):
+    xmin, xmax, ymin, ymax = grid.extent
+    x_center = (xmin + xmax) / 2
+    y_center = (ymin + ymax) / 2
+    benchmark(lambda: grid.intersect(x_center, y_center))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_grid_intersect_batch_points(benchmark, grid):
+    xmin, xmax, ymin, ymax = grid.extent
+    x = np.linspace(xmin + 0.1 * (xmax - xmin), xmax - 0.1 * (xmax - xmin), 100)
+    y = np.linspace(ymin + 0.1 * (ymax - ymin), ymax - 0.1 * (ymax - ymin), 100)
+    xx, yy = np.meshgrid(x, y)
+    benchmark(lambda: grid.intersect(xx.ravel(), yy.ravel()))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_grid_intersect_3d(benchmark, grid):
+    xmin, xmax, ymin, ymax = grid.extent
+    x = np.linspace(xmin + 0.1 * (xmax - xmin), xmax - 0.1 * (xmax - xmin), 50)
+    y = np.linspace(ymin + 0.1 * (ymax - ymin), ymax - 0.1 * (ymax - ymin), 50)
+    xx, yy = np.meshgrid(x, y)
+    zz = np.ones_like(xx) * 5.0
+    benchmark(lambda: grid.intersect(xx.ravel(), yy.ravel(), zz.ravel()))
diff --git a/autotest/benchmarks/benchmark_grids.py b/autotest/benchmarks/benchmark_grids.py
new file mode 100644
index 0000000000..c2e09e02fb
--- /dev/null
+++ b/autotest/benchmarks/benchmark_grids.py
@@ -0,0 +1,63 @@
+"""
+Benchmarks for flopy.discretization grid operations including:
+- cellid <-> node number conversions
+- grid intersection operations
+- grid geometry properties
+"""
+
+import pytest
+
+from autotest.test_grid_cases import GridCases
+from flopy.utils.geometry import LineString, Point
+
+STRUCTURED_GRIDS = {
+    "small": GridCases.structured_small(),
+    "medium": GridCases.structured_medium(),
+    "large": GridCases.structured_large(),
+}
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_structured_grid_get_lrc(benchmark, grid):
+    nodes = list(range(grid.nnodes))
+    benchmark(lambda: grid.get_lrc(nodes))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_structured_grid_get_node(benchmark, grid):
+    cellids = [
+        (lay, row, col)
+        for lay in range(grid.nlay)
+        for row in range(0, grid.nrow)
+        for col in range(0, grid.ncol)
+    ]
+    benchmark(lambda: grid.get_node(cellids=cellids))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_structured_grid_intersect_linestring(benchmark, grid):
+    line = LineString([(0, 0), (grid.ncol, grid.nrow)])
+    benchmark(lambda: grid.intersect(line, return_all_intersections=True))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_structured_grid_intersect_point(benchmark, grid):
+    point = Point(50, 50)
+    benchmark(lambda: grid.intersect(point))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_structured_grid_extent(benchmark, grid):
+    benchmark(lambda: grid.extent)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
+def test_structured_grid_xyzcellcenters(benchmark, grid):
+    benchmark(lambda: grid.xyzcellcenters)
diff --git a/autotest/benchmarks/benchmark_headfile.py b/autotest/benchmarks/benchmark_headfile.py
new file mode 100644
index 0000000000..109dca340f
--- /dev/null
+++ b/autotest/benchmarks/benchmark_headfile.py
@@ -0,0 +1,80 @@
+"""
+Benchmarks for flopy.utils.HeadFile operations including:
+- HeadFile initialization
+- get_data() for single time steps
+- get_alldata() for full file reading
+- get_ts() for time series extraction
+"""
+
+from pathlib import Path
+
+import pytest
+
+from flopy.utils import HeadFile
+
+
+@pytest.mark.benchmark
+def test_headfile_load(benchmark, example_data_path):
+    pth = (
+        example_data_path
+        / "mf6"
+        / "create_tests"
+        / "test021_twri"
+        / "expected_output"
+        / "twri.hds"
+    )
+    benchmark(lambda: HeadFile(pth))
+
+
+@pytest.fixture
+def hdsf(example_data_path) -> HeadFile:
+    return HeadFile(
+        example_data_path
+        / "mf6"
+        / "create_tests"
+        / "test021_twri"
+        / "expected_output"
+        / "twri.hds"
+    )
+
+
+@pytest.mark.benchmark
+def test_headfile_get_data_single(benchmark, hdsf):
+    times = hdsf.get_times()
+    mid_time = times[len(times) // 2] if len(times) > 1 else times[0]
+    benchmark(lambda: hdsf.get_data(totim=mid_time))
+
+
+@pytest.mark.benchmark
+def test_headfile_get_alldata(benchmark, hdsf):
+    benchmark(hdsf.get_alldata)
+
+
+@pytest.mark.benchmark
+def test_headfile_get_ts(benchmark, hdsf):
+    benchmark(lambda: hdsf.get_ts((0, 10, 10)))
+
+
+@pytest.mark.benchmark
+def test_headfile_get_kstpkper(benchmark, hdsf):
+    benchmark(lambda: hdsf.get_data(kstpkper=(0, 1)))
+
+
+@pytest.mark.benchmark
+def test_headfile_list_records(benchmark, hdsf):
+    benchmark(hdsf.list_records)
+
+
+@pytest.mark.benchmark
+def test_headfile_get_times(benchmark, hdsf):
+    benchmark(hdsf.get_times)
+
+
+@pytest.mark.benchmark
+def test_headfile_get_kstpkper_list(benchmark, hdsf):
+    benchmark(hdsf.get_kstpkper)
+
+
+@pytest.mark.benchmark
+def test_headfile_get_alldata_mf6(benchmark, hdsf):
+    benchmark(hdsf.get_alldata)
diff --git a/autotest/benchmarks/benchmark_headufile.py b/autotest/benchmarks/benchmark_headufile.py
new file mode 100644
index 0000000000..f1efa814f7
--- /dev/null
+++ b/autotest/benchmarks/benchmark_headufile.py
@@ -0,0 +1,32 @@
+import pytest
+
+from flopy.utils import HeadUFile
+
+
+@pytest.mark.benchmark
+def test_headufile_load(benchmark, example_data_path):
+    hds_file = example_data_path / "unstructured" / "headu.githds"
+    benchmark(lambda: HeadUFile(str(hds_file)))
+
+
+@pytest.fixture
+def huf(example_data_path) -> HeadUFile:
+    return HeadUFile(str(example_data_path / "unstructured" / "headu.githds"))
+
+
+@pytest.mark.benchmark
+def test_headufile_get_data(benchmark, huf):
+    times = huf.get_times()
+    mid_time = times[len(times) // 2] if len(times) > 0 else times[0]
+    benchmark(lambda: huf.get_data(totim=mid_time))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+def test_headufile_get_alldata(benchmark, huf):
+    benchmark(huf.get_alldata)
+
+
+@pytest.mark.benchmark
+def test_headufile_get_ts(benchmark, huf):
+    benchmark(lambda: huf.get_ts(0))
diff --git a/autotest/benchmarks/benchmark_mf6_io.py b/autotest/benchmarks/benchmark_mf6_io.py
new file mode 100644
index 0000000000..03f0cda267
--- /dev/null
+++ b/autotest/benchmarks/benchmark_mf6_io.py
@@ -0,0 +1,70 @@
+"""
+Benchmarks for MODFLOW 6 I/O operations using models from:
+- examples/data (local models)
+- modflow-devtools registry (downloaded on-demand)
+"""
+
+from pathlib import Path
+from shutil import copytree
+
+import pytest
+from modflow_devtools.models import DEFAULT_REGISTRY, LocalRegistry
+
+from autotest.conftest import get_examples_path
+from flopy.mf6 import MFSimulation
+
+# prefixes into the model registry
+PREFIXES = ["mf6/test", "mf6/large", "mf2005"]
+
+
+def pytest_generate_tests(metafunc):
+    # Use the --models-path command line option once or more to specify
+    # model directories. If at least one --models_path is provided,
+    # external tests (i.e. those using models from an external repo)
+    # will run against model input files found in the given location
+    # on the local filesystem rather than model input files from the
+    # official model registry. This is useful for testing changes to
+    # test model input files during MF6 development. See conftest.py
+    # for the models_path fixture and CLI argument definitions.
+    if "model_name" in metafunc.fixturenames:
+        models_paths = metafunc.config.getoption("--models-path")
+        models_paths = [
+            Path(p).expanduser().resolve().absolute() for p in models_paths or []
+        ]
+        registry = LocalRegistry() if any(models_paths) else DEFAULT_REGISTRY
+        registry_type = type(registry).__name__.lower().replace("registry", "")
+        metafunc.parametrize("registry", [registry], ids=[registry_type])
+        models = []
+        if "local" in registry_type:
+            namefile_pattern = (
+                metafunc.config.getoption("--namefile-pattern") or "mfsim.nam"
+            )
+            for path in models_paths:
+                registry.index(path, namefile=namefile_pattern)
+            models.extend(registry.models.keys())
+        else:
+            for model_prefix in PREFIXES:
+                models.extend(
+                    [m for m in registry.models.keys() if m.startswith(model_prefix)]
+                )
+        models = sorted(models)
+        metafunc.parametrize("model_name", models, ids=models)
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+@pytest.mark.external
+@pytest.mark.parametrize("use_pandas", [True, False], ids=["pandas", "nopandas"])
+def test_load_simulation(function_tmpdir, benchmark, registry, model_name, use_pandas):
+    registry.copy_to(function_tmpdir, model_name)
+    benchmark(lambda: MFSimulation.load(sim_ws=function_tmpdir, use_pandas=use_pandas))
+
+
+@pytest.mark.benchmark
+@pytest.mark.external
+@pytest.mark.slow
+@pytest.mark.parametrize("use_pandas", [True, False], ids=["pandas", "nopandas"])
+def test_write_simulation(function_tmpdir, benchmark, registry, model_name, use_pandas):
+    registry.copy_to(function_tmpdir, model_name)
+    sim = MFSimulation.load(sim_ws=function_tmpdir, use_pandas=use_pandas)
+    benchmark(sim.write_simulation)
diff --git a/autotest/benchmarks/benchmark_mf6listbudget.py b/autotest/benchmarks/benchmark_mf6listbudget.py
new file mode 100644
index 0000000000..406923526a
--- /dev/null
+++ b/autotest/benchmarks/benchmark_mf6listbudget.py
@@ -0,0 +1,48 @@
+import pytest
+
+from flopy.utils.mflistfile import Mf6ListBudget
+
+
+@pytest.fixture
+def mf6_lbf(example_data_path) -> Mf6ListBudget:
+    return Mf6ListBudget(
+        example_data_path / "mf6" / "test001a_Tharmonic" / "flow15.lst"
+    )
+
+
+@pytest.mark.benchmark
+def test_mf6listbudget_load(benchmark, mf6_lbf):
+    benchmark(lambda: Mf6ListBudget(mf6_lbf.fname))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize(
+    "incremental", [True, False], ids=["incremental", "no_incremental"]
+)
+def test_mf6listbudget_get_data(benchmark, mf6_lbf, incremental):
+    benchmark(lambda: mf6_lbf.get_data(incremental=incremental))
+
+
+@pytest.mark.benchmark
+def test_mf6listbudget_get_budget(benchmark, mf6_lbf):
+    benchmark(mf6_lbf.get_budget)
+
+
+@pytest.mark.benchmark
+def test_mf6listbudget_get_time_series(benchmark, mf6_lbf):
+    budget = mf6_lbf.get_budget()
+    if budget and len(budget) > 0:
+        term = (
+            next(iter(budget[0].dtype.names))
+            if hasattr(budget[0], "dtype")
+            else "STORAGE"
+        )
+    else:
+        term = "STORAGE"
+
+    benchmark(lambda: mf6_lbf.get_time_series(term))
+
+
+@pytest.mark.benchmark
+def test_mf6listbudget_to_dataframe(benchmark, mf6_lbf):
+    benchmark(mf6_lbf.get_dataframes)
diff --git a/autotest/benchmarks/benchmark_mf_io.py b/autotest/benchmarks/benchmark_mf_io.py
new file mode 100644
index 0000000000..95a11c81d2
--- /dev/null
+++ b/autotest/benchmarks/benchmark_mf_io.py
@@ -0,0 +1,62 @@
+from shutil import copytree
+
+import pytest
+from modflow_devtools.misc import get_namefile_paths
+
+from flopy.modflow import Modflow
+
+from .conftest import get_examples_path
+
+# Model directories from examples/data
+MODEL_DIRS = [
+    "freyberg_multilayer_transient",
+    "mf2005_test",
+]
+
+
+def pytest_generate_tests(metafunc):
+    """Dynamically parametrize tests with model names."""
+    if "model_name" in metafunc.fixturenames:
+        examples_path = get_examples_path()
+        models = []
+
+        # Only include models that exist
+        for model_dir in MODEL_DIRS:
+            if (examples_path / model_dir).exists():
+                models.append(model_dir)
+
+        metafunc.parametrize("model_name", models, ids=models)
+
+
+def _load_model(ws, model_name):
+    """Load MODFLOW-2005 model from examples."""
+    examples_path = get_examples_path()
+    source_path = examples_path / model_name
+    copytree(source_path, ws, dirs_exist_ok=True)
+
+    nam_files = get_namefile_paths(source_path, namefile="*.nam")
+    assert nam_files, f"No .nam file found in {source_path}"
+    nam_file = nam_files[0].name
+
+    return Modflow.load(nam_file, model_ws=ws, check=False)
+
+
+@pytest.mark.benchmark
+def test_mf2005_load(benchmark, function_tmpdir, model_name):
+    benchmark(lambda: _load_model(function_tmpdir, model_name))
+
+
+@pytest.mark.benchmark
+def test_mf2005_write_freyberg(benchmark, function_tmpdir):
+    ml = _load_model(function_tmpdir, "freyberg_multilayer_transient")
+    benchmark(ml.write_input)
+
+
+@pytest.mark.benchmark
+def test_mf2005_round_trip_freyberg(benchmark, function_tmpdir):
+    def round_trip():
+        ml = _load_model(function_tmpdir, "freyberg_multilayer_transient")
+        ml.write_input()
+        return Modflow.load("freyberg.nam", model_ws=function_tmpdir, check=False)
+
+    benchmark(round_trip)
diff --git a/autotest/benchmarks/benchmark_mflistbudget.py b/autotest/benchmarks/benchmark_mflistbudget.py
new file mode 100644
index 0000000000..634df7c869
--- /dev/null
+++ b/autotest/benchmarks/benchmark_mflistbudget.py
@@ -0,0 +1,15 @@
+import pytest
+
+from flopy.utils.mflistfile import MfListBudget
+
+
+@pytest.fixture
+def mf_lbf(example_data_path) -> MfListBudget:
+    return MfListBudget(
+        example_data_path / "freyberg_multilayer_transient" / "freyberg.list"
+    )
+
+
+@pytest.mark.benchmark
+def test_mflistbudget_load(benchmark, mf_lbf):
+    benchmark(lambda: MfListBudget(mf_lbf.fname))
diff --git a/autotest/benchmarks/benchmark_mfusglistbudget.py b/autotest/benchmarks/benchmark_mfusglistbudget.py
new file mode 100644
index 0000000000..08ce89e7ec
--- /dev/null
+++ b/autotest/benchmarks/benchmark_mfusglistbudget.py
@@ -0,0 +1,19 @@
+import pytest
+
+from flopy.utils.mflistfile import MfusgListBudget
+
+
+@pytest.fixture
+def mfusg_lbf(example_data_path) -> MfusgListBudget:
+    return MfusgListBudget(
+        example_data_path
+        / "mfusg_test"
+        / "03A_conduit_unconfined"
+        / "output"
+        / "ex3A.lst"
+    )
+
+
+@pytest.mark.benchmark
+def test_mfusglistbudget_load(benchmark, mfusg_lbf):
+    benchmark(lambda: MfusgListBudget(mfusg_lbf.fname))
diff --git a/autotest/benchmarks/benchmark_mtlistfile.py b/autotest/benchmarks/benchmark_mtlistfile.py
new file mode 100644
index 0000000000..6b164a6b15
--- /dev/null
+++ b/autotest/benchmarks/benchmark_mtlistfile.py
@@ -0,0 +1,24 @@
+import pytest
+
+from flopy.utils.mtlistfile import MtListBudget
+
+
+@pytest.mark.benchmark
+def test_mtlistfile_load(benchmark, example_data_path):
+    list_file = example_data_path / "mt3d_test" / "mf2kmt3d" / "mnw" / "t5.lst"
+    benchmark(lambda: MtListBudget(str(list_file)))
+
+
+@pytest.fixture
+def mtlf(example_data_path) -> MtListBudget:
+    return MtListBudget(example_data_path / "mt3d_test" / "mf2kmt3d" / "mnw" / "t5.lst")
+
+
+@pytest.mark.benchmark
+def test_mtlistfile_get_budget(benchmark, mtlf):
+    benchmark(lambda: mtlf.get_budget())
+
+
+@pytest.mark.benchmark
+def test_mtlistfile_get_data(benchmark, mtlf):
+    benchmark(lambda: mtlf.get_data())
diff --git a/autotest/benchmarks/benchmark_pathlinefile.py b/autotest/benchmarks/benchmark_pathlinefile.py
new file mode 100644
index 0000000000..e3603ad156
--- /dev/null
+++ b/autotest/benchmarks/benchmark_pathlinefile.py
@@ -0,0 +1,56 @@
+import pytest
+
+from autotest.test_mp7 import ex01_mf6_model_name
+from flopy.modpath.mp7 import Modpath7
+from flopy.utils.modpathfile import PathlineFile
+
+
+@pytest.fixture(scope="module")
+def ex01_mp7_model(ex01_mf6_model):
+    sim, function_tmpdir = ex01_mf6_model
+    success, buff = sim.run_simulation()
+    assert success, buff
+    gwf = sim.get_model(ex01_mf6_model_name)
+    mpnam = f"{ex01_mf6_model_name}_mp"
+    mp_ws = function_tmpdir / "mp7"
+    mp_ws.mkdir()
+    return Modpath7.create_mp7(
+        modelname=mpnam,
+        trackdir="forward",
+        flowmodel=gwf,
+        exe_name="mp7",
+        model_ws=mp_ws,
+        rowcelldivisions=1,
+        columncelldivisions=1,
+        layercelldivisions=1,
+    ), mp_ws
+
+
+@pytest.fixture(scope="module")
+def plf(ex01_mp7_model) -> PathlineFile:
+    mp, ws = ex01_mp7_model
+    mp.write_input()
+    success, buff = mp.run_model()
+    assert success
+    return PathlineFile(ws / f"{mp.name}.mppth")
+
+
+@pytest.mark.benchmark
+def test_pathlinefile_load(benchmark, plf):
+    benchmark(lambda: PathlineFile(plf.fname))
+
+
+@pytest.mark.benchmark
+def test_pathlinefile_get_data(benchmark, plf):
+    benchmark(plf.get_data)
+
+
+@pytest.mark.benchmark
+def test_pathlinefile_get_alldata(benchmark, plf):
+    benchmark(plf.get_alldata)
+
+
+@pytest.mark.benchmark
+def test_pathlinefile_get_destination_data(benchmark, plf):
+    dest_cells = list(range(0, 100))
+    benchmark(lambda: plf.get_destination_pathline_data(dest_cells))
diff --git a/autotest/benchmarks/benchmark_postprocessing.py b/autotest/benchmarks/benchmark_postprocessing.py
new file mode 100644
index 0000000000..bbcc132d56
--- /dev/null
+++ b/autotest/benchmarks/benchmark_postprocessing.py
@@ -0,0 +1,60 @@
+from pathlib import Path
+
+import pytest
+
+from autotest.conftest import load_mf6_sim
+from flopy.utils import CellBudgetFile, HeadFile
+from flopy.utils.postprocessing import (
+    get_gradients,
+    get_specific_discharge,
+    get_transmissivities,
+    get_water_table,
+)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize(
+    "row_col",
+    [lambda m: (None, None), lambda m: (m.dis.nrow // 2, m.dis.ncol // 2)],
+    ids=["everywhere", "center"],
+)
+def test_get_transmissivities(benchmark, function_tmpdir, row_col):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    gwf = sim.get_model()
+    hds_path = Path(function_tmpdir) / "freyberg.hds"
+    hds = HeadFile(hds_path)
+    heads = hds.get_data(totim=hds.get_times()[-1])
+    r, c = row_col(gwf)
+    benchmark(lambda: get_transmissivities(heads, gwf, r=r, c=c))
+
+
+@pytest.mark.benchmark
+def test_get_water_table(benchmark, function_tmpdir):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    hds_path = Path(function_tmpdir) / "freyberg.hds"
+    hds = HeadFile(hds_path)
+    heads = hds.get_data(totim=hds.get_times()[-1])
+    benchmark(lambda: get_water_table(heads))
+
+
+@pytest.mark.benchmark
+def test_get_gradients(benchmark, function_tmpdir):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    gwf = sim.get_model()
+    hds_path = Path(function_tmpdir) / "freyberg.hds"
+    hds = HeadFile(hds_path)
+    heads = hds.get_data(totim=hds.get_times()[-1])
+    benchmark(lambda: get_gradients(heads, gwf))
+
+
+@pytest.mark.benchmark
+def test_get_specific_discharge(benchmark, function_tmpdir):
+    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
+    gwf = sim.get_model()
+    hds_path = Path(function_tmpdir) / "freyberg.hds"
+    bud_path = Path(function_tmpdir) / "freyberg.cbc"
+    hds = HeadFile(hds_path)
+    bud = CellBudgetFile(bud_path)
+    heads = hds.get_data(totim=hds.get_times()[-1])
+    spdis = bud.get_data(text="DATA-SPDIS")[0]
+    benchmark(lambda: get_specific_discharge(spdis, gwf, heads))
diff --git a/autotest/benchmarks/benchmark_rasters.py b/autotest/benchmarks/benchmark_rasters.py
new file mode 100644
index 0000000000..f60bfc21d6
--- /dev/null
+++ b/autotest/benchmarks/benchmark_rasters.py
@@ -0,0 +1,134 @@
+import numpy as np
+import pytest
+from modflow_devtools.misc import has_pkg
+
+pytest.importorskip("rasterio")
+pytest.importorskip("affine")
+
+from autotest.conftest import get_example_data_path
+from flopy.discretization.structuredgrid import StructuredGrid
+from flopy.utils.geometry import Polygon
+from flopy.utils.rasters import Raster
+
+
+@pytest.fixture(scope="module")
+def raster_path(example_data_path):
+    return example_data_path / "options" / "dem" / "dem.img"
+
+
+@pytest.fixture(scope="module")
+def raster(raster_path):
+    return Raster.load(raster_path)
+
+
+def origin_and_extent(raster) -> tuple[float, float, float, float]:
+    x0, x1, y0, y1 = raster.bounds
+    # central region
+    cx, cy = (x0 + x1) / 2, (y0 + y1) / 2
+    extent_x, extent_y = (x1 - x0) * 0.6, (y1 - y0) * 0.6
+    grid_x0, grid_y0 = cx - extent_x / 2, cy - extent_y / 2
+    return grid_x0, grid_y0, extent_x, extent_y
+
+
+def make_grid(raster, nrow, ncol) -> StructuredGrid:
+    grid_x0, grid_y0, extent_x, extent_y = origin_and_extent(raster)
+    delr = np.full(ncol, extent_x / ncol)
+    delc = np.full(nrow, extent_y / nrow)
+    return StructuredGrid(
+        delc=delc, delr=delr, xoff=grid_x0, yoff=grid_y0, crs=raster.crs
+    )
+
+
+@pytest.mark.benchmark
+def test_raster_load(benchmark, raster_path):
+    benchmark(lambda: Raster.load(raster_path))
+
+
+RASTER_PATH = get_example_data_path() / "options" / "dem" / "dem.img"
+GRIDS = [
+    make_grid(Raster.load(RASTER_PATH), 10, 10),
+    make_grid(Raster.load(RASTER_PATH), 50, 50),
+    make_grid(Raster.load(RASTER_PATH), 200, 200),
+]
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("grid", GRIDS, ids=["small", "medium", "large"])
+@pytest.mark.parametrize(
+    "method", ["linear", "nearest", "cubic", "mean", "median", "min", "max"]
+)
+def test_raster_resample(benchmark, raster, grid, method):
+    benchmark(lambda: raster.resample_to_grid(grid, band=1, method=method))
+
+
+@pytest.mark.benchmark
+@pytest.mark.skipif(not has_pkg("pyproj"), reason="requires pyproj")
+def test_raster_to_crs_transform(benchmark, raster):
+    benchmark(lambda: raster.to_crs(epsg=4326))
+
+
+def small_poly(raster):
+    # small central polygon, ~20% of extent
+    x0, x1, y0, y1 = raster.bounds
+    cx, cy = (x0 + x1) / 2, (y0 + y1) / 2
+    dx, dy = (x1 - x0) * 0.1, (y1 - y0) * 0.1
+    return Polygon(
+        [(cx - dx, cy - dy), (cx + dx, cy - dy), (cx + dx, cy + dy), (cx - dx, cy + dy)]
+    )
+
+
+def medium_poly(raster):
+    # ~50% of extent
+    x0, x1, y0, y1 = raster.bounds
+    cx, cy = (x0 + x1) / 2, (y0 + y1) / 2
+    dx, dy = (x1 - x0) * 0.25, (y1 - y0) * 0.25
+    return Polygon(
+        [(cx - dx, cy - dy), (cx + dx, cy - dy), (cx + dx, cy + dy), (cx - dx, cy + dy)]
+    )
+
+
+def large_poly(raster):
+    # 80% of extent
+    x0, x1, y0, y1 = raster.bounds
+    dx, dy = (x1 - x0) * 0.1, (y1 - y0) * 0.1
+    return Polygon(
+        [(x0 + dx, y0 + dy), (x1 - dx, y0 + dy), (x1 - dx, y1 - dy), (x0 + dx, y1 - dy)]
+    )
+
+
+POLYGONS = [
+    small_poly(Raster.load(RASTER_PATH)),
+    medium_poly(Raster.load(RASTER_PATH)),
+    large_poly(Raster.load(RASTER_PATH)),
+]
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("poly", POLYGONS, ids=["small", "medium", "large"])
+def test_raster_crop(benchmark, raster, poly):
+    benchmark(lambda: raster.crop(poly))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("poly", POLYGONS, ids=["small", "medium", "large"])
+def test_raster_sample(benchmark, raster, poly):
+    benchmark(lambda: raster.sample_polygon(poly, band=1))
+
+
+@pytest.mark.benchmark
+def test_raster_sample_point(benchmark, raster):
+    x0, x1, y0, y1 = raster.bounds
+    x, y = (x0 + x1) / 2, (y0 + y1) / 2  # center point
+    benchmark(lambda: raster.sample_point(x, y, band=1))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("masked", [True, False], ids=["masked", "unmasked"])
+def test_raster_get_array_masked(benchmark, raster, masked):
+    benchmark(lambda: raster.get_array(band=1, masked=masked))
+
+
+@pytest.mark.benchmark
+def test_raster_write(benchmark, raster, function_tmpdir):
+    output_path = function_tmpdir / "output_raster.tif"
+    benchmark(lambda: raster.write(str(output_path)))
diff --git a/autotest/benchmarks/benchmark_sfroutputfile.py b/autotest/benchmarks/benchmark_sfroutputfile.py
new file mode 100644
index 0000000000..f3ba1415e4
--- /dev/null
+++ b/autotest/benchmarks/benchmark_sfroutputfile.py
@@ -0,0 +1,34 @@
+import pytest
+
+from flopy.utils.sfroutputfile import SfrFile
+
+
+@pytest.mark.benchmark
+def test_sfrfile_load(benchmark, example_data_path):
+    sfr_file = example_data_path / "freyberg_usg" / "freyberg.usg.sfr"
+    benchmark(lambda: SfrFile(str(sfr_file)))
+
+
+@pytest.mark.fixture
+def sfrf(example_data_path) -> SfrFile:
+    return SfrFile(str(example_data_path / "freyberg_usg" / "freyberg.usg.sfr"))
+
+
+@pytest.mark.benchmark
+def test_sfrfile_get_nstrm(benchmark, sfrf):
+    benchmark(sfrf.get_nstrm)
+
+
+@pytest.mark.benchmark
+def test_sfrfile_get_results(benchmark, sfrf):
+    benchmark(sfrf.get_results)
+
+
+@pytest.mark.benchmark
+def test_sfrfile_get_times(benchmark, sfrf):
+    benchmark(sfrf.get_times)
+
+
+@pytest.mark.benchmark
+def test_sfrfile_get_dataframe(benchmark, sfrf):
+    benchmark(sfrf.get_dataframe)
diff --git a/autotest/benchmarks/benchmark_ucnfile.py b/autotest/benchmarks/benchmark_ucnfile.py
new file mode 100644
index 0000000000..f1efa814f7
--- /dev/null
+++ b/autotest/benchmarks/benchmark_ucnfile.py
@@ -0,0 +1,32 @@
+import pytest
+
+from flopy.utils import HeadUFile
+
+
+@pytest.mark.benchmark
+def test_headufile_load(benchmark, example_data_path):
+    hds_file = example_data_path / "unstructured" / "headu.githds"
+    benchmark(lambda: HeadUFile(str(hds_file)))
+
+
+@pytest.fixture
+def huf(example_data_path) -> HeadUFile:
+    return HeadUFile(str(example_data_path / "unstructured" / "headu.githds"))
+
+
+@pytest.mark.benchmark
+def test_headufile_get_data(benchmark, huf):
+    times = huf.get_times()
+    mid_time = times[len(times) // 2] if len(times) > 0 else times[0]
+    benchmark(lambda: huf.get_data(totim=mid_time))
+
+
+@pytest.mark.benchmark
+@pytest.mark.slow
+def test_headufile_get_alldata(benchmark, huf):
+    benchmark(huf.get_alldata)
+
+
+@pytest.mark.benchmark
+def test_headufile_get_ts(benchmark, huf):
+    benchmark(lambda: huf.get_ts(0))
diff --git a/autotest/benchmarks/benchmark_zonebudget.py b/autotest/benchmarks/benchmark_zonebudget.py
new file mode 100644
index 0000000000..f6c0672adc
--- /dev/null
+++ b/autotest/benchmarks/benchmark_zonebudget.py
@@ -0,0 +1,89 @@
+import numpy as np
+import pytest
+
+from flopy.mf6.modflow.mfsimulation import MFSimulation
+from flopy.modflow.mf import Modflow
+from flopy.utils.zonbud import ZoneBudget, ZoneBudget6
+
+
+def create_zone_array(nlay, nrow, ncol, n_zones=5):
+    zones = np.zeros((nlay, nrow, ncol), dtype=np.int32)
+
+    # Create simple zoning pattern
+    # Divide grid into roughly equal zones
+    zone_width = ncol // n_zones
+
+    for i in range(n_zones):
+        start_col = i * zone_width
+        end_col = (i + 1) * zone_width if i < n_zones - 1 else ncol
+        zones[:, :, start_col:end_col] = i + 1
+
+    return zones
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("nzones", [3, 10, 50])
+def test_zonebudget_load(benchmark, example_data_path, nzones):
+    model_path = example_data_path / "freyberg_multilayer_transient"
+    model = Modflow.load(model_path, version="mf2005")
+    cbc_path = model_path / "freyberg.cbc"
+    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
+    benchmark(lambda: ZoneBudget(str(cbc_path), zones, verbose=False))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("nzones", [3, 10, 50])
+def test_zonebudget_get_budget(benchmark, example_data_path, nzones):
+    model_path = example_data_path / "freyberg_multilayer_transient"
+    model = Modflow.load(model_path, version="mf2005")
+    cbc_path = model_path / "freyberg.cbc"
+    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
+    zb = ZoneBudget(str(cbc_path), zones, verbose=False)
+    benchmark(zb.get_budget)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("nzones", [3, 10, 50])
+def test_zonebudget_get_volumetric_budget(benchmark, example_data_path, nzones):
+    model_path = example_data_path / "freyberg_multilayer_transient"
+    model = Modflow.load(model_path, version="mf2005")
+    cbc_path = model_path / "freyberg.cbc"
+    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
+    zb = ZoneBudget(str(cbc_path), zones, verbose=False)
+    benchmark(zb.get_volumetric_budget)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("nzones", [3, 10, 50])
+def test_zonebudget_get_dataframes(benchmark, example_data_path, nzones):
+    model_path = example_data_path / "freyberg_multilayer_transient"
+    model = Modflow.load(model_path, version="mf2005")
+    cbc_path = model_path / "freyberg.cbc"
+    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
+    zb = ZoneBudget(str(cbc_path), zones, verbose=False)
+    benchmark(zb.get_dataframes)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("nzones", [3, 10, 50])
+def test_zonebudget6_load(benchmark, example_data_path, nzones):
+    sim = MFSimulation.load(sim_ws=example_data_path / "mf6-freyberg")
+    gwf = sim.get_model()
+    zones = create_zone_array(
+        gwf.modelgrid.nlay, gwf.modelgrid.nrow, gwf.modelgrid.ncol, n_zones=nzones
+    )
+    cbc_path = sim.sim_path / f"{gwf.name}.cbc"
+    benchmark(lambda: ZoneBudget6(str(cbc_path), zones, verbose=False))
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("nzones", [3, 10, 50])
+def test_zonebudget6_get_budget(benchmark, example_data_path, nzones):
+    sim = MFSimulation.load(sim_ws=example_data_path / "mf6-freyberg")
+    gwf = sim.get_model()
+    zones = create_zone_array(
+        gwf.modelgrid.nlay, gwf.modelgrid.nrow, gwf.modelgrid.ncol, n_zones=nzones
+    )
+    cbc_path = sim.sim_path / f"{gwf.name}.cbc"
+    zb = ZoneBudget6(str(cbc_path), zones, verbose=False)
+    benchmark(zb.get_budget)
diff --git a/autotest/benchmarks/conftest.py b/autotest/benchmarks/conftest.py
new file mode 100644
index 0000000000..1f04dcd1c3
--- /dev/null
+++ b/autotest/benchmarks/conftest.py
@@ -0,0 +1,54 @@
+from pathlib import Path
+
+import pytest
+
+
+def get_examples_path():
+    """Get path to examples/data directory."""
+    return Path(__file__).parent.parent.parent / "examples" / "data"
+
+
+@pytest.fixture(scope="session")
+def benchmark_config():
+    """Configure pytest-benchmark settings."""
+    return {
+        "warmup": True,
+        "warmup_iterations": 5,
+        "min_rounds": 5,
+        "disable_gc": True,
+    }
+
+
+@pytest.fixture(scope="session")
+def models_path(request) -> list[Path]:
+    """
+    A directories containing model subdirectories. Use
+    the --models-path command line option once or more to specify
+    model directories. If at least one --models_path is provided,
+    external tests (i.e. those using models from an external repo)
+    will run against model input files found in the given location
+    on the local filesystem rather than model input files from the
+    official model registry. This is useful for testing changes to
+    test model input files during MF6 development.
+    """
+    paths = request.config.getoption("--models-path") or []
+    return [Path(p).expanduser().resolve().absolute() for p in paths]
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--models-path",
+        action="append",
+        type=str,
+        help="directory containing model subdirectories. set this to run external "
+        "tests (i.e. those using models from an external repo) against local model "
+        "input files rather than input files from the official model registry.",
+    )
+    parser.addoption(
+        "--namefile-pattern",
+        action="store",
+        type=str,
+        default="mfsim.nam",
+        help="namefile pattern to use when indexing models when --models-path is set."
+        "does nothing otherwise. default is 'mfsim.nam'.",
+    )
diff --git a/autotest/pytest.ini b/autotest/pytest.ini
deleted file mode 100644
index e867703f14..0000000000
--- a/autotest/pytest.ini
+++ /dev/null
@@ -1,23 +0,0 @@
-[pytest]
-addopts = -ra --color=yes
-python_files =
-    test_*.py
-    profile_*.py
-    benchmark_*.py
-    *_test*.py
-    *_profile*.py
-    *_benchmark*.py
-env_files =
-    .env
-markers =
-    example: exercise scripts, tutorials, notebooks
-    generation: tests for code generation utilities
-    meta: tests run by other tests
-    mf6: tests for MODFLOW 6 support
-    regression: tests comparing multiple versions
-    slow: tests not completing in a few seconds
-filterwarnings =
-    # from python-dateutil, used by arrow, jupyter_client, matplotlib, pandas
-    ignore:datetime.datetime.utcfromtimestamp
-    # from pandas, see https://github.com/pandas-dev/pandas/issues/54466
-    ignore:\n.*Pyarrow
diff --git a/autotest/test_grid_cases.py b/autotest/test_grid_cases.py
index 9415a237bd..24392eedaa 100644
--- a/autotest/test_grid_cases.py
+++ b/autotest/test_grid_cases.py
@@ -58,6 +58,30 @@ def structured_cbd_small():
             laycbd=laycbd,
         )
 
+    @staticmethod
+    def structured_medium():
+        nlay, nrow, ncol = 5, 50, 50
+        return StructuredGrid(
+            delc=np.ones(nrow),
+            delr=np.ones(ncol),
+            top=np.ones((nrow, ncol)) * 100.0,
+            botm=np.array(
+                [np.ones((nrow, ncol)) * (100.0 - (i + 1) * 10.0) for i in range(nlay)]
+            ),
+        )
+
+    @staticmethod
+    def structured_large():
+        nlay, nrow, ncol = 10, 100, 100
+        return StructuredGrid(
+            delc=np.ones(nrow),
+            delr=np.ones(ncol),
+            top=np.ones((nrow, ncol)) * 100.0,
+            botm=np.array(
+                [np.ones((nrow, ncol)) * (100.0 - (i + 1) * 10.0) for i in range(nlay)]
+            ),
+        )
+
     @staticmethod
     def vertex_small():
         nlay, ncpl = 3, 5
diff --git a/docs/benchmarking_plan.md b/docs/benchmarking_plan.md
new file mode 100644
index 0000000000..0bb75931cc
--- /dev/null
+++ b/docs/benchmarking_plan.md
@@ -0,0 +1,924 @@
+# FloPy Comprehensive Benchmarking Plan
+
+## Executive Summary
+
+This document outlines a plan to expand FloPy's benchmarking capabilities to systematically track performance improvements during ongoing development, particularly the pandas-based I/O refactoring effort. The plan builds upon the existing `pytest-benchmark` infrastructure while addressing current limitations in coverage and tooling.
+
+**Important**: These benchmarks test **FloPy code performance only**, not the runtime of MODFLOW/MODPATH executables that FloPy drives. Benchmarks focus on FloPy's I/O operations, data structure manipulations, and utility functions.
+
+### Key Goals
+
+1. **Quantify pandas I/O refactor impact** - Measure FloPy's file I/O performance gains
+2. **Prevent performance regressions** - Automated detection of FloPy performance degradation in CI/CD
+3. **Expand coverage** - Benchmark all major FloPy operations (load/write, utilities, grids, exports)
+4. **Streamline workflow** - Reduce ad-hoc scripting, improve automation and reporting
+
+## Current State Analysis
+
+### Existing Infrastructure
+
+**Benchmarks** (as of 2026-01-25):
+- Location: `autotest/test_modflow.py:1334-1353`
+- Count: 3 benchmarks
+  - `test_model_init_time` - MODFLOW-2005 model initialization
+  - `test_model_write_time` - Model file writing
+  - `test_model_load_time` - Model file loading
+
+**Tooling**:
+- `pytest-benchmark` plugin (pyproject.toml dependency)
+- Daily CI workflow (`.github/workflows/benchmark.yml`)
+  - Matrix: 3 OS × 3 Python versions = 9 configurations
+  - Runs: Daily at 8 AM UTC
+- Post-processing: `scripts/process_benchmarks.py`
+  - Generates time-series plots using seaborn
+  - Outputs CSV data and PNG visualizations
+
+### Current Limitations
+
+1. **Narrow Coverage**
+   - Only MODFLOW-2005 tested
+   - No MF6, MT3D, SEAWAT coverage
+   - Missing utility benchmarks (HeadFile, BudgetFile, grids, exports)
+
+2. **Limited Visibility**
+   - Results only stored as GitHub Actions artifacts
+   - No historical trend tracking
+   - No automated regression detection
+   - Manual comparison required
+
+3. **Workflow Issues**
+   - Ad-hoc scripting for result processing
+   - No integration with PR review process
+   - Missing baseline comparisons
+
+## Proposed Solution
+
+### 1. Tooling Strategy
+
+**Decision: Continue with pytest-benchmark + Add Codspeed Integration**
+
+#### Rationale
+
+- **ASV (Airspeed Velocity)**: Originally considered but now appears unmaintained
+  - Major projects (NumPy, others) migrating away
+  - Limited recent activity on repository
+
+- **pytest-benchmark**: Currently working well
+  - Integrated with existing test suite
+  - Familiar to developers
+  - Good CI integration
+
+- **Codspeed** (RECOMMENDED ADDITION):
+  - Seamless `pytest-codspeed` plugin compatibility
+  - Zero-config migration from `pytest-benchmark`
+  - Automated performance regression detection
+  - Historical trend visualization
+  - PR-based performance impact reports
+  - Free for open-source projects
+
+#### Implementation Steps
+
+1. Add `pytest-codspeed` to test dependencies:
+   ```toml
+   [project.optional-dependencies]
+   test = [
+       # ... existing deps
+       "pytest-benchmark",
+       "pytest-codspeed",
+   ]
+   ```
+
+2. Update benchmark workflow to use Codspeed action:
+   ```yaml
+   - uses: CodSpeedHQ/action@v3
+     with:
+       token: ${{ secrets.CODSPEED_TOKEN }}
+       run: pytest autotest/benchmarks --codspeed
+   ```
+
+3. Optionally refactor existing benchmarks to use decorator pattern:
+   ```python
+   @pytest.mark.benchmark
+   def test_model_load_time(function_tmpdir):
+       model = get_perftest_model(ws=function_tmpdir, name=name)
+       model.write_input()
+       Modflow.load(f"{name}.nam", model_ws=function_tmpdir, check=False)
+   ```
+
+### 2. Benchmark Coverage Expansion
+
+#### 2.1 Core I/O Benchmarks
+
+Expand model load/write/init benchmarks across all major simulators.
+
+**MODFLOW 6** (Highest Priority):
+
+```python
+# autotest/benchmarks/benchmark_io_mf6.py
+
+def test_mf6_sim_init_small(benchmark, function_tmpdir):
+    """Benchmark MF6 simulation initialization - small model."""
+    benchmark(lambda: create_small_mf6_sim(function_tmpdir))
+
+def test_mf6_sim_init_large(benchmark, function_tmpdir):
+    """Benchmark MF6 simulation initialization - large model."""
+    benchmark(lambda: create_large_mf6_sim(function_tmpdir))
+
+def test_mf6_sim_write(benchmark, function_tmpdir):
+    """Benchmark MF6 simulation write."""
+    sim = create_test_mf6_sim(function_tmpdir)
+    benchmark(sim.write_simulation)
+
+def test_mf6_sim_load(benchmark, function_tmpdir):
+    """Benchmark MF6 simulation load."""
+    sim = create_test_mf6_sim(function_tmpdir)
+    sim.write_simulation()
+    sim_ws = function_tmpdir
+    benchmark(lambda: MFSimulation.load(simulation_ws=sim_ws))
+
+def test_mf6_package_write_large_arrays(benchmark, function_tmpdir):
+    """Benchmark writing packages with large arrays (e.g., NPF K)."""
+    sim = create_large_array_sim(function_tmpdir)
+    benchmark(sim.write_simulation)
+
+def test_mf6_multimodel_sim(benchmark, function_tmpdir):
+    """Benchmark multi-model simulation I/O."""
+    benchmark(lambda: create_multimodel_sim(function_tmpdir))
+
+def test_mf6_exchange_load(benchmark, function_tmpdir):
+    """Benchmark loading simulations with exchanges."""
+    sim = create_exchange_sim(function_tmpdir)
+    sim.write_simulation()
+    benchmark(lambda: MFSimulation.load(simulation_ws=function_tmpdir))
+```
+
+**Legacy MODFLOW Variants**:
+
+```python
+# autotest/benchmarks/benchmark_io_legacy.py
+
+@pytest.mark.parametrize("variant", ["mfnwt", "mfusg", "seawat", "mt3dms"])
+def test_legacy_model_init(benchmark, function_tmpdir, variant):
+    """Benchmark initialization across legacy MODFLOW variants."""
+    benchmark(lambda: create_legacy_model(variant, function_tmpdir))
+
+@pytest.mark.parametrize("grid_type", ["structured", "unstructured"])
+def test_modflow_grid_types(benchmark, function_tmpdir, grid_type):
+    """Benchmark I/O for different grid types."""
+    benchmark(lambda: create_model_with_grid(grid_type, function_tmpdir))
+
+@pytest.mark.parametrize("temporal", ["steady", "transient_small", "transient_large"])
+def test_modflow_temporal(benchmark, function_tmpdir, temporal):
+    """Benchmark I/O for different temporal discretizations."""
+    benchmark(lambda: create_temporal_model(temporal, function_tmpdir))
+```
+
+#### 2.2 Post-Processing Utilities
+
+Benchmark common workflow operations.
+
+**HeadFile Operations**:
+
+Note: HeadFile benchmarks use pre-existing files from examples/data directory to test FloPy's file parsing performance only, not MODFLOW runtime.
+
+```python
+# autotest/benchmarks/benchmark_utils_heads.py
+
+from pathlib import Path
+from flopy.utils import HeadFile
+
+FREYBERG_HDS = Path("examples/data/freyberg_multilayer_transient/freyberg.hds")
+
+@pytest.mark.skipif(not FREYBERG_HDS.exists(), reason="Example data not available")
+def test_headfile_init_freyberg(benchmark):
+    """Benchmark FloPy's HeadFile initialization."""
+    benchmark(lambda: HeadFile(FREYBERG_HDS))
+
+@pytest.mark.skipif(not FREYBERG_HDS.exists(), reason="Example data not available")
+def test_headfile_get_data_single(benchmark):
+    """Benchmark FloPy's head data extraction for single time step."""
+    hds = HeadFile(FREYBERG_HDS)
+    times = hds.get_times()
+    mid_time = times[len(times) // 2]
+    benchmark(lambda: hds.get_data(totim=mid_time))
+
+def test_headfile_get_alldata(benchmark, function_tmpdir):
+    """Benchmark reading entire head file."""
+    hds = create_and_open_headfile(function_tmpdir)
+    benchmark(hds.get_alldata)
+
+def test_headfile_get_ts(benchmark, function_tmpdir):
+    """Benchmark time series extraction."""
+    hds = create_and_open_headfile(function_tmpdir)
+    benchmark(lambda: hds.get_ts((0, 10, 10)))
+
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_headfile_scaling(benchmark, function_tmpdir, size):
+    """Benchmark HeadFile operations at different scales."""
+    hds = create_headfile_with_size(function_tmpdir, size)
+    benchmark(hds.get_alldata)
+```
+
+**CellBudgetFile Operations**:
+
+Note: BudgetFile benchmarks use pre-existing files from examples/data directory to test FloPy's file parsing performance only, not MODFLOW runtime.
+
+```python
+# autotest/benchmarks/benchmark_utils_budget.py
+
+from pathlib import Path
+from flopy.utils import CellBudgetFile
+
+FREYBERG_CBC = Path("examples/data/freyberg_multilayer_transient/freyberg.cbc")
+
+@pytest.mark.skipif(not FREYBERG_CBC.exists(), reason="Example data not available")
+def test_budgetfile_init_freyberg(benchmark):
+    """Benchmark FloPy's CellBudgetFile initialization."""
+    benchmark(lambda: CellBudgetFile(FREYBERG_CBC))
+
+def test_budgetfile_get_data(benchmark, function_tmpdir):
+    """Benchmark budget data extraction."""
+    cbc = create_and_open_budgetfile(function_tmpdir)
+    benchmark(lambda: cbc.get_data(text="FLOW RIGHT FACE"))
+
+def test_budgetfile_list_records(benchmark, function_tmpdir):
+    """Benchmark record listing."""
+    cbc = create_and_open_budgetfile(function_tmpdir)
+    benchmark(cbc.list_records)
+```
+
+**MODPATH Utilities**:
+
+```python
+# autotest/benchmarks/benchmark_utils_modpath.py
+
+def test_pathlinefile_load(benchmark, function_tmpdir):
+    """Benchmark PathlineFile loading."""
+    pth_file = create_pathlinefile(function_tmpdir)
+    benchmark(lambda: PathlineFile(pth_file))
+
+def test_endpointfile_load(benchmark, function_tmpdir):
+    """Benchmark EndpointFile loading."""
+    ept_file = create_endpointfile(function_tmpdir)
+    benchmark(lambda: EndpointFile(ept_file))
+
+def test_pathline_to_dataframe(benchmark, function_tmpdir):
+    """Benchmark pathline conversion to DataFrame."""
+    pth = create_and_open_pathlinefile(function_tmpdir)
+    benchmark(lambda: pth.get_destination_pathline_data(range(100)))
+```
+
+#### 2.3 Grid Operations
+
+```python
+# autotest/benchmarks/benchmark_grids.py
+
+@pytest.mark.parametrize("grid_class", [
+    StructuredGrid,
+    VertexGrid,
+    UnstructuredGrid,
+])
+def test_grid_init(benchmark, grid_class):
+    """Benchmark grid initialization."""
+    params = get_grid_params(grid_class)
+    benchmark(lambda: grid_class(**params))
+
+def test_grid_intersect_structured(benchmark):
+    """Benchmark structured grid intersection."""
+    grid = create_test_structured_grid()
+    line = create_test_linestring()
+    benchmark(lambda: grid.intersect(line))
+
+def test_grid_get_lrc_large(benchmark):
+    """Benchmark get_lrc for large models."""
+    grid = create_large_structured_grid()
+    nodes = range(0, grid.nnodes, 100)  # Sample every 100th node
+    benchmark(lambda: [grid.get_lrc(node) for node in nodes])
+
+def test_grid_get_node_large(benchmark):
+    """Benchmark get_node for large models."""
+    grid = create_large_structured_grid()
+    lrc_tuples = [(0, i, j) for i in range(0, grid.nrow, 10)
+                              for j in range(0, grid.ncol, 10)]
+    benchmark(lambda: [grid.get_node(lrc) for lrc in lrc_tuples])
+```
+
+#### 2.4 Export Operations
+
+```python
+# autotest/benchmarks/benchmark_export.py
+
+def test_export_shapefile_small(benchmark, function_tmpdir):
+    """Benchmark shapefile export - small model."""
+    model = create_small_test_model(function_tmpdir)
+    output_path = function_tmpdir / "export.shp"
+    benchmark(lambda: model.export(output_path))
+
+def test_export_shapefile_large(benchmark, function_tmpdir):
+    """Benchmark shapefile export - large model."""
+    model = create_large_test_model(function_tmpdir)
+    output_path = function_tmpdir / "export.shp"
+    benchmark(lambda: model.export(output_path))
+
+@pytest.mark.skipif(not has_pkg("geopandas"), reason="requires geopandas")
+def test_export_geodataframe(benchmark, function_tmpdir):
+    """Benchmark GeoDataFrame export (issue #2671)."""
+    model = create_test_model(function_tmpdir)
+    benchmark(lambda: model.to_gdf())
+
+@pytest.mark.skipif(not has_pkg("netCDF4"), reason="requires netCDF4")
+def test_export_netcdf(benchmark, function_tmpdir):
+    """Benchmark NetCDF export."""
+    model = create_test_model(function_tmpdir)
+    output_path = function_tmpdir / "export.nc"
+    benchmark(lambda: model.export(output_path, fmt="netcdf"))
+
+@pytest.mark.skipif(not has_pkg("vtk"), reason="requires vtk")
+def test_export_vtk(benchmark, function_tmpdir):
+    """Benchmark VTK export."""
+    model = create_test_model(function_tmpdir)
+    output_path = function_tmpdir / "export.vtk"
+    benchmark(lambda: model.export(output_path, fmt="vtk"))
+```
+
+#### 2.5 Array and Data Structure Benchmarks
+
+```python
+# autotest/benchmarks/benchmark_arrays.py
+
+def test_util2d_create_large(benchmark):
+    """Benchmark Util2d creation with large arrays."""
+    shape = (100, 100)
+    data = np.random.random(shape)
+    benchmark(lambda: Util2d(None, shape, data))
+
+def test_util2d_external_io(benchmark, function_tmpdir):
+    """Benchmark Util2d external file I/O."""
+    u2d = create_util2d_with_external(function_tmpdir)
+    benchmark(u2d.get_file_entry)
+
+def test_util3d_create_large(benchmark):
+    """Benchmark Util3d creation with large arrays."""
+    shape = (10, 100, 100)
+    data = np.random.random(shape)
+    benchmark(lambda: Util3d(None, shape, data))
+```
+
+#### 2.6 Pandas Integration Benchmarks
+
+**Critical for validating ongoing refactor efforts:**
+
+```python
+# autotest/benchmarks/benchmark_pandas_io.py
+
+def test_pandas_array_read(benchmark, function_tmpdir):
+    """Benchmark pandas-based array reading."""
+    # Compare pandas vs traditional approaches
+    file_path = create_test_array_file(function_tmpdir)
+    benchmark(lambda: read_array_pandas(file_path))
+
+def test_pandas_list_read(benchmark, function_tmpdir):
+    """Benchmark pandas-based list reading."""
+    file_path = create_test_list_file(function_tmpdir)
+    benchmark(lambda: read_list_pandas(file_path))
+
+def test_pandas_array_write(benchmark, function_tmpdir):
+    """Benchmark pandas-based array writing."""
+    data = create_test_array_data()
+    file_path = function_tmpdir / "test.dat"
+    benchmark(lambda: write_array_pandas(data, file_path))
+
+def test_mflist_pandas_performance(benchmark, function_tmpdir):
+    """Benchmark MFList with pandas backend."""
+    stress_period_data = create_large_stress_period_data()
+    benchmark(lambda: MFList(stress_period_data))
+
+def test_recarray_to_dataframe(benchmark):
+    """Benchmark recarray to DataFrame conversion."""
+    rec = create_large_recarray()
+    benchmark(lambda: pd.DataFrame(rec))
+```
+
+### 3. Integration with modflow-devtools Models API
+
+**Already Available**: The `modflow-devtools` package provides a models API with 442 models including:
+- 242 MF6 test models (`mf6/test/*`)
+- MF6 examples (`mf6/example/*`)
+- MF6 large models (`mf6/large/*`)
+- MODFLOW-2005 models (`mf2005/*`)
+
+**Implementation**:
+
+```python
+# autotest/benchmarks/benchmark_models_api.py
+
+from modflow_devtools.models import DEFAULT_REGISTRY
+from flopy.mf6 import MFSimulation
+
+# Select diverse models for benchmarking
+BENCHMARK_MODELS = [
+    "mf6/test/test001a_Tharmonic",
+    "mf6/test/test006_gwf3",           # Multi-model
+    "mf6/test/test006_gwf3_disv",      # DISV grid
+    "mf6/test/test021_twri",           # Classic problem
+    "mf6/test/test045_lake1ss_table",  # LAK package
+]
+
+@pytest.mark.parametrize("model_name", BENCHMARK_MODELS)
+def test_mf6_load_from_registry(benchmark, function_tmpdir, model_name):
+    """Benchmark FloPy loading models from devtools registry."""
+    # Copy model to temp directory (setup, not benchmarked)
+    DEFAULT_REGISTRY.copy_to(function_tmpdir, model_name)
+
+    # Benchmark FloPy loading the model
+    benchmark(lambda: MFSimulation.load(simulation_ws=function_tmpdir))
+```
+
+**Benefits**:
+- 442 models available immediately (no waiting for issue #1872)
+- Reproducible benchmarks across development environments
+- Tests FloPy loading against diverse, real-world model inputs
+- Community-standard test cases from MODFLOW 6 test suite
+- On-demand download via Pooch (models cached locally)
+
+### 4. Benchmark Organization
+
+#### Proposed Directory Structure
+
+```
+autotest/
+├── benchmarks/                    # NEW: Dedicated benchmark directory
+│   ├── __init__.py
+│   ├── conftest.py               # Shared fixtures, model builders
+│   ├── test_io_mf6.py            # MF6 I/O benchmarks
+│   ├── test_io_legacy.py         # Legacy MODFLOW variants
+│   ├── test_utils_heads.py       # HeadFile operations (uses example data)
+│   ├── test_utils_budget.py      # CellBudgetFile operations (uses example data)
+│   ├── test_grids.py             # Grid operations
+│   ├── test_export.py            # Export operations
+│   ├── test_arrays.py            # Util2d/Util3d benchmarks
+│   ├── test_pandas_io.py         # Pandas refactor validation
+│   └── test_models_api.py        # Models API integration (future)
+├── test_*.py                      # Existing test files
+└── conftest.py                    # Global test configuration
+```
+
+#### Benchmark Markers
+
+Define custom markers in `pyproject.toml`:
+
+```toml
+[tool.pytest.ini_options]
+markers = [
+    "benchmark: performance benchmarks",
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+]
+```
+
+Usage:
+```python
+@pytest.mark.benchmark
+def test_mf6_sim_load(benchmark, function_tmpdir):
+    ...
+```
+
+### 5. CI/CD Strategy
+
+#### Tiered Approach
+
+**PR-Level (Fast Feedback)**:
+```yaml
+# .github/workflows/commit.yml - ADD TO EXISTING WORKFLOW
+- name: Run fast benchmarks
+  run: |
+    pytest autotest/benchmarks \
+      -m "benchmark and not slow" \
+      --benchmark-only \
+      --benchmark-disable-gc \
+      --benchmark-warmup=on
+```
+
+**Daily Full Suite**:
+```yaml
+# .github/workflows/benchmark.yml - EXISTING, ENHANCED
+- name: Run all benchmarks
+  run: |
+    pytest autotest/benchmarks \
+      --benchmark-only \
+      --benchmark-autosave \
+      --codspeed
+```
+
+**Branch Comparisons**:
+```yaml
+# NEW: .github/workflows/benchmark-compare.yml
+- name: Compare with develop branch
+  run: |
+    # Checkout develop
+    git fetch origin develop
+    git checkout develop
+    pytest autotest/benchmarks --benchmark-only --benchmark-autosave
+
+    # Checkout feature branch
+    git checkout ${{ github.head_ref }}
+    pytest autotest/benchmarks --benchmark-only --benchmark-compare
+```
+
+#### Codspeed Integration
+
+**Benefits**:
+- Automatic regression detection on PRs
+- Performance trend visualization
+- Historical comparison
+- No manual artifact management
+
+**Setup**:
+1. Register FloPy repository at [codspeed.io](https://codspeed.io)
+2. Add `CODSPEED_TOKEN` to repository secrets
+3. Update workflow:
+   ```yaml
+   - uses: CodSpeedHQ/action@v3
+     with:
+       token: ${{ secrets.CODSPEED_TOKEN }}
+       run: pytest autotest/benchmarks --codspeed
+   ```
+
+### 6. Performance Regression Tracking
+
+#### Key Metrics to Monitor
+
+1. **Pandas I/O Refactor Impact** (Primary Goal)
+   - Array reading: Traditional vs pandas-based
+   - List reading: Traditional vs pandas-based
+   - Array writing: Traditional vs pandas-based
+   - Expected: 10-50% improvement in most cases
+
+2. **Model I/O by Size Category**
+   - Small models (< 10k cells): Target < 100ms load
+   - Medium models (10k-100k cells): Target < 1s load
+   - Large models (> 100k cells): Monitor for regressions
+
+3. **Package-Level Performance**
+   - Identify expensive packages (large arrays: DIS, NPF, IC)
+   - Track improvements over time
+
+4. **Memory Usage**
+   - Enable memory profiling for large model operations
+   - Track memory efficiency of refactored code
+
+#### Regression Thresholds
+
+Configure pytest-benchmark comparison thresholds:
+
+```python
+# autotest/benchmarks/conftest.py
+
+@pytest.fixture(scope="session")
+def benchmark_config():
+    return {
+        "warmup": True,
+        "warmup_iterations": 5,
+        "max_time": 1.0,
+        "min_rounds": 5,
+        "timer": time.perf_counter,
+        "disable_gc": True,
+        "compare": {
+            "func": "mean",
+            "group": "fullname",
+            "threshold": 1.05,  # 5% tolerance
+        },
+    }
+```
+
+**Alert Criteria**:
+- > 5% slowdown: Warning (review required)
+- > 10% slowdown: Failure (block merge)
+- > 20% improvement: Document and celebrate!
+
+### 7. Documentation
+
+#### Developer Documentation Updates
+
+Add comprehensive benchmarking section to `DEVELOPER.md`:
+
+````markdown
+#### Writing Benchmarks
+
+Benchmarks follow standard pytest conventions with the `benchmark` fixture:
+
+```python
+# autotest/benchmarks/benchmark_example.py
+
+def test_my_operation(benchmark, function_tmpdir):
+    """Clear description of what is being benchmarked and why."""
+
+    # Setup (not timed)
+    model = create_test_model(function_tmpdir)
+
+    # Benchmark the operation
+    result = benchmark(model.write_input)
+
+    # Optional assertions (not timed)
+    assert result is not None
+```
+
+**Best Practices**:
+- Use descriptive names: `test_mf6_large_model_load`, not `test_load1`
+- Include docstrings explaining rationale
+- Use fixtures for setup/teardown (not timed)
+- Focus on one operation per benchmark
+- Use parametrize for testing variations
+
+**Running Benchmarks Locally**:
+
+```bash
+# Run all benchmarks
+pytest autotest/benchmarks --benchmark-only
+
+# Run specific benchmark file
+pytest autotest/benchmarks/benchmark_io_mf6.py --benchmark-only
+
+# Run with specific markers
+pytest -m "benchmark and not slow" --benchmark-only
+
+# Compare against saved baseline
+pytest autotest/benchmarks --benchmark-only --benchmark-compare
+
+# Save results
+pytest autotest/benchmarks --benchmark-only --benchmark-autosave
+
+# View statistics
+pytest autotest/benchmarks --benchmark-only --benchmark-columns=mean,stddev,min,max
+```
+
+**Interpreting Results**:
+
+Codspeed provides automated analysis, but for local runs:
+- **Mean**: Primary metric (average execution time)
+- **StdDev**: Consistency (lower is better)
+- **Min**: Best-case performance
+- **Iterations**: Number of runs (more = higher confidence)
+
+**When to Add Benchmarks**:
+
+1. Implementing performance-critical features
+2. Refactoring I/O operations (e.g., pandas migration)
+3. Optimizing existing code paths
+4. Adding new model types or utilities
+5. When performance is a key requirement
+````
+
+#### Template for New Benchmarks
+
+Provide a template in `autotest/benchmarks/TEMPLATE.py`:
+
+```python
+"""
+Benchmark template for FloPy operations.
+
+Copy this template when creating new benchmark files.
+"""
+import pytest
+
+
+# Fixtures for test data creation (setup not timed)
+@pytest.fixture
+def test_model(function_tmpdir):
+    """Create a test model for benchmarking."""
+    # Create and return model
+    pass
+
+
+# Basic benchmark
+def test_operation_basic(benchmark, test_model):
+    """
+    Benchmark [operation description].
+
+    This benchmark measures [what is being measured] to [why it matters].
+    Expected baseline: [X]ms on [reference hardware].
+    """
+    result = benchmark(test_model.some_operation)
+    assert result is not None
+
+
+# Parametrized benchmark
+@pytest.mark.parametrize("size", ["small", "medium", "large"])
+def test_operation_scaling(benchmark, function_tmpdir, size):
+    """
+    Benchmark [operation] at different scales.
+
+    Measures how [operation] scales with [dimension].
+    """
+    model = create_model_with_size(function_tmpdir, size)
+    benchmark(model.some_operation)
+
+
+# Slow benchmark (excluded from PR checks)
+@pytest.mark.slow
+@pytest.mark.benchmark
+def test_operation_large_dataset(benchmark, function_tmpdir):
+    """
+    Benchmark [operation] with realistic large dataset.
+
+    Only run in daily benchmark suite due to runtime.
+    """
+    large_model = create_large_realistic_model(function_tmpdir)
+    benchmark(large_model.some_operation)
+```
+
+### 8. Implementation Roadmap
+
+#### Phase 1: Foundation (Weeks 1-2)
+
+**Goals**: Set up infrastructure, reorganize existing benchmarks
+
+- [ ] Create `autotest/benchmarks/` directory structure
+- [ ] Add `pytest-codspeed` to dependencies
+- [ ] Set up Codspeed integration (register, add token)
+- [ ] Migrate existing 3 benchmarks from `test_modflow.py`
+- [ ] Create shared fixtures in `benchmarks/conftest.py`
+- [ ] Add benchmark markers to `pyproject.toml`
+- [ ] Update `.github/workflows/benchmark.yml` for Codspeed
+- [ ] Document new structure in `DEVELOPER.md`
+
+**Deliverables**:
+- Working Codspeed integration
+- Reorganized benchmarks with clear structure
+- Updated documentation
+
+#### Phase 2: Core Coverage (Weeks 3-4)
+
+**Goals**: Add essential MF6 and utility benchmarks
+
+- [ ] Implement `test_io_mf6.py` (10-15 benchmarks)
+  - Sim init (small, medium, large)
+  - Sim write/load
+  - Package-level operations
+  - Multi-model simulations
+- [ ] Implement `test_utils_heads.py` (8-10 benchmarks)
+  - HeadFile init, get_data, get_alldata, get_ts
+  - Scaling tests (small/medium/large)
+- [ ] Implement `test_utils_budget.py` (5-8 benchmarks)
+  - CellBudgetFile operations
+- [ ] Establish baseline measurements for regression tracking
+
+**Deliverables**:
+- 25-35 new benchmarks
+- Baseline performance data
+- Initial regression thresholds set
+
+#### Phase 3: Extended Coverage (Weeks 5-6)
+
+**Goals**: Legacy models, grids, exports
+
+- [ ] Implement `test_io_legacy.py` (10-12 benchmarks)
+  - MODFLOW-NWT, MFUSG, SEAWAT, MT3DMS
+  - Structured vs unstructured
+  - Steady vs transient
+- [ ] Implement `test_grids.py` (8-10 benchmarks)
+  - Grid initialization
+  - Intersection operations
+  - get_lrc/get_node conversions
+- [ ] Implement `test_export.py` (8-10 benchmarks)
+  - Shapefile, GeoDataFrame, NetCDF, VTK
+
+**Deliverables**:
+- 25-30 additional benchmarks
+- Comprehensive coverage of major FloPy operations
+- Performance characterization across all model types
+
+#### Phase 4: Pandas Validation & Polish (Weeks 7-8)
+
+**Goals**: Validate refactor, finalize infrastructure
+
+- [ ] Implement `test_pandas_io.py` (15-20 benchmarks)
+  - Head-to-head pandas vs traditional
+  - Array read/write
+  - List operations
+  - MFList performance
+  - Recarray conversions
+- [ ] Implement `test_arrays.py` (5-8 benchmarks)
+  - Util2d/Util3d operations
+- [ ] Add PR-level fast benchmark checks
+- [ ] Create branch comparison workflow
+- [ ] Generate performance improvement report for pandas refactor
+- [ ] Polish documentation with examples and best practices
+- [ ] Create benchmark template
+
+**Deliverables**:
+- Quantified pandas refactor performance gains
+- Complete benchmark suite (80-120 total benchmarks)
+- Full CI/CD integration
+- Comprehensive documentation
+
+#### Phase 5: Models API Integration (Available Now!)
+
+**Goals**: Leverage modflow-devtools models registry
+
+**Status**: ✅ Available - modflow-devtools already provides 442 models
+
+- [ ] Implement `test_models_api.py` with representative sample (~10-15 models)
+- [ ] Parametrize benchmarks across diverse model types
+  - Multi-model simulations
+  - Different grid types (DIS, DISV, DISU)
+  - Various packages (LAK, SFR, UZF, MAW, etc.)
+  - Transport models (GWT, GWE)
+- [ ] Establish performance baselines for standard test suite
+- [ ] Document model selection rationale
+
+**Deliverables**:
+- 10-15 benchmarks using modflow-devtools registry
+- Coverage of diverse model complexity
+- Community-standard performance baselines
+- Validation of FloPy loading across official test suite
+
+### 9. Success Criteria
+
+#### Quantitative Metrics
+
+1. **Coverage**: 80-100 benchmarks across all major FloPy operations
+2. **CI Runtime**:
+   - PR-level fast benchmarks: < 5 minutes
+   - Daily full suite: < 30 minutes
+3. **Pandas Refactor**: Demonstrate 10-50% improvement in I/O operations
+4. **Regression Detection**: 100% of PRs receive automated performance feedback
+5. **Historical Tracking**: 6+ months of continuous performance data
+
+#### Qualitative Goals
+
+1. **Developer Awareness**: Performance is a first-class consideration in PRs
+2. **Confidence**: No unintended performance regressions in releases
+3. **Documentation**: Clear guidelines for writing and interpreting benchmarks
+4. **Community**: Performance data available for external analysis and comparison
+
+### 10. Maintenance and Evolution
+
+#### Ongoing Responsibilities
+
+1. **Regular Review**: Quarterly review of benchmark relevance and thresholds
+2. **Baseline Updates**: Reset baselines after intentional performance changes
+3. **New Features**: All performance-critical features include benchmarks
+4. **Cleanup**: Remove obsolete benchmarks when code is removed
+5. **Reporting**: Annual performance report summarizing trends and improvements
+
+#### Future Enhancements
+
+1. **Memory Profiling**: Integrate memory usage tracking
+2. **Parallel Benchmarks**: Test parallel performance (if applicable)
+3. **Real-World Scenarios**: Benchmark complete workflows (load → modify → run → postprocess)
+4. **Hardware Diversity**: Track performance across different CPU/memory configurations
+5. **Comparison Reports**: Generate before/after reports for major refactors
+
+## References
+
+- [Issue #1989: Expand benchmarking](https://github.com/modflowpy/flopy/issues/1989)
+- [Issue #1872: Models API](https://github.com/modflowpy/flopy/issues/1872)
+- [pytest-benchmark documentation](https://pytest-benchmark.readthedocs.io/)
+- [Codspeed documentation](https://docs.codspeed.io/)
+- [FloPy DEVELOPER.md](../DEVELOPER.md)
+
+## Appendix A: Example Benchmark Output
+
+### pytest-benchmark Console Output
+
+```
+---------------------------- benchmark: 3 tests ----------------------------
+Name (time in ms)                  Min       Max      Mean    StdDev    Rounds
+---------------------------------------------------------------------------
+test_model_init_time            12.34     15.67     13.21      0.89        50
+test_model_write_time           45.23     52.11     47.89      2.34        20
+test_model_load_time            78.90     89.12     82.45      3.12        15
+---------------------------------------------------------------------------
+```
+
+### Codspeed PR Comment Example
+
+```markdown
+## ⚡ CodSpeed Performance Report
+
+Performance changes detected in this PR:
+
+| Benchmark | Status | Base | PR | Change |
+|-----------|--------|------|----|---------|
+| test_mf6_sim_load | 🔴 Slower | 145ms | 167ms | +15.2% |
+| test_pandas_array_read | 🟢 Faster | 234ms | 187ms | -20.1% |
+| test_headfile_get_data | ⚪ Unchanged | 56ms | 57ms | +1.8% |
+
+[View full results on CodSpeed →](https://app.codspeed.io/...)
+```
+
+## Appendix B: Glossary
+
+- **Benchmark**: Repeatable performance test measuring execution time
+- **Baseline**: Reference performance measurement for comparison
+- **Regression**: Unintended performance degradation
+- **Round**: Single execution of benchmarked code
+- **Warmup**: Initial executions discarded to account for JIT/caching effects
+- **Fixture**: pytest test setup/teardown function (not timed)
+- **Parametrize**: Run same benchmark with multiple input variations
+
+---
+
+**Document Version**: 1.0
+**Authors**: FloPy Development Team
+**License**: CC0 1.0 Universal
diff --git a/pyproject.toml b/pyproject.toml
index 0324b2d054..50d7ca42f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,7 @@ test = [
     "modflow-devtools>=1.7.0",
     "pytest !=8.1.0",
     "pytest-benchmark",
+    "pytest-codspeed",
     "pytest-cov",
     "pytest-dotenv",
     "pytest-xdist",
@@ -218,3 +219,29 @@ ignore-words-list = [
     "vertx",
     "nd",
 ]
+
+[tool.pytest.ini_options]
+addopts = "-ra --color=yes"
+python_files = [
+    "test_*.py",
+    "profile_*.py",
+    "benchmark_*.py",
+    "*_test*.py",
+    "*_profile*.py",
+    "*_benchmark*.py",
+]
+env_files = [".env"]
+markers = [
+    "benchmark: performance benchmarks",
+    "external: uses models from external repositories",
+    "example: exercise scripts, tutorials, notebooks",
+    "generation: tests for code generation utilities",
+    "meta: tests run by other tests",
+    "mf6: tests for MODFLOW 6 support",
+    "regression: tests comparing multiple versions",
+    "slow: tests not completing in a few seconds",
+]
+filterwarnings = [
+    "ignore:datetime.datetime.utcfromtimestamp",
+    "ignore:\n.*Pyarrow",
+]

From 8114d807c06bd4f221b66b054707a2cf2fed58ee Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Tue, 27 Jan 2026 19:11:15 -0500
Subject: [PATCH 02/11] fixes

---
 autotest/benchmarks/benchmark_arrays.py       |  59 ++++++----
 .../benchmarks/benchmark_cellbudgetfile.py    |   7 +-
 autotest/benchmarks/benchmark_endpointfile.py |   9 +-
 autotest/benchmarks/benchmark_export.py       |  13 ++-
 .../benchmarks/benchmark_formattedfile.py     |   3 +-
 autotest/benchmarks/benchmark_grids.py        |  16 ++-
 autotest/benchmarks/benchmark_headfile.py     |   5 +-
 autotest/benchmarks/benchmark_mf6_io.py       |  23 ++--
 autotest/benchmarks/benchmark_pathlinefile.py |  10 +-
 .../benchmarks/benchmark_postprocessing.py    |   3 +-
 autotest/benchmarks/conftest.py               | 106 ++++++++++++++++++
 11 files changed, 198 insertions(+), 56 deletions(-)

diff --git a/autotest/benchmarks/benchmark_arrays.py b/autotest/benchmarks/benchmark_arrays.py
index bf68cc2167..8678093676 100644
--- a/autotest/benchmarks/benchmark_arrays.py
+++ b/autotest/benchmarks/benchmark_arrays.py
@@ -1,40 +1,51 @@
 """
 Benchmarks for flopy.utils.Util2d and Util3d operations including:
 - Array creation
-- External file I/O
-- get_file_entry() performance
+- Binary file I/O
+- Array access performance
 """
 
 import numpy as np
 import pytest
 
+from flopy.modflow import Modflow
 from flopy.utils import Util2d, Util3d
 
 SIZES = {
     "small": {"nlay": 3, "nrow": 10, "ncol": 10},
-    "medium": {"nlay": 10, "nrow": 1000, "ncol": 1000},
-    "large": {"nlay": 20, "nrow": 2000, "ncol": 2000},
+    "medium": {"nlay": 10, "nrow": 100, "ncol": 100},
+    "large": {"nlay": 20, "nrow": 200, "ncol": 200},
 }
 
 
 @pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
-def test_util2d_create(benchmark, size):
+def test_util2d_create(benchmark, function_tmpdir, size):
     dims = SIZES[size]
     shape = (dims["nrow"], dims["ncol"])
     data = np.random.random(shape)
-    benchmark(lambda: Util2d(None, shape, np.float32, data.copy(), "test"))
+    ml = Modflow(model_ws=function_tmpdir)
+
+    def create_util2d():
+        return Util2d(ml, shape, np.float32, data.copy(), "test")
+
+    benchmark(create_util2d)
 
 
 @pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
-def test_util3d_create(benchmark, size):
+def test_util3d_create(benchmark, function_tmpdir, size):
     dims = SIZES[size]
     shape = (dims["nlay"], dims["nrow"], dims["ncol"])
     data = np.random.random(shape)
-    benchmark(lambda: Util3d(None, shape, np.float32, data.copy(), "test"))
+    ml = Modflow(model_ws=function_tmpdir)
+
+    def create_util3d():
+        return Util3d(ml, shape, np.float32, data.copy(), "test")
+
+    benchmark(create_util3d)
 
 
 @pytest.mark.benchmark
@@ -43,15 +54,13 @@ def test_util3d_create(benchmark, size):
 def test_util2d_external_write(benchmark, function_tmpdir, size):
     dims = SIZES[size]
     shape = (dims["nrow"], dims["ncol"])
-    data = np.random.random(shape)
-    u2d = Util2d(None, shape, np.float32, data, "test")
+    data = np.random.random(shape).astype(np.float32)
     fpath = function_tmpdir / "test_array.dat"
 
-    def write_external():
-        u2d.write(str(fpath))
-        return u2d
+    def write_bin():
+        Util2d.write_bin(shape, fpath, data, bintype="head")
 
-    benchmark(write_external)
+    benchmark(write_bin)
 
 
 @pytest.mark.benchmark
@@ -60,34 +69,36 @@ def write_external():
 def test_util3d_external_write(benchmark, function_tmpdir, size):
     dims = SIZES[size]
     shape = (dims["nlay"], dims["nrow"], dims["ncol"])
-    data = np.random.random(shape)
-    u3d = Util3d(None, shape, np.float32, data, "test")
+    data = np.random.random(shape).astype(np.float32)
     fpath = function_tmpdir / "test_array3d.dat"
 
-    def write_external():
-        u3d.write(str(fpath))
-        return u3d
+    def write_bin():
+        for i in range(shape[0]):
+            layer_path = function_tmpdir / f"test_array3d_lay{i}.dat"
+            Util2d.write_bin((shape[1], shape[2]), layer_path, data[i], bintype="head")
 
-    benchmark(write_external)
+    benchmark(write_bin)
 
 
 @pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
-def test_util2d_array_copy(benchmark, size):
+def test_util2d_array_copy(benchmark, function_tmpdir, size):
     dims = SIZES[size]
     shape = (dims["nrow"], dims["ncol"])
     data = np.random.random(shape)
-    u2d = Util2d(None, shape, np.float32, data, "test")
+    ml = Modflow(model_ws=function_tmpdir)
+    u2d = Util2d(ml, shape, np.float32, data, "test")
     benchmark(lambda: u2d.array.copy())
 
 
 @pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
-def test_util3d_array_copy(benchmark, size):
+def test_util3d_array_copy(benchmark, function_tmpdir, size):
     dims = SIZES[size]
     shape = (dims["nlay"], dims["nrow"], dims["ncol"])
     data = np.random.random(shape)
-    u3d = Util3d(None, shape, np.float32, data, "test")
+    ml = Modflow(model_ws=function_tmpdir)
+    u3d = Util3d(ml, shape, np.float32, data, "test")
     benchmark(lambda: u3d.array.copy())
diff --git a/autotest/benchmarks/benchmark_cellbudgetfile.py b/autotest/benchmarks/benchmark_cellbudgetfile.py
index eea641101e..d764c37d93 100644
--- a/autotest/benchmarks/benchmark_cellbudgetfile.py
+++ b/autotest/benchmarks/benchmark_cellbudgetfile.py
@@ -19,13 +19,14 @@ def cbcf(example_data_path) -> CellBudgetFile:
 
 @pytest.mark.benchmark
 def test_cellbudgetfile_load(benchmark, cbcf):
-    benchmark(lambda: CellBudgetFile(cbcf.fname))
+    benchmark(lambda: CellBudgetFile(cbcf.filename))
 
 
 @pytest.mark.benchmark
 def test_cellbudgetfile_get_data_all(benchmark, cbcf):
-    records = cbcf.list_unique_records()
-    term = records[0]
+    # Use the new API to get unique records
+    unique_records = cbcf.headers[["text", "imeth"]].drop_duplicates()
+    term = unique_records.iloc[0]["text"]
     benchmark(lambda: cbcf.get_data(text=term))
 
 
diff --git a/autotest/benchmarks/benchmark_endpointfile.py b/autotest/benchmarks/benchmark_endpointfile.py
index b50a40bd2a..0f85720541 100644
--- a/autotest/benchmarks/benchmark_endpointfile.py
+++ b/autotest/benchmarks/benchmark_endpointfile.py
@@ -1,6 +1,7 @@
 import pytest
 
 from autotest.benchmarks.benchmark_pathlinefile import ex01_mp7_model
+from autotest.test_mp7 import ex01_mf6_model
 from flopy.utils.modpathfile import EndpointFile
 
 
@@ -15,7 +16,7 @@ def epf(ex01_mp7_model) -> EndpointFile:
 
 @pytest.mark.benchmark
 def test_endpointfile_load(benchmark, epf):
-    benchmark(lambda: EndpointFile(epf.fname))
+    benchmark(lambda: EndpointFile(epf.filename))
 
 
 @pytest.mark.benchmark
@@ -28,12 +29,6 @@ def test_endpointfile_get_alldata(benchmark, epf):
     benchmark(epf.get_alldata)
 
 
-@pytest.mark.benchmark
-def test_pathlinefile_to_geodataframe(benchmark, plf):
-    pytest.importorskip("geopandas")
-    benchmark(plf.to_geodataframe)
-
-
 @pytest.mark.benchmark
 def test_endpointfile_to_geodataframe(benchmark, epf):
     pytest.importorskip("geopandas")
diff --git a/autotest/benchmarks/benchmark_export.py b/autotest/benchmarks/benchmark_export.py
index be8d94442f..bcdbe44bfe 100644
--- a/autotest/benchmarks/benchmark_export.py
+++ b/autotest/benchmarks/benchmark_export.py
@@ -1,7 +1,7 @@
 import pytest
 from modflow_devtools.misc import has_pkg
 
-from autotest.conftest import load_mf6_sim, load_mf2005_model
+from .conftest import load_mf6_sim, load_mf2005_model
 
 
 @pytest.mark.benchmark
@@ -22,10 +22,17 @@ def test_mf2005_export_shapefile(benchmark, function_tmpdir):
 @pytest.mark.benchmark
 @pytest.mark.skipif(not has_pkg("netCDF4"), reason="requires netCDF4")
 def test_mf6_export_netcdf(benchmark, function_tmpdir):
+    import uuid
+
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     gwf = sim.get_model()
-    output_path = function_tmpdir / "export.nc"
-    benchmark(lambda: gwf.export(str(output_path), fmt="netcdf"))
+
+    def export_netcdf():
+        # Use unique filename for each iteration to avoid file locking issues on Windows
+        output_path = function_tmpdir / f"export_{uuid.uuid4().hex[:8]}.nc"
+        gwf.export(str(output_path), fmt="netcdf")
+
+    benchmark(export_netcdf)
 
 
 @pytest.mark.benchmark
diff --git a/autotest/benchmarks/benchmark_formattedfile.py b/autotest/benchmarks/benchmark_formattedfile.py
index 74ddf1232b..b57aff8d8e 100644
--- a/autotest/benchmarks/benchmark_formattedfile.py
+++ b/autotest/benchmarks/benchmark_formattedfile.py
@@ -50,7 +50,8 @@ def test_formattedfile_get_alldata(benchmark, fhd):
 
 @pytest.mark.benchmark
 def test_formattedfile_get_ts(benchmark, fhd):
-    benchmark(lambda: fhd.get_ts((2, 25, 25)))
+    # Use a valid cell index based on test file dimensions (1, 15, 10)
+    benchmark(lambda: fhd.get_ts((0, 7, 5)))
 
 
 @pytest.mark.benchmark
diff --git a/autotest/benchmarks/benchmark_grids.py b/autotest/benchmarks/benchmark_grids.py
index c2e09e02fb..924ec1474f 100644
--- a/autotest/benchmarks/benchmark_grids.py
+++ b/autotest/benchmarks/benchmark_grids.py
@@ -5,10 +5,10 @@
 - grid geometry properties
 """
 
+import numpy as np
 import pytest
 
 from autotest.test_grid_cases import GridCases
-from flopy.utils.geometry import LineString, Point
 
 STRUCTURED_GRIDS = {
     "small": GridCases.structured_small(),
@@ -34,21 +34,25 @@ def test_structured_grid_get_node(benchmark, grid):
         for row in range(0, grid.nrow)
         for col in range(0, grid.ncol)
     ]
-    benchmark(lambda: grid.get_node(cellids=cellids))
+    benchmark(lambda: grid.get_node(cellids))
 
 
 @pytest.mark.benchmark
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_structured_grid_intersect_linestring(benchmark, grid):
-    line = LineString([(0, 0), (grid.ncol, grid.nrow)])
-    benchmark(lambda: grid.intersect(line, return_all_intersections=True))
+    # Create x, y coordinates along a diagonal line across the grid
+    x = np.linspace(0, grid.ncol, 100)
+    y = np.linspace(0, grid.nrow, 100)
+    benchmark(lambda: grid.intersect(x, y, forgive=True))
 
 
 @pytest.mark.benchmark
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_structured_grid_intersect_point(benchmark, grid):
-    point = Point(50, 50)
-    benchmark(lambda: grid.intersect(point))
+    # Use grid center point
+    x = grid.ncol / 2.0
+    y = grid.nrow / 2.0
+    benchmark(lambda: grid.intersect(x, y))
 
 
 @pytest.mark.benchmark
diff --git a/autotest/benchmarks/benchmark_headfile.py b/autotest/benchmarks/benchmark_headfile.py
index 109dca340f..c38805a3d5 100644
--- a/autotest/benchmarks/benchmark_headfile.py
+++ b/autotest/benchmarks/benchmark_headfile.py
@@ -57,7 +57,10 @@ def test_headfile_get_ts(benchmark, hdsf):
 
 @pytest.mark.benchmark
 def test_headfile_get_kstpkper(benchmark, hdsf):
-    benchmark(lambda: hdsf.get_data(kstpkper=(0, 1)))
+    # Use the first available kstpkper from the file
+    kstpkpers = hdsf.get_kstpkper()
+    kstpkper = kstpkpers[0] if kstpkpers else (0, 0)
+    benchmark(lambda: hdsf.get_data(kstpkper=kstpkper))
 
 
 @pytest.mark.benchmark
diff --git a/autotest/benchmarks/benchmark_mf6_io.py b/autotest/benchmarks/benchmark_mf6_io.py
index 03f0cda267..2380470e4c 100644
--- a/autotest/benchmarks/benchmark_mf6_io.py
+++ b/autotest/benchmarks/benchmark_mf6_io.py
@@ -10,9 +10,10 @@
 import pytest
 from modflow_devtools.models import DEFAULT_REGISTRY, LocalRegistry
 
-from autotest.conftest import get_examples_path
 from flopy.mf6 import MFSimulation
 
+from .conftest import get_examples_path
+
 # prefixes into the model registry
 PREFIXES = ["mf6/test", "mf6/large", "mf2005"]
 
@@ -27,18 +28,24 @@ def pytest_generate_tests(metafunc):
     # test model input files during MF6 development. See conftest.py
     # for the models_path fixture and CLI argument definitions.
     if "model_name" in metafunc.fixturenames:
-        models_paths = metafunc.config.getoption("--models-path")
-        models_paths = [
-            Path(p).expanduser().resolve().absolute() for p in models_paths or []
-        ]
+        # Try to get the models-path option, default to None if not available
+        try:
+            models_paths = metafunc.config.getoption("--models-path") or []
+        except ValueError:
+            models_paths = []
+
+        models_paths = [Path(p).expanduser().resolve().absolute() for p in models_paths]
         registry = LocalRegistry() if any(models_paths) else DEFAULT_REGISTRY
         registry_type = type(registry).__name__.lower().replace("registry", "")
         metafunc.parametrize("registry", [registry], ids=[registry_type])
         models = []
         if "local" in registry_type:
-            namefile_pattern = (
-                metafunc.config.getoption("--namefile-pattern") or "mfsim.nam"
-            )
+            try:
+                namefile_pattern = (
+                    metafunc.config.getoption("--namefile-pattern") or "mfsim.nam"
+                )
+            except ValueError:
+                namefile_pattern = "mfsim.nam"
             for path in models_paths:
                 registry.index(path, namefile=namefile_pattern)
             models.extend(registry.models.keys())
diff --git a/autotest/benchmarks/benchmark_pathlinefile.py b/autotest/benchmarks/benchmark_pathlinefile.py
index e3603ad156..f533abff68 100644
--- a/autotest/benchmarks/benchmark_pathlinefile.py
+++ b/autotest/benchmarks/benchmark_pathlinefile.py
@@ -1,6 +1,6 @@
 import pytest
 
-from autotest.test_mp7 import ex01_mf6_model_name
+from autotest.test_mp7 import ex01_mf6_model, ex01_mf6_model_name
 from flopy.modpath.mp7 import Modpath7
 from flopy.utils.modpathfile import PathlineFile
 
@@ -37,7 +37,13 @@ def plf(ex01_mp7_model) -> PathlineFile:
 
 @pytest.mark.benchmark
 def test_pathlinefile_load(benchmark, plf):
-    benchmark(lambda: PathlineFile(plf.fname))
+    benchmark(lambda: PathlineFile(plf.filename))
+
+
+@pytest.mark.benchmark
+def test_pathlinefile_to_geodataframe(benchmark, plf):
+    pytest.importorskip("geopandas")
+    benchmark(plf.to_geodataframe)
 
 
 @pytest.mark.benchmark
diff --git a/autotest/benchmarks/benchmark_postprocessing.py b/autotest/benchmarks/benchmark_postprocessing.py
index bbcc132d56..e6614a2414 100644
--- a/autotest/benchmarks/benchmark_postprocessing.py
+++ b/autotest/benchmarks/benchmark_postprocessing.py
@@ -2,7 +2,6 @@
 
 import pytest
 
-from autotest.conftest import load_mf6_sim
 from flopy.utils import CellBudgetFile, HeadFile
 from flopy.utils.postprocessing import (
     get_gradients,
@@ -11,6 +10,8 @@
     get_water_table,
 )
 
+from .conftest import load_mf6_sim
+
 
 @pytest.mark.benchmark
 @pytest.mark.parametrize(
diff --git a/autotest/benchmarks/conftest.py b/autotest/benchmarks/conftest.py
index 1f04dcd1c3..8ab49b3551 100644
--- a/autotest/benchmarks/conftest.py
+++ b/autotest/benchmarks/conftest.py
@@ -1,6 +1,11 @@
 from pathlib import Path
+from shutil import copytree
 
 import pytest
+from modflow_devtools.misc import run_cmd
+
+from flopy.mf6 import MFSimulation
+from flopy.modflow import Modflow
 
 
 def get_examples_path():
@@ -8,6 +13,107 @@ def get_examples_path():
     return Path(__file__).parent.parent.parent / "examples" / "data"
 
 
+def load_mf6_sim(tmpdir, model_key="freyberg", run=True):
+    """
+    Load and optionally run a MODFLOW 6 simulation from examples/data.
+
+    Parameters
+    ----------
+    tmpdir : Path
+        Temporary directory to copy the model to
+    model_key : str
+        Model identifier (e.g., "freyberg" for mf6-freyberg)
+    run : bool
+        Whether to run the simulation before returning
+
+    Returns
+    -------
+    MFSimulation
+        The loaded simulation
+    """
+    examples_path = get_examples_path()
+
+    # Map model keys to their actual directory names
+    model_map = {
+        "freyberg": "mf6-freyberg",
+        "test003": "mf6/test003_gwfs_disv",
+        "test006": "mf6/test006_gwf3",
+        "test045": "mf6/test045_lake2tr",
+    }
+
+    model_dir = model_map.get(model_key, f"mf6-{model_key}")
+    source_path = examples_path / model_dir
+
+    if not source_path.exists():
+        raise FileNotFoundError(f"Model directory not found: {source_path}")
+
+    # Copy model files to tmpdir
+    copytree(source_path, tmpdir, dirs_exist_ok=True)
+
+    # Load the simulation
+    sim = MFSimulation.load(sim_ws=tmpdir)
+
+    # Run if requested
+    if run:
+        run_cmd("mf6", cwd=tmpdir)
+
+    return sim
+
+
+def load_mf2005_model(tmpdir, model_key="freyberg", run=False):
+    """
+    Load and optionally run a MODFLOW-2005 model from examples/data.
+
+    Parameters
+    ----------
+    tmpdir : Path
+        Temporary directory to copy the model to
+    model_key : str
+        Model identifier (e.g., "freyberg")
+    run : bool
+        Whether to run the model before returning
+
+    Returns
+    -------
+    Modflow
+        The loaded model
+    """
+    from modflow_devtools.misc import get_namefile_paths
+
+    examples_path = get_examples_path()
+
+    # Map model keys to their actual directory names
+    model_map = {
+        "freyberg": "freyberg_multilayer_transient",
+        "mf2005_test": "mf2005_test",
+    }
+
+    model_dir = model_map.get(model_key, model_key)
+    source_path = examples_path / model_dir
+
+    if not source_path.exists():
+        raise FileNotFoundError(f"Model directory not found: {source_path}")
+
+    # Copy model files to tmpdir
+    copytree(source_path, tmpdir, dirs_exist_ok=True)
+
+    # Find the namefile
+    nam_files = get_namefile_paths(tmpdir, namefile="*.nam")
+    if not nam_files:
+        raise FileNotFoundError(f"No .nam file found in {tmpdir}")
+
+    nam_file = nam_files[0].name
+
+    # Load the model
+    model = Modflow.load(nam_file, model_ws=tmpdir, check=False)
+
+    # Run if requested
+    if run:
+        model.run_model()
+
+    return model
+
+
 @pytest.fixture(scope="session")
 def benchmark_config():
     """Configure pytest-benchmark settings."""

From b007fe320b02abdc06c9365249f9172391209901 Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Wed, 28 Jan 2026 09:23:30 -0500
Subject: [PATCH 03/11] fewer rounds

---
 autotest/benchmarks/benchmark_arrays.py        |  8 ++++----
 autotest/benchmarks/benchmark_endpointfile.py  |  4 ++--
 autotest/benchmarks/benchmark_export.py        | 10 +++++-----
 autotest/benchmarks/benchmark_formattedfile.py |  2 +-
 autotest/benchmarks/benchmark_gridintersect.py | 12 ++++++------
 autotest/benchmarks/benchmark_mf6_io.py        |  4 ++--
 autotest/benchmarks/benchmark_mf_io.py         |  6 +++---
 autotest/benchmarks/benchmark_pathlinefile.py  |  4 ++--
 autotest/benchmarks/benchmark_rasters.py       |  2 +-
 autotest/benchmarks/benchmark_zonebudget.py    | 12 ++++++------
 10 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/autotest/benchmarks/benchmark_arrays.py b/autotest/benchmarks/benchmark_arrays.py
index 8678093676..ad92dccd69 100644
--- a/autotest/benchmarks/benchmark_arrays.py
+++ b/autotest/benchmarks/benchmark_arrays.py
@@ -18,7 +18,7 @@
 }
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util2d_create(benchmark, function_tmpdir, size):
@@ -33,7 +33,7 @@ def create_util2d():
     benchmark(create_util2d)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util3d_create(benchmark, function_tmpdir, size):
@@ -48,7 +48,7 @@ def create_util3d():
     benchmark(create_util3d)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util2d_external_write(benchmark, function_tmpdir, size):
@@ -63,7 +63,7 @@ def write_bin():
     benchmark(write_bin)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util3d_external_write(benchmark, function_tmpdir, size):
diff --git a/autotest/benchmarks/benchmark_endpointfile.py b/autotest/benchmarks/benchmark_endpointfile.py
index 0f85720541..01c827b480 100644
--- a/autotest/benchmarks/benchmark_endpointfile.py
+++ b/autotest/benchmarks/benchmark_endpointfile.py
@@ -14,7 +14,7 @@ def epf(ex01_mp7_model) -> EndpointFile:
     return EndpointFile(ws / f"{mp.name}.mpend")
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_endpointfile_load(benchmark, epf):
     benchmark(lambda: EndpointFile(epf.filename))
 
@@ -29,7 +29,7 @@ def test_endpointfile_get_alldata(benchmark, epf):
     benchmark(epf.get_alldata)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_endpointfile_to_geodataframe(benchmark, epf):
     pytest.importorskip("geopandas")
     benchmark(epf.to_geodataframe)
diff --git a/autotest/benchmarks/benchmark_export.py b/autotest/benchmarks/benchmark_export.py
index bcdbe44bfe..2bb5cf863b 100644
--- a/autotest/benchmarks/benchmark_export.py
+++ b/autotest/benchmarks/benchmark_export.py
@@ -4,7 +4,7 @@
 from .conftest import load_mf6_sim, load_mf2005_model
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_mf6_export_shapefile(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     gwf = sim.get_model()
@@ -12,14 +12,14 @@ def test_mf6_export_shapefile(benchmark, function_tmpdir):
     benchmark(lambda: gwf.export(str(output_path)))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_mf2005_export_shapefile(benchmark, function_tmpdir):
     model = load_mf2005_model(function_tmpdir, model_key="freyberg")
     output_path = function_tmpdir / "export_mf2005.shp"
     benchmark(lambda: model.export(str(output_path)))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.skipif(not has_pkg("netCDF4"), reason="requires netCDF4")
 def test_mf6_export_netcdf(benchmark, function_tmpdir):
     import uuid
@@ -35,7 +35,7 @@ def export_netcdf():
     benchmark(export_netcdf)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.skipif(not has_pkg("geopandas"), reason="requires geopandas")
 def test_mf6_modelgrid_to_geodataframe(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
@@ -43,7 +43,7 @@ def test_mf6_modelgrid_to_geodataframe(benchmark, function_tmpdir):
     benchmark(gwf.modelgrid.to_geodataframe)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.skipif(not has_pkg("vtk"), reason="requires vtk")
 def test_mf6_export_vtk(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
diff --git a/autotest/benchmarks/benchmark_formattedfile.py b/autotest/benchmarks/benchmark_formattedfile.py
index b57aff8d8e..efc04d9628 100644
--- a/autotest/benchmarks/benchmark_formattedfile.py
+++ b/autotest/benchmarks/benchmark_formattedfile.py
@@ -12,7 +12,7 @@
 from flopy.utils.formattedfile import FormattedHeadFile
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 def test_formattedfile_load(benchmark, example_data_path):
     pth = example_data_path / "mf2005_test" / "test1tr.githds"
diff --git a/autotest/benchmarks/benchmark_gridintersect.py b/autotest/benchmarks/benchmark_gridintersect.py
index b4de7a902a..80e872c81b 100644
--- a/autotest/benchmarks/benchmark_gridintersect.py
+++ b/autotest/benchmarks/benchmark_gridintersect.py
@@ -29,7 +29,7 @@
 # GridIntersect class benchmarks
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
@@ -64,7 +64,7 @@ def make_line(grid, line_type) -> LineString:
         return LineString(coords)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("line", ["diagonal", "horizontal", "complex"])
@@ -126,7 +126,7 @@ def make_poly(grid, poly_type) -> Polygon:
     return Polygon(coords)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
@@ -137,7 +137,7 @@ def test_intersect_polygon(benchmark, grid, poly, rtree):
     benchmark(lambda: gi.intersect(polygon, "polygon"))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
@@ -160,7 +160,7 @@ def test_grid_intersect_single_point(benchmark, grid):
     benchmark(lambda: grid.intersect(x_center, y_center))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_grid_intersect_batch_points(benchmark, grid):
@@ -171,7 +171,7 @@ def test_grid_intersect_batch_points(benchmark, grid):
     benchmark(lambda: grid.intersect(xx.ravel(), yy.ravel()))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_grid_intersect_3d(benchmark, grid):
diff --git a/autotest/benchmarks/benchmark_mf6_io.py b/autotest/benchmarks/benchmark_mf6_io.py
index 2380470e4c..d8f43e3705 100644
--- a/autotest/benchmarks/benchmark_mf6_io.py
+++ b/autotest/benchmarks/benchmark_mf6_io.py
@@ -58,7 +58,7 @@ def pytest_generate_tests(metafunc):
         metafunc.parametrize("model_name", models, ids=models)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.external
 @pytest.mark.parametrize("use_pandas", [True, False], ids=["pandas", "nopandas"])
@@ -67,7 +67,7 @@ def test_load_simulation(function_tmpdir, benchmark, registry, model_name, use_p
     benchmark(lambda: MFSimulation.load(sim_ws=function_tmpdir, use_pandas=use_pandas))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.external
 @pytest.mark.slow
 @pytest.mark.parametrize("use_pandas", [True, False], ids=["pandas", "nopandas"])
diff --git a/autotest/benchmarks/benchmark_mf_io.py b/autotest/benchmarks/benchmark_mf_io.py
index 95a11c81d2..4d3d92da4e 100644
--- a/autotest/benchmarks/benchmark_mf_io.py
+++ b/autotest/benchmarks/benchmark_mf_io.py
@@ -41,18 +41,18 @@ def _load_model(ws, model_name):
     return Modflow.load(nam_file, model_ws=ws, check=False)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_mf2005_load(benchmark, function_tmpdir, model_name):
     benchmark(lambda: _load_model(function_tmpdir, model_name))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_mf2005_write_freyberg(benchmark, function_tmpdir):
     ml = _load_model(function_tmpdir, "freyberg_multilayer_transient")
     benchmark(ml.write_input)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_mf2005_round_trip_freyberg(benchmark, function_tmpdir):
     def round_trip():
         ml = _load_model(function_tmpdir, "freyberg_multilayer_transient")
diff --git a/autotest/benchmarks/benchmark_pathlinefile.py b/autotest/benchmarks/benchmark_pathlinefile.py
index f533abff68..399e9474f1 100644
--- a/autotest/benchmarks/benchmark_pathlinefile.py
+++ b/autotest/benchmarks/benchmark_pathlinefile.py
@@ -35,12 +35,12 @@ def plf(ex01_mp7_model) -> PathlineFile:
     return PathlineFile(ws / f"{mp.name}.mppth")
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_pathlinefile_load(benchmark, plf):
     benchmark(lambda: PathlineFile(plf.filename))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_pathlinefile_to_geodataframe(benchmark, plf):
     pytest.importorskip("geopandas")
     benchmark(plf.to_geodataframe)
diff --git a/autotest/benchmarks/benchmark_rasters.py b/autotest/benchmarks/benchmark_rasters.py
index f60bfc21d6..9fb700e30b 100644
--- a/autotest/benchmarks/benchmark_rasters.py
+++ b/autotest/benchmarks/benchmark_rasters.py
@@ -128,7 +128,7 @@ def test_raster_get_array_masked(benchmark, raster, masked):
     benchmark(lambda: raster.get_array(band=1, masked=masked))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 def test_raster_write(benchmark, raster, function_tmpdir):
     output_path = function_tmpdir / "output_raster.tif"
     benchmark(lambda: raster.write(str(output_path)))
diff --git a/autotest/benchmarks/benchmark_zonebudget.py b/autotest/benchmarks/benchmark_zonebudget.py
index f6c0672adc..27ad512a3f 100644
--- a/autotest/benchmarks/benchmark_zonebudget.py
+++ b/autotest/benchmarks/benchmark_zonebudget.py
@@ -21,7 +21,7 @@ def create_zone_array(nlay, nrow, ncol, n_zones=5):
     return zones
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("nzones", [3, 10, 50])
 def test_zonebudget_load(benchmark, example_data_path, nzones):
     model_path = example_data_path / "freyberg_multilayer_transient"
@@ -31,7 +31,7 @@ def test_zonebudget_load(benchmark, example_data_path, nzones):
     benchmark(lambda: ZoneBudget(str(cbc_path), zones, verbose=False))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("nzones", [3, 10, 50])
 def test_zonebudget_get_budget(benchmark, example_data_path, nzones):
     model_path = example_data_path / "freyberg_multilayer_transient"
@@ -42,7 +42,7 @@ def test_zonebudget_get_budget(benchmark, example_data_path, nzones):
     benchmark(zb.get_budget)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("nzones", [3, 10, 50])
 def test_zonebudget_get_volumetric_budget(benchmark, example_data_path, nzones):
     model_path = example_data_path / "freyberg_multilayer_transient"
@@ -53,7 +53,7 @@ def test_zonebudget_get_volumetric_budget(benchmark, example_data_path, nzones):
     benchmark(zb.get_volumetric_budget)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("nzones", [3, 10, 50])
 def test_zonebudget_get_dataframes(benchmark, example_data_path, nzones):
     model_path = example_data_path / "freyberg_multilayer_transient"
@@ -64,7 +64,7 @@ def test_zonebudget_get_dataframes(benchmark, example_data_path, nzones):
     benchmark(zb.get_dataframes)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("nzones", [3, 10, 50])
 def test_zonebudget6_load(benchmark, example_data_path, nzones):
     sim = MFSimulation.load(sim_ws=example_data_path / "mf6-freyberg")
@@ -76,7 +76,7 @@ def test_zonebudget6_load(benchmark, example_data_path, nzones):
     benchmark(lambda: ZoneBudget6(str(cbc_path), zones, verbose=False))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("nzones", [3, 10, 50])
 def test_zonebudget6_get_budget(benchmark, example_data_path, nzones):
     sim = MFSimulation.load(sim_ws=example_data_path / "mf6-freyberg")

From 069725934fd64134716ddbfd37841eebbdd4509b Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Wed, 28 Jan 2026 16:33:00 -0500
Subject: [PATCH 04/11] fix pathline and endpoint file benchmarks

---
 autotest/benchmarks/benchmark_endpointfile.py | 10 ++++++----
 autotest/benchmarks/benchmark_pathlinefile.py | 12 +++++++-----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/autotest/benchmarks/benchmark_endpointfile.py b/autotest/benchmarks/benchmark_endpointfile.py
index 01c827b480..fddeda014f 100644
--- a/autotest/benchmarks/benchmark_endpointfile.py
+++ b/autotest/benchmarks/benchmark_endpointfile.py
@@ -5,7 +5,7 @@
 from flopy.utils.modpathfile import EndpointFile
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture
 def epf(ex01_mp7_model) -> EndpointFile:
     mp, ws = ex01_mp7_model
     mp.write_input()
@@ -16,7 +16,7 @@ def epf(ex01_mp7_model) -> EndpointFile:
 
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_endpointfile_load(benchmark, epf):
-    benchmark(lambda: EndpointFile(epf.filename))
+    benchmark(lambda: EndpointFile(epf.fname))
 
 
 @pytest.mark.benchmark
@@ -30,6 +30,8 @@ def test_endpointfile_get_alldata(benchmark, epf):
 
 
 @pytest.mark.benchmark(min_rounds=1, warmup=False)
-def test_endpointfile_to_geodataframe(benchmark, epf):
+def test_endpointfile_to_geodataframe(benchmark, ex01_mf6_model, epf):
     pytest.importorskip("geopandas")
-    benchmark(epf.to_geodataframe)
+    sim, function_tmpdir = ex01_mf6_model
+    gwf = sim.get_model()
+    benchmark(lambda: epf.to_geodataframe(gwf.modelgrid))
diff --git a/autotest/benchmarks/benchmark_pathlinefile.py b/autotest/benchmarks/benchmark_pathlinefile.py
index 399e9474f1..ba4a0c1afc 100644
--- a/autotest/benchmarks/benchmark_pathlinefile.py
+++ b/autotest/benchmarks/benchmark_pathlinefile.py
@@ -5,7 +5,7 @@
 from flopy.utils.modpathfile import PathlineFile
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture
 def ex01_mp7_model(ex01_mf6_model):
     sim, function_tmpdir = ex01_mf6_model
     success, buff = sim.run_simulation()
@@ -26,7 +26,7 @@ def ex01_mp7_model(ex01_mf6_model):
     ), mp_ws
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture
 def plf(ex01_mp7_model) -> PathlineFile:
     mp, ws = ex01_mp7_model
     mp.write_input()
@@ -37,13 +37,15 @@ def plf(ex01_mp7_model) -> PathlineFile:
 
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_pathlinefile_load(benchmark, plf):
-    benchmark(lambda: PathlineFile(plf.filename))
+    benchmark(lambda: PathlineFile(plf.fname))
 
 
 @pytest.mark.benchmark(min_rounds=1, warmup=False)
-def test_pathlinefile_to_geodataframe(benchmark, plf):
+def test_pathlinefile_to_geodataframe(benchmark, ex01_mf6_model, plf):
     pytest.importorskip("geopandas")
-    benchmark(plf.to_geodataframe)
+    sim, function_tmpdir = ex01_mf6_model
+    gwf = sim.get_model()
+    benchmark(lambda: plf.to_geodataframe(gwf.modelgrid))
 
 
 @pytest.mark.benchmark

From ad4736c966d6610b2dc5915dae99be714db9ca54 Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Wed, 28 Jan 2026 20:41:50 -0500
Subject: [PATCH 05/11] various fixes, shard ci

---
 .github/workflows/codspeed.yml                | 19 +++-
 .../benchmarks/benchmark_cellbudgetfile.py    |  4 +-
 .../benchmarks/benchmark_gridintersect.py     | 12 +--
 autotest/benchmarks/benchmark_headfile.py     |  6 +-
 ...ark_mf_io.py => benchmark_mf2005_input.py} |  0
 ...hmark_mf6_io.py => benchmark_mf6_input.py} |  0
 autotest/benchmarks/benchmark_mtlistfile.py   | 17 +---
 .../benchmarks/benchmark_postprocessing.py    |  8 +-
 autotest/benchmarks/benchmark_rasters.py      | 10 +-
 autotest/benchmarks/benchmark_zonebudget.py   | 93 +++++++------------
 10 files changed, 72 insertions(+), 97 deletions(-)
 rename autotest/benchmarks/{benchmark_mf_io.py => benchmark_mf2005_input.py} (100%)
 rename autotest/benchmarks/{benchmark_mf6_io.py => benchmark_mf6_input.py} (100%)

diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
index e8e7029ee2..2286687aaf 100644
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@@ -16,7 +16,22 @@ permissions:
 
 jobs:
   benchmarks:
+    # Benchmarks are sharded across 3 parallel jobs by functionality:
+    # - input-io: 1140 tests (model/simulation input file I/O - MF6 & legacy)
+    # - output-io: 66 tests (model output file readers - heads, budgets, particles, etc.)
+    # - pre-post: 166 tests (preprocessing, postprocessing, grids, rasters, arrays, export)
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        shard:
+          - name: "input-io"
+            files: "benchmark_mf6_input.py benchmark_mf2005_input.py"
+          - name: "output-io"
+            files: "benchmark_cellbudgetfile.py benchmark_zonebudget.py benchmark_mf6listbudget.py benchmark_headfile.py benchmark_headufile.py benchmark_formattedfile.py benchmark_pathlinefile.py benchmark_endpointfile.py benchmark_mtlistfile.py benchmark_sfroutputfile.py benchmark_ucnfile.py benchmark_mflistbudget.py benchmark_mfusglistbudget.py"
+          - name: "pre-post"
+            files: "benchmark_gridintersect.py benchmark_grids.py benchmark_rasters.py benchmark_arrays.py benchmark_export.py benchmark_postprocessing.py"
+    name: "benchmarks (${{ matrix.shard.name }})"
     steps:
       - name: Checkout repo
         uses: actions/checkout@v4
@@ -37,4 +52,6 @@ jobs:
       - name: Run benchmarks with CodSpeed
         uses: CodSpeedHQ/action@v3
         with:
-          run: uv run pytest autotest/benchmarks --codspeed
+          run: cd autotest/benchmarks && uv run pytest ${{ matrix.shard.files }} --codspeed
+        env:
+          CODSPEED_SHARD_NAME: ${{ matrix.shard.name }}
diff --git a/autotest/benchmarks/benchmark_cellbudgetfile.py b/autotest/benchmarks/benchmark_cellbudgetfile.py
index d764c37d93..a81d1e356c 100644
--- a/autotest/benchmarks/benchmark_cellbudgetfile.py
+++ b/autotest/benchmarks/benchmark_cellbudgetfile.py
@@ -17,12 +17,12 @@ def cbcf(example_data_path) -> CellBudgetFile:
     )
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_cellbudgetfile_load(benchmark, cbcf):
     benchmark(lambda: CellBudgetFile(cbcf.filename))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_cellbudgetfile_get_data_all(benchmark, cbcf):
     # Use the new API to get unique records
     unique_records = cbcf.headers[["text", "imeth"]].drop_duplicates()
diff --git a/autotest/benchmarks/benchmark_gridintersect.py b/autotest/benchmarks/benchmark_gridintersect.py
index 80e872c81b..5d9531fb33 100644
--- a/autotest/benchmarks/benchmark_gridintersect.py
+++ b/autotest/benchmarks/benchmark_gridintersect.py
@@ -29,7 +29,7 @@
 # GridIntersect class benchmarks
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
@@ -64,7 +64,7 @@ def make_line(grid, line_type) -> LineString:
         return LineString(coords)
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("line", ["diagonal", "horizontal", "complex"])
@@ -126,7 +126,7 @@ def make_poly(grid, poly_type) -> Polygon:
     return Polygon(coords)
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
@@ -137,7 +137,7 @@ def test_intersect_polygon(benchmark, grid, poly, rtree):
     benchmark(lambda: gi.intersect(polygon, "polygon"))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
@@ -160,7 +160,7 @@ def test_grid_intersect_single_point(benchmark, grid):
     benchmark(lambda: grid.intersect(x_center, y_center))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_grid_intersect_batch_points(benchmark, grid):
@@ -171,7 +171,7 @@ def test_grid_intersect_batch_points(benchmark, grid):
     benchmark(lambda: grid.intersect(xx.ravel(), yy.ravel()))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_grid_intersect_3d(benchmark, grid):
diff --git a/autotest/benchmarks/benchmark_headfile.py b/autotest/benchmarks/benchmark_headfile.py
index c38805a3d5..e3c08ea571 100644
--- a/autotest/benchmarks/benchmark_headfile.py
+++ b/autotest/benchmarks/benchmark_headfile.py
@@ -13,7 +13,7 @@
 from flopy.utils import HeadFile
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_headfile_load(benchmark, example_data_path):
     pth = (
         example_data_path
@@ -45,7 +45,7 @@ def test_headfile_get_data_single(benchmark, hdsf):
     benchmark(lambda: hdsf.get_data(totim=mid_time))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_headfile_get_alldata(benchmark, hdsf):
     benchmark(hdsf.get_alldata)
 
@@ -78,6 +78,6 @@ def test_headfile_get_kstpkper_list(benchmark, hdsf):
     benchmark(hdsf.get_kstpkper)
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_headfile_get_alldata_mf6(benchmark, hdsf):
     benchmark(hdsf.get_alldata)
diff --git a/autotest/benchmarks/benchmark_mf_io.py b/autotest/benchmarks/benchmark_mf2005_input.py
similarity index 100%
rename from autotest/benchmarks/benchmark_mf_io.py
rename to autotest/benchmarks/benchmark_mf2005_input.py
diff --git a/autotest/benchmarks/benchmark_mf6_io.py b/autotest/benchmarks/benchmark_mf6_input.py
similarity index 100%
rename from autotest/benchmarks/benchmark_mf6_io.py
rename to autotest/benchmarks/benchmark_mf6_input.py
diff --git a/autotest/benchmarks/benchmark_mtlistfile.py b/autotest/benchmarks/benchmark_mtlistfile.py
index 6b164a6b15..503b531fc2 100644
--- a/autotest/benchmarks/benchmark_mtlistfile.py
+++ b/autotest/benchmarks/benchmark_mtlistfile.py
@@ -6,19 +6,4 @@
 @pytest.mark.benchmark
 def test_mtlistfile_load(benchmark, example_data_path):
     list_file = example_data_path / "mt3d_test" / "mf2kmt3d" / "mnw" / "t5.lst"
-    benchmark(lambda: MtListBudget(str(list_file)))
-
-
-@pytest.fixture
-def mtlf(example_data_path) -> MtListBudget:
-    return MtListBudget(example_data_path / "mt3d_test" / "mf2kmt3d" / "mnw" / "t5.lst")
-
-
-@pytest.mark.benchmark
-def test_mtlistfile_get_budget(benchmark, mtlf):
-    benchmark(lambda: mtlf.get_budget())
-
-
-@pytest.mark.benchmark
-def test_mtlistfile_get_data(benchmark, mtlf):
-    benchmark(lambda: mtlf.get_data())
+    benchmark(lambda: MtListBudget(str(list_file)).gw_data)
diff --git a/autotest/benchmarks/benchmark_postprocessing.py b/autotest/benchmarks/benchmark_postprocessing.py
index e6614a2414..fd271e14d1 100644
--- a/autotest/benchmarks/benchmark_postprocessing.py
+++ b/autotest/benchmarks/benchmark_postprocessing.py
@@ -13,7 +13,7 @@
 from .conftest import load_mf6_sim
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize(
     "row_col",
     [lambda m: (None, None), lambda m: (m.dis.nrow // 2, m.dis.ncol // 2)],
@@ -29,7 +29,7 @@ def test_get_transmissivities(benchmark, function_tmpdir, row_col):
     benchmark(lambda: get_transmissivities(heads, gwf, r=r, c=c))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_get_water_table(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     hds_path = Path(function_tmpdir) / "freyberg.hds"
@@ -38,7 +38,7 @@ def test_get_water_table(benchmark, function_tmpdir):
     benchmark(lambda: get_water_table(heads))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_get_gradients(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     gwf = sim.get_model()
@@ -48,7 +48,7 @@ def test_get_gradients(benchmark, function_tmpdir):
     benchmark(lambda: get_gradients(heads, gwf))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_get_specific_discharge(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     gwf = sim.get_model()
diff --git a/autotest/benchmarks/benchmark_rasters.py b/autotest/benchmarks/benchmark_rasters.py
index 9fb700e30b..5590b27d5b 100644
--- a/autotest/benchmarks/benchmark_rasters.py
+++ b/autotest/benchmarks/benchmark_rasters.py
@@ -39,7 +39,7 @@ def make_grid(raster, nrow, ncol) -> StructuredGrid:
     )
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_raster_load(benchmark, raster_path):
     benchmark(lambda: Raster.load(raster_path))
 
@@ -52,7 +52,7 @@ def test_raster_load(benchmark, raster_path):
 ]
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.parametrize("grid", GRIDS, ids=["small", "medium", "large"])
 @pytest.mark.parametrize(
     "method", ["linear", "nearest", "cubic", "mean", "median", "min", "max"]
@@ -61,7 +61,7 @@ def test_raster_resample(benchmark, raster, grid, method):
     benchmark(lambda: raster.resample_to_grid(grid, band=1, method=method))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.skipif(not has_pkg("pyproj"), reason="requires pyproj")
 def test_raster_to_crs_transform(benchmark, raster):
     benchmark(lambda: raster.to_crs(epsg=4326))
@@ -103,13 +103,13 @@ def large_poly(raster):
 ]
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("poly", POLYGONS, ids=["small", "medium", "large"])
 def test_raster_crop(benchmark, raster, poly):
     benchmark(lambda: raster.crop(poly))
 
 
-@pytest.mark.benchmark
+@pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize("poly", POLYGONS, ids=["small", "medium", "large"])
 def test_raster_sample(benchmark, raster, poly):
     benchmark(lambda: raster.sample_polygon(poly, band=1))
diff --git a/autotest/benchmarks/benchmark_zonebudget.py b/autotest/benchmarks/benchmark_zonebudget.py
index 27ad512a3f..cd1d8f19ee 100644
--- a/autotest/benchmarks/benchmark_zonebudget.py
+++ b/autotest/benchmarks/benchmark_zonebudget.py
@@ -1,17 +1,16 @@
+from pathlib import Path
+
 import numpy as np
 import pytest
+from numpy.typing import NDArray
 
-from flopy.mf6.modflow.mfsimulation import MFSimulation
 from flopy.modflow.mf import Modflow
-from flopy.utils.zonbud import ZoneBudget, ZoneBudget6
+from flopy.utils.zonbud import ZoneBudget
 
 
-def create_zone_array(nlay, nrow, ncol, n_zones=5):
+def create_zone_array(nlay, nrow, ncol, n_zones=5) -> NDArray:
     zones = np.zeros((nlay, nrow, ncol), dtype=np.int32)
-
-    # Create simple zoning pattern
-    # Divide grid into roughly equal zones
-    zone_width = ncol // n_zones
+    zone_width = ncol // n_zones  # roughly equal zones
 
     for i in range(n_zones):
         start_col = i * zone_width
@@ -21,69 +20,43 @@ def create_zone_array(nlay, nrow, ncol, n_zones=5):
     return zones
 
 
+@pytest.fixture(scope="module", params=[2, 5])
+def case(request, example_data_path) -> tuple[Path, NDArray]:
+    model_path = example_data_path / "zonbud_examples"
+    model = Modflow.load(
+        example_data_path / "freyberg" / "freyberg.nam", version="mf2005"
+    )
+    cbc_path = model_path / "freyberg.gitcbc"
+    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=request.param)
+    return cbc_path, zones
+
+
+@pytest.mark.slow
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
-@pytest.mark.parametrize("nzones", [3, 10, 50])
-def test_zonebudget_load(benchmark, example_data_path, nzones):
-    model_path = example_data_path / "freyberg_multilayer_transient"
-    model = Modflow.load(model_path, version="mf2005")
-    cbc_path = model_path / "freyberg.cbc"
-    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
-    benchmark(lambda: ZoneBudget(str(cbc_path), zones, verbose=False))
+def test_zonebudget_load(benchmark, case):
+    cbc_path, zones = case
+    benchmark(lambda: ZoneBudget(str(cbc_path), z=zones))
 
 
+@pytest.mark.slow
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
-@pytest.mark.parametrize("nzones", [3, 10, 50])
-def test_zonebudget_get_budget(benchmark, example_data_path, nzones):
-    model_path = example_data_path / "freyberg_multilayer_transient"
-    model = Modflow.load(model_path, version="mf2005")
-    cbc_path = model_path / "freyberg.cbc"
-    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
-    zb = ZoneBudget(str(cbc_path), zones, verbose=False)
+def test_zonebudget_get_budget(benchmark, case):
+    cbc_path, zones = case
+    zb = ZoneBudget(str(cbc_path), z=zones)
     benchmark(zb.get_budget)
 
 
+@pytest.mark.slow
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
-@pytest.mark.parametrize("nzones", [3, 10, 50])
-def test_zonebudget_get_volumetric_budget(benchmark, example_data_path, nzones):
-    model_path = example_data_path / "freyberg_multilayer_transient"
-    model = Modflow.load(model_path, version="mf2005")
-    cbc_path = model_path / "freyberg.cbc"
-    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
-    zb = ZoneBudget(str(cbc_path), zones, verbose=False)
+def test_zonebudget_get_volumetric_budget(benchmark, case):
+    cbc_path, zones = case
+    zb = ZoneBudget(str(cbc_path), z=zones)
     benchmark(zb.get_volumetric_budget)
 
 
+@pytest.mark.slow
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
-@pytest.mark.parametrize("nzones", [3, 10, 50])
-def test_zonebudget_get_dataframes(benchmark, example_data_path, nzones):
-    model_path = example_data_path / "freyberg_multilayer_transient"
-    model = Modflow.load(model_path, version="mf2005")
-    cbc_path = model_path / "freyberg.cbc"
-    zones = create_zone_array(model.nlay, model.nrow, model.ncol, n_zones=nzones)
-    zb = ZoneBudget(str(cbc_path), zones, verbose=False)
+def test_zonebudget_get_dataframes(benchmark, case):
+    cbc_path, zones = case
+    zb = ZoneBudget(str(cbc_path), z=zones)
     benchmark(zb.get_dataframes)
-
-
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
-@pytest.mark.parametrize("nzones", [3, 10, 50])
-def test_zonebudget6_load(benchmark, example_data_path, nzones):
-    sim = MFSimulation.load(sim_ws=example_data_path / "mf6-freyberg")
-    gwf = sim.get_model()
-    zones = create_zone_array(
-        gwf.modelgrid.nlay, gwf.modelgrid.nrow, gwf.modelgrid.ncol, n_zones=nzones
-    )
-    cbc_path = sim.sim_path / f"{gwf.name}.cbc"
-    benchmark(lambda: ZoneBudget6(str(cbc_path), zones, verbose=False))
-
-
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
-@pytest.mark.parametrize("nzones", [3, 10, 50])
-def test_zonebudget6_get_budget(benchmark, example_data_path, nzones):
-    sim = MFSimulation.load(sim_ws=example_data_path / "mf6-freyberg")
-    gwf = sim.get_model()
-    zones = create_zone_array(
-        gwf.modelgrid.nlay, gwf.modelgrid.nrow, gwf.modelgrid.ncol, n_zones=nzones
-    )
-    cbc_path = sim.sim_path / f"{gwf.name}.cbc"
-    zb = ZoneBudget6(str(cbc_path), zones, verbose=False)
-    benchmark(zb.get_budget)

From cc2bd414c558502e12ffa3984fdcb60046b48696 Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Wed, 28 Jan 2026 20:49:35 -0500
Subject: [PATCH 06/11] better

---
 .github/workflows/codspeed.yml                | 35 +++++++++++++++----
 .../benchmarks/benchmark_cellbudgetfile.py    |  1 -
 autotest/benchmarks/benchmark_zonebudget.py   |  2 +-
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
index 2286687aaf..2fc0b4cfe3 100644
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@@ -16,21 +16,42 @@ permissions:
 
 jobs:
   benchmarks:
-    # Benchmarks are sharded across 3 parallel jobs by functionality:
-    # - input-io: 1140 tests (model/simulation input file I/O - MF6 & legacy)
-    # - output-io: 66 tests (model output file readers - heads, budgets, particles, etc.)
-    # - pre-post: 166 tests (preprocessing, postprocessing, grids, rasters, arrays, export)
+    # Benchmarks are sharded by functionality:
+    # - input-io: model/simulation input file I/O
+    # - output-io: model output file readers
+    # - pre-post: pre/postprocessing, grids, rasters, arrays, export
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
         shard:
           - name: "input-io"
-            files: "benchmark_mf6_input.py benchmark_mf2005_input.py"
+            files: >-
+              benchmark_mf6_input.py
+              benchmark_mf2005_input.py
           - name: "output-io"
-            files: "benchmark_cellbudgetfile.py benchmark_zonebudget.py benchmark_mf6listbudget.py benchmark_headfile.py benchmark_headufile.py benchmark_formattedfile.py benchmark_pathlinefile.py benchmark_endpointfile.py benchmark_mtlistfile.py benchmark_sfroutputfile.py benchmark_ucnfile.py benchmark_mflistbudget.py benchmark_mfusglistbudget.py"
+            files: >-
+              benchmark_cellbudgetfile.py
+              benchmark_zonebudget.py
+              benchmark_mf6listbudget.py
+              benchmark_headfile.py
+              benchmark_headufile.py
+              benchmark_formattedfile.py
+              benchmark_pathlinefile.py
+              benchmark_endpointfile.py
+              benchmark_mtlistfile.py
+              benchmark_sfroutputfile.py
+              benchmark_ucnfile.py
+              benchmark_mflistbudget.py
+              benchmark_mfusglistbudget.py
           - name: "pre-post"
-            files: "benchmark_gridintersect.py benchmark_grids.py benchmark_rasters.py benchmark_arrays.py benchmark_export.py benchmark_postprocessing.py"
+            files: >-
+              benchmark_gridintersect.py
+              benchmark_grids.py
+              benchmark_rasters.py
+              benchmark_arrays.py
+              benchmark_export.py
+              benchmark_postprocessing.py
     name: "benchmarks (${{ matrix.shard.name }})"
     steps:
       - name: Checkout repo
diff --git a/autotest/benchmarks/benchmark_cellbudgetfile.py b/autotest/benchmarks/benchmark_cellbudgetfile.py
index a81d1e356c..171b391308 100644
--- a/autotest/benchmarks/benchmark_cellbudgetfile.py
+++ b/autotest/benchmarks/benchmark_cellbudgetfile.py
@@ -24,7 +24,6 @@ def test_cellbudgetfile_load(benchmark, cbcf):
 
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_cellbudgetfile_get_data_all(benchmark, cbcf):
-    # Use the new API to get unique records
     unique_records = cbcf.headers[["text", "imeth"]].drop_duplicates()
     term = unique_records.iloc[0]["text"]
     benchmark(lambda: cbcf.get_data(text=term))
diff --git a/autotest/benchmarks/benchmark_zonebudget.py b/autotest/benchmarks/benchmark_zonebudget.py
index cd1d8f19ee..ec01ff9bfa 100644
--- a/autotest/benchmarks/benchmark_zonebudget.py
+++ b/autotest/benchmarks/benchmark_zonebudget.py
@@ -8,7 +8,7 @@
 from flopy.utils.zonbud import ZoneBudget
 
 
-def create_zone_array(nlay, nrow, ncol, n_zones=5) -> NDArray:
+def create_zone_array(nlay, nrow, ncol, n_zones=2) -> NDArray:
     zones = np.zeros((nlay, nrow, ncol), dtype=np.int32)
     zone_width = ncol // n_zones  # roughly equal zones
 

From f7d9319b77148c35151dbccc003f0cfc00ad1320 Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Wed, 28 Jan 2026 20:52:30 -0500
Subject: [PATCH 07/11] clean up initial plan

---
 docs/benchmarking_plan.md | 924 --------------------------------------
 1 file changed, 924 deletions(-)
 delete mode 100644 docs/benchmarking_plan.md

diff --git a/docs/benchmarking_plan.md b/docs/benchmarking_plan.md
deleted file mode 100644
index 0bb75931cc..0000000000
--- a/docs/benchmarking_plan.md
+++ /dev/null
@@ -1,924 +0,0 @@
-# FloPy Comprehensive Benchmarking Plan
-
-## Executive Summary
-
-This document outlines a plan to expand FloPy's benchmarking capabilities to systematically track performance improvements during ongoing development, particularly the pandas-based I/O refactoring effort. The plan builds upon the existing `pytest-benchmark` infrastructure while addressing current limitations in coverage and tooling.
-
-**Important**: These benchmarks test **FloPy code performance only**, not the runtime of MODFLOW/MODPATH executables that FloPy drives. Benchmarks focus on FloPy's I/O operations, data structure manipulations, and utility functions.
-
-### Key Goals
-
-1. **Quantify pandas I/O refactor impact** - Measure FloPy's file I/O performance gains
-2. **Prevent performance regressions** - Automated detection of FloPy performance degradation in CI/CD
-3. **Expand coverage** - Benchmark all major FloPy operations (load/write, utilities, grids, exports)
-4. **Streamline workflow** - Reduce ad-hoc scripting, improve automation and reporting
-
-## Current State Analysis
-
-### Existing Infrastructure
-
-**Benchmarks** (as of 2026-01-25):
-- Location: `autotest/test_modflow.py:1334-1353`
-- Count: 3 benchmarks
-  - `test_model_init_time` - MODFLOW-2005 model initialization
-  - `test_model_write_time` - Model file writing
-  - `test_model_load_time` - Model file loading
-
-**Tooling**:
-- `pytest-benchmark` plugin (pyproject.toml dependency)
-- Daily CI workflow (`.github/workflows/benchmark.yml`)
-  - Matrix: 3 OS × 3 Python versions = 9 configurations
-  - Runs: Daily at 8 AM UTC
-- Post-processing: `scripts/process_benchmarks.py`
-  - Generates time-series plots using seaborn
-  - Outputs CSV data and PNG visualizations
-
-### Current Limitations
-
-1. **Narrow Coverage**
-   - Only MODFLOW-2005 tested
-   - No MF6, MT3D, SEAWAT coverage
-   - Missing utility benchmarks (HeadFile, BudgetFile, grids, exports)
-
-2. **Limited Visibility**
-   - Results only stored as GitHub Actions artifacts
-   - No historical trend tracking
-   - No automated regression detection
-   - Manual comparison required
-
-3. **Workflow Issues**
-   - Ad-hoc scripting for result processing
-   - No integration with PR review process
-   - Missing baseline comparisons
-
-## Proposed Solution
-
-### 1. Tooling Strategy
-
-**Decision: Continue with pytest-benchmark + Add Codspeed Integration**
-
-#### Rationale
-
-- **ASV (Airspeed Velocity)**: Originally considered but now appears unmaintained
-  - Major projects (NumPy, others) migrating away
-  - Limited recent activity on repository
-
-- **pytest-benchmark**: Currently working well
-  - Integrated with existing test suite
-  - Familiar to developers
-  - Good CI integration
-
-- **Codspeed** (RECOMMENDED ADDITION):
-  - Seamless `pytest-codspeed` plugin compatibility
-  - Zero-config migration from `pytest-benchmark`
-  - Automated performance regression detection
-  - Historical trend visualization
-  - PR-based performance impact reports
-  - Free for open-source projects
-
-#### Implementation Steps
-
-1. Add `pytest-codspeed` to test dependencies:
-   ```toml
-   [project.optional-dependencies]
-   test = [
-       # ... existing deps
-       "pytest-benchmark",
-       "pytest-codspeed",
-   ]
-   ```
-
-2. Update benchmark workflow to use Codspeed action:
-   ```yaml
-   - uses: CodSpeedHQ/action@v3
-     with:
-       token: ${{ secrets.CODSPEED_TOKEN }}
-       run: pytest autotest/benchmarks --codspeed
-   ```
-
-3. Optionally refactor existing benchmarks to use decorator pattern:
-   ```python
-   @pytest.mark.benchmark
-   def test_model_load_time(function_tmpdir):
-       model = get_perftest_model(ws=function_tmpdir, name=name)
-       model.write_input()
-       Modflow.load(f"{name}.nam", model_ws=function_tmpdir, check=False)
-   ```
-
-### 2. Benchmark Coverage Expansion
-
-#### 2.1 Core I/O Benchmarks
-
-Expand model load/write/init benchmarks across all major simulators.
-
-**MODFLOW 6** (Highest Priority):
-
-```python
-# autotest/benchmarks/benchmark_io_mf6.py
-
-def test_mf6_sim_init_small(benchmark, function_tmpdir):
-    """Benchmark MF6 simulation initialization - small model."""
-    benchmark(lambda: create_small_mf6_sim(function_tmpdir))
-
-def test_mf6_sim_init_large(benchmark, function_tmpdir):
-    """Benchmark MF6 simulation initialization - large model."""
-    benchmark(lambda: create_large_mf6_sim(function_tmpdir))
-
-def test_mf6_sim_write(benchmark, function_tmpdir):
-    """Benchmark MF6 simulation write."""
-    sim = create_test_mf6_sim(function_tmpdir)
-    benchmark(sim.write_simulation)
-
-def test_mf6_sim_load(benchmark, function_tmpdir):
-    """Benchmark MF6 simulation load."""
-    sim = create_test_mf6_sim(function_tmpdir)
-    sim.write_simulation()
-    sim_ws = function_tmpdir
-    benchmark(lambda: MFSimulation.load(simulation_ws=sim_ws))
-
-def test_mf6_package_write_large_arrays(benchmark, function_tmpdir):
-    """Benchmark writing packages with large arrays (e.g., NPF K)."""
-    sim = create_large_array_sim(function_tmpdir)
-    benchmark(sim.write_simulation)
-
-def test_mf6_multimodel_sim(benchmark, function_tmpdir):
-    """Benchmark multi-model simulation I/O."""
-    benchmark(lambda: create_multimodel_sim(function_tmpdir))
-
-def test_mf6_exchange_load(benchmark, function_tmpdir):
-    """Benchmark loading simulations with exchanges."""
-    sim = create_exchange_sim(function_tmpdir)
-    sim.write_simulation()
-    benchmark(lambda: MFSimulation.load(simulation_ws=function_tmpdir))
-```
-
-**Legacy MODFLOW Variants**:
-
-```python
-# autotest/benchmarks/benchmark_io_legacy.py
-
-@pytest.mark.parametrize("variant", ["mfnwt", "mfusg", "seawat", "mt3dms"])
-def test_legacy_model_init(benchmark, function_tmpdir, variant):
-    """Benchmark initialization across legacy MODFLOW variants."""
-    benchmark(lambda: create_legacy_model(variant, function_tmpdir))
-
-@pytest.mark.parametrize("grid_type", ["structured", "unstructured"])
-def test_modflow_grid_types(benchmark, function_tmpdir, grid_type):
-    """Benchmark I/O for different grid types."""
-    benchmark(lambda: create_model_with_grid(grid_type, function_tmpdir))
-
-@pytest.mark.parametrize("temporal", ["steady", "transient_small", "transient_large"])
-def test_modflow_temporal(benchmark, function_tmpdir, temporal):
-    """Benchmark I/O for different temporal discretizations."""
-    benchmark(lambda: create_temporal_model(temporal, function_tmpdir))
-```
-
-#### 2.2 Post-Processing Utilities
-
-Benchmark common workflow operations.
-
-**HeadFile Operations**:
-
-Note: HeadFile benchmarks use pre-existing files from examples/data directory to test FloPy's file parsing performance only, not MODFLOW runtime.
-
-```python
-# autotest/benchmarks/benchmark_utils_heads.py
-
-from pathlib import Path
-from flopy.utils import HeadFile
-
-FREYBERG_HDS = Path("examples/data/freyberg_multilayer_transient/freyberg.hds")
-
-@pytest.mark.skipif(not FREYBERG_HDS.exists(), reason="Example data not available")
-def test_headfile_init_freyberg(benchmark):
-    """Benchmark FloPy's HeadFile initialization."""
-    benchmark(lambda: HeadFile(FREYBERG_HDS))
-
-@pytest.mark.skipif(not FREYBERG_HDS.exists(), reason="Example data not available")
-def test_headfile_get_data_single(benchmark):
-    """Benchmark FloPy's head data extraction for single time step."""
-    hds = HeadFile(FREYBERG_HDS)
-    times = hds.get_times()
-    mid_time = times[len(times) // 2]
-    benchmark(lambda: hds.get_data(totim=mid_time))
-
-def test_headfile_get_alldata(benchmark, function_tmpdir):
-    """Benchmark reading entire head file."""
-    hds = create_and_open_headfile(function_tmpdir)
-    benchmark(hds.get_alldata)
-
-def test_headfile_get_ts(benchmark, function_tmpdir):
-    """Benchmark time series extraction."""
-    hds = create_and_open_headfile(function_tmpdir)
-    benchmark(lambda: hds.get_ts((0, 10, 10)))
-
-@pytest.mark.parametrize("size", ["small", "medium", "large"])
-def test_headfile_scaling(benchmark, function_tmpdir, size):
-    """Benchmark HeadFile operations at different scales."""
-    hds = create_headfile_with_size(function_tmpdir, size)
-    benchmark(hds.get_alldata)
-```
-
-**CellBudgetFile Operations**:
-
-Note: BudgetFile benchmarks use pre-existing files from examples/data directory to test FloPy's file parsing performance only, not MODFLOW runtime.
-
-```python
-# autotest/benchmarks/benchmark_utils_budget.py
-
-from pathlib import Path
-from flopy.utils import CellBudgetFile
-
-FREYBERG_CBC = Path("examples/data/freyberg_multilayer_transient/freyberg.cbc")
-
-@pytest.mark.skipif(not FREYBERG_CBC.exists(), reason="Example data not available")
-def test_budgetfile_init_freyberg(benchmark):
-    """Benchmark FloPy's CellBudgetFile initialization."""
-    benchmark(lambda: CellBudgetFile(FREYBERG_CBC))
-
-def test_budgetfile_get_data(benchmark, function_tmpdir):
-    """Benchmark budget data extraction."""
-    cbc = create_and_open_budgetfile(function_tmpdir)
-    benchmark(lambda: cbc.get_data(text="FLOW RIGHT FACE"))
-
-def test_budgetfile_list_records(benchmark, function_tmpdir):
-    """Benchmark record listing."""
-    cbc = create_and_open_budgetfile(function_tmpdir)
-    benchmark(cbc.list_records)
-```
-
-**MODPATH Utilities**:
-
-```python
-# autotest/benchmarks/benchmark_utils_modpath.py
-
-def test_pathlinefile_load(benchmark, function_tmpdir):
-    """Benchmark PathlineFile loading."""
-    pth_file = create_pathlinefile(function_tmpdir)
-    benchmark(lambda: PathlineFile(pth_file))
-
-def test_endpointfile_load(benchmark, function_tmpdir):
-    """Benchmark EndpointFile loading."""
-    ept_file = create_endpointfile(function_tmpdir)
-    benchmark(lambda: EndpointFile(ept_file))
-
-def test_pathline_to_dataframe(benchmark, function_tmpdir):
-    """Benchmark pathline conversion to DataFrame."""
-    pth = create_and_open_pathlinefile(function_tmpdir)
-    benchmark(lambda: pth.get_destination_pathline_data(range(100)))
-```
-
-#### 2.3 Grid Operations
-
-```python
-# autotest/benchmarks/benchmark_grids.py
-
-@pytest.mark.parametrize("grid_class", [
-    StructuredGrid,
-    VertexGrid,
-    UnstructuredGrid,
-])
-def test_grid_init(benchmark, grid_class):
-    """Benchmark grid initialization."""
-    params = get_grid_params(grid_class)
-    benchmark(lambda: grid_class(**params))
-
-def test_grid_intersect_structured(benchmark):
-    """Benchmark structured grid intersection."""
-    grid = create_test_structured_grid()
-    line = create_test_linestring()
-    benchmark(lambda: grid.intersect(line))
-
-def test_grid_get_lrc_large(benchmark):
-    """Benchmark get_lrc for large models."""
-    grid = create_large_structured_grid()
-    nodes = range(0, grid.nnodes, 100)  # Sample every 100th node
-    benchmark(lambda: [grid.get_lrc(node) for node in nodes])
-
-def test_grid_get_node_large(benchmark):
-    """Benchmark get_node for large models."""
-    grid = create_large_structured_grid()
-    lrc_tuples = [(0, i, j) for i in range(0, grid.nrow, 10)
-                              for j in range(0, grid.ncol, 10)]
-    benchmark(lambda: [grid.get_node(lrc) for lrc in lrc_tuples])
-```
-
-#### 2.4 Export Operations
-
-```python
-# autotest/benchmarks/benchmark_export.py
-
-def test_export_shapefile_small(benchmark, function_tmpdir):
-    """Benchmark shapefile export - small model."""
-    model = create_small_test_model(function_tmpdir)
-    output_path = function_tmpdir / "export.shp"
-    benchmark(lambda: model.export(output_path))
-
-def test_export_shapefile_large(benchmark, function_tmpdir):
-    """Benchmark shapefile export - large model."""
-    model = create_large_test_model(function_tmpdir)
-    output_path = function_tmpdir / "export.shp"
-    benchmark(lambda: model.export(output_path))
-
-@pytest.mark.skipif(not has_pkg("geopandas"), reason="requires geopandas")
-def test_export_geodataframe(benchmark, function_tmpdir):
-    """Benchmark GeoDataFrame export (issue #2671)."""
-    model = create_test_model(function_tmpdir)
-    benchmark(lambda: model.to_gdf())
-
-@pytest.mark.skipif(not has_pkg("netCDF4"), reason="requires netCDF4")
-def test_export_netcdf(benchmark, function_tmpdir):
-    """Benchmark NetCDF export."""
-    model = create_test_model(function_tmpdir)
-    output_path = function_tmpdir / "export.nc"
-    benchmark(lambda: model.export(output_path, fmt="netcdf"))
-
-@pytest.mark.skipif(not has_pkg("vtk"), reason="requires vtk")
-def test_export_vtk(benchmark, function_tmpdir):
-    """Benchmark VTK export."""
-    model = create_test_model(function_tmpdir)
-    output_path = function_tmpdir / "export.vtk"
-    benchmark(lambda: model.export(output_path, fmt="vtk"))
-```
-
-#### 2.5 Array and Data Structure Benchmarks
-
-```python
-# autotest/benchmarks/benchmark_arrays.py
-
-def test_util2d_create_large(benchmark):
-    """Benchmark Util2d creation with large arrays."""
-    shape = (100, 100)
-    data = np.random.random(shape)
-    benchmark(lambda: Util2d(None, shape, data))
-
-def test_util2d_external_io(benchmark, function_tmpdir):
-    """Benchmark Util2d external file I/O."""
-    u2d = create_util2d_with_external(function_tmpdir)
-    benchmark(u2d.get_file_entry)
-
-def test_util3d_create_large(benchmark):
-    """Benchmark Util3d creation with large arrays."""
-    shape = (10, 100, 100)
-    data = np.random.random(shape)
-    benchmark(lambda: Util3d(None, shape, data))
-```
-
-#### 2.6 Pandas Integration Benchmarks
-
-**Critical for validating ongoing refactor efforts:**
-
-```python
-# autotest/benchmarks/benchmark_pandas_io.py
-
-def test_pandas_array_read(benchmark, function_tmpdir):
-    """Benchmark pandas-based array reading."""
-    # Compare pandas vs traditional approaches
-    file_path = create_test_array_file(function_tmpdir)
-    benchmark(lambda: read_array_pandas(file_path))
-
-def test_pandas_list_read(benchmark, function_tmpdir):
-    """Benchmark pandas-based list reading."""
-    file_path = create_test_list_file(function_tmpdir)
-    benchmark(lambda: read_list_pandas(file_path))
-
-def test_pandas_array_write(benchmark, function_tmpdir):
-    """Benchmark pandas-based array writing."""
-    data = create_test_array_data()
-    file_path = function_tmpdir / "test.dat"
-    benchmark(lambda: write_array_pandas(data, file_path))
-
-def test_mflist_pandas_performance(benchmark, function_tmpdir):
-    """Benchmark MFList with pandas backend."""
-    stress_period_data = create_large_stress_period_data()
-    benchmark(lambda: MFList(stress_period_data))
-
-def test_recarray_to_dataframe(benchmark):
-    """Benchmark recarray to DataFrame conversion."""
-    rec = create_large_recarray()
-    benchmark(lambda: pd.DataFrame(rec))
-```
-
-### 3. Integration with modflow-devtools Models API
-
-**Already Available**: The `modflow-devtools` package provides a models API with 442 models including:
-- 242 MF6 test models (`mf6/test/*`)
-- MF6 examples (`mf6/example/*`)
-- MF6 large models (`mf6/large/*`)
-- MODFLOW-2005 models (`mf2005/*`)
-
-**Implementation**:
-
-```python
-# autotest/benchmarks/benchmark_models_api.py
-
-from modflow_devtools.models import DEFAULT_REGISTRY
-from flopy.mf6 import MFSimulation
-
-# Select diverse models for benchmarking
-BENCHMARK_MODELS = [
-    "mf6/test/test001a_Tharmonic",
-    "mf6/test/test006_gwf3",           # Multi-model
-    "mf6/test/test006_gwf3_disv",      # DISV grid
-    "mf6/test/test021_twri",           # Classic problem
-    "mf6/test/test045_lake1ss_table",  # LAK package
-]
-
-@pytest.mark.parametrize("model_name", BENCHMARK_MODELS)
-def test_mf6_load_from_registry(benchmark, function_tmpdir, model_name):
-    """Benchmark FloPy loading models from devtools registry."""
-    # Copy model to temp directory (setup, not benchmarked)
-    DEFAULT_REGISTRY.copy_to(function_tmpdir, model_name)
-
-    # Benchmark FloPy loading the model
-    benchmark(lambda: MFSimulation.load(simulation_ws=function_tmpdir))
-```
-
-**Benefits**:
-- 442 models available immediately (no waiting for issue #1872)
-- Reproducible benchmarks across development environments
-- Tests FloPy loading against diverse, real-world model inputs
-- Community-standard test cases from MODFLOW 6 test suite
-- On-demand download via Pooch (models cached locally)
-
-### 4. Benchmark Organization
-
-#### Proposed Directory Structure
-
-```
-autotest/
-├── benchmarks/                    # NEW: Dedicated benchmark directory
-│   ├── __init__.py
-│   ├── conftest.py               # Shared fixtures, model builders
-│   ├── test_io_mf6.py            # MF6 I/O benchmarks
-│   ├── test_io_legacy.py         # Legacy MODFLOW variants
-│   ├── test_utils_heads.py       # HeadFile operations (uses example data)
-│   ├── test_utils_budget.py      # CellBudgetFile operations (uses example data)
-│   ├── test_grids.py             # Grid operations
-│   ├── test_export.py            # Export operations
-│   ├── test_arrays.py            # Util2d/Util3d benchmarks
-│   ├── test_pandas_io.py         # Pandas refactor validation
-│   └── test_models_api.py        # Models API integration (future)
-├── test_*.py                      # Existing test files
-└── conftest.py                    # Global test configuration
-```
-
-#### Benchmark Markers
-
-Define custom markers in `pyproject.toml`:
-
-```toml
-[tool.pytest.ini_options]
-markers = [
-    "benchmark: performance benchmarks",
-    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
-]
-```
-
-Usage:
-```python
-@pytest.mark.benchmark
-def test_mf6_sim_load(benchmark, function_tmpdir):
-    ...
-```
-
-### 5. CI/CD Strategy
-
-#### Tiered Approach
-
-**PR-Level (Fast Feedback)**:
-```yaml
-# .github/workflows/commit.yml - ADD TO EXISTING WORKFLOW
-- name: Run fast benchmarks
-  run: |
-    pytest autotest/benchmarks \
-      -m "benchmark and not slow" \
-      --benchmark-only \
-      --benchmark-disable-gc \
-      --benchmark-warmup=on
-```
-
-**Daily Full Suite**:
-```yaml
-# .github/workflows/benchmark.yml - EXISTING, ENHANCED
-- name: Run all benchmarks
-  run: |
-    pytest autotest/benchmarks \
-      --benchmark-only \
-      --benchmark-autosave \
-      --codspeed
-```
-
-**Branch Comparisons**:
-```yaml
-# NEW: .github/workflows/benchmark-compare.yml
-- name: Compare with develop branch
-  run: |
-    # Checkout develop
-    git fetch origin develop
-    git checkout develop
-    pytest autotest/benchmarks --benchmark-only --benchmark-autosave
-
-    # Checkout feature branch
-    git checkout ${{ github.head_ref }}
-    pytest autotest/benchmarks --benchmark-only --benchmark-compare
-```
-
-#### Codspeed Integration
-
-**Benefits**:
-- Automatic regression detection on PRs
-- Performance trend visualization
-- Historical comparison
-- No manual artifact management
-
-**Setup**:
-1. Register FloPy repository at [codspeed.io](https://codspeed.io)
-2. Add `CODSPEED_TOKEN` to repository secrets
-3. Update workflow:
-   ```yaml
-   - uses: CodSpeedHQ/action@v3
-     with:
-       token: ${{ secrets.CODSPEED_TOKEN }}
-       run: pytest autotest/benchmarks --codspeed
-   ```
-
-### 6. Performance Regression Tracking
-
-#### Key Metrics to Monitor
-
-1. **Pandas I/O Refactor Impact** (Primary Goal)
-   - Array reading: Traditional vs pandas-based
-   - List reading: Traditional vs pandas-based
-   - Array writing: Traditional vs pandas-based
-   - Expected: 10-50% improvement in most cases
-
-2. **Model I/O by Size Category**
-   - Small models (< 10k cells): Target < 100ms load
-   - Medium models (10k-100k cells): Target < 1s load
-   - Large models (> 100k cells): Monitor for regressions
-
-3. **Package-Level Performance**
-   - Identify expensive packages (large arrays: DIS, NPF, IC)
-   - Track improvements over time
-
-4. **Memory Usage**
-   - Enable memory profiling for large model operations
-   - Track memory efficiency of refactored code
-
-#### Regression Thresholds
-
-Configure pytest-benchmark comparison thresholds:
-
-```python
-# autotest/benchmarks/conftest.py
-
-@pytest.fixture(scope="session")
-def benchmark_config():
-    return {
-        "warmup": True,
-        "warmup_iterations": 5,
-        "max_time": 1.0,
-        "min_rounds": 5,
-        "timer": time.perf_counter,
-        "disable_gc": True,
-        "compare": {
-            "func": "mean",
-            "group": "fullname",
-            "threshold": 1.05,  # 5% tolerance
-        },
-    }
-```
-
-**Alert Criteria**:
-- > 5% slowdown: Warning (review required)
-- > 10% slowdown: Failure (block merge)
-- > 20% improvement: Document and celebrate!
-
-### 7. Documentation
-
-#### Developer Documentation Updates
-
-Add comprehensive benchmarking section to `DEVELOPER.md`:
-
-````markdown
-#### Writing Benchmarks
-
-Benchmarks follow standard pytest conventions with the `benchmark` fixture:
-
-```python
-# autotest/benchmarks/benchmark_example.py
-
-def test_my_operation(benchmark, function_tmpdir):
-    """Clear description of what is being benchmarked and why."""
-
-    # Setup (not timed)
-    model = create_test_model(function_tmpdir)
-
-    # Benchmark the operation
-    result = benchmark(model.write_input)
-
-    # Optional assertions (not timed)
-    assert result is not None
-```
-
-**Best Practices**:
-- Use descriptive names: `test_mf6_large_model_load`, not `test_load1`
-- Include docstrings explaining rationale
-- Use fixtures for setup/teardown (not timed)
-- Focus on one operation per benchmark
-- Use parametrize for testing variations
-
-**Running Benchmarks Locally**:
-
-```bash
-# Run all benchmarks
-pytest autotest/benchmarks --benchmark-only
-
-# Run specific benchmark file
-pytest autotest/benchmarks/benchmark_io_mf6.py --benchmark-only
-
-# Run with specific markers
-pytest -m "benchmark and not slow" --benchmark-only
-
-# Compare against saved baseline
-pytest autotest/benchmarks --benchmark-only --benchmark-compare
-
-# Save results
-pytest autotest/benchmarks --benchmark-only --benchmark-autosave
-
-# View statistics
-pytest autotest/benchmarks --benchmark-only --benchmark-columns=mean,stddev,min,max
-```
-
-**Interpreting Results**:
-
-Codspeed provides automated analysis, but for local runs:
-- **Mean**: Primary metric (average execution time)
-- **StdDev**: Consistency (lower is better)
-- **Min**: Best-case performance
-- **Iterations**: Number of runs (more = higher confidence)
-
-**When to Add Benchmarks**:
-
-1. Implementing performance-critical features
-2. Refactoring I/O operations (e.g., pandas migration)
-3. Optimizing existing code paths
-4. Adding new model types or utilities
-5. When performance is a key requirement
-````
-
-#### Template for New Benchmarks
-
-Provide a template in `autotest/benchmarks/TEMPLATE.py`:
-
-```python
-"""
-Benchmark template for FloPy operations.
-
-Copy this template when creating new benchmark files.
-"""
-import pytest
-
-
-# Fixtures for test data creation (setup not timed)
-@pytest.fixture
-def test_model(function_tmpdir):
-    """Create a test model for benchmarking."""
-    # Create and return model
-    pass
-
-
-# Basic benchmark
-def test_operation_basic(benchmark, test_model):
-    """
-    Benchmark [operation description].
-
-    This benchmark measures [what is being measured] to [why it matters].
-    Expected baseline: [X]ms on [reference hardware].
-    """
-    result = benchmark(test_model.some_operation)
-    assert result is not None
-
-
-# Parametrized benchmark
-@pytest.mark.parametrize("size", ["small", "medium", "large"])
-def test_operation_scaling(benchmark, function_tmpdir, size):
-    """
-    Benchmark [operation] at different scales.
-
-    Measures how [operation] scales with [dimension].
-    """
-    model = create_model_with_size(function_tmpdir, size)
-    benchmark(model.some_operation)
-
-
-# Slow benchmark (excluded from PR checks)
-@pytest.mark.slow
-@pytest.mark.benchmark
-def test_operation_large_dataset(benchmark, function_tmpdir):
-    """
-    Benchmark [operation] with realistic large dataset.
-
-    Only run in daily benchmark suite due to runtime.
-    """
-    large_model = create_large_realistic_model(function_tmpdir)
-    benchmark(large_model.some_operation)
-```
-
-### 8. Implementation Roadmap
-
-#### Phase 1: Foundation (Weeks 1-2)
-
-**Goals**: Set up infrastructure, reorganize existing benchmarks
-
-- [ ] Create `autotest/benchmarks/` directory structure
-- [ ] Add `pytest-codspeed` to dependencies
-- [ ] Set up Codspeed integration (register, add token)
-- [ ] Migrate existing 3 benchmarks from `test_modflow.py`
-- [ ] Create shared fixtures in `benchmarks/conftest.py`
-- [ ] Add benchmark markers to `pyproject.toml`
-- [ ] Update `.github/workflows/benchmark.yml` for Codspeed
-- [ ] Document new structure in `DEVELOPER.md`
-
-**Deliverables**:
-- Working Codspeed integration
-- Reorganized benchmarks with clear structure
-- Updated documentation
-
-#### Phase 2: Core Coverage (Weeks 3-4)
-
-**Goals**: Add essential MF6 and utility benchmarks
-
-- [ ] Implement `test_io_mf6.py` (10-15 benchmarks)
-  - Sim init (small, medium, large)
-  - Sim write/load
-  - Package-level operations
-  - Multi-model simulations
-- [ ] Implement `test_utils_heads.py` (8-10 benchmarks)
-  - HeadFile init, get_data, get_alldata, get_ts
-  - Scaling tests (small/medium/large)
-- [ ] Implement `test_utils_budget.py` (5-8 benchmarks)
-  - CellBudgetFile operations
-- [ ] Establish baseline measurements for regression tracking
-
-**Deliverables**:
-- 25-35 new benchmarks
-- Baseline performance data
-- Initial regression thresholds set
-
-#### Phase 3: Extended Coverage (Weeks 5-6)
-
-**Goals**: Legacy models, grids, exports
-
-- [ ] Implement `test_io_legacy.py` (10-12 benchmarks)
-  - MODFLOW-NWT, MFUSG, SEAWAT, MT3DMS
-  - Structured vs unstructured
-  - Steady vs transient
-- [ ] Implement `test_grids.py` (8-10 benchmarks)
-  - Grid initialization
-  - Intersection operations
-  - get_lrc/get_node conversions
-- [ ] Implement `test_export.py` (8-10 benchmarks)
-  - Shapefile, GeoDataFrame, NetCDF, VTK
-
-**Deliverables**:
-- 25-30 additional benchmarks
-- Comprehensive coverage of major FloPy operations
-- Performance characterization across all model types
-
-#### Phase 4: Pandas Validation & Polish (Weeks 7-8)
-
-**Goals**: Validate refactor, finalize infrastructure
-
-- [ ] Implement `test_pandas_io.py` (15-20 benchmarks)
-  - Head-to-head pandas vs traditional
-  - Array read/write
-  - List operations
-  - MFList performance
-  - Recarray conversions
-- [ ] Implement `test_arrays.py` (5-8 benchmarks)
-  - Util2d/Util3d operations
-- [ ] Add PR-level fast benchmark checks
-- [ ] Create branch comparison workflow
-- [ ] Generate performance improvement report for pandas refactor
-- [ ] Polish documentation with examples and best practices
-- [ ] Create benchmark template
-
-**Deliverables**:
-- Quantified pandas refactor performance gains
-- Complete benchmark suite (80-120 total benchmarks)
-- Full CI/CD integration
-- Comprehensive documentation
-
-#### Phase 5: Models API Integration (Available Now!)
-
-**Goals**: Leverage modflow-devtools models registry
-
-**Status**: ✅ Available - modflow-devtools already provides 442 models
-
-- [ ] Implement `test_models_api.py` with representative sample (~10-15 models)
-- [ ] Parametrize benchmarks across diverse model types
-  - Multi-model simulations
-  - Different grid types (DIS, DISV, DISU)
-  - Various packages (LAK, SFR, UZF, MAW, etc.)
-  - Transport models (GWT, GWE)
-- [ ] Establish performance baselines for standard test suite
-- [ ] Document model selection rationale
-
-**Deliverables**:
-- 10-15 benchmarks using modflow-devtools registry
-- Coverage of diverse model complexity
-- Community-standard performance baselines
-- Validation of FloPy loading across official test suite
-
-### 9. Success Criteria
-
-#### Quantitative Metrics
-
-1. **Coverage**: 80-100 benchmarks across all major FloPy operations
-2. **CI Runtime**:
-   - PR-level fast benchmarks: < 5 minutes
-   - Daily full suite: < 30 minutes
-3. **Pandas Refactor**: Demonstrate 10-50% improvement in I/O operations
-4. **Regression Detection**: 100% of PRs receive automated performance feedback
-5. **Historical Tracking**: 6+ months of continuous performance data
-
-#### Qualitative Goals
-
-1. **Developer Awareness**: Performance is a first-class consideration in PRs
-2. **Confidence**: No unintended performance regressions in releases
-3. **Documentation**: Clear guidelines for writing and interpreting benchmarks
-4. **Community**: Performance data available for external analysis and comparison
-
-### 10. Maintenance and Evolution
-
-#### Ongoing Responsibilities
-
-1. **Regular Review**: Quarterly review of benchmark relevance and thresholds
-2. **Baseline Updates**: Reset baselines after intentional performance changes
-3. **New Features**: All performance-critical features include benchmarks
-4. **Cleanup**: Remove obsolete benchmarks when code is removed
-5. **Reporting**: Annual performance report summarizing trends and improvements
-
-#### Future Enhancements
-
-1. **Memory Profiling**: Integrate memory usage tracking
-2. **Parallel Benchmarks**: Test parallel performance (if applicable)
-3. **Real-World Scenarios**: Benchmark complete workflows (load → modify → run → postprocess)
-4. **Hardware Diversity**: Track performance across different CPU/memory configurations
-5. **Comparison Reports**: Generate before/after reports for major refactors
-
-## References
-
-- [Issue #1989: Expand benchmarking](https://github.com/modflowpy/flopy/issues/1989)
-- [Issue #1872: Models API](https://github.com/modflowpy/flopy/issues/1872)
-- [pytest-benchmark documentation](https://pytest-benchmark.readthedocs.io/)
-- [Codspeed documentation](https://docs.codspeed.io/)
-- [FloPy DEVELOPER.md](../DEVELOPER.md)
-
-## Appendix A: Example Benchmark Output
-
-### pytest-benchmark Console Output
-
-```
----------------------------- benchmark: 3 tests ----------------------------
-Name (time in ms)                  Min       Max      Mean    StdDev    Rounds
----------------------------------------------------------------------------
-test_model_init_time            12.34     15.67     13.21      0.89        50
-test_model_write_time           45.23     52.11     47.89      2.34        20
-test_model_load_time            78.90     89.12     82.45      3.12        15
----------------------------------------------------------------------------
-```
-
-### Codspeed PR Comment Example
-
-```markdown
-## ⚡ CodSpeed Performance Report
-
-Performance changes detected in this PR:
-
-| Benchmark | Status | Base | PR | Change |
-|-----------|--------|------|----|---------|
-| test_mf6_sim_load | 🔴 Slower | 145ms | 167ms | +15.2% |
-| test_pandas_array_read | 🟢 Faster | 234ms | 187ms | -20.1% |
-| test_headfile_get_data | ⚪ Unchanged | 56ms | 57ms | +1.8% |
-
-[View full results on CodSpeed →](https://app.codspeed.io/...)
-```
-
-## Appendix B: Glossary
-
-- **Benchmark**: Repeatable performance test measuring execution time
-- **Baseline**: Reference performance measurement for comparison
-- **Regression**: Unintended performance degradation
-- **Round**: Single execution of benchmarked code
-- **Warmup**: Initial executions discarded to account for JIT/caching effects
-- **Fixture**: pytest test setup/teardown function (not timed)
-- **Parametrize**: Run same benchmark with multiple input variations
-
----
-
-**Document Version**: 1.0
-**Authors**: FloPy Development Team
-**License**: CC0 1.0 Universal

From 419d7106583c4f36f8a021bdb520d498044b46f9 Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Wed, 28 Jan 2026 20:57:11 -0500
Subject: [PATCH 08/11] one fewer

---
 autotest/benchmarks/benchmark_zonebudget.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/autotest/benchmarks/benchmark_zonebudget.py b/autotest/benchmarks/benchmark_zonebudget.py
index ec01ff9bfa..eab60b82ba 100644
--- a/autotest/benchmarks/benchmark_zonebudget.py
+++ b/autotest/benchmarks/benchmark_zonebudget.py
@@ -46,14 +46,6 @@ def test_zonebudget_get_budget(benchmark, case):
     benchmark(zb.get_budget)
 
 
-@pytest.mark.slow
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
-def test_zonebudget_get_volumetric_budget(benchmark, case):
-    cbc_path, zones = case
-    zb = ZoneBudget(str(cbc_path), z=zones)
-    benchmark(zb.get_volumetric_budget)
-
-
 @pytest.mark.slow
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_zonebudget_get_dataframes(benchmark, case):

From a7d04d6458f8ef33401fcf40942957de5ad27e8f Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Thu, 29 Jan 2026 06:41:14 -0500
Subject: [PATCH 09/11] postproc fix

---
 .../benchmarks/benchmark_postprocessing.py    | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/autotest/benchmarks/benchmark_postprocessing.py b/autotest/benchmarks/benchmark_postprocessing.py
index fd271e14d1..cdd4c63f60 100644
--- a/autotest/benchmarks/benchmark_postprocessing.py
+++ b/autotest/benchmarks/benchmark_postprocessing.py
@@ -10,23 +10,26 @@
     get_water_table,
 )
 
-from .conftest import load_mf6_sim
+from .conftest import load_mf6_sim, load_mf2005_model
 
 
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
 @pytest.mark.parametrize(
-    "row_col",
-    [lambda m: (None, None), lambda m: (m.dis.nrow // 2, m.dis.ncol // 2)],
-    ids=["everywhere", "center"],
+    "rcxy",
+    [
+        lambda m: (m.dis.nrow.array // 2, m.dis.ncol.array // 2, None, None),
+        lambda m: (None, None, sum(m.modelgrid.extent[:2]) / 2, sum(m.modelgrid.extent[2:4]) / 2),
+    ],
+    ids=["r c", "x y"],
 )
-def test_get_transmissivities(benchmark, function_tmpdir, row_col):
+def test_get_transmissivities(benchmark, function_tmpdir, rcxy):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     gwf = sim.get_model()
     hds_path = Path(function_tmpdir) / "freyberg.hds"
     hds = HeadFile(hds_path)
     heads = hds.get_data(totim=hds.get_times()[-1])
-    r, c = row_col(gwf)
-    benchmark(lambda: get_transmissivities(heads, gwf, r=r, c=c))
+    r, c, x, y = rcxy(gwf)
+    benchmark(lambda: get_transmissivities(heads, gwf, r=r, c=c, x=x, y=y))
 
 
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
@@ -40,12 +43,11 @@ def test_get_water_table(benchmark, function_tmpdir):
 
 @pytest.mark.benchmark(min_rounds=2, warmup=False)
 def test_get_gradients(benchmark, function_tmpdir):
-    sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
-    gwf = sim.get_model()
+    model = load_mf2005_model(function_tmpdir, model_key="freyberg")
     hds_path = Path(function_tmpdir) / "freyberg.hds"
     hds = HeadFile(hds_path)
     heads = hds.get_data(totim=hds.get_times()[-1])
-    benchmark(lambda: get_gradients(heads, gwf))
+    benchmark(lambda: get_gradients(heads, model, nodata=-999))
 
 
 @pytest.mark.benchmark(min_rounds=2, warmup=False)

From 7f420fd3c8b7b74c65671b9d1a35bf4ee65b597b Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Thu, 29 Jan 2026 08:23:54 -0500
Subject: [PATCH 10/11] fixes

---
 autotest/benchmarks/benchmark_mf6listbudget.py  | 17 +----------------
 autotest/benchmarks/benchmark_mflistbudget.py   |  2 +-
 .../benchmarks/benchmark_mfusglistbudget.py     |  2 +-
 autotest/benchmarks/benchmark_mtlistfile.py     | 10 ++++++++--
 autotest/benchmarks/benchmark_postprocessing.py |  7 ++++++-
 autotest/benchmarks/benchmark_rasters.py        |  2 ++
 autotest/benchmarks/benchmark_sfroutputfile.py  | 11 ++++++-----
 7 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/autotest/benchmarks/benchmark_mf6listbudget.py b/autotest/benchmarks/benchmark_mf6listbudget.py
index 406923526a..7b75ac6845 100644
--- a/autotest/benchmarks/benchmark_mf6listbudget.py
+++ b/autotest/benchmarks/benchmark_mf6listbudget.py
@@ -12,7 +12,7 @@ def mf6_lbf(example_data_path) -> Mf6ListBudget:
 
 @pytest.mark.benchmark
 def test_mf6listbudget_load(benchmark, mf6_lbf):
-    benchmark(lambda: Mf6ListBudget(mf6_lbf.fname))
+    benchmark(lambda: Mf6ListBudget(mf6_lbf.file_name))
 
 
 @pytest.mark.benchmark
@@ -28,21 +28,6 @@ def test_mf6listbudget_get_budget(benchmark, mf6_lbf):
     benchmark(mf6_lbf.get_budget)
 
 
-@pytest.mark.benchmark
-def test_mf6listbudget_get_time_series(benchmark, mf6_lbf):
-    budget = mf6_lbf.get_budget()
-    if budget and len(budget) > 0:
-        term = (
-            next(iter(budget[0].dtype.names))
-            if hasattr(budget[0], "dtype")
-            else "STORAGE"
-        )
-    else:
-        term = "STORAGE"
-
-    benchmark(lambda: mf6_lbf.get_time_series(term))
-
-
 @pytest.mark.benchmark
 def test_mf6listbudget_to_dataframe(benchmark, mf6_lbf):
     benchmark(mf6_lbf.get_dataframes)
diff --git a/autotest/benchmarks/benchmark_mflistbudget.py b/autotest/benchmarks/benchmark_mflistbudget.py
index 634df7c869..4b021c648b 100644
--- a/autotest/benchmarks/benchmark_mflistbudget.py
+++ b/autotest/benchmarks/benchmark_mflistbudget.py
@@ -12,4 +12,4 @@ def mf_lbf(example_data_path) -> MfListBudget:
 
 @pytest.mark.benchmark
 def test_mflistbudget_load(benchmark, mf_lbf):
-    benchmark(lambda: MfListBudget(mf_lbf.fname))
+    benchmark(lambda: MfListBudget(mf_lbf.file_name))
diff --git a/autotest/benchmarks/benchmark_mfusglistbudget.py b/autotest/benchmarks/benchmark_mfusglistbudget.py
index 08ce89e7ec..0212d62d46 100644
--- a/autotest/benchmarks/benchmark_mfusglistbudget.py
+++ b/autotest/benchmarks/benchmark_mfusglistbudget.py
@@ -16,4 +16,4 @@ def mfusg_lbf(example_data_path) -> MfusgListBudget:
 
 @pytest.mark.benchmark
 def test_mfusglistbudget_load(benchmark, mfusg_lbf):
-    benchmark(lambda: MfusgListBudget(mfusg_lbf.fname))
+    benchmark(lambda: MfusgListBudget(mfusg_lbf.file_name))
diff --git a/autotest/benchmarks/benchmark_mtlistfile.py b/autotest/benchmarks/benchmark_mtlistfile.py
index 503b531fc2..a7e87b3f1e 100644
--- a/autotest/benchmarks/benchmark_mtlistfile.py
+++ b/autotest/benchmarks/benchmark_mtlistfile.py
@@ -5,5 +5,11 @@
 
 @pytest.mark.benchmark
 def test_mtlistfile_load(benchmark, example_data_path):
-    list_file = example_data_path / "mt3d_test" / "mf2kmt3d" / "mnw" / "t5.lst"
-    benchmark(lambda: MtListBudget(str(list_file)).gw_data)
+    list_file = example_data_path / "mt3d_test" / "mcomp.list"
+
+    def load_and_parse():
+        mt = MtListBudget(str(list_file))
+        mt.parse()
+        return mt.gw_data
+
+    benchmark(load_and_parse)
diff --git a/autotest/benchmarks/benchmark_postprocessing.py b/autotest/benchmarks/benchmark_postprocessing.py
index cdd4c63f60..07ff32de76 100644
--- a/autotest/benchmarks/benchmark_postprocessing.py
+++ b/autotest/benchmarks/benchmark_postprocessing.py
@@ -18,7 +18,12 @@
     "rcxy",
     [
         lambda m: (m.dis.nrow.array // 2, m.dis.ncol.array // 2, None, None),
-        lambda m: (None, None, sum(m.modelgrid.extent[:2]) / 2, sum(m.modelgrid.extent[2:4]) / 2),
+        lambda m: (
+            None,
+            None,
+            sum(m.modelgrid.extent[:2]) / 2,
+            sum(m.modelgrid.extent[2:4]) / 2,
+        ),
     ],
     ids=["r c", "x y"],
 )
diff --git a/autotest/benchmarks/benchmark_rasters.py b/autotest/benchmarks/benchmark_rasters.py
index 5590b27d5b..374dbe00eb 100644
--- a/autotest/benchmarks/benchmark_rasters.py
+++ b/autotest/benchmarks/benchmark_rasters.py
@@ -52,6 +52,7 @@ def test_raster_load(benchmark, raster_path):
 ]
 
 
+@pytest.mark.slow
 @pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.parametrize("grid", GRIDS, ids=["small", "medium", "large"])
 @pytest.mark.parametrize(
@@ -61,6 +62,7 @@ def test_raster_resample(benchmark, raster, grid, method):
     benchmark(lambda: raster.resample_to_grid(grid, band=1, method=method))
 
 
+@pytest.mark.slow
 @pytest.mark.benchmark(min_rounds=1, warmup=False)
 @pytest.mark.skipif(not has_pkg("pyproj"), reason="requires pyproj")
 def test_raster_to_crs_transform(benchmark, raster):
diff --git a/autotest/benchmarks/benchmark_sfroutputfile.py b/autotest/benchmarks/benchmark_sfroutputfile.py
index f3ba1415e4..7cb5a8c5dc 100644
--- a/autotest/benchmarks/benchmark_sfroutputfile.py
+++ b/autotest/benchmarks/benchmark_sfroutputfile.py
@@ -5,23 +5,24 @@
 
 @pytest.mark.benchmark
 def test_sfrfile_load(benchmark, example_data_path):
-    sfr_file = example_data_path / "freyberg_usg" / "freyberg.usg.sfr"
+    sfr_file = example_data_path / "sfr_examples" / "test1tr.flw"
     benchmark(lambda: SfrFile(str(sfr_file)))
 
 
-@pytest.mark.fixture
+@pytest.fixture
 def sfrf(example_data_path) -> SfrFile:
-    return SfrFile(str(example_data_path / "freyberg_usg" / "freyberg.usg.sfr"))
+    return SfrFile(str(example_data_path / "sfr_examples" / "test1tr.flw"))
 
 
 @pytest.mark.benchmark
 def test_sfrfile_get_nstrm(benchmark, sfrf):
-    benchmark(sfrf.get_nstrm)
+    df = sfrf.get_dataframe()
+    benchmark(lambda: SfrFile.get_nstrm(df))
 
 
 @pytest.mark.benchmark
 def test_sfrfile_get_results(benchmark, sfrf):
-    benchmark(sfrf.get_results)
+    benchmark(lambda: sfrf.get_results(segment=1, reach=1))
 
 
 @pytest.mark.benchmark

From 11684fa5c90eb46592884843cd26e8008d063cd2 Mon Sep 17 00:00:00 2001
From: Bonelli <wbonelli@contractor.usgs.gov>
Date: Thu, 29 Jan 2026 09:11:57 -0500
Subject: [PATCH 11/11] fix decorators

---
 autotest/benchmarks/benchmark_arrays.py         |  8 ++++----
 autotest/benchmarks/benchmark_cellbudgetfile.py |  4 ++--
 autotest/benchmarks/benchmark_endpointfile.py   |  4 ++--
 autotest/benchmarks/benchmark_export.py         | 10 +++++-----
 autotest/benchmarks/benchmark_formattedfile.py  |  2 +-
 autotest/benchmarks/benchmark_gridintersect.py  | 12 ++++++------
 autotest/benchmarks/benchmark_headfile.py       |  6 +++---
 autotest/benchmarks/benchmark_mf2005_input.py   |  6 +++---
 autotest/benchmarks/benchmark_mf6_input.py      |  4 ++--
 autotest/benchmarks/benchmark_pathlinefile.py   |  4 ++--
 autotest/benchmarks/benchmark_postprocessing.py |  8 ++++----
 autotest/benchmarks/benchmark_rasters.py        | 12 ++++++------
 autotest/benchmarks/benchmark_zonebudget.py     |  6 +++---
 pyproject.toml                                  |  6 ++++++
 14 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/autotest/benchmarks/benchmark_arrays.py b/autotest/benchmarks/benchmark_arrays.py
index ad92dccd69..8678093676 100644
--- a/autotest/benchmarks/benchmark_arrays.py
+++ b/autotest/benchmarks/benchmark_arrays.py
@@ -18,7 +18,7 @@
 }
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util2d_create(benchmark, function_tmpdir, size):
@@ -33,7 +33,7 @@ def create_util2d():
     benchmark(create_util2d)
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util3d_create(benchmark, function_tmpdir, size):
@@ -48,7 +48,7 @@ def create_util3d():
     benchmark(create_util3d)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util2d_external_write(benchmark, function_tmpdir, size):
@@ -63,7 +63,7 @@ def write_bin():
     benchmark(write_bin)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("size", ["small", "medium", "large"])
 def test_util3d_external_write(benchmark, function_tmpdir, size):
diff --git a/autotest/benchmarks/benchmark_cellbudgetfile.py b/autotest/benchmarks/benchmark_cellbudgetfile.py
index 171b391308..1ae3226347 100644
--- a/autotest/benchmarks/benchmark_cellbudgetfile.py
+++ b/autotest/benchmarks/benchmark_cellbudgetfile.py
@@ -17,12 +17,12 @@ def cbcf(example_data_path) -> CellBudgetFile:
     )
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_cellbudgetfile_load(benchmark, cbcf):
     benchmark(lambda: CellBudgetFile(cbcf.filename))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_cellbudgetfile_get_data_all(benchmark, cbcf):
     unique_records = cbcf.headers[["text", "imeth"]].drop_duplicates()
     term = unique_records.iloc[0]["text"]
diff --git a/autotest/benchmarks/benchmark_endpointfile.py b/autotest/benchmarks/benchmark_endpointfile.py
index fddeda014f..1f2b8f4100 100644
--- a/autotest/benchmarks/benchmark_endpointfile.py
+++ b/autotest/benchmarks/benchmark_endpointfile.py
@@ -14,7 +14,7 @@ def epf(ex01_mp7_model) -> EndpointFile:
     return EndpointFile(ws / f"{mp.name}.mpend")
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_endpointfile_load(benchmark, epf):
     benchmark(lambda: EndpointFile(epf.fname))
 
@@ -29,7 +29,7 @@ def test_endpointfile_get_alldata(benchmark, epf):
     benchmark(epf.get_alldata)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_endpointfile_to_geodataframe(benchmark, ex01_mf6_model, epf):
     pytest.importorskip("geopandas")
     sim, function_tmpdir = ex01_mf6_model
diff --git a/autotest/benchmarks/benchmark_export.py b/autotest/benchmarks/benchmark_export.py
index 2bb5cf863b..bcdbe44bfe 100644
--- a/autotest/benchmarks/benchmark_export.py
+++ b/autotest/benchmarks/benchmark_export.py
@@ -4,7 +4,7 @@
 from .conftest import load_mf6_sim, load_mf2005_model
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_mf6_export_shapefile(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     gwf = sim.get_model()
@@ -12,14 +12,14 @@ def test_mf6_export_shapefile(benchmark, function_tmpdir):
     benchmark(lambda: gwf.export(str(output_path)))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_mf2005_export_shapefile(benchmark, function_tmpdir):
     model = load_mf2005_model(function_tmpdir, model_key="freyberg")
     output_path = function_tmpdir / "export_mf2005.shp"
     benchmark(lambda: model.export(str(output_path)))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.skipif(not has_pkg("netCDF4"), reason="requires netCDF4")
 def test_mf6_export_netcdf(benchmark, function_tmpdir):
     import uuid
@@ -35,7 +35,7 @@ def export_netcdf():
     benchmark(export_netcdf)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.skipif(not has_pkg("geopandas"), reason="requires geopandas")
 def test_mf6_modelgrid_to_geodataframe(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
@@ -43,7 +43,7 @@ def test_mf6_modelgrid_to_geodataframe(benchmark, function_tmpdir):
     benchmark(gwf.modelgrid.to_geodataframe)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.skipif(not has_pkg("vtk"), reason="requires vtk")
 def test_mf6_export_vtk(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
diff --git a/autotest/benchmarks/benchmark_formattedfile.py b/autotest/benchmarks/benchmark_formattedfile.py
index efc04d9628..b57aff8d8e 100644
--- a/autotest/benchmarks/benchmark_formattedfile.py
+++ b/autotest/benchmarks/benchmark_formattedfile.py
@@ -12,7 +12,7 @@
 from flopy.utils.formattedfile import FormattedHeadFile
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 def test_formattedfile_load(benchmark, example_data_path):
     pth = example_data_path / "mf2005_test" / "test1tr.githds"
diff --git a/autotest/benchmarks/benchmark_gridintersect.py b/autotest/benchmarks/benchmark_gridintersect.py
index 5d9531fb33..b4de7a902a 100644
--- a/autotest/benchmarks/benchmark_gridintersect.py
+++ b/autotest/benchmarks/benchmark_gridintersect.py
@@ -29,7 +29,7 @@
 # GridIntersect class benchmarks
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("rtree", [True, False], ids=["rtree", "no_rtree"])
@@ -64,7 +64,7 @@ def make_line(grid, line_type) -> LineString:
         return LineString(coords)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("line", ["diagonal", "horizontal", "complex"])
@@ -126,7 +126,7 @@ def make_poly(grid, poly_type) -> Polygon:
     return Polygon(coords)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
@@ -137,7 +137,7 @@ def test_intersect_polygon(benchmark, grid, poly, rtree):
     benchmark(lambda: gi.intersect(polygon, "polygon"))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 @pytest.mark.parametrize("poly", ["small", "medium", "large", "irregular"])
@@ -160,7 +160,7 @@ def test_grid_intersect_single_point(benchmark, grid):
     benchmark(lambda: grid.intersect(x_center, y_center))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_grid_intersect_batch_points(benchmark, grid):
@@ -171,7 +171,7 @@ def test_grid_intersect_batch_points(benchmark, grid):
     benchmark(lambda: grid.intersect(xx.ravel(), yy.ravel()))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.parametrize("grid", STRUCTURED_GRIDS.values(), ids=STRUCTURED_GRIDS.keys())
 def test_grid_intersect_3d(benchmark, grid):
diff --git a/autotest/benchmarks/benchmark_headfile.py b/autotest/benchmarks/benchmark_headfile.py
index e3c08ea571..c38805a3d5 100644
--- a/autotest/benchmarks/benchmark_headfile.py
+++ b/autotest/benchmarks/benchmark_headfile.py
@@ -13,7 +13,7 @@
 from flopy.utils import HeadFile
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_headfile_load(benchmark, example_data_path):
     pth = (
         example_data_path
@@ -45,7 +45,7 @@ def test_headfile_get_data_single(benchmark, hdsf):
     benchmark(lambda: hdsf.get_data(totim=mid_time))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_headfile_get_alldata(benchmark, hdsf):
     benchmark(hdsf.get_alldata)
 
@@ -78,6 +78,6 @@ def test_headfile_get_kstpkper_list(benchmark, hdsf):
     benchmark(hdsf.get_kstpkper)
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_headfile_get_alldata_mf6(benchmark, hdsf):
     benchmark(hdsf.get_alldata)
diff --git a/autotest/benchmarks/benchmark_mf2005_input.py b/autotest/benchmarks/benchmark_mf2005_input.py
index 4d3d92da4e..95a11c81d2 100644
--- a/autotest/benchmarks/benchmark_mf2005_input.py
+++ b/autotest/benchmarks/benchmark_mf2005_input.py
@@ -41,18 +41,18 @@ def _load_model(ws, model_name):
     return Modflow.load(nam_file, model_ws=ws, check=False)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_mf2005_load(benchmark, function_tmpdir, model_name):
     benchmark(lambda: _load_model(function_tmpdir, model_name))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_mf2005_write_freyberg(benchmark, function_tmpdir):
     ml = _load_model(function_tmpdir, "freyberg_multilayer_transient")
     benchmark(ml.write_input)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_mf2005_round_trip_freyberg(benchmark, function_tmpdir):
     def round_trip():
         ml = _load_model(function_tmpdir, "freyberg_multilayer_transient")
diff --git a/autotest/benchmarks/benchmark_mf6_input.py b/autotest/benchmarks/benchmark_mf6_input.py
index d8f43e3705..2380470e4c 100644
--- a/autotest/benchmarks/benchmark_mf6_input.py
+++ b/autotest/benchmarks/benchmark_mf6_input.py
@@ -58,7 +58,7 @@ def pytest_generate_tests(metafunc):
         metafunc.parametrize("model_name", models, ids=models)
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.slow
 @pytest.mark.external
 @pytest.mark.parametrize("use_pandas", [True, False], ids=["pandas", "nopandas"])
@@ -67,7 +67,7 @@ def test_load_simulation(function_tmpdir, benchmark, registry, model_name, use_p
     benchmark(lambda: MFSimulation.load(sim_ws=function_tmpdir, use_pandas=use_pandas))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.external
 @pytest.mark.slow
 @pytest.mark.parametrize("use_pandas", [True, False], ids=["pandas", "nopandas"])
diff --git a/autotest/benchmarks/benchmark_pathlinefile.py b/autotest/benchmarks/benchmark_pathlinefile.py
index ba4a0c1afc..d07949f8b3 100644
--- a/autotest/benchmarks/benchmark_pathlinefile.py
+++ b/autotest/benchmarks/benchmark_pathlinefile.py
@@ -35,12 +35,12 @@ def plf(ex01_mp7_model) -> PathlineFile:
     return PathlineFile(ws / f"{mp.name}.mppth")
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_pathlinefile_load(benchmark, plf):
     benchmark(lambda: PathlineFile(plf.fname))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_pathlinefile_to_geodataframe(benchmark, ex01_mf6_model, plf):
     pytest.importorskip("geopandas")
     sim, function_tmpdir = ex01_mf6_model
diff --git a/autotest/benchmarks/benchmark_postprocessing.py b/autotest/benchmarks/benchmark_postprocessing.py
index 07ff32de76..9153c62a63 100644
--- a/autotest/benchmarks/benchmark_postprocessing.py
+++ b/autotest/benchmarks/benchmark_postprocessing.py
@@ -13,7 +13,7 @@
 from .conftest import load_mf6_sim, load_mf2005_model
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.parametrize(
     "rcxy",
     [
@@ -37,7 +37,7 @@ def test_get_transmissivities(benchmark, function_tmpdir, rcxy):
     benchmark(lambda: get_transmissivities(heads, gwf, r=r, c=c, x=x, y=y))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_get_water_table(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     hds_path = Path(function_tmpdir) / "freyberg.hds"
@@ -46,7 +46,7 @@ def test_get_water_table(benchmark, function_tmpdir):
     benchmark(lambda: get_water_table(heads))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_get_gradients(benchmark, function_tmpdir):
     model = load_mf2005_model(function_tmpdir, model_key="freyberg")
     hds_path = Path(function_tmpdir) / "freyberg.hds"
@@ -55,7 +55,7 @@ def test_get_gradients(benchmark, function_tmpdir):
     benchmark(lambda: get_gradients(heads, model, nodata=-999))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_get_specific_discharge(benchmark, function_tmpdir):
     sim = load_mf6_sim(function_tmpdir, model_key="freyberg")
     gwf = sim.get_model()
diff --git a/autotest/benchmarks/benchmark_rasters.py b/autotest/benchmarks/benchmark_rasters.py
index 374dbe00eb..e8ef8e2d31 100644
--- a/autotest/benchmarks/benchmark_rasters.py
+++ b/autotest/benchmarks/benchmark_rasters.py
@@ -39,7 +39,7 @@ def make_grid(raster, nrow, ncol) -> StructuredGrid:
     )
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_raster_load(benchmark, raster_path):
     benchmark(lambda: Raster.load(raster_path))
 
@@ -53,7 +53,7 @@ def test_raster_load(benchmark, raster_path):
 
 
 @pytest.mark.slow
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.parametrize("grid", GRIDS, ids=["small", "medium", "large"])
 @pytest.mark.parametrize(
     "method", ["linear", "nearest", "cubic", "mean", "median", "min", "max"]
@@ -63,7 +63,7 @@ def test_raster_resample(benchmark, raster, grid, method):
 
 
 @pytest.mark.slow
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.skipif(not has_pkg("pyproj"), reason="requires pyproj")
 def test_raster_to_crs_transform(benchmark, raster):
     benchmark(lambda: raster.to_crs(epsg=4326))
@@ -105,13 +105,13 @@ def large_poly(raster):
 ]
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.parametrize("poly", POLYGONS, ids=["small", "medium", "large"])
 def test_raster_crop(benchmark, raster, poly):
     benchmark(lambda: raster.crop(poly))
 
 
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 @pytest.mark.parametrize("poly", POLYGONS, ids=["small", "medium", "large"])
 def test_raster_sample(benchmark, raster, poly):
     benchmark(lambda: raster.sample_polygon(poly, band=1))
@@ -130,7 +130,7 @@ def test_raster_get_array_masked(benchmark, raster, masked):
     benchmark(lambda: raster.get_array(band=1, masked=masked))
 
 
-@pytest.mark.benchmark(min_rounds=1, warmup=False)
+@pytest.mark.benchmark
 def test_raster_write(benchmark, raster, function_tmpdir):
     output_path = function_tmpdir / "output_raster.tif"
     benchmark(lambda: raster.write(str(output_path)))
diff --git a/autotest/benchmarks/benchmark_zonebudget.py b/autotest/benchmarks/benchmark_zonebudget.py
index eab60b82ba..da0b15b318 100644
--- a/autotest/benchmarks/benchmark_zonebudget.py
+++ b/autotest/benchmarks/benchmark_zonebudget.py
@@ -32,14 +32,14 @@ def case(request, example_data_path) -> tuple[Path, NDArray]:
 
 
 @pytest.mark.slow
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_zonebudget_load(benchmark, case):
     cbc_path, zones = case
     benchmark(lambda: ZoneBudget(str(cbc_path), z=zones))
 
 
 @pytest.mark.slow
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_zonebudget_get_budget(benchmark, case):
     cbc_path, zones = case
     zb = ZoneBudget(str(cbc_path), z=zones)
@@ -47,7 +47,7 @@ def test_zonebudget_get_budget(benchmark, case):
 
 
 @pytest.mark.slow
-@pytest.mark.benchmark(min_rounds=2, warmup=False)
+@pytest.mark.benchmark
 def test_zonebudget_get_dataframes(benchmark, case):
     cbc_path, zones = case
     zb = ZoneBudget(str(cbc_path), z=zones)
diff --git a/pyproject.toml b/pyproject.toml
index 50d7ca42f1..cce48522c6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -245,3 +245,9 @@ filterwarnings = [
     "ignore:datetime.datetime.utcfromtimestamp",
     "ignore:\n.*Pyarrow",
 ]
+
+[tool.pytest.benchmark]
+# Default settings for pytest-benchmark (ignored by pytest-codspeed)
+min_rounds = 5
+warmup = false
+disable_gc = false