rerun-io
diff --git a/‎docs/content/reference/migration/migration-0-28.md‎
Lines changed: 60 additions & 0 deletions b/‎docs/content/reference/migration/migration-0-28.md‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎rerun_py/docs/gen_common_index.py‎
Lines changed: 1 addition & 1 deletion b/‎rerun_py/docs/gen_common_index.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rerun_py/rerun_bindings/rerun_bindings.pyi‎
Lines changed: 34 additions & 177 deletions b/‎rerun_py/rerun_bindings/rerun_bindings.pyi‎
Lines changed: 34 additions & 177 deletions
diff --git a/‎rerun_py/rerun_sdk/rerun/catalog/__init__.py‎
Lines changed: 1 addition & 2 deletions b/‎rerun_py/rerun_sdk/rerun/catalog/__init__.py‎
Lines changed: 1 addition & 2 deletions
@@ -230,3 +230,63 @@ The `Schema` class and related column descriptor/selector types have moved from
 | `from rerun.dataframe import IndexColumnSelector`       | `from rerun.catalog import IndexColumnSelector`        |
 
 The previous import paths are still supported but will be removed in a future release.
+
+## Python SDK: new `DatasetView` API for filtering datasets
+
+A new `DatasetView` class has been introduced for filtering and reading from datasets. It provides a cleaner, lazily-evaluated API for working with subsets of dataset data.
+
+### Creating a DatasetView
+
+Use `filter_segments()` or `filter_contents()` on a `DatasetEntry` to create a `DatasetView`:
+
+```python
+from rerun.catalog import CatalogClient
+
+client = CatalogClient("rerun+http://localhost:51234")
+dataset = client.get_dataset(name="my_dataset")
+
+# Filter to specific segments
+view = dataset.filter_segments(["recording_0", "recording_1"])
+
+# Filter to specific entity paths
+view = dataset.filter_contents(["/points/**"])
+
+# Chain filters
+view = dataset.filter_segments(["recording_0"]).filter_contents(["/points/**", "-/text/**"])
+```
+
+### Reading data
+
+Use `reader()` to get a DataFusion DataFrame:
+
+```python
+df = view.reader(index="timeline")
+```
+
+### Available methods
+
+| Method | Description |
+|--------|-------------|
+| `filter_segments(segment_ids)` | Filter to specific segment IDs (list or DataFrame with `rerun_segment_id` column) |
+| `filter_contents(exprs)` | Filter to specific entity paths (supports wildcards like `/points/**`) |
+| `segment_ids()` | Get the list of segment IDs in this view |
+| `segment_table()` | Get segment metadata as a DataFusion DataFrame |
+| `schema()` | Get the filtered schema |
+| `arrow_schema()` | Get the filtered Arrow schema |
+| `reader(index=...)` | Create a DataFusion DataFrame reader |
+| `get_index_ranges(index)` | Get min/max values per segment for an index |
+| `download_segment(segment_id)` | Download a specific segment as a Recording |
+
+### Deprecation of `dataframe_query_view()`
+
+The `DatasetEntry.dataframe_query_view()` method is deprecated. Use the new `DatasetView` API instead:
+
+```python
+# Before (deprecated)
+view = dataset.dataframe_query_view(index="timeline", contents={"/points": ["Position2D"]})
+df = view.df()
+
+# After
+view = dataset.filter_contents(["/points/**"])
+df = view.reader(index="timeline")
+```
@@ -422,8 +422,8 @@ class Section:
         show_submodules=True,
         class_list=[
             "AlreadyExistsError",
-            "DataframeQueryView",
             "DatasetEntry",
+            "DatasetView",
             "CatalogClient",
             "Entry",
             "EntryId",
 
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 import os
-from collections.abc import Callable, Iterable, Sequence
+from collections.abc import Callable, Sequence
 from datetime import datetime, timedelta
 from enum import Enum
-from typing import TYPE_CHECKING, Any, Self
+from typing import TYPE_CHECKING, Any
 
 import datafusion as dfn
 import numpy as np
@@ -1285,14 +1285,6 @@ class DatasetEntryInternal:
     # ---
 
     def download_segment(self, segment_id: str) -> Recording: ...
-    def dataframe_query_view(
-        self,
-        *,
-        index: str | None,
-        contents: Any,
-        include_semantically_empty_columns: bool = False,
-        include_tombstone_columns: bool = False,
-    ) -> DataframeQueryView: ...
 
     # ---
 
@@ -1341,6 +1333,38 @@ class DatasetEntryInternal:
         unsafe_allow_recent_cleanup: bool = False,
     ) -> None: ...
 
+    # --- DatasetView filter methods ---
+
+    def filter_segments(self, segment_ids: list[str]) -> DatasetViewInternal: ...
+    def filter_contents(self, exprs: list[str]) -> DatasetViewInternal: ...
+
+class DatasetViewInternal:
+    """Internal Rust implementation of DatasetView."""
+
+    # Properties
+    @property
+    def dataset(self) -> DatasetEntryInternal: ...
+    @property
+    def filtered_segment_ids(self) -> set[str] | None: ...
+    @property
+    def content_filters(self) -> list[str]: ...
+
+    # Methods
+    def schema(self) -> SchemaInternal: ...
+    def arrow_schema(self) -> pa.Schema: ...
+    def segment_ids(self) -> list[str]: ...
+    def reader(
+        self,
+        *,
+        index: str | None,
+        include_semantically_empty_columns: bool = False,
+        include_tombstone_columns: bool = False,
+        fill_latest_at: bool = False,
+        using_index_values: dict[str, Any] | None = None,
+    ) -> dfn.DataFrame: ...
+    def filter_segments(self, segment_ids: list[str]) -> DatasetViewInternal: ...
+    def filter_contents(self, exprs: list[str]) -> DatasetViewInternal: ...
+
 class TableEntryInternal:
     def catalog(self) -> CatalogClientInternal: ...
     def delete(self) -> None: ...
@@ -1388,173 +1412,6 @@ class _IndexValuesLikeInternal:
     def to_index_values(self) -> npt.NDArray[np.int64]: ...
     def len(self) -> int: ...
 
-class DataframeQueryView:
-    """View into a remote dataset acting as DataFusion table provider."""
-
-    def filter_segment_id(self, segment_id: str, *args: Iterable[str]) -> Self:
-        """Filter by one or more segment ids. All segment ids are included if not specified."""
-
-    def filter_range_sequence(self, start: int, end: int) -> Self:
-        """
-        Filter the view to only include data between the given index sequence numbers.
-
-        This range is inclusive and will contain both the value at the start and the value at the end.
-
-        The view must be of a sequential index type to use this method.
-
-        Parameters
-        ----------
-        start : int
-            The inclusive start of the range.
-        end : int
-            The inclusive end of the range.
-
-        Returns
-        -------
-        RecordingView
-            A new view containing only the data within the specified range.
-
-            The original view will not be modified.
-
-        """
-
-    def filter_range_secs(self, start: float, end: float) -> Self:
-        """
-        Filter the view to only include data between the given index values expressed as seconds.
-
-        This range is inclusive and will contain both the value at the start and the value at the end.
-
-        The view must be of a temporal index type to use this method.
-
-        Parameters
-        ----------
-        start : int
-            The inclusive start of the range.
-        end : int
-            The inclusive end of the range.
-
-        Returns
-        -------
-        RecordingView
-            A new view containing only the data within the specified range.
-
-            The original view will not be modified.
-
-        """
-
-    def filter_range_nanos(self, start: int, end: int) -> Self:
-        """
-        Filter the view to only include data between the given index values expressed as nanoseconds.
-
-        This range is inclusive and will contain both the value at the start and the value at the end.
-
-        The view must be of a temporal index type to use this method.
-
-        Parameters
-        ----------
-        start : int
-            The inclusive start of the range.
-        end : int
-            The inclusive end of the range.
-
-        Returns
-        -------
-        RecordingView
-            A new view containing only the data within the specified range.
-
-            The original view will not be modified.
-
-        """
-
-    def filter_index_values(self, values: IndexValuesLike) -> Self:
-        """
-        Filter the view to only include data at the provided index values.
-
-        The index values returned will be the intersection between the provided values and the
-        original index values.
-
-        This requires index values to be a precise match. Index values in Rerun are
-        represented as i64 sequence counts or nanoseconds. This API does not expose an interface
-        in floating point seconds, as the numerical conversion would risk false mismatches.
-
-        Parameters
-        ----------
-        values : IndexValuesLike
-            The index values to filter by.
-
-        Returns
-        -------
-        RecordingView
-            A new view containing only the data at the specified index values.
-
-            The original view will not be modified.
-
-        """
-
-    def filter_is_not_null(self, column: AnyComponentColumn) -> Self:
-        """
-        Filter the view to only include rows where the given component column is not null.
-
-        This corresponds to rows for index values where this component was provided to Rerun explicitly
-        via `.log()` or `.send_columns()`.
-
-        Parameters
-        ----------
-        column : AnyComponentColumn
-            The component column to filter by.
-
-        Returns
-        -------
-        RecordingView
-            A new view containing only the data where the specified component column is not null.
-
-            The original view will not be modified.
-
-        """
-
-    def using_index_values(self, values: IndexValuesLike) -> Self:
-        """
-        Create a new view that contains the provided index values.
-
-        If they exist in the original data they are selected, otherwise empty rows are added to the view.
-
-        The output view will always have the same number of rows as the provided values, even if
-        those rows are empty. Use with [`.fill_latest_at()`][rerun.dataframe.RecordingView.fill_latest_at]
-        to populate these rows with the most recent data.
-
-        Parameters
-        ----------
-        values : IndexValuesLike
-            The index values to use.
-
-        Returns
-        -------
-        RecordingView
-            A new view containing the provided index values.
-
-            The original view will not be modified.
-
-        """
-
-    def fill_latest_at(self) -> Self:
-        """
-        Populate any null values in a row with the latest valid data according to the index.
-
-        Returns
-        -------
-        RecordingView
-            A new view with the null values filled in.
-
-            The original view will not be modified.
-
-        """
-
-    def df(self) -> dfn.DataFrame:
-        """Register this view to the global DataFusion context and return a DataFrame."""
-
-    def to_arrow_reader(self) -> pa.RecordBatchReader:
-        """Convert this view to a [`pyarrow.RecordBatchReader`][]."""
-
 class IndexProperties:
     """The properties and configuration of a user-defined index."""
 
 
@@ -4,7 +4,6 @@
     AlreadyExistsError as AlreadyExistsError,
     ComponentColumnDescriptor as ComponentColumnDescriptor,
     ComponentColumnSelector as ComponentColumnSelector,
-    DataframeQueryView as DataframeQueryView,
     DataFusionTable as DataFusionTable,
     EntryId as EntryId,
     EntryKind as EntryKind,
@@ -26,5 +25,5 @@
 )
 
 from ._catalog_client import CatalogClient as CatalogClient
-from ._entry import DatasetEntry as DatasetEntry, Entry as Entry, TableEntry as TableEntry
+from ._entry import DatasetEntry as DatasetEntry, DatasetView as DatasetView, Entry as Entry, TableEntry as TableEntry
 from ._schema import Schema as Schema