|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import os |
4 | | -from collections.abc import Callable, Iterable, Sequence |
| 4 | +from collections.abc import Callable, Sequence |
5 | 5 | from datetime import datetime, timedelta |
6 | 6 | from enum import Enum |
7 | | -from typing import TYPE_CHECKING, Any, Self |
| 7 | +from typing import TYPE_CHECKING, Any |
8 | 8 |
|
9 | 9 | import datafusion as dfn |
10 | 10 | import numpy as np |
@@ -1285,14 +1285,6 @@ class DatasetEntryInternal: |
1285 | 1285 | # --- |
1286 | 1286 |
|
1287 | 1287 | def download_segment(self, segment_id: str) -> Recording: ... |
1288 | | - def dataframe_query_view( |
1289 | | - self, |
1290 | | - *, |
1291 | | - index: str | None, |
1292 | | - contents: Any, |
1293 | | - include_semantically_empty_columns: bool = False, |
1294 | | - include_tombstone_columns: bool = False, |
1295 | | - ) -> DataframeQueryView: ... |
1296 | 1288 |
|
1297 | 1289 | # --- |
1298 | 1290 |
|
@@ -1341,6 +1333,38 @@ class DatasetEntryInternal: |
1341 | 1333 | unsafe_allow_recent_cleanup: bool = False, |
1342 | 1334 | ) -> None: ... |
1343 | 1335 |
|
| 1336 | + # --- DatasetView filter methods --- |
| 1337 | + |
| 1338 | + def filter_segments(self, segment_ids: list[str]) -> DatasetViewInternal: ... |
| 1339 | + def filter_contents(self, exprs: list[str]) -> DatasetViewInternal: ... |
| 1340 | + |
| 1341 | +class DatasetViewInternal: |
| 1342 | + """Internal Rust implementation of DatasetView.""" |
| 1343 | + |
| 1344 | + # Properties |
| 1345 | + @property |
| 1346 | + def dataset(self) -> DatasetEntryInternal: ... |
| 1347 | + @property |
| 1348 | + def filtered_segment_ids(self) -> set[str] | None: ... |
| 1349 | + @property |
| 1350 | + def content_filters(self) -> list[str]: ... |
| 1351 | + |
| 1352 | + # Methods |
| 1353 | + def schema(self) -> SchemaInternal: ... |
| 1354 | + def arrow_schema(self) -> pa.Schema: ... |
| 1355 | + def segment_ids(self) -> list[str]: ... |
| 1356 | + def reader( |
| 1357 | + self, |
| 1358 | + *, |
| 1359 | + index: str | None, |
| 1360 | + include_semantically_empty_columns: bool = False, |
| 1361 | + include_tombstone_columns: bool = False, |
| 1362 | + fill_latest_at: bool = False, |
| 1363 | + using_index_values: dict[str, Any] | None = None, |
| 1364 | + ) -> dfn.DataFrame: ... |
| 1365 | + def filter_segments(self, segment_ids: list[str]) -> DatasetViewInternal: ... |
| 1366 | + def filter_contents(self, exprs: list[str]) -> DatasetViewInternal: ... |
| 1367 | + |
1344 | 1368 | class TableEntryInternal: |
1345 | 1369 | def catalog(self) -> CatalogClientInternal: ... |
1346 | 1370 | def delete(self) -> None: ... |
@@ -1388,173 +1412,6 @@ class _IndexValuesLikeInternal: |
1388 | 1412 | def to_index_values(self) -> npt.NDArray[np.int64]: ... |
1389 | 1413 | def len(self) -> int: ... |
1390 | 1414 |
|
1391 | | -class DataframeQueryView: |
1392 | | - """View into a remote dataset acting as DataFusion table provider.""" |
1393 | | - |
1394 | | - def filter_segment_id(self, segment_id: str, *args: Iterable[str]) -> Self: |
1395 | | - """Filter by one or more segment ids. All segment ids are included if not specified.""" |
1396 | | - |
1397 | | - def filter_range_sequence(self, start: int, end: int) -> Self: |
1398 | | - """ |
1399 | | - Filter the view to only include data between the given index sequence numbers. |
1400 | | -
|
1401 | | - This range is inclusive and will contain both the value at the start and the value at the end. |
1402 | | -
|
1403 | | - The view must be of a sequential index type to use this method. |
1404 | | -
|
1405 | | - Parameters |
1406 | | - ---------- |
1407 | | - start : int |
1408 | | - The inclusive start of the range. |
1409 | | - end : int |
1410 | | - The inclusive end of the range. |
1411 | | -
|
1412 | | - Returns |
1413 | | - ------- |
1414 | | - RecordingView |
1415 | | - A new view containing only the data within the specified range. |
1416 | | -
|
1417 | | - The original view will not be modified. |
1418 | | -
|
1419 | | - """ |
1420 | | - |
1421 | | - def filter_range_secs(self, start: float, end: float) -> Self: |
1422 | | - """ |
1423 | | - Filter the view to only include data between the given index values expressed as seconds. |
1424 | | -
|
1425 | | - This range is inclusive and will contain both the value at the start and the value at the end. |
1426 | | -
|
1427 | | - The view must be of a temporal index type to use this method. |
1428 | | -
|
1429 | | - Parameters |
1430 | | - ---------- |
1431 | | - start : int |
1432 | | - The inclusive start of the range. |
1433 | | - end : int |
1434 | | - The inclusive end of the range. |
1435 | | -
|
1436 | | - Returns |
1437 | | - ------- |
1438 | | - RecordingView |
1439 | | - A new view containing only the data within the specified range. |
1440 | | -
|
1441 | | - The original view will not be modified. |
1442 | | -
|
1443 | | - """ |
1444 | | - |
1445 | | - def filter_range_nanos(self, start: int, end: int) -> Self: |
1446 | | - """ |
1447 | | - Filter the view to only include data between the given index values expressed as nanoseconds. |
1448 | | -
|
1449 | | - This range is inclusive and will contain both the value at the start and the value at the end. |
1450 | | -
|
1451 | | - The view must be of a temporal index type to use this method. |
1452 | | -
|
1453 | | - Parameters |
1454 | | - ---------- |
1455 | | - start : int |
1456 | | - The inclusive start of the range. |
1457 | | - end : int |
1458 | | - The inclusive end of the range. |
1459 | | -
|
1460 | | - Returns |
1461 | | - ------- |
1462 | | - RecordingView |
1463 | | - A new view containing only the data within the specified range. |
1464 | | -
|
1465 | | - The original view will not be modified. |
1466 | | -
|
1467 | | - """ |
1468 | | - |
1469 | | - def filter_index_values(self, values: IndexValuesLike) -> Self: |
1470 | | - """ |
1471 | | - Filter the view to only include data at the provided index values. |
1472 | | -
|
1473 | | - The index values returned will be the intersection between the provided values and the |
1474 | | - original index values. |
1475 | | -
|
1476 | | - This requires index values to be a precise match. Index values in Rerun are |
1477 | | - represented as i64 sequence counts or nanoseconds. This API does not expose an interface |
1478 | | - in floating point seconds, as the numerical conversion would risk false mismatches. |
1479 | | -
|
1480 | | - Parameters |
1481 | | - ---------- |
1482 | | - values : IndexValuesLike |
1483 | | - The index values to filter by. |
1484 | | -
|
1485 | | - Returns |
1486 | | - ------- |
1487 | | - RecordingView |
1488 | | - A new view containing only the data at the specified index values. |
1489 | | -
|
1490 | | - The original view will not be modified. |
1491 | | -
|
1492 | | - """ |
1493 | | - |
1494 | | - def filter_is_not_null(self, column: AnyComponentColumn) -> Self: |
1495 | | - """ |
1496 | | - Filter the view to only include rows where the given component column is not null. |
1497 | | -
|
1498 | | - This corresponds to rows for index values where this component was provided to Rerun explicitly |
1499 | | - via `.log()` or `.send_columns()`. |
1500 | | -
|
1501 | | - Parameters |
1502 | | - ---------- |
1503 | | - column : AnyComponentColumn |
1504 | | - The component column to filter by. |
1505 | | -
|
1506 | | - Returns |
1507 | | - ------- |
1508 | | - RecordingView |
1509 | | - A new view containing only the data where the specified component column is not null. |
1510 | | -
|
1511 | | - The original view will not be modified. |
1512 | | -
|
1513 | | - """ |
1514 | | - |
1515 | | - def using_index_values(self, values: IndexValuesLike) -> Self: |
1516 | | - """ |
1517 | | - Create a new view that contains the provided index values. |
1518 | | -
|
1519 | | - If they exist in the original data they are selected, otherwise empty rows are added to the view. |
1520 | | -
|
1521 | | - The output view will always have the same number of rows as the provided values, even if |
1522 | | - those rows are empty. Use with [`.fill_latest_at()`][rerun.dataframe.RecordingView.fill_latest_at] |
1523 | | - to populate these rows with the most recent data. |
1524 | | -
|
1525 | | - Parameters |
1526 | | - ---------- |
1527 | | - values : IndexValuesLike |
1528 | | - The index values to use. |
1529 | | -
|
1530 | | - Returns |
1531 | | - ------- |
1532 | | - RecordingView |
1533 | | - A new view containing the provided index values. |
1534 | | -
|
1535 | | - The original view will not be modified. |
1536 | | -
|
1537 | | - """ |
1538 | | - |
1539 | | - def fill_latest_at(self) -> Self: |
1540 | | - """ |
1541 | | - Populate any null values in a row with the latest valid data according to the index. |
1542 | | -
|
1543 | | - Returns |
1544 | | - ------- |
1545 | | - RecordingView |
1546 | | - A new view with the null values filled in. |
1547 | | -
|
1548 | | - The original view will not be modified. |
1549 | | -
|
1550 | | - """ |
1551 | | - |
1552 | | - def df(self) -> dfn.DataFrame: |
1553 | | - """Register this view to the global DataFusion context and return a DataFrame.""" |
1554 | | - |
1555 | | - def to_arrow_reader(self) -> pa.RecordBatchReader: |
1556 | | - """Convert this view to a [`pyarrow.RecordBatchReader`][].""" |
1557 | | - |
1558 | 1415 | class IndexProperties: |
1559 | 1416 | """The properties and configuration of a user-defined index.""" |
1560 | 1417 |
|
|
0 commit comments