Add inspect_ai eval logs support (#7899)

lhoestq · web-flow · commit 0ec4d87dfe0a · 2025-12-09T15:45:13.000+01:00
add inspectai eval format
diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
@@ -689,10 +689,10 @@ def __len__(self) -> int:
         return len(self.source)
 
     def __repr__(self):
-        return "Column(" + repr(list(self[:5])) + ")"
+        return "Column(" + repr(list(self[:5]))[:-1] + (", ...])" if len(self) > 5 else "])")
 
     def __str__(self):
-        return "Column(" + str(list(self[:5])) + ")"
+        return "Column(" + str(list(self[:5]))[:-1] + (", ...])" if len(self) > 5 else "])")
 
     def __eq__(self, value):
         if isinstance(value, Column):
diff --git a/src/datasets/data_files.py b/src/datasets/data_files.py
@@ -94,8 +94,11 @@ class EmptyDatasetError(FileNotFoundError):
     Split.TRAIN: ["**"],
 }
 
+DEFAULT_PATTERNS_LOGS = {"logs": ["**/*.eval"]}
+
 ALL_SPLIT_PATTERNS = [SPLIT_PATTERN_SHARDED]
 ALL_DEFAULT_PATTERNS = [
+    DEFAULT_PATTERNS_LOGS,
     DEFAULT_PATTERNS_SPLIT_IN_DIR_NAME,
     DEFAULT_PATTERNS_SPLIT_IN_FILENAME,
     DEFAULT_PATTERNS_ALL,
diff --git a/src/datasets/packaged_modules/__init__.py b/src/datasets/packaged_modules/__init__.py
@@ -8,6 +8,7 @@
 from .audiofolder import audiofolder
 from .cache import cache
 from .csv import csv
+from .eval import eval
 from .hdf5 import hdf5
 from .imagefolder import imagefolder
 from .json import json
@@ -51,6 +52,7 @@ def _hash_python_lines(lines: list[str]) -> str:
     "webdataset": (webdataset.__name__, _hash_python_lines(inspect.getsource(webdataset).splitlines())),
     "xml": (xml.__name__, _hash_python_lines(inspect.getsource(xml).splitlines())),
     "hdf5": (hdf5.__name__, _hash_python_lines(inspect.getsource(hdf5).splitlines())),
+    "eval": (eval.__name__, _hash_python_lines(inspect.getsource(eval).splitlines())),
 }
 
 # get importable module names and hash for caching
@@ -82,6 +84,7 @@ def _hash_python_lines(lines: list[str]) -> str:
     ".xml": ("xml", {}),
     ".hdf5": ("hdf5", {}),
     ".h5": ("hdf5", {}),
+    ".eval": ("eval", {}),
 }
 _EXTENSION_TO_MODULE.update({ext: ("imagefolder", {}) for ext in imagefolder.ImageFolder.EXTENSIONS})
 _EXTENSION_TO_MODULE.update({ext.upper(): ("imagefolder", {}) for ext in imagefolder.ImageFolder.EXTENSIONS})
diff --git a/src/datasets/packaged_modules/eval/__init__.py b/src/datasets/packaged_modules/eval/__init__.py
diff --git a/src/datasets/packaged_modules/eval/eval.py b/src/datasets/packaged_modules/eval/eval.py
@@ -0,0 +1,63 @@
+import json
+import os
+from itertools import islice
+
+import pyarrow as pa
+
+import datasets
+from datasets.builder import Key
+
+
+logger = datasets.utils.logging.get_logger(__name__)
+
+
+class Eval(datasets.GeneratorBasedBuilder):
+    NUM_EXAMPLES_FOR_FEATURES_INFERENCE = 5
+
+    def _info(self):
+        return datasets.DatasetInfo()
+
+    def _split_generators(self, dl_manager):
+        """We handle string, list and dicts in datafiles"""
+        if not self.config.data_files:
+            raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}")
+        dl_manager.download_config.extract_on_the_fly = True
+        data_files = dl_manager.download_and_extract(self.config.data_files)
+        splits = []
+        for split_name, logs in data_files.items():
+            if isinstance(logs, str):
+                logs = [logs]
+            logs_files = [dl_manager.iter_files(log) for log in logs]
+            splits.append(datasets.SplitGenerator(name=split_name, gen_kwargs={"logs_files": logs_files}))
+        if not self.info.features:
+            first_examples = list(
+                islice(self._iter_samples_from_log_files(logs_files[0]), self.NUM_EXAMPLES_FOR_FEATURES_INFERENCE)
+            )
+            pa_tables = [pa.Table.from_pylist([example]) for example in first_examples]
+            inferred_arrow_schema = pa.concat_tables(pa_tables, promote_options="default").schema
+            self.info.features = datasets.Features.from_arrow_schema(inferred_arrow_schema)
+
+        return splits
+
+    def _sort_samples_key(self, sample_path: str):
+        # looks like "{sample_idx}_epoch_{epoch_idx}""
+        (sample_idx_str, epoch_idx_str) = os.path.splitext(os.path.basename(sample_path))[0].split("_epoch_")
+        return (int(epoch_idx_str), int(sample_idx_str))
+
+    def _iter_samples_from_log_files(self, log_files: list[str]):
+        sample_files = [log_file for log_file in log_files if os.path.basename(os.path.dirname(log_file)) == "samples"]
+        sample_files.sort(key=self._sort_samples_key)
+        for sample_file in sample_files:
+            with open(sample_file) as f:
+                sample = json.load(f)
+                for field in sample:
+                    if isinstance(sample[field], dict):
+                        sample[field] = json.dumps(sample[field])
+                    if isinstance(sample[field], list):
+                        sample[field] = [json.dumps(x) for x in sample[field]]
+                yield sample
+
+    def _generate_examples(self, logs_files):
+        for file_idx, log_files in enumerate(logs_files):
+            for sample_idx, sample in enumerate(self._iter_samples_from_log_files(log_files)):
+                yield Key(file_idx, sample_idx), sample
diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py
@@ -479,6 +479,7 @@ def readline(f: io.RawIOBase):
     },
     # archive compression
     "zip": "zip",
+    "eval": "zip",
 }
 SINGLE_FILE_COMPRESSION_EXTENSION_TO_PROTOCOL = {
     extension.lstrip("."): fs_class.protocol

Original file line number	Diff line number	Diff line change
`@@ -479,6 +479,7 @@ def readline(f: io.RawIOBase):`
`479`	`479`	`},`
`480`	`480`	`# archive compression`
`481`	`481`	`"zip": "zip",`
	`482`	`+ "eval": "zip",`
`482`	`483`	`}`
`483`	`484`	`SINGLE_FILE_COMPRESSION_EXTENSION_TO_PROTOCOL = {`
`484`	`485`	`extension.lstrip("."): fs_class.protocol`