stack lists of tensors in BatchFeature, improve error messages, add tests

yonigozlan · yonigozlan · commit e5d109258ea2 · 2025-12-09T20:30:18.000Z
diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py
@@ -67,11 +67,18 @@ class BatchFeature(UserDict):
         tensor_type (`Union[None, str, TensorType]`, *optional*):
             You can give a tensor_type here to convert the lists of integers in PyTorch/Numpy Tensors at
             initialization.
+        skip_tensor_conversion (`list[str]` or `set[str]`, *optional*):
+            List or set of keys that should NOT be converted to tensors, even when `tensor_type` is specified.
     """
 
-    def __init__(self, data: Optional[dict[str, Any]] = None, tensor_type: Union[None, str, TensorType] = None):
+    def __init__(
+        self,
+        data: Optional[dict[str, Any]] = None,
+        tensor_type: Union[None, str, TensorType] = None,
+        skip_tensor_conversion: Optional[Union[list[str], set[str]]] = None,
+    ):
         super().__init__(data)
-        self.convert_to_tensors(tensor_type=tensor_type)
+        self.convert_to_tensors(tensor_type=tensor_type, skip_tensor_conversion=skip_tensor_conversion)
 
     def __getitem__(self, item: str) -> Any:
         """
@@ -110,6 +117,14 @@ def _get_is_as_tensor_fns(self, tensor_type: Optional[Union[str, TensorType]] =
             import torch
 
             def as_tensor(value):
+                if torch.is_tensor(value):
+                    return value
+
+                # stack list of tensors if tensor_type is PyTorch (# torch.tensor() does not support list of tensors)
+                if isinstance(value, (list, tuple)) and len(value) > 0 and torch.is_tensor(value[0]):
+                    return torch.stack(value)
+
+                # convert list of numpy arrays to numpy array (stack) if tensor_type is Numpy
                 if isinstance(value, (list, tuple)) and len(value) > 0:
                     if isinstance(value[0], np.ndarray):
                         value = np.array(value)
@@ -138,14 +153,20 @@ def as_tensor(value, dtype=None):
             is_tensor = is_numpy_array
         return is_tensor, as_tensor
 
-    def convert_to_tensors(self, tensor_type: Optional[Union[str, TensorType]] = None):
+    def convert_to_tensors(
+        self,
+        tensor_type: Optional[Union[str, TensorType]] = None,
+        skip_tensor_conversion: Optional[Union[list[str], set[str]]] = None,
+    ):
         """
         Convert the inner content to tensors.
 
         Args:
             tensor_type (`str` or [`~utils.TensorType`], *optional*):
                 The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
                 `None`, no modification is done.
+            skip_tensor_conversion (`list[str]` or `set[str]`, *optional*):
+                List or set of keys that should NOT be converted to tensors, even when `tensor_type` is specified.
         """
         if tensor_type is None:
             return self
@@ -154,18 +175,26 @@ def convert_to_tensors(self, tensor_type: Optional[Union[str, TensorType]] = Non
 
         # Do the tensor conversion in batch
         for key, value in self.items():
+            # Skip keys explicitly marked for no conversion
+            if skip_tensor_conversion and key in skip_tensor_conversion:
+                continue
+
             try:
                 if not is_tensor(value):
                     tensor = as_tensor(value)
-
                     self[key] = tensor
-            except:  # noqa E722
+            except Exception as e:
                 if key == "overflowing_values":
-                    raise ValueError("Unable to create tensor returning overflowing values of different lengths. ")
+                    raise ValueError(
+                        f"Unable to create tensor for '{key}' with overflowing values of different lengths. "
+                        f"Original error: {str(e)}"
+                    ) from e
                 raise ValueError(
-                    "Unable to create tensor, you should probably activate padding "
-                    "with 'padding=True' to have batched tensors with the same length."
-                )
+                    f"Unable to convert output '{key}' (type: {type(value).__name__}) to tensor: {str(e)}\n"
+                    f"You can try:\n"
+                    f"  1. Use padding=True to ensure all outputs have the same shape\n"
+                    f"  2. Set return_tensors=None to return Python objects instead of tensors"
+                ) from e
 
         return self
 
diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py
@@ -932,7 +932,6 @@ def _preprocess(
         if do_pad:
             processed_images = self.pad(processed_images, pad_size=pad_size, disable_grouping=disable_grouping)
 
-        processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
         return BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
 
     def to_dict(self):
diff --git a/src/transformers/models/gemma3/image_processing_gemma3_fast.py b/src/transformers/models/gemma3/image_processing_gemma3_fast.py
@@ -231,7 +231,6 @@ def _preprocess(
             processed_images_grouped[shape] = stacked_images
 
         processed_images = reorder_images(processed_images_grouped, grouped_images_index)
-        processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
         return BatchFeature(
             data={"pixel_values": processed_images, "num_crops": num_crops}, tensor_type=return_tensors
         )
diff --git a/tests/utils/test_feature_extraction_utils.py b/tests/utils/test_feature_extraction_utils.py
@@ -20,19 +20,156 @@
 from pathlib import Path
 
 import httpx
+import numpy as np
 
 from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
-from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test
+from transformers.feature_extraction_utils import BatchFeature
+from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test, require_torch
+from transformers.utils import is_torch_available
 
 
 sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
 
 from test_module.custom_feature_extraction import CustomFeatureExtractor  # noqa E402
 
 
+if is_torch_available():
+    import torch
+
+
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = get_tests_dir("fixtures")
 
 
+class BatchFeatureTester(unittest.TestCase):
+    """Tests for the BatchFeature class and tensor conversion."""
+
+    def test_batch_feature_basic_access_and_no_conversion(self):
+        """Test basic dict/attribute access and no conversion when tensor_type=None."""
+        data = {"input_values": [[1, 2, 3], [4, 5, 6]], "labels": [0, 1]}
+        batch = BatchFeature(data)
+
+        # Dict-style and attribute-style access
+        self.assertEqual(batch["input_values"], [[1, 2, 3], [4, 5, 6]])
+        self.assertEqual(batch.labels, [0, 1])
+
+        # No conversion without tensor_type
+        self.assertIsInstance(batch["input_values"], list)
+
+    @require_torch
+    def test_batch_feature_numpy_conversion(self):
+        """Test conversion to numpy arrays from lists and existing numpy arrays."""
+        # From lists
+        batch = BatchFeature({"input_values": [[1, 2, 3], [4, 5, 6]]}, tensor_type="np")
+        self.assertIsInstance(batch["input_values"], np.ndarray)
+        self.assertEqual(batch["input_values"].shape, (2, 3))
+
+        # From numpy arrays (should remain numpy)
+        numpy_data = np.array([[1, 2, 3], [4, 5, 6]])
+        batch_arrays = BatchFeature({"input_values": numpy_data}, tensor_type="np")
+        np.testing.assert_array_equal(batch_arrays["input_values"], numpy_data)
+
+        # From list of numpy arrays with same shape should stack
+        numpy_data = [np.array([[1, 2, 3], [4, 5, 6]]), np.array([[7, 8, 9], [10, 11, 12]])]
+        batch_stacked = BatchFeature({"input_values": numpy_data}, tensor_type="np")
+        np.testing.assert_array_equal(
+            batch_stacked["input_values"], np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
+        )
+
+        # from tensor
+        tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
+        batch_tensor = BatchFeature({"input_values": tensor}, tensor_type="np")
+        np.testing.assert_array_equal(batch_tensor["input_values"], tensor.numpy())
+
+        # from list of tensors with same shape should stack
+        tensors = [torch.tensor([[1, 2, 3], [4, 5, 6]]), torch.tensor([[7, 8, 9], [10, 11, 12]])]
+        batch_stacked = BatchFeature({"input_values": tensors}, tensor_type="np")
+        self.assertIsInstance(batch_stacked["input_values"], np.ndarray)
+        np.testing.assert_array_equal(
+            batch_stacked["input_values"], np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
+        )
+
+    @require_torch
+    def test_batch_feature_pytorch_conversion(self):
+        """Test conversion to PyTorch tensors from various input types."""
+        # From lists
+        batch = BatchFeature({"input_values": [[1, 2, 3], [4, 5, 6]]}, tensor_type="pt")
+        self.assertIsInstance(batch["input_values"], torch.Tensor)
+        self.assertEqual(batch["input_values"].shape, (2, 3))
+
+        # from tensor (should be returned as-is)
+        tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
+        batch_tensor = BatchFeature({"input_values": tensor}, tensor_type="pt")
+        torch.testing.assert_close(batch_tensor["input_values"], tensor)
+
+        # From numpy arrays
+        batch_numpy = BatchFeature({"input_values": np.array([[1, 2]])}, tensor_type="pt")
+        self.assertIsInstance(batch_numpy["input_values"], torch.Tensor)
+
+        # List of same-shape tensors should stack
+        tensors = [torch.randn(3, 10, 10) for _ in range(3)]
+        batch_stacked = BatchFeature({"pixel_values": tensors}, tensor_type="pt")
+        self.assertEqual(batch_stacked["pixel_values"].shape, (3, 3, 10, 10))
+
+        # List of same-shape numpy arrays should stack
+        numpy_arrays = [np.random.randn(3, 10, 10) for _ in range(3)]
+        batch_stacked = BatchFeature({"pixel_values": numpy_arrays}, tensor_type="pt")
+        self.assertIsInstance(batch_stacked["pixel_values"], torch.Tensor)
+        self.assertEqual(batch_stacked["pixel_values"].shape, (3, 3, 10, 10))
+
+    @require_torch
+    def test_batch_feature_error_handling(self):
+        """Test clear error messages for common conversion failures."""
+        # Ragged tensors (different shapes)
+        data_ragged = {"values": [torch.randn(3, 224, 224), torch.randn(3, 448, 448)]}
+        with self.assertRaises(ValueError) as context:
+            BatchFeature(data_ragged, tensor_type="pt")
+        error_msg = str(context.exception)
+        self.assertIn("stack expects each tensor to be equal size", error_msg.lower())
+        self.assertIn("return_tensors=None", error_msg)
+
+        # Ragged numpy arrays (different shapes)
+        data_ragged = {"values": [np.random.randn(3, 224, 224), np.random.randn(3, 448, 448)]}
+        with self.assertRaises(ValueError) as context:
+            BatchFeature(data_ragged, tensor_type="np")
+        error_msg = str(context.exception)
+        self.assertIn("inhomogeneous", error_msg.lower())
+        self.assertIn("return_tensors=None", error_msg)
+
+        # Unconvertible type (dict)
+        data_dict = {"values": [[1, 2]], "metadata": {"key": "val"}}
+        with self.assertRaises(ValueError) as context:
+            BatchFeature(data_dict, tensor_type="pt")
+        self.assertIn("metadata", str(context.exception))
+
+    @require_torch
+    def test_batch_feature_skip_tensor_conversion(self):
+        """Test skip_tensor_conversion parameter for metadata fields."""
+        import torch
+
+        data = {"pixel_values": [[1, 2, 3]], "num_crops": [1, 2], "sizes": [(224, 224)]}
+        batch = BatchFeature(data, tensor_type="pt", skip_tensor_conversion=["num_crops", "sizes"])
+
+        # pixel_values should be converted
+        self.assertIsInstance(batch["pixel_values"], torch.Tensor)
+        # num_crops and sizes should remain as lists
+        self.assertIsInstance(batch["num_crops"], list)
+        self.assertIsInstance(batch["sizes"], list)
+
+    @require_torch
+    def test_batch_feature_convert_to_tensors_method(self):
+        """Test convert_to_tensors method can be called after initialization."""
+        import torch
+
+        data = {"input_values": [[1, 2, 3]], "metadata": [1, 2]}
+        batch = BatchFeature(data)  # No conversion initially
+        self.assertIsInstance(batch["input_values"], list)
+
+        # Convert with skip parameter
+        batch.convert_to_tensors(tensor_type="pt", skip_tensor_conversion=["metadata"])
+        self.assertIsInstance(batch["input_values"], torch.Tensor)
+        self.assertIsInstance(batch["metadata"], list)
+
+
 class FeatureExtractorUtilTester(unittest.TestCase):
     def test_cached_files_are_used_when_internet_is_down(self):
         # A mock response for an HTTP head request to emulate server down

Original file line number	Diff line number	Diff line change
`@@ -231,7 +231,6 @@ def _preprocess(`
`231`	`231`	`processed_images_grouped[shape] = stacked_images`
`232`	`232`
`233`	`233`	`processed_images = reorder_images(processed_images_grouped, grouped_images_index)`
`234`		`- processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images`
`235`	`234`	`return BatchFeature(`
`236`	`235`	`data={"pixel_values": processed_images, "num_crops": num_crops}, tensor_type=return_tensors`
`237`	`236`	`)`