Add an alternative scenario to EoMT post_process_semantic_segmentation in case path_offsets is None (#42716)

simonreise · yonigozlan · web-flow · commit 5b4d72c5061a · 2025-12-09T18:00:09.000Z
* Add an alternative scenario in case patch_offsets is None

* Fixup

* Fix an error

* Simplified the function

---------

Co-authored-by: Yoni Gozlan &lt;74535834+yonigozlan@users.noreply.github.com&gt;
diff --git a/src/transformers/models/eomt/image_processing_eomt.py b/src/transformers/models/eomt/image_processing_eomt.py
@@ -815,7 +815,19 @@ def post_process_semantic_segmentation(
 
         segmentation_logits = torch.einsum("bqc, bqhw -> bchw", masks_classes, masks_probs)
 
-        output_logits = self.merge_image_patches(segmentation_logits, patch_offsets, target_sizes, size)
+        if patch_offsets:
+            output_logits = self.merge_image_patches(segmentation_logits, patch_offsets, target_sizes, size)
+        else:
+            output_logits = []
+
+            for idx in range(len(segmentation_logits)):
+                resized_logits = torch.nn.functional.interpolate(
+                    segmentation_logits[idx].unsqueeze(dim=0),
+                    size=target_sizes[idx],
+                    mode="bilinear",
+                    align_corners=False,
+                )
+                output_logits.append(resized_logits[0])
 
         preds = [logit.argmax(dim=0) for logit in output_logits]
         return preds
diff --git a/src/transformers/models/eomt/image_processing_eomt_fast.py b/src/transformers/models/eomt/image_processing_eomt_fast.py
@@ -385,7 +385,19 @@ def post_process_semantic_segmentation(
 
         segmentation_logits = torch.einsum("bqc, bqhw -> bchw", masks_classes, masks_probs)
 
-        output_logits = self.merge_image_patches(segmentation_logits, patch_offsets, target_sizes, size)
+        if patch_offsets:
+            output_logits = self.merge_image_patches(segmentation_logits, patch_offsets, target_sizes, size)
+        else:
+            output_logits = []
+
+            for idx in range(len(segmentation_logits)):
+                resized_logits = torch.nn.functional.interpolate(
+                    segmentation_logits[idx].unsqueeze(dim=0),
+                    size=target_sizes[idx],
+                    mode="bilinear",
+                    align_corners=False,
+                )
+                output_logits.append(resized_logits[0])
 
         preds = [logit.argmax(dim=0) for logit in output_logits]
         return preds