Fix OBB prediction and update Ultralytics demo notebook (#1126)

fcakyon · web-flow · commit 13531b79d173 · 2025-03-06T08:12:50.000+03:00
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@
   <br>
     <a href="https://ieeexplore.ieee.org/document/9897990"><img src="https://img.shields.io/badge/DOI-10.1109%2FICIP46576.2022.9897990-orange.svg" alt="ci"></a>
   <br>
-    <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_yolov5.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
+    <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_ultralytics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
     <a href="https://huggingface.co/spaces/fcakyon/sahi-yolox"><img src="https://raw.githubusercontent.com/obss/sahi/main/resources/hf_spaces_badge.svg" alt="HuggingFace Spaces"></a>
 
 </div>
@@ -43,7 +43,7 @@ Object detection and instance segmentation are by far the most important applica
 
 ## <div align="center">Quick Start Examples</div>
 
-[📜 List of publications that cite SAHI (currently 200+)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=14065474760484865747&scipsc=&q=&scisbd=1)
+[📜 List of publications that cite SAHI (currently 300+)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=14065474760484865747&scipsc=&q=&scisbd=1)
 
 [🏆 List of competition winners that used SAHI](https://github.com/obss/sahi/discussions/688)
 
@@ -55,11 +55,15 @@ Object detection and instance segmentation are by far the most important applica
 
 - [Pretrained weights and ICIP 2022 paper files](https://github.com/fcakyon/small-object-detection-benchmark)
 
-- [Visualizing and Evaluating SAHI predictions with FiftyOne](https://voxel51.com/blog/how-to-detect-small-objects/) (2024) (NEW)
+- [2025 Video Tutorial](https://www.youtube.com/watch?v=ILqMBah5ZvI) (RECOMMENDED)
+
+- [Visualizing and Evaluating SAHI predictions with FiftyOne](https://voxel51.com/blog/how-to-detect-small-objects/)
 
 - ['Exploring SAHI' Research Article from 'learnopencv.com'](https://learnopencv.com/slicing-aided-hyper-inference/)
 
-- ['VIDEO TUTORIAL: Slicing Aided Hyper Inference for Small Object Detection - SAHI'](https://www.youtube.com/watch?v=UuOjJKxn-M8&t=270s) (RECOMMENDED)
+- [Slicing Aided Hyper Inference Explained by Encord](https://encord.com/blog/slicing-aided-hyper-inference-explained/)
+
+- ['VIDEO TUTORIAL: Slicing Aided Hyper Inference for Small Object Detection - SAHI'](https://www.youtube.com/watch?v=UuOjJKxn-M8&t=270s)
 
 - [Video inference support is live](https://github.com/obss/sahi/discussions/626)
 
@@ -77,11 +81,13 @@ Object detection and instance segmentation are by far the most important applica
 
 - `YOLOX` + `SAHI` demo: <a href="https://huggingface.co/spaces/fcakyon/sahi-yolox"><img src="https://raw.githubusercontent.com/obss/sahi/main/resources/hf_spaces_badge.svg" alt="sahi-yolox"></a>
 
-- `YOLO11` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_ultralytics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-yolov8"></a> (NEW)
+- `YOLO12` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_ultralytics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-yolo12"></a> (NEW)
 
-- `RT-DETR` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_rtdetr.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-rtdetr"></a> (NEW)
+- `YOLO11-OBB` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_ultralytics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-yolo11-obb"></a> (NEW)
 
-- `YOLOv8` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_ultralytics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-yolov8"></a>
+- `YOLO11` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_ultralytics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-yolo11"></a>
+
+- `RT-DETR` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_rtdetr.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-rtdetr"></a> (NEW)
 
 - `DeepSparse` + `SAHI` walkthrough: <a href="https://colab.research.google.com/github/obss/sahi/blob/main/demo/inference_for_sparse_yolov5.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="sahi-deepsparse"></a>
 
diff --git a/demo/inference_for_ultralytics.ipynb b/demo/inference_for_ultralytics.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "sahi"
-version = "0.11.21"
+version = "0.11.22"
 readme = "README.md"
 description = "A vision library for performing sliced inference on large images/small objects"
 requires-python = ">=3.8"
diff --git a/sahi/auto_model.py b/sahi/auto_model.py
@@ -39,7 +39,7 @@ def from_pretrained(
 
         Args:
             model_type: str
-                Name of the detection framework (example: "yolov5", "mmdet", "detectron2")
+                Name of the detection framework (example: "ultralytics", "huggingface", "torchvision")
             model_path: str
                 Path of the detection model (ex. 'model.pt')
             config_path: str
@@ -58,8 +58,10 @@ def from_pretrained(
                 If True, automatically loads the model at initialization
             image_size: int
                 Inference input size.
+
         Returns:
             Returns an instance of a DetectionModel
+
         Raises:
             ImportError: If given {model_type} framework is not installed
         """
diff --git a/sahi/models/torchvision.py b/sahi/models/torchvision.py
@@ -178,13 +178,13 @@ def _create_object_prediction_list_from_original_predictions(
 
             for ind in range(len(boxes)):
                 if masks is not None:
-                    mask = get_coco_segmentation_from_bool_mask(np.array(masks[ind]))
+                    segmentation = get_coco_segmentation_from_bool_mask(np.array(masks[ind]))
                 else:
-                    mask = None
+                    segmentation = None
 
                 object_prediction = ObjectPrediction(
                     bbox=boxes[ind],
-                    segmentation=mask,
+                    segmentation=segmentation,
                     category_id=int(category_ids[ind]),
                     category_name=self.category_mapping[str(int(category_ids[ind]))],
                     shift_amount=shift_amount,
diff --git a/sahi/models/ultralytics.py b/sahi/models/ultralytics.py
@@ -11,7 +11,7 @@
 from sahi.models.base import DetectionModel
 from sahi.prediction import ObjectPrediction
 from sahi.utils.compatibility import fix_full_shape_list, fix_shift_amount_list
-from sahi.utils.cv import get_coco_segmentation_from_bool_mask, get_coco_segmentation_from_obb_points
+from sahi.utils.cv import get_coco_segmentation_from_bool_mask
 from sahi.utils.import_utils import check_requirements
 
 logger = logging.getLogger(__name__)
@@ -207,7 +207,7 @@ def _create_object_prediction_list_from_original_predictions(
                         segmentation = get_coco_segmentation_from_bool_mask(bool_mask)
                     else:  # is_obb
                         obb_points = masks_or_points[pred_ind]  # Get OBB points for this prediction
-                        segmentation = get_coco_segmentation_from_obb_points(obb_points)
+                        segmentation = [obb_points.reshape(-1).tolist()]
 
                     if len(segmentation) == 0:
                         continue
diff --git a/sahi/predict.py b/sahi/predict.py
@@ -113,6 +113,9 @@ def get_prediction(
     time_end = time.time() - time_start
     durations_in_seconds["prediction"] = time_end
 
+    if full_shape is None:
+        full_shape = [image_as_pil.height, image_as_pil.width]
+
     # process prediction
     time_start = time.time()
     # works only with 1 batch
@@ -239,19 +242,21 @@ def get_sliced_prediction(
         overlap_width_ratio=overlap_width_ratio,
         auto_slice_resolution=auto_slice_resolution,
     )
+    from sahi.models.ultralytics import UltralyticsDetectionModel
 
     num_slices = len(slice_image_result)
     time_end = time.time() - time_start
     durations_in_seconds["slice"] = time_end
 
+    if isinstance(detection_model, UltralyticsDetectionModel) and detection_model.is_obb:
+        # Only NMS is supported for OBB model outputs
+        postprocess_type = "NMS"
+
     # init match postprocess instance
     if postprocess_type not in POSTPROCESS_NAME_TO_CLASS.keys():
         raise ValueError(
             f"postprocess_type should be one of {list(POSTPROCESS_NAME_TO_CLASS.keys())} but given as {postprocess_type}"
         )
-    elif postprocess_type == "UNIONMERGE":
-        # deprecated in v0.9.3
-        raise ValueError("'UNIONMERGE' postprocess_type is deprecated, use 'GREEDYNMM' instead.")
     postprocess_constructor = POSTPROCESS_NAME_TO_CLASS[postprocess_type]
     postprocess = postprocess_constructor(
         match_threshold=postprocess_match_threshold,
diff --git a/sahi/utils/cv.py b/sahi/utils/cv.py
@@ -540,68 +540,88 @@ def visualize_object_predictions(
     # set text_size for category names
     text_size = text_size or rect_th / 3
 
-    # add masks to image if present
+    # add masks or obb polygons to image if present
     for object_prediction in object_prediction_list:
         # deepcopy object_prediction_list so that original is not altered
         object_prediction = object_prediction.deepcopy()
-        # visualize masks if present
-        if object_prediction.mask is not None:
-            # deepcopy mask so that original is not altered
-            mask = object_prediction.mask.bool_mask
-            # set color
-            if colors is not None:
-                color = colors(object_prediction.category.id)
-            # draw mask
-            rgb_mask = apply_color_mask(mask, color or (0, 0, 0))
-            image = cv2.addWeighted(image, 1, rgb_mask, 0.6, 0)
-
-    # add bboxes to image if present
-    for object_prediction in object_prediction_list:
-        # deepcopy object_prediction_list so that original is not altered
-        object_prediction = object_prediction.deepcopy()
-
-        bbox = object_prediction.bbox.to_xyxy()
-        category_name = object_prediction.category.name
-        score = object_prediction.score.value
-
+        # arange label to be displayed
+        label = f"{object_prediction.category.name}"
+        if not hide_conf:
+            label += f" {object_prediction.score.value:.2f}"
         # set color
         if colors is not None:
             color = colors(object_prediction.category.id)
-        # set bbox points
-        point1, point2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
-        # visualize boxes
-        cv2.rectangle(
-            image,
-            point1,
-            point2,
-            color=color or (0, 0, 0),
-            thickness=rect_th,
-        )
-
-        if not hide_labels:
-            # arange bounding box text location
-            label = f"{category_name}"
-
-            if not hide_conf:
-                label += f" {score:.2f}"
-
-            box_width, box_height = cv2.getTextSize(label, 0, fontScale=text_size, thickness=text_th)[
-                0
-            ]  # label width, height
-            outside = point1[1] - box_height - 3 >= 0  # label fits outside box
-            point2 = point1[0] + box_width, point1[1] - box_height - 3 if outside else point1[1] + box_height + 3
-            # add bounding box text
-            cv2.rectangle(image, point1, point2, color or (0, 0, 0), -1, cv2.LINE_AA)  # filled
-            cv2.putText(
+        # visualize masks or obb polygons if present
+        has_mask = object_prediction.mask is not None
+        is_obb_pred = False
+        if has_mask:
+            segmentation = object_prediction.mask.segmentation
+            if len(segmentation) == 1 and len(segmentation[0]) == 8:
+                is_obb_pred = True
+
+            if is_obb_pred:
+                points = np.array(segmentation).reshape((-1, 1, 2)).astype(np.int32)
+                cv2.polylines(image, [points], isClosed=True, color=color or (0, 0, 0), thickness=rect_th)
+
+                if not hide_labels:
+                    lowest_point = points[points[:, :, 1].argmax()][0]
+                    box_width, box_height = cv2.getTextSize(label, 0, fontScale=text_size, thickness=text_th)[0]
+                    outside = lowest_point[1] - box_height - 3 >= 0
+                    text_bg_point1 = (
+                        lowest_point[0],
+                        lowest_point[1] - box_height - 3 if outside else lowest_point[1] + 3,
+                    )
+                    text_bg_point2 = (lowest_point[0] + box_width, lowest_point[1])
+                    cv2.rectangle(
+                        image, text_bg_point1, text_bg_point2, color or (0, 0, 0), thickness=-1, lineType=cv2.LINE_AA
+                    )
+                    cv2.putText(
+                        image,
+                        label,
+                        (lowest_point[0], lowest_point[1] - 2 if outside else lowest_point[1] + box_height + 2),
+                        0,
+                        text_size,
+                        (255, 255, 255),
+                        thickness=text_th,
+                    )
+            else:
+                # draw mask
+                rgb_mask = apply_color_mask(object_prediction.mask.bool_mask, color or (0, 0, 0))
+                image = cv2.addWeighted(image, 1, rgb_mask, 0.6, 0)
+
+        # add bboxes to image if is_obb_pred=False
+        if not is_obb_pred:
+            bbox = object_prediction.bbox.to_xyxy()
+
+            # set bbox points
+            point1, point2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
+            # visualize boxes
+            cv2.rectangle(
                 image,
-                label,
-                (point1[0], point1[1] - 2 if outside else point1[1] + box_height + 2),
-                0,
-                text_size,
-                (255, 255, 255),
-                thickness=text_th,
+                point1,
+                point2,
+                color=color or (0, 0, 0),
+                thickness=rect_th,
             )
 
+            if not hide_labels:
+                box_width, box_height = cv2.getTextSize(label, 0, fontScale=text_size, thickness=text_th)[
+                    0
+                ]  # label width, height
+                outside = point1[1] - box_height - 3 >= 0  # label fits outside box
+                point2 = point1[0] + box_width, point1[1] - box_height - 3 if outside else point1[1] + box_height + 3
+                # add bounding box text
+                cv2.rectangle(image, point1, point2, color or (0, 0, 0), -1, cv2.LINE_AA)  # filled
+                cv2.putText(
+                    image,
+                    label,
+                    (point1[0], point1[1] - 2 if outside else point1[1] + box_height + 2),
+                    0,
+                    text_size,
+                    (255, 255, 255),
+                    thickness=text_th,
+                )
+
     # export if output_dir is present
     if output_dir is not None:
         # export image with predictions
@@ -614,7 +634,7 @@ def visualize_object_predictions(
     return {"image": image, "elapsed_time": elapsed_time}
 
 
-def get_coco_segmentation_from_bool_mask(bool_mask):
+def get_coco_segmentation_from_bool_mask(bool_mask: np.ndarray) -> List[List[float]]:
     """
     Convert boolean mask to coco segmentation format
     [
@@ -712,9 +732,10 @@ def get_coco_segmentation_from_obb_points(obb_points: np.ndarray) -> List[List[f
         obb_points: np.ndarray
             OBB points tensor from ultralytics.engine.results.OBB
             Shape: (4, 2) containing 4 points with (x,y) coordinates each
+
     Returns:
         List[List[float]]: Polygon points in COCO format
-            [[x1, y1, x2, y2, x3, y3, x4, y4, x1, y1], [...], ...]
+            [[x1, y1, x2, y2, x3, y3, x4, y4], [...], ...]
     """
     # Convert from (4,2) to [x1,y1,x2,y2,x3,y3,x4,y4] format
     points = obb_points.reshape(-1).tolist()
diff --git a/tests/test_ultralyticsmodel.py b/tests/test_ultralyticsmodel.py
@@ -219,12 +219,6 @@ def test_yolo11_obb(self):
             # Verify segmentation is a list of points
             self.assertTrue(isinstance(coco_segmentation, list))
             self.assertGreater(len(coco_segmentation), 0)
-            # Verify each segment is a valid closed polygon
-            for segment in coco_segmentation:
-                self.assertEqual(len(segment), 10)  # 4 points + 1 closing point (x,y coordinates)
-                # Verify polygon is closed (first point equals last point)
-                self.assertEqual(segment[0], segment[-2])  # x coordinate
-                self.assertEqual(segment[1], segment[-1])  # y coordinate
 
 
 if __name__ == "__main__":