diff --git a/README.md b/README.md index cd3d90f..6915d7a 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ The output files are going to be in `shared-component/output` folder. Currently, the following models are supported: | Model Version | Supported versions | -| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `yolov5` | YOLOv5n, YOLOv5s, YOLOv5m, YOLOv5l, YOLOv5x, YOLOv5n6, YOLOv5s6, YOLOv5m6, YOLOv5l6 | | `yolov6r1` | **v1.0 release:** YOLOv6n, YOLOv6t, YOLOv6s | | `yolov6r3` | **v2.0 release:** YOLOv6n, YOLOv6t, YOLOv6s, YOLOv6m, YOLOv6l
**v2.1 release:** YOLOv6n, YOLOv6s, YOLOv6m, YOLOv6l
**v3.0 release:** YOLOv6n, YOLOv6s, YOLOv6m, YOLOv6l | @@ -116,6 +116,7 @@ Currently, the following models are supported: | `yolov10` | YOLOv10n, YOLOv10s, YOLOv10m, YOLOv10b, YOLOv10l, YOLOv10x | | `yolov11` | **Detection, Instance Segmentation, Pose, Oriented Detection, Classification:** YOLO11n, YOLO11s, YOLO11m, YOLO11l, YOLO11x | | `yolov12` | **Detection:** YOLO12n, YOLO12s, YOLO12m, YOLO12l, YOLO12x | +| `yolov26` | **Detection, Instance Segmentation, Pose, Oriented Detection, Classification:** YOLO26n, YOLO26s, YOLO26m, YOLO26l, YOLO26x | | `yoloe` | **Detection, Instance Segmentation:** YOLOE-11s, YOLOE-11m, YOLOE-11l; YOLOE-v8s, YOLOE-v8m, YOLOE-v8l | | `goldyolo` | Gold-YOLO-N, Gold-YOLO-S, Gold-YOLO-M, Gold-YOLO-L | diff --git a/tests/constants.py b/tests/constants.py index b39825b..94b4c76 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -69,6 +69,11 @@ {"name": "yolov12m", "version": "v12"}, {"name": "yolov12l", "version": "v12"}, {"name": "yolov12x", "version": "v12"}, + {"name": "yolov26n", "version": "v26"}, + {"name": "yolov26s", "version": "v26"}, + {"name": "yolov26m", "version": "v26"}, + {"name": "yolov26l", "version": "v26"}, + {"name": "yolov26x", "version": "v26"}, {"name": "yolov8n-cls", "version": "v8"}, {"name": "yolov8n-seg", "version": "v8"}, {"name": "yolov8n-pose", "version": "v8"}, @@ -77,6 +82,10 @@ {"name": "yolov11n-seg", "version": "v11"}, {"name": "yolov11n-pose", "version": "v11"}, {"name": "yolov11n-obb", "version": "v11"}, + {"name": "yolov26n-cls", "version": "v11"}, + {"name": "yolov26n-seg", "version": "v11"}, + {"name": "yolov26n-pose", "version": "v11"}, + {"name": "yolov26n-obb", "version": "v11"}, {"name": "yoloe-11s-seg", "version": "v11"}, {"name": "yoloe-11m-seg", "version": "v11"}, {"name": "yoloe-11l-seg", "version": "v11"}, @@ -157,10 +166,19 @@ "yolov12m": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo12m.pt", "yolov12l": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo12l.pt", "yolov12x": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo12x.pt", + "yolov26n": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26n.pt", + "yolov26s": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26s.pt", + "yolov26m": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26m.pt", + "yolov26l": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26l.pt", + "yolov26x": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26x.pt", "yolov11n-cls": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt", "yolov11n-seg": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt", "yolov11n-pose": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt", "yolov11n-obb": "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt", + "yolov26n-cls": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolo26n-cls.pt", + "yolov26n-seg": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26n-seg.pt", + "yolov26n-pose": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26n-pose.pt", + "yolov26n-obb": "https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov26n-obb.pt", "yoloe-11s-seg": "https://huggingface.co/jameslahm/yoloe/resolve/main/yoloe-11s-seg-coco.pt", "yoloe-11m-seg": "https://huggingface.co/jameslahm/yoloe/resolve/main/yoloe-11m-seg-coco.pt", "yoloe-11l-seg": "https://huggingface.co/jameslahm/yoloe/resolve/main/yoloe-11l-seg-coco.pt", diff --git a/tools/main.py b/tools/main.py index f10c6e6..b388236 100644 --- a/tools/main.py +++ b/tools/main.py @@ -27,6 +27,7 @@ YOLOV10_CONVERSION, YOLOV11_CONVERSION, YOLOV12_CONVERSION, + YOLOV26_CONVERSION, detect_version, ) @@ -48,6 +49,7 @@ YOLOV10_CONVERSION, YOLOV11_CONVERSION, YOLOV12_CONVERSION, + YOLOV26_CONVERSION, ] @@ -174,6 +176,7 @@ def convert( YOLOV9_CONVERSION, YOLOV11_CONVERSION, YOLOV12_CONVERSION, + YOLOV26_CONVERSION, ]: from tools.yolo.yolov8_exporter import YoloV8Exporter diff --git a/tools/modules/__init__.py b/tools/modules/__init__.py index dc2c8e1..bda7040 100644 --- a/tools/modules/__init__.py +++ b/tools/modules/__init__.py @@ -4,6 +4,7 @@ from .exporter import Exporter from .heads import ( OBBV8, + OBBV26, ClassifyV8, DetectV5, DetectV6R1, @@ -14,7 +15,9 @@ DetectV8, DetectV10, PoseV8, + PoseV26, SegmentV8, + SegmentV26, ) from .stage2 import Multiplier @@ -27,8 +30,11 @@ "DetectV8", "Exporter", "PoseV8", + "PoseV26", "OBBV8", + "OBBV26", "SegmentV8", + "SegmentV26", "ClassifyV8", "Multiplier", "DetectV5", diff --git a/tools/modules/heads.py b/tools/modules/heads.py index 6263ee0..2808eb3 100644 --- a/tools/modules/heads.py +++ b/tools/modules/heads.py @@ -366,11 +366,14 @@ def __init__(self, old_detect, use_rvc2: bool): self.use_rvc2 = use_rvc2 - self.proj_conv = nn.Conv2d(old_detect.dfl.c1, 1, 1, bias=False).requires_grad_( - False - ) - x = torch.arange(old_detect.dfl.c1, dtype=torch.float) - self.proj_conv.weight.data[:] = nn.Parameter(x.view(1, old_detect.dfl.c1, 1, 1)) + if isinstance(old_detect.dfl, nn.Identity): + self.proj_conv = None + else: + self.proj_conv = nn.Conv2d(old_detect.dfl.c1, 1, 1, bias=False).requires_grad_( + False + ) + x = torch.arange(old_detect.dfl.c1, dtype=torch.float) + self.proj_conv.weight.data[:] = nn.Parameter(x.view(1, old_detect.dfl.c1, 1, 1)) def forward(self, x): bs = x[0].shape[0] # batch size @@ -382,9 +385,10 @@ def forward(self, x): # ------------------------------ # DFL PART - box = box.view(bs, 4, self.reg_max, h * w).permute(0, 2, 1, 3) - box = self.proj_conv(F.softmax(box, dim=1))[:, 0] - box = box.reshape([bs, 4, h, w]) + if self.proj_conv is not None: + box = box.view(bs, 4, self.reg_max, h * w).permute(0, 2, 1, 3) + box = self.proj_conv(F.softmax(box, dim=1))[:, 0] + box = box.reshape([bs, 4, h, w]) # ------------------------------ cls = self.cv3[i](x[i]) @@ -427,6 +431,27 @@ def forward(self, x): return outputs +class OBBV26(DetectV8): + """YOLOv8 OBB detection head for detection with rotation models.""" + + def __init__(self, old_obb, use_rvc2): + super().__init__(old_obb, use_rvc2) + self.ne = old_obb.ne # number of extra parameters + self.cv4 = old_obb.cv4 + + def forward(self, x): + # Detection part + outputs = super().forward(x) + + # OBB part + bs = x[0].shape[0] # batch size + angle = torch.cat( + [self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2 + ) # OBB theta logits + # Append the angle + outputs.append(angle) + + return outputs class PoseV8(DetectV8): """YOLOv8 Pose head for keypoints models.""" @@ -467,6 +492,46 @@ def kpts_decode(self, bs, kpts, i): a = torch.cat((a, y[:, :, 2:3]), 2) return a.view(bs, self.nk, -1) +class PoseV26(DetectV8): + """YOLOv8 Pose head for keypoints models.""" + + def __init__(self, old_kpts, use_rvc2): + super().__init__(old_kpts, use_rvc2) + self.kpt_shape = ( + old_kpts.kpt_shape + ) # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) + self.nk = old_kpts.nk # number of keypoints total + self.cv4 = old_kpts.cv4 + self.cv4_kpts = old_kpts.cv4_kpts + self.use_rvc2 = use_rvc2 + + def forward(self, x): + """Perform forward pass through YOLO model and return predictions.""" + bs = x[0].shape[0] # batch size + if self.shape != bs: + self.anchors, self.strides = make_anchors(x, self.stride, 0.5) + self.shape = bs + + # Detection part + outputs = super().forward(x) + + # Pose part + for i in range(self.nl): + feature = self.cv4[i](x[i]) + kpt = self.cv4_kpts[i](feature).view(bs, self.nk, -1) + outputs.append(self.kpts_decode(bs, kpt, i)) + + return outputs + + def kpts_decode(self, bs, kpts, i): + """Decodes keypoints.""" + ndim = self.kpt_shape[1] + y = kpts.view(bs, *self.kpt_shape, -1) + a = (y[:, :, :2] + self.anchors[i]) * self.strides[i] + if ndim == 3: + # a = torch.cat((a, y[:, :, 2:3].sigmoid()*10), 2) + a = torch.cat((a, y[:, :, 2:3]), 2) + return a.view(bs, self.nk, -1) class SegmentV8(DetectV8): """YOLOv8 Segment head for segmentation models.""" @@ -496,6 +561,33 @@ def forward(self, x): return outputs +class SegmentV26(DetectV8): + """YOLOv26 Segment head for segmentation models.""" + + def __init__(self, old_segment, use_rvc2): + super().__init__(old_segment, use_rvc2) + self.nm = old_segment.nm # number of masks + self.npr = old_segment.npr # number of protos + self.proto = old_segment.proto # protos + self.cv4 = old_segment.cv4 + + @staticmethod + def _mask_call(layer, t): + # Support both signatures: layer(t) and layer(t, t) + try: + return layer(t) + except TypeError: + return layer(t, t) + + def forward(self, x): + # Detection part + outputs = super().forward(x) + # Masks + outputs.extend(self._mask_call(self.cv4[i], x[i]) for i in range(self.nl)) + # Mask protos + outputs.append(self.proto(x)) + + return outputs class ClassifyV8(nn.Module): """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2).""" diff --git a/tools/version_detection/__init__.py b/tools/version_detection/__init__.py index 11e3815..0d097e4 100644 --- a/tools/version_detection/__init__.py +++ b/tools/version_detection/__init__.py @@ -12,6 +12,7 @@ YOLOV10_CONVERSION, YOLOV11_CONVERSION, YOLOV12_CONVERSION, + YOLOV26_CONVERSION, detect_version, ) @@ -28,6 +29,7 @@ "YOLOV10_CONVERSION", "YOLOV11_CONVERSION", "YOLOV12_CONVERSION", + "YOLOV26_CONVERSION", "GOLD_YOLO_CONVERSION", "UNRECOGNIZED", ] diff --git a/tools/version_detection/version_detection.py b/tools/version_detection/version_detection.py index cc93a44..5ad5d82 100644 --- a/tools/version_detection/version_detection.py +++ b/tools/version_detection/version_detection.py @@ -17,6 +17,7 @@ YOLOV10_CONVERSION = "yolov10" YOLOV11_CONVERSION = "yolov11" YOLOV12_CONVERSION = "yolov12" +YOLOV26_CONVERSION = "yolov26" GOLD_YOLO_CONVERSION = "goldyolo" UNRECOGNIZED = "none" @@ -74,7 +75,9 @@ def detect_version(path: str, debug: bool = False) -> str: if debug: print(data.decode(errors="replace")) content = data.decode("latin1") - if "yolov12" in content: + if "yolo26" in content: + return YOLOV26_CONVERSION + elif "yolov12" in content: return YOLOV12_CONVERSION elif "yolo11" in content: return YOLOV11_CONVERSION diff --git a/tools/yolo/ultralytics b/tools/yolo/ultralytics index 2107aa1..25a8fd3 160000 --- a/tools/yolo/ultralytics +++ b/tools/yolo/ultralytics @@ -1 +1 @@ -Subproject commit 2107aa1eb9e95d93645a9febbda4a3fc75a57f21 +Subproject commit 25a8fd335b0e3ec13257edad29aa8ad1a248a7d2 diff --git a/tools/yolo/yolov8_exporter.py b/tools/yolo/yolov8_exporter.py index 29db152..43876bc 100644 --- a/tools/yolo/yolov8_exporter.py +++ b/tools/yolo/yolov8_exporter.py @@ -18,12 +18,15 @@ from tools.modules import ( OBBV8, + OBBV26, ClassifyV8, DetectV8, Exporter, Multiplier, PoseV8, + PoseV26, SegmentV8, + SegmentV26 ) from tools.utils import get_first_conv2d_in_channels from tools.utils.constants import Encoding @@ -34,11 +37,15 @@ from ultralytics.nn.modules import ( # noqa: E402 OBB, + OBB26, Classify, Detect, Pose, + Pose26, Segment, + Segment26, YOLOESegment, + YOLOESegment26, ) from ultralytics.nn.tasks import load_checkpoint # noqa: E402 @@ -119,11 +126,22 @@ def __init__( def load_model(self): # load the model model, _ = load_checkpoint( - self.model_path, device="cpu", inplace=True, fuse=True + self.model_path, device="cpu", inplace=True, fuse=False ) self.mode = -1 - if isinstance(model.model[-1], (Segment)) or isinstance( + if isinstance(model.model[-1], (Segment26)) or isinstance( + model.model[-1], (YOLOESegment26) + ): + model.model[-1] = SegmentV26(model.model[-1], self.use_rvc2) + self.mode = SEGMENT_MODE + elif isinstance(model.model[-1], (OBB26)): + model.model[-1] = OBBV26(model.model[-1], self.use_rvc2) + self.mode = OBB_MODE + elif isinstance(model.model[-1], (Pose26)): + model.model[-1] = PoseV26(model.model[-1], self.use_rvc2) + self.mode = POSE_MODE + elif isinstance(model.model[-1], (Segment)) or isinstance( model.model[-1], (YOLOESegment) ): model.model[-1] = SegmentV8(model.model[-1], self.use_rvc2) @@ -173,6 +191,7 @@ def load_model(self): if len(self.imgsz) != 2: raise ValueError("Image size must be of length 1 or 2.") + model.fuse() model.eval() self.model = model