欢迎关注我的CSDN:https://spike.blog.csdn.net/
本文地址:https://spike.blog.csdn.net/article/details/141140498
免责声明:本文来源于个人知识与公开资料,仅用于学术交流,欢迎讨论,不支持转载。
在 ComfyUI 的 Impact-Pack 插件中,检测(Detection) 是 YOLO v8 系列,分割(Segmentation) 是 SAM 的第1个版本,目前不支持最新版本,实现基础的检测与分割任务,实现人物与物体的分割。
ComfyUI:https://github.com/comfyanonymous/ComfyUI
1. 模型准备
Load Image
节点,上传图像位于:
[your folder]/ComfyUI/input
SAM 模型:位于 ComfyUI/models/sams
,HuggingFace 的下载路径:
wget https://huggingface.co/segments-arnaud/sam_vit_h/resolve/main/sam_vit_h_4b8939.pth
模型大小:
ComfyUI/models/sams
├── [358M] sam_vit_b_01ec64.pth
└── [2.4G] sam_vit_h_4b8939.pth
YOLO 模型位于 ComfyUI/models/ultralytics
,即:
ComfyUI/models/ultralytics
├── [4.0K] bbox
│ ├── [ 50M] face_yolov8m.pt
│ └── [ 21M] hand_yolov8s.pt
└── [4.0K] segm
├── [ 23M] deepfashion2_yolov8s-seg.pt
├── [ 52M] face_yolov8m-seg_60.pt
├── [6.5M] hair_yolov8n-seg_60.pt
└── [ 52M] person_yolov8m-seg.pt
下载路径:https://huggingface.co/Bingsu/adetailer/tree/main
2. 任务流程
YOLO 的检测和分割流程:
YOLO + SAM 的检测和分割流程:
两类流程的效果对比,左侧是 YOLO 的效果,右侧是使用 SAM 的效果,轮廓更加清晰:
流程 Json:
{
"last_node_id": 25,
"last_link_id": 30,
"nodes": [
{
"id": 1,
"type": "LoadImage",
"pos": {
"0": 186,
"1": 125
},
"size": {
"0": 315,
"1": 314
},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
5,
6,
16,
28
],
"slot_index": 0,
"label": "IMAGE"
},
{
"name": "MASK",
"type": "MASK",
"links": null,
"label": "MASK"
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"咝小喵生日会限定壁纸 (14).jpg",
"image"
]
},
{
"id": 5,
"type": "BboxDetectorSEGS",
"pos": {
"0": 1005,
"1": 125
},
"size": {
"0": 400,
"1": 217
},
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "bbox_detector",
"type": "BBOX_DETECTOR",
"link": 3,
"label": "bbox_detector"
},
{
"name": "image",
"type": "IMAGE",
"link": 5,
"label": "image"
},
{
"name": "detailer_hook",
"type": "DETAILER_HOOK",
"link": null,
"shape": 7,
"label": "detailer_hook"
}
],
"outputs": [
{
"name": "SEGS",
"type": "SEGS",
"links": [
7,
12
],
"slot_index": 0,
"label": "SEGS"
}
],
"properties": {
"Node name for S&R": "BboxDetectorSEGS"
},
"widgets_values": [
0.5,
10,
3,
10,
"all",
true
]
},
{
"id": 7,
"type": "SEGSPreview",
"pos": {
"0": 1493,
"1": 125
},
"size": {
"0": 315,
"1": 314
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "segs",
"type": "SEGS",
"link": 7,
"label": "segs"
},
{
"name": "fallback_image_opt",
"type": "IMAGE",
"link": null,
"shape": 7,
"label": "fallback_image_opt"
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [],
"slot_index": 0,
"shape": 6,
"label": "IMAGE"
}
],
"properties": {
"Node name for S&R": "SEGSPreview"
},
"widgets_values": [
true,
0.2
]
},
{
"id": 6,
"type": "SegmDetectorSEGS",
"pos": {
"0": 1005,
"1": 494
},
"size": {
"0": 400,
"1": 212
},
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "segm_detector",
"type": "SEGM_DETECTOR",
"link": 4,
"label": "segm_detector"
},
{
"name": "image",
"type": "IMAGE",
"link": 6,
"label": "image"
},
{
"name": "detailer_hook",
"type": "DETAILER_HOOK",
"link": null,
"shape": 7,
"label": "detailer_hook"
}
],
"outputs": [
{
"name": "SEGS",
"type": "SEGS",
"links": [
18
],
"slot_index": 0,
"label": "SEGS"
}
],
"properties": {
"Node name for S&R": "SegmDetectorSEGS"
},
"widgets_values": [
0.5,
10,
3,
10,
"all",
true
]
},
{
"id": 17,
"type": "SEGSPreview",
"pos": {
"0": 1493,
"1": 494
},
"size": {
"0": 315,
"1": 314
},
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "segs",
"type": "SEGS",
"link": 18,
"label": "segs"
},
{
"name": "fallback_image_opt",
"type": "IMAGE",
"link": null,
"shape": 7,
"label": "fallback_image_opt"
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [],
"slot_index": 0,
"shape": 6,
"label": "IMAGE"
}
],
"properties": {
"Node name for S&R": "SEGSPreview"
},
"widgets_values": [
true,
0.2
]
},
{
"id": 3,
"type": "SAMDetectorSegmented",
"pos": {
"0": 564,
"1": 782
},
"size": {
"0": 315,
"1": 218
},
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "sam_model",
"type": "SAM_MODEL",
"link": 1,
"label": "sam_model"
},
{
"name": "segs",
"type": "SEGS",
"link": 12,
"label": "segs"
},
{
"name": "image",
"type": "IMAGE",
"link": 16,
"label": "image"
}
],
"outputs": [
{
"name": "combined_mask",
"type": "MASK",
"links": [],
"slot_index": 0,
"label": "combined_mask"
},
{
"name": "batch_masks",
"type": "MASK",
"links": [
24
],
"slot_index": 1,
"label": "batch_masks"
}
],
"properties": {
"Node name for S&R": "SAMDetectorSegmented"
},
"widgets_values": [
"center-1",
0,
0.93,
0,
0.7,
"False"
]
},
{
"id": 23,
"type": "MaskPreview+",
"pos": {
"0": 1263,
"1": 782
},
"size": {
"0": 210,
"1": 246
},
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "mask",
"type": "MASK",
"link": 25,
"label": "mask"
}
],
"outputs": [],
"properties": {
"Node name for S&R": "MaskPreview+"
},
"widgets_values": []
},
{
"id": 22,
"type": "MasksToMaskList",
"pos": {
"0": 916,
"1": 782
},
"size": {
"0": 289.79998779296875,
"1": 26
},
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "masks",
"type": "MASK",
"link": 24,
"label": "masks"
}
],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
25
],
"slot_index": 0,
"shape": 6,
"label": "MASK"
}
],
"properties": {
"Node name for S&R": "MasksToMaskList"
},
"widgets_values": []
},
{
"id": 2,
"type": "SAMLoader",
"pos": {
"0": 192,
"1": 782
},
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "SAM_MODEL",
"type": "SAM_MODEL",
"links": [
1,
29
],
"slot_index": 0,
"label": "SAM_MODEL"
}
],
"properties": {
"Node name for S&R": "SAMLoader"
},
"widgets_values": [
"sam_vit_h_4b8939.pth",
"AUTO"
]
},
{
"id": 24,
"type": "ImpactSimpleDetectorSEGS",
"pos": {
"0": 938,
"1": 1075
},
"size": {
"0": 315,
"1": 310
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "bbox_detector",
"type": "BBOX_DETECTOR",
"link": 26,
"label": "bbox_detector"
},
{
"name": "image",
"type": "IMAGE",
"link": 28,
"label": "image"
},
{
"name": "sam_model_opt",
"type": "SAM_MODEL",
"link": 29,
"shape": 7,
"label": "sam_model_opt"
},
{
"name": "segm_detector_opt",
"type": "SEGM_DETECTOR",
"link": 27,
"shape": 7,
"label": "segm_detector_opt"
}
],
"outputs": [
{
"name": "SEGS",
"type": "SEGS",
"links": [
30
],
"slot_index": 0,
"label": "SEGS"
}
],
"properties": {
"Node name for S&R": "ImpactSimpleDetectorSEGS"
},
"widgets_values": [
0.5,
0,
3,
10,
0.5,
0,
0,
0.7,
0
]
},
{
"id": 25,
"type": "SEGSPreview",
"pos": {
"0": 1369,
"1": 1075
},
"size": {
"0": 315,
"1": 314
},
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "segs",
"type": "SEGS",
"link": 30,
"label": "segs"
},
{
"name": "fallback_image_opt",
"type": "IMAGE",
"link": null,
"shape": 7,
"label": "fallback_image_opt"
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": null,
"shape": 6,
"label": "IMAGE"
}
],
"properties": {
"Node name for S&R": "SEGSPreview"
},
"widgets_values": [
true,
0.2
]
},
{
"id": 4,
"type": "UltralyticsDetectorProvider",
"pos": {
"0": 557,
"1": 125
},
"size": {
"0": 340.20001220703125,
"1": 78
},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "BBOX_DETECTOR",
"type": "BBOX_DETECTOR",
"links": [
3,
26
],
"slot_index": 0,
"label": "BBOX_DETECTOR"
},
{
"name": "SEGM_DETECTOR",
"type": "SEGM_DETECTOR",
"links": [
4,
27
],
"slot_index": 1,
"label": "SEGM_DETECTOR"
}
],
"properties": {
"Node name for S&R": "UltralyticsDetectorProvider"
},
"widgets_values": [
"segm/person_yolov8m-seg.pt"
]
}
],
"links": [
[
1,
2,
0,
3,
0,
"SAM_MODEL"
],
[
3,
4,
0,
5,
0,
"BBOX_DETECTOR"
],
[
4,
4,
1,
6,
0,
"SEGM_DETECTOR"
],
[
5,
1,
0,
5,
1,
"IMAGE"
],
[
6,
1,
0,
6,
1,
"IMAGE"
],
[
7,
5,
0,
7,
0,
"SEGS"
],
[
12,
5,
0,
3,
1,
"SEGS"
],
[
16,
1,
0,
3,
2,
"IMAGE"
],
[
18,
6,
0,
17,
0,
"SEGS"
],
[
24,
3,
1,
22,
0,
"MASK"
],
[
25,
22,
0,
23,
0,
"MASK"
],
[
26,
4,
0,
24,
0,
"BBOX_DETECTOR"
],
[
27,
4,
1,
24,
3,
"SEGM_DETECTOR"
],
[
28,
1,
0,
24,
1,
"IMAGE"
],
[
29,
2,
0,
24,
2,
"SAM_MODEL"
],
[
30,
24,
0,
25,
0,
"SEGS"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.7513148009015777,
"offset": [
-212.86626630100392,
-176.90870266354517
]
}
},
"version": 0.4
}