Windows搭建MaskRCNN环境

news2025/7/15 8:59:43

环境：python3.6

1. 在miniconda上创建虚拟环境

miniconda下载地址：https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda

# 创建环境
conda create -n maskrcnn python=3.6
# 激活 maskrcnn 环境，后续操作都在这个环境下进行
conda activate maskrcnn

2. 升级pip

python -m ensurepip
python -m pip install --upgrade pip

3. 下载Mask_RCNN代码

对于MaskRCNN2.1版本直接下载并将权重文件mask_rcnn_coco.h5放在代码的根目录下即可，对于2.0版本需要在powershell中找到文件根目录输入下面命令进行安装，区别在于有没有setup.py文件

源码地址：https://github.com/matterport/Mask_RCNN/releases
进入项目根目录：

python setup.py install

在这里插入图片描述
mask_rcnn_coco.h5模型放在项目根目录下

4. 安装插件库

在maskRCNN2.0里面有一个requirements.txt文件，可以直接按照这个安装

pip install -r requirements.txt # 大概率会失败

若安装报错，按照如下版本手动安装

pip install numpy==1.17.0
pip install scipy==1.2.1
pip install Pillow==8.4.0
pip install cython==0.29.28
pip install matplotlib==3.3.4
pip install scikit-image==0.17.2
pip install keras==2.1.6
pip install opencv-python==4.3.0.38
pip install h5py==2.10.0
pip install imgaug==0.4.0
pip install ipython==7.16.3

5. 安装pycocotools

# windows环境
pip install pycocotools-windows

6. 测试COCO数据集

在根目录下创建mode_coco.py

'''
Author: qingqingdan 1306047688@qq.com
Date: 2024-11-22 17:48:33
LastEditTime: 2024-11-29 15:32:37
Description: 用coco模型去预测图片
'''
import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
import colorsys
from skimage.measure import find_contours

from PIL import Image

# 将 Mask R-CNN 模块路径添加到系统路径
ROOT_DIR = os.path.abspath("./")  # 替换为 Mask R-CNN 文件夹路径
sys.path.append(ROOT_DIR)

from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize
from mrcnn.model import log

# 加载 COCO 类别信息
from mrcnn.config import Config
from mrcnn.utils import Dataset

# COCO 类别
COCO_CLASS_NAMES = [
    'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
    'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana',
    'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table',
    'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
    'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# 设置模型路径
MODEL_PATH = "mask_rcnn_coco.h5"  # 替换为实际模型路径

INPUT_DIR = "./images/"   # 输入图片目录
OUTPUT_DIR = "./output_results/"  # 输出结果目录
os.makedirs(OUTPUT_DIR, exist_ok=True)



def apply_mask(image, mask, color, alpha=0.5):
    """
    打上mask图标
    """
    for c in range(3):
        image[:, :, c] = np.where(mask == 1,
                                  image[:, :, c] *
                                  (1 - alpha) + alpha * color[c] * 255,
                                  image[:, :, c])
    return image



def random_colors(N, bright=True):
    """
    生成随机颜色
    """
    brightness = 1.0 if bright else 0.7
    hsv = [(i / N, 1, brightness) for i in range(N)]
    colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
    return colors


# 绘制结果
def display_instances(image, boxes, masks, class_ids, class_names, scores=None, 
                      filename="", title="",
                      figsize=(16, 16),
                      show_mask=True, show_bbox=True,
                      colors=None, captions=None):
    # instance的数量
    N = boxes.shape[0]
    if not N:
        print("\n*** No instances to display *** \n")
    else:
        assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
    colors = colors or random_colors(N)
 
    # 当masked_image为原图时是在原图上绘制
    # 如果不想在原图上绘制，可以把masked_image设置成等大小的全0矩阵
    masked_image = np.array(image, np.uint8)
    for i in range(N):
        color = colors[i]
 
        # 该部分用于显示bbox
        if not np.any(boxes[i]):
            continue
        y1, x1, y2, x2 = boxes[i]

        # 绘制方形边界框
        if show_bbox:
            # 打印边界框坐标
            # print(f"Box {i + 1}: (x1, y1, x2, y2) = ({x1}, {y1}, {x2}, {y2})")
            cropped_image_rgb = image[y1:y2, x1:x2]
            image_bgr = cv2.cvtColor(cropped_image_rgb, cv2.COLOR_RGB2BGR)
            
            # cv2.rectangle(masked_image, (x1, y1), (x2, y2), (color[0] * 255, color[1] * 255, color[2] * 255), 2)
 
        # 该部分用于显示文字与置信度
        if not captions:
            class_id = class_ids[i]
            score = scores[i] if scores is not None else None
            label = class_names[class_id]
            caption = "{} {:.3f}".format(label, score) if score else label

            print("{} {:.3f}".format(label, score))
        else:
            caption = captions[i]
        
        # 绘制文字（类别、分数）
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(masked_image, caption, (x1, y1 + 8), font, 0.5, (255, 255, 255), 1)
 
        # 绘制语义分割（遮挡物体面积部分）
        mask = masks[:, :, i]
        if show_mask:
            masked_image = apply_mask(masked_image, mask, color)
 
        # 画出语义分割的范围
        padded_mask = np.zeros(
            (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
        padded_mask[1:-1, 1:-1] = mask
        contours = find_contours(padded_mask, 0.5)

        # 绘制边缘轮廓
        for verts in contours:
            verts = np.fliplr(verts) - 1
            cv2.polylines(masked_image, [np.array([verts], np.int)], 1,
                          (color[0] * 255, color[1] * 255, color[2] * 255), 1)


    # 将绘制好的图片保存在制定文件夹中
    save_path = os.path.join(OUTPUT_DIR, filename)
    cv2.imwrite(save_path, masked_image)

    img = Image.fromarray(np.uint8(masked_image))
    return img




class InferenceConfig(Config):
    NAME = "coco"
    NUM_CLASSES = 1 + 80  # COCO 数据集的 80 个类别
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

# 加载配置
config = InferenceConfig()
config.display()

# 加载模型
model = modellib.MaskRCNN(mode="inference", config=config, model_dir=ROOT_DIR)
# 加载权重
model.load_weights(MODEL_PATH, by_name=True)






########################## 单图片预测 ##########################
# # 加载图像预测
# # 加载图片
# image_path = "./images/1045023827_4ec3e8ba5c_z.jpg"  # 替换为图片路径
# image = cv2.imread(image_path)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# # 预测
# results = model.detect([image], verbose=1)
# r = results[0]

# # 可视化结果
# visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
#                             COCO_CLASS_NAMES, r['scores'])


# # 提取信息
# rois = r['rois']           # 边界框
# class_ids = r['class_ids'] # 类别 ID
# scores = r['scores']       # 置信分数
# masks = r['masks']         # 掩码

# # 打印结果
# for i, box in enumerate(rois):
#     y1, x1, y2, x2 = box
#     class_id = class_ids[i]
#     score = scores[i]
#     label = COCO_CLASS_NAMES[class_id]
#     print(f"Object {i+1}: {label} (Score: {score:.2f}), Box: {x1, y1, x2, y2}")



########################## 多张图片预测 ##########################

for img_name in os.listdir(INPUT_DIR):
    if img_name.endswith(".jpg") or img_name.endswith(".png"):
        img_path = os.path.join(INPUT_DIR, img_name)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # 预测
        results = model.detect([image], verbose=0)
        r = results[0]

        print('FileNmame: ', img_name)

        # 可视化并保存结果
        output_path = os.path.join(OUTPUT_DIR, img_name)
        display_instances(image, r['rois'], r['masks'], r['class_ids'],
                                    COCO_CLASS_NAMES, r['scores'], img_name )