YOLOv7 instance segmentation

surroundings

  • ubuntu 18.04 64bit
  • python 3.8
  • pytorch1.8.2 + cu111

video here

Bilibili

Introduction

The previous YOLOv7 mentioned that in addition to target detection, YOLOv7 will also be used in the field of human pose estimation and instance segmentation in the future, but the author only opened the pose estimation model. The good news is that recently, the authors of YOLOv7 released a model for instance segmentation. In this article, we will take a look at the performance of YOLOv7 in instance segmentation.

practice

The code for instance segmentation is stored in the branch mask , which is still under continuous improvement. We directly download the latest code on the branch

 git clone https://github.com/WongKinYiu/ yolov7 .git -b mask yolov7-mask cd yolov7-mask

The split part depends on facebook ‘s detectron2 , and detectron2 requires torch version greater than 1.8. Since I have been using 1.7.1 before, a new environment needs to be created here

 # 创建新的虚拟环境conda create -n pytorch1.8 python=3.8 conda activate pytorch1.8 # 安装torch 1.8.2 pip install torch==1.8.2 torchvision==0.9.2 torchaudio===0.8.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu111 # 修改requirements.txt,将其中的torch和torchvision注释掉pip install -r requirements.txt # 安装detectron2 git clone https://github.com/facebookresearch/detectron2 cd detectron2 python setup.py install cd ..

Then go to download the instance segmentation model https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-mask.pt and put the model into the source code directory

The sample code for segmentation is in tools/instance.ipynb , which can be run directly in jupyter notebook

If you need to convert the ipynb file to a python file, execute

 jupyter nbconvert --to python tools/instance.ipynb

Copy the generated tools/instance.py to the root directory of the source code, and then modify the last display part of the file to save the result image

 import matplotlib.pyplot as plt import torch import cv2 import yaml from torchvision import transforms import numpy as np from utils.datasets import letterbox from utils.general import non_max_suppression_mask_conf from detectron2.modeling.poolers import ROIPooler from detectron2.structures import Boxes from detectron2.utils.memory import retry_if_cuda_oom from detectron2.layers import paste_masks_in_image device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") with open('data/hyp.scratch.mask.yaml') as f: hyp = yaml.load(f, Loader=yaml.FullLoader) weigths = torch.load('yolov7-mask.pt') model = weigths['model'] model = model.half().to(device) _ = model.eval() image = cv2.imread('inference/images/horses.jpg') # 504x378 image image = letterbox(image, 640, stride=64, auto=True)[0] image_ = image.copy() image = transforms.ToTensor()(image) image = torch.tensor(np.array([image.numpy()])) image = image.to(device) image = image.half() output = model(image) inf_out, train_out, attn, mask_iou, bases, sem_output = output['test'], output['bbox_and_cls'], output['attn'], output['mask_iou'], output['bases'], output['sem'] bases = torch.cat([bases, sem_output], dim=1) nb, _, height, width = image.shape names = model.names pooler_scale = model.pooler_scale pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2) output, output_mask, output_mask_score, output_ac, output_ab = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None) pred, pred_masks = output[0], output_mask[0] base = bases[0] bboxes = Boxes(pred[:, :4]) original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution']) pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5) pred_masks_np = pred_masks.detach().cpu().numpy() pred_cls = pred[:, 5].detach().cpu().numpy() pred_conf = pred[:, 4].detach().cpu().numpy() nimg = image[0].permute(1, 2, 0) * 255 nimg = nimg.cpu().numpy().astype(np.uint8) nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR) nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int) pnimg = nimg.copy() for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf): if conf < 0.25: continue color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)] pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5 pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) #label = '%s %.3f' % (names[int(cls)], conf) #t_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=1)[0] #c2 = bbox[0] + t_size[0], bbox[1] - t_size[1] - 3 #pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), c2, color, -1, cv2.LINE_AA) # filled #pnimg = cv2.putText(pnimg, label, (bbox[0], bbox[1] - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA) cv2.imwrite("instance_result.jpg", pnimg)

I found 2 test pictures to see the effect

Similarly, here is also a code for video file or camera detection

 import torch import cv2 import yaml from torchvision import transforms import numpy as np from utils.datasets import letterbox from utils.general import non_max_suppression_mask_conf from detectron2.modeling.poolers import ROIPooler from detectron2.structures import Boxes from detectron2.utils.memory import retry_if_cuda_oom from detectron2.layers import paste_masks_in_image device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") with open('data/hyp.scratch.mask.yaml') as f: hyp = yaml.load(f, Loader=yaml.FullLoader) weigths = torch.load('yolov7-mask.pt') model = weigths['model'] model = model.half().to(device) _ = model.eval() cap = cv2.VideoCapture('vehicle_test.mp4') if (cap.isOpened() == False): print('open failed.') exit(-1) # 分辨率frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) # 图片缩放vid_write_image = letterbox(cap.read()[1], (frame_width), stride=64, auto=True)[0] resize_height, resize_width = vid_write_image.shape[:2] # 保存结果视频out = cv2.VideoWriter("result_instance.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 30, (resize_width, resize_height)) while(cap.isOpened): flag, image = cap.read() if flag: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = letterbox(image, frame_width, stride=64, auto=True)[0] image_ = image.copy() image = transforms.ToTensor()(image) image = torch.tensor(np.array([image.numpy()])) image = image.to(device) image = image.half() with torch.no_grad(): output = model(image) inf_out, train_out, attn, mask_iou, bases, sem_output = output['test'], output['bbox_and_cls'], output['attn'], output['mask_iou'], output['bases'], output['sem'] bases = torch.cat([bases, sem_output], dim=1) nb, _, height, width = image.shape names = model.names pooler_scale = model.pooler_scale pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2) output, output_mask, output_mask_score, output_ac, output_ab = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None) pred, pred_masks = output[0], output_mask[0] base = bases[0] bboxes = Boxes(pred[:, :4]) original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution']) pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5) pred_masks_np = pred_masks.detach().cpu().numpy() pred_cls = pred[:, 5].detach().cpu().numpy() pred_conf = pred[:, 4].detach().cpu().numpy() nimg = image[0].permute(1, 2, 0) * 255 nimg = nimg.cpu().numpy().astype(np.uint8) nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR) nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int) pnimg = nimg.copy() for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf): if conf < 0.25: continue color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)] pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5 pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) cv2.imshow('YOLOv7 mask', pnimg) out.write(pnimg) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() cv2.destroyAllWindows()

References

This article is reprinted from https://xugaoxiang.com/2022/08/15/yolov7-instance-segmentation/
This site is for inclusion only, and the copyright belongs to the original author.

Leave a Comment