Python Yolo9:YOLOV7

Python Yolo9:YOLOV7

1. 准备

新建一个虚拟环境
# 创建虚拟环境 conda create -n yolov6env python=3.8 # 这种方式创建出来没有name,该文件夹 PS D:\Cumtb_Code> conda create --prefix=D:\Cumtb_Code\yolo7env python=3.8 conda create --prefix=D:\Cumtb_Code\yolo7env python=3.8 # 进入虚拟环境,我这里将文件路径移动到了目录下 D:\Cumtb_Code\yolov6env>conda activate d:\Cumtb_Code\yolov7env

2. 拉取项目

git clone https://github.com/WongKinYiu/yolov7

3. 安装相应的库

  • requirements.txt文件打开onnx,我需要导出onnx模型
notion image
# 切换到目录下,安装对应的库D:\Cumtb_Code\yolo7env\yolov7 pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python # 验证Torch import torch print(torch.__version__) print(torch.cuda.is_available())
Tips:📢📢📢:我在conda虚拟环境下,安装pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 仍然会提示torch.cuda.is_available()还是为false
使用conda安装
import torch print(torch.__version__) print(torch.cuda.is_available())
PyTorch官方查看CUDA版本
 
notion image
conda install pytorch torchvision torchaudio cudatoolkit=11.6 -c pytorch -c conda-forge
notion image
 

4. 运行Demo

python detect.py --weights ./weights/yolov7.pt --conf 0.25 --img-size 640 --source inference/images/horses.jpg

5. 转换onnx模型

在yolov7下创建两个文件
  • export_onnx.py
import argparse import sys import time sys.path.append('./') # to run '$ python *.py' files in subdirectories import torch import torch.nn as nn import models from models.experimental import attempt_load from utils.activations import SiLU from utils.general import set_logging, check_img_size from utils.torch_utils import select_device from EfficientNMS import End2End if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='./weights/yolov7.pt', help='weights path') parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--max-obj', type=int, default=100, help='topk') parser.add_argument('--iou-thres', type=float, default=0.45, help='nms iou threshold') parser.add_argument('--score-thres', type=float, default=0.25, help='nms score threshold') parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') parser.add_argument('--grid', action='store_true', default=True,help='export Detect() layer grid') parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') opt = parser.parse_args() opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand print(opt) set_logging() t = time.time() # Load PyTorch model device = select_device(opt.device) model = attempt_load(opt.weights, map_location=device) # load FP32 model # Checks gs = int(max(model.stride)) # grid size (max stride) opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples # Input img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection model.eval() # Update model for k, m in model.named_modules(): m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility if isinstance(m, models.common.Conv) or isinstance(m, models.common.RepConv): # assign export-friendly activations if isinstance(m.act, nn.SiLU): m.act = SiLU() #print(model) model.model[-1].export = not opt.grid # set Detect() layer grid export model = End2End(model, max_obj=opt.max_obj, iou_thres=opt.iou_thres, score_thres=opt.score_thres, max_wh=False, device=device) y = model(img) # dry run # ONNX export try: import onnx print('\nStarting ONNX export with onnx %s...' % onnx.__version__) f = opt.weights.replace('.pt', '.onnx') # filename torch.onnx.export(model, img, f, verbose=False, opset_version=12, training=torch.onnx.TrainingMode.EVAL, do_constant_folding=True, input_names=['images'], output_names=['num_dets','det_boxes','det_scores','det_classes'], dynamic_axes= None) # Checks onnx_model = onnx.load(f) # load onnx model onnx.checker.check_model(onnx_model) # check onnx model shapes = [opt.batch_size, 1, opt.batch_size, opt.max_obj, 4, opt.batch_size, opt.max_obj, opt.batch_size, opt.max_obj] for i in onnx_model.graph.output: for j in i.type.tensor_type.shape.dim: j.dim_param = str(shapes.pop(0)) onnx.save(onnx_model, f) print('ONNX export success, saved as %s' % f) except Exception as e: print('ONNX export failure: %s' % e) # Finish print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
  • EfficientNMS.py
import torch import torch.nn as nn class TRT_NMS(torch.autograd.Function): @staticmethod def forward( ctx, boxes, scores, background_class=-1, box_coding=0, iou_threshold=0.45, max_output_boxes=100, plugin_version="1", score_activation=0, score_threshold=0.25 ): batch_size, num_boxes, num_classes = scores.shape num_det = torch.randint(0, max_output_boxes, (batch_size, 1),dtype=torch.int32) det_boxes = torch.randn(batch_size, max_output_boxes, 4) det_scores = torch.randn(batch_size, max_output_boxes) det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes),dtype=torch.int32) return num_det, det_boxes, det_scores, det_classes @staticmethod def symbolic(g, boxes, scores, background_class=-1, box_coding=0, iou_threshold=0.45, max_output_boxes=100, plugin_version="1", score_activation=0, score_threshold=0.25): out = g.op("TRT::EfficientNMS_TRT", boxes, scores, background_class_i=background_class, box_coding_i=box_coding, iou_threshold_f=iou_threshold, max_output_boxes_i=max_output_boxes, plugin_version_s=plugin_version, score_activation_i=score_activation, score_threshold_f=score_threshold, outputs=4) nums, boxes, scores, classes = out return nums, boxes, scores, classes class ONNX_TRT(nn.Module): def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=False, device=None): super().__init__() assert max_wh is False self.device = device self.background_class = -1, self.box_coding = 0, self.iou_threshold = iou_thres self.max_obj = max_obj self.plugin_version = '1' self.score_activation = 0 self.score_threshold = score_thres self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=torch.float32,device=self.device) def forward(self, x): box = x[:, :, :4] conf = x[:, :, 4:5] score = x[:, :, 5:] score *= conf box @= self.convert_matrix num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(box, score, self.background_class, self.box_coding, self.iou_threshold, self.max_obj, self.plugin_version, self.score_activation, self.score_threshold) return num_det, det_boxes, det_scores, det_classes class End2End(nn.Module): def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None): super().__init__() device = device self.model = model.to(device) self.patch_model = ONNX_TRT self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device) self.end2end.eval() def forward(self, x): x = self.model(x)[0] x = self.end2end(x) return x

从官网下载模型

notion image
将模型放在新建的weights文件夹下,运行命令,导出onnx模型
python export_onnx.py --weights ./weights/yolov7.pt
notion image

6. 导出engine模型

将生成的onnx模型复制到tensorrt/bin文件夹下,使用官方trtexec转化添加完EfficientNMS的onnx模型。FP32预测删除--fp16参数即可
trtexec --onnx=./yolov7.onnx --saveEngine=./yolov7_fp16.engine --fp16 --workspace=200

7. 安装tensorrt

切换到TensorRT文件夹的路径下:cd xxxxx/xxxxx/TensorRT-x.x.x.x
查看文件:dir
我们需要的是graphsurgeon, onnx-graphsurgeon, python, uff这几个文件夹下的python wheel文件

安装

安装时文件名使用tab键自动补全
a. 安装Python下whl文件
有多个支持python版本的轮子文件,查看python版本python -V ,以python3.8为例
(d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5>cd python (d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5\python>pip install tensorrt-8.4.1.5-cp38-none-win_amd64.whl
b. 安装uff下的whl文件tensorflow需要的,顺手安装上。
(d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5>cd uff (d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5\uff>pip install uff-0.6.9-py2.py3-none-any.whl
c. 安装graphsurgeon下的whl文件
(d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5>cd graphsurgeon (d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5\graphsurgeon>pip install graphsurgeon-0.4.6-py2.py3-none-any.whl
d. 安装onnx-graphsurgeon下的whl文件
(d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5>cd onnx_graphsurgeon (d:\Cumtb_Code\yolov6env) D:\TensorRT-8.4.1.5\onnx_graphsurgeon>pip install onnx_graphsurgeon-0.3.12-py2.py3-none-any.whl
e. 使用Python查看验证TensorRT
(d:\Cumtb_Code\yolov6env) D:\Cumtb_Code\yolov6env>python Python 3.8.13 (default, Mar 28 2022, 06:59:08) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32 Type "help", "copyright", "credits" or "license" for more information. >>> import tensorrt >>> print(tensorrt.__version__) 8.4.1.5

8. 运行预测

将生成的engine放入项目对应文件
  • infer.py
import cv2 import torch import numpy as np import tensorrt as trt from collections import OrderedDict,namedtuple class TRT_engine(): def __init__(self, weight) -> None: self.imgsz = [640,640] self.weight = weight self.device = torch.device('cuda:0') self.init_engine() def init_engine(self): # Infer TensorRT Engine self.Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) self.logger = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(self.logger, namespace="") with open(self.weight, 'rb') as self.f, trt.Runtime(self.logger) as self.runtime: self.model = self.runtime.deserialize_cuda_engine(self.f.read()) self.bindings = OrderedDict() self.fp16 = False for index in range(self.model.num_bindings): self.name = self.model.get_binding_name(index) self.dtype = trt.nptype(self.model.get_binding_dtype(index)) self.shape = tuple(self.model.get_binding_shape(index)) self.data = torch.from_numpy(np.empty(self.shape, dtype=np.dtype(self.dtype))).to(self.device) self.bindings[self.name] = self.Binding(self.name, self.dtype, self.shape, self.data, int(self.data.data_ptr())) if self.model.binding_is_input(index) and self.dtype == np.float16: self.fp16 = True self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items()) self.context = self.model.create_execution_context() def letterbox(self,im,color=(114, 114, 114), auto=False, scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] new_shape = self.imgsz if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) self.r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better val mAP) self.r = min(self.r, 1.0) # Compute padding new_unpad = int(round(shape[1] * self.r)), int(round(shape[0] * self.r)) self.dw, self.dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle self.dw, self.dh = np.mod(self.dw, stride), np.mod(self.dh, stride) # wh padding self.dw /= 2 # divide padding into 2 sides self.dh /= 2 if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(self.dh - 0.1)), int(round(self.dh + 0.1)) left, right = int(round(self.dw - 0.1)), int(round(self.dw + 0.1)) self.img = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return self.img,self.r,self.dw,self.dh def preprocess(self,image): self.img,self.r,self.dw,self.dh = self.letterbox(image) self.img = cv2.cvtColor(self.img,cv2.COLOR_BGR2RGB) self.img = self.img.transpose((2, 0, 1)) self.img = np.expand_dims(self.img,0) self.img = np.ascontiguousarray(self.img) self.img = torch.from_numpy(self.img).to(self.device) self.img = self.img.float() self.img /= 255. return self.img def predict(self,img,threshold): img = self.preprocess(img) self.binding_addrs['images'] = int(img.data_ptr()) self.context.execute_v2(list(self.binding_addrs.values())) nums = self.bindings['num_dets'].data[0].tolist() boxes = self.bindings['det_boxes'].data[0].tolist() scores =self.bindings['det_scores'].data[0].tolist() classes = self.bindings['det_classes'].data[0].tolist() num = int(nums[0]) new_bboxes = [] for i in range(num): if(scores[i] < threshold): continue xmin = (boxes[i][0] - self.dw)/self.r ymin = (boxes[i][1] - self.dh)/self.r xmax = (boxes[i][2] - self.dw)/self.r ymax = (boxes[i][3] - self.dh)/self.r new_bboxes.append([classes[i],scores[i],xmin,ymin,xmax,ymax]) return new_bboxes def visualize(img,bbox_array): for temp in bbox_array: xmin = int(temp[2]) ymin = int(temp[3]) xmax = int(temp[4]) ymax = int(temp[5]) clas = int(temp[0]) score = temp[1] cv2.rectangle(img,(xmin,ymin),(xmax,ymax), (105, 237, 249), 2) img = cv2.putText(img, "class:"+str(clas)+" "+str(round(score,2)), (xmin,int(ymin)-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (105, 237, 249), 1) return img trt_engine = TRT_engine("./weights/yolov7_fp16.engine") img1 = cv2.imread("./pictures/zidane.jpg") results = trt_engine.predict(img1,threshold=0.5) img = visualize(img1,results) cv2.imshow("img",img) cv2.waitKey(0)
运行测试
python infer.py
 

报错

  • 我这里使用的Torch1.12.0,官方安装小与1.12.0
[07/12/2022-20:07:54] [TRT] [E] 1: [raiiMyelinGraph.h::nvinfer1::RAIIMyelinGraph::RAIIMyelinGraph::24] Error Code 1: Myelin (Compiled against cuBLASLt 11.10.1.0 but running against cuBLASLt 11.8.1.0.) Traceback (most recent call last): File "inferDemo7.py", line 103, in <module> results = trt_engine.predict(img1,threshold=0.5) File "inferDemo7.py", line 72, in predict self.context.execute_v2(list(self.binding_addrs.values())) AttributeError: 'NoneType' object has no attribute 'execute_v2'