Python Yolo2:YOLOV5实时监测

0. 准备 1. 虚拟环境启动 2. 安装对应的库 1. 屏幕抓取 1.1 使用pywin32对屏幕进行实时抓取 1.2 MSS 2.1 安装MSS库 2.2 使用MSS实时检测

0. 准备

虚拟环境

下载好YOLO5

训练好模型

准备好数据

1. 虚拟环境启动


# 1.切换到虚拟环境目录
PS D:\Cumtb_Code> cd .\yolo5env\
PS D:\Cumtb_Code\yolo5env> cd .\yolov5-master\

# 2. CMD
PS D:\Cumtb_Code\yolo5env\yolov5-master> cmd

# 3. 启动虚拟环境
D:\Cumtb_Code\yolo5env\yolov5-master>conda activate D:\Cumtb_Code\yolo5env



cd .\yolo5env\yolov5-master\
cmd

conda activate D:\Cumtb_Code\yolo5env

python autogamednf.py

2. 安装对应的库

pywin32（建议安装303版本，304有点小问题，具体是ImportError: DLL load failed while importing win32ui: 找不到指定的程序。）

PyAutoGUI


pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pywin32==303


pip install -i https://pypi.tuna.tsinghua.edu.cn/simple PyAutoGUI

1. 屏幕抓取

1.1 使用pywin32对屏幕进行实时抓取

grabscreen.py


import cv2
import numpy as np
import win32gui
import win32ui
import win32con
import win32api
 
 
def grab_screen(region=None):
 
    hwin = win32gui.GetDesktopWindow()
 
    if region:
        left, top, x2, y2 = region
        width = x2 - left + 1
        height = y2 - top + 1
    else:
        width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
        height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
        left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN)
        top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN)
 
    hwindc = win32gui.GetWindowDC(hwin)
    srcdc = win32ui.CreateDCFromHandle(hwindc)
    memdc = srcdc.CreateCompatibleDC()
    bmp = win32ui.CreateBitmap()
    bmp.CreateCompatibleBitmap(srcdc, width, height)
    memdc.SelectObject(bmp)
    memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
 
    signedIntsArray = bmp.GetBitmapBits(True)
    # img = np.fromstring(signedIntsArray, dtype='uint8')
    img = np.frombuffer(signedIntsArray, dtype='uint8')
    img.shape = (height, width, 4)
 
    srcdc.DeleteDC()
    memdc.DeleteDC()
    win32gui.ReleaseDC(hwin, hwindc)
    win32gui.DeleteObject(bmp.GetHandle())
 
    return cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)


['0', '770', '228', '817', '342']
['63', '214', '0', '1623', '1080']
['0', '814', '232', '911', '341']
['0', '1154', '244', '1254', '477']
['0', '1245', '250', '1448', '471']

1.2 MSS

使用mss库抓屏

2.1 安装MSS库


pip install mss
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple mss

2.2 使用MSS实时检测


import random
import mss
from models.experimental import attempt_load
import torch
import numpy as np
from utils.general import non_max_suppression, scale_coords
from utils.augmentations import letterbox
import argparse
import time
import cv2 


parser = argparse.ArgumentParser()
parser.add_argument('--model-path', type=str, default='pretrained/yolov5s.pt',
                    help='模型地址,绝对路径')
parser.add_argument('--imgsz', type=int, default=640, help='和你训练模型时imgsz一样,默认640')
parser.add_argument('--conf-thres', type=float, default=0.1, help='置信阈值')
parser.add_argument('--iou-thres', type=float, default=0.05, help='交并比阈值')
parser.add_argument('--hide_labels', type=bool, default=True, help='是否隐藏标签')
parser.add_argument('--hide_conf', type=bool, default=True, help='是否隐藏置信度')
parser.add_argument('--show-window', type=bool, default=True, help='是否显示实时检测窗口')
parser.add_argument('--resize-window', type=float, default=1 / 2, help='缩放实时检测窗口大小')
parser.add_argument('--show-fps', type=bool, default=True, help='是否显示帧数')
parser.add_argument('--region', type=tuple, default=(1, 1),
                    help='检测范围；分别为x，y，(1.0, 1.0)表示全屏检测，越低检测范围越小(始终保持屏幕中心为中心)')
args = parser.parse_args()
'------------------------------------------------------------------------------------'

# 加载模型
def load_model(args):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'  # 设备选择
    half = device != 'cpu'  # fp32/fp16
    model = attempt_load(args.model_path, device=device)  # 加载 FP32 模型
    if half:  # 如果cuda可用
        model.half()  # 启用 FP16

    if device != 'cpu':  # 如果是cuda
        model(torch.zeros(1, 3, args.imgsz, args.imgsz).to(device).type_as(next(model.parameters())))  # cuda设置
    return model  # 返回加载好的网络模型

# mss截图
cap = mss.mss()  # 实例化mss

def grab_screen_mss(monitor):
    # cap.grab截取图片，np.array将图片转为数组，cvtColor将BRGA转为BRG,去掉了透明通道
    return cv2.cvtColor(np.array(cap.grab(monitor)), cv2.COLOR_BGRA2BGR)

# 画框函数
def fun_en(aims, img0, len_x, len_y):
    # 设置字体格式及大小
    font = cv2.FONT_HERSHEY_SIMPLEX
    for i, det in enumerate(aims):
        cls, conf,x_center, y_center, width, height = det  # 将det里的数据分装到前面   rc_x,y  表示归化后的比例坐标
        # x_center, width = len_x * float(x_center), len_x * float(width)  # 中心的x和宽
        # y_center, height = len_y * float(y_center), len_y * float(height)  # 中心的y和高
        # top_left = (int(x_center - width / 2.), int(y_center - height / 2.))
        # bottom_right = (int(x_center + width / 2.), int(y_center + height / 2.))
        top_left = (int(x_center), int(y_center))
        bottom_right = (int(width), int(height))

        cls = int(cls)
        conf = float(conf)
        hide_labels = False
        hide_conf = False
        label = None if hide_labels else (names[cls] if hide_conf else f'{names[cls]} {conf:.2f}')
        print(label)
       
        cv2.rectangle(img0, top_left, bottom_right, color=colors[cls], thickness=3)  # 3代表线条粗细
        cv2.putText(img0, names[cls], (top_left[0], top_left[1] - 5), font, 1, colors[cls], 2)

# 运行
def run():
    top_x, top_y, x, y = 0, 0, 1920, 1080  # x,y 屏幕大小,top是原点
    len_x, len_y = int(x * args.region[0]), int(y * args.region[1])  # 截图的宽高
    top_x, top_y = int(top_x + x // 2 * (1. - args.region[0])), int(top_y + y // 2 * (1. - args.region[1]))  # 截图区域的原点
    monitor = {'left': top_x, 'top': top_y, 'width': len_x, 'height': len_y}  # 截图范围

    cv2.namedWindow('img', cv2.WINDOW_NORMAL)  # 创建窗口
    if args.show_window:  # 是否显示检测款
        len_x, len_y = int(x * args.region[0]), int(y * args.region[1])
        cv2.resizeWindow('img', int(len_x * args.resize_window), int(len_y * args.resize_window))  # 裁剪窗口

    t0 = time.time()  # fps 计算
    while True:
        if not cv2.getWindowProperty('img', cv2.WND_PROP_VISIBLE):  # 如果窗口关闭，退出程序
            cv2.destroyAllWindows()
            exit('程序结束...')
            break

        img0 = grab_screen_mss(monitor)  # 截取整个屏幕的到图片img0
        img0 = cv2.resize(img0, (len_x, len_y))  # 裁剪图片至截取的大小

        # 预处理
        img = letterbox(img0, args.imgsz, stride=stride)[0]  # 预处理
        img = img.transpose((2, 0, 1))[::-1]  # 维度转换
        img = np.ascontiguousarray(img)  # 转为数组，其内存是连续的
        img = torch.from_numpy(img).to(device)  # 将来自numpy的数组转为tensor，并传入设备
        img = img.half() if half else img.float()  # 选择fp32 / fp16
        img /= 255.  # 归一化 ,0 - 255 to 0.0 - 1.0
        img = img[None]  # 扩大批调暗
        # if len(img.shape) == 3:
        #     img = img[None]

        # 推理
        t1 = time.time()  # 时间点
        pred = model(img, augment=False, visualize=False)[0]

        # Apply NMS,非极大值抑制
        pred = non_max_suppression(pred, conf_thres, iou_thres, agnostic=False)
        t2 = time.time()

        print('推理时间 {} ms'.format('%.2f' % ((t2 - t1) * 1000)))

        # Process detections,转换
        aims = []
        for i, det in enumerate(pred): # detections per image
            s, im0 = '', img0.copy()
            # 输出字符串
            s += '%gx%g ' % img.shape[2:]
            if len(det):
                # Rescale boxes from img_size to im0 size,将坐标 (xyxy) 从 img_shape 重新缩放为 img0_shape
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
                # Write results
                for *xyxy, conf, cls in reversed(det):  # 从末尾遍历
                    # 将xyxy合并至一个维度,锚框的左上角和右下角
                    xyxy = (torch.tensor(xyxy).view(1, 4)).view(-1)
                    # 将类别和坐标合并
                    line = (cls,conf, *xyxy)
                    # 提取tensor类型里的坐标数据
                    aim = ('%g ' * len(
                        line)).rstrip() % line  # %g 格式为浮点数 .rstrip()删除tring字符串末尾的指定字符,默认为空白符包括空格,即删除2个坐标之间的空格
                    # 划分元素
                    aim = aim.split(' ')  # 将一个元素按空格符分为多个元素,获得单个目标信息列表
                    # 所有目标的类别和锚框的坐标(类别,左上角x,左上角y,右下角x,右下角y)
                    aims.append(aim)  # 添加至列表
                    # aims.append(aim)  # 加入标签列表
            if len(aims):  # 如果检测到存在目标
                fun_en(aims, img0, len_x, len_y)  # 画框函数

        # 显示检测
        if args.show_window:  # 是否显示窗口
            if args.show_fps:  # 是否显示 fps
                cv2.putText(img0, "FPS:{:.1f}".format(1. / (time.time() - t0)), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 2,
                            (0, 0, 235), 4)  # 绘制字体
                t0 = time.time()
            cv2.imshow('img', img0)  # 显示
        cv2.waitKey(1)


if __name__ == '__main__':
    # 参数初始化
    model = load_model(args)  # 加载模型
    stride = int(model.stride.max())  # 设置特征点步长
    # 获取类名
    names = model.module.names if hasattr(model, 'module') else model.names
		# 设置边框颜色
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
    device = 'cuda' if torch.cuda.is_available() else 'cpu'  # 根据pytorch选择设备，cpu或者cuda
    conf_thres = args.conf_thres  # 置信度
    iou_thres = args.iou_thres  # IOU
    half = device != 'cpu'  # 如果cuda可用，启用fp16

    # run
    run()

screen-reasoning/屏幕识别.py at main · AMXZzzz/screen-reasoning

You can't perform that action at this time. You signed in with another tab or window. You signed out in another tab or window. Reload to refresh your session. Reload to refresh your session.

https://github.com/AMXZzzz/screen-reasoning/blob/main/%E5%B1%8F%E5%B9%95%E8%AF%86%E5%88%AB.py