当前位置:   article > 正文

Yolov5、rtsp-server、ffmpeg、vlc,实现实时检测视频推拉流_qgc 验证无人机推流 vlc

qgc 验证无人机推流 vlc

整体流程:

      1.首先现在rtsp-server服务器(如果采用的是虚拟机或者是服务器,可以下载对应的linux服务器),我下载的是图片上的两个版本。下载完毕后直接打开文件夹下的mediamtx.exe

            Releases · bluenviron/mediamtx (github.com)

          

    2.在代码中执行main.py函数

  1. rtmp_server = 'rtmp://你的主机ip:1935/video'
  2. if __name__ == '__main__':
  3. parser = argparse.ArgumentParser()
  4. parser.add_argument('--imgpath', type=str, default='video/test.mp4', help="image path")
  5. parser.add_argument('--modelpath', type=str, default='models/yolov7-tiny_384x640.onnx',
  6. choices=["models/yolov7_640x640.onnx", "models/yolov7-tiny_640x640.onnx",
  7. "models/yolov7_736x1280.onnx", "models/yolov7-tiny_384x640.onnx",
  8. "models/yolov7_480x640.onnx", "models/yolov7_384x640.onnx",
  9. "models/yolov7-tiny_256x480.onnx", "models/yolov7-tiny_256x320.onnx",
  10. "models/yolov7_256x320.onnx", "models/yolov7-tiny_256x640.onnx",
  11. "models/yolov7_256x640.onnx", "models/yolov7-tiny_480x640.onnx",
  12. "models/yolov7-tiny_736x1280.onnx", "models/yolov7_256x480.onnx"],
  13. help="onnx filepath")
  14. parser.add_argument('--confThreshold', default=0.3, type=float, help='class confidence')
  15. parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh')
  16. args = parser.parse_args()
  17. # Initialize YOLOv7 object detector
  18. yolov7_detector = YOLOv7(args.modelpath, conf_thres=args.confThreshold, iou_thres=args.nmsThreshold)
  19. VID_FORMATS = ['asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'wmv'] # include video suffixes
  20. imgpath = args.imgpath
  21. print(imgpath.split('.')[-1])
  22. if imgpath.split('.')[-1] in VID_FORMATS:
  23. cap = cv2.VideoCapture(0)
  24. pusher = StreamPusher(rtmp_server)
  25. while True:
  26. success, srcimg = cap.read()
  27. srcimg = imutils.resize(srcimg, width=640)
  28. t1 = time.time()
  29. boxes, scores, class_ids = yolov7_detector.detect(srcimg)
  30. print(time.time() - t1) # 测量处理一帧图像的时间 用于评估模型的处理速度或性能(推理时间)
  31. # Draw detections
  32. dstimg = yolov7_detector.draw_detections(srcimg, boxes, scores, class_ids)
  33. print(time.time() - t1) # 测量了模型的推理时间以及绘制检测结果的时间
  34. winName = 'Deep learning object detection in OpenCV'
  35. # cv2.namedWindow(winName, 0)
  36. # cv2.imshow(winName, dstimg)
  37. cv2.waitKey(1)
  38. pusher.streamPush(dstimg)
  39. cv2.destroyAllWindows()
  40. else:
  41. srcimg = cv2.imread(args.imgpath)
  42. # Detect Objects
  43. t1 = time.time()
  44. boxes, scores, class_ids = yolov7_detector.detect(srcimg)
  45. print(time.time() - t1)
  46. # Draw detections
  47. dstimg = yolov7_detector.draw_detections(srcimg, boxes, scores, class_ids)
  48. print(time.time() - t1)
  49. winName = 'Deep learning object detection in OpenCV'
  50. cv2.namedWindow(winName, 0)
  51. cv2.imshow(winName, dstimg)
  52. cv2.waitKey(0)
  53. cv2.destroyAllWindows()

     3.采用vlc拉流:

    4.代码解析

         a.定义推流器:我在用的是ffmpeg进行推流,在虚拟环境中使用pip安装ffmpeg包

  1. class StreamPusher:
  2. def __init__(self, rtmp_url): #接受一个参数rtmq_url 该参数受用于指定rtmq服务器地址的字符串
  3. # 创建FFmpeg命令行参数
  4. ffmpeg_cmd = ['ffmpeg',
  5. '-y', # 覆盖已存在的文件
  6. '-f', 'rawvideo', #指定输入格式为原始视频帧数据
  7. '-pixel_format', 'bgr24', #指定输入数据的像素格式为BGR24(一种图像颜色编码格式)
  8. '-video_size', '640x480', #指定输入视频的尺寸为640*480
  9. '-i', '-', # 从标准输入读取数据
  10. '-c:v', 'libx264', #指定视频编码器为libx264(H.264编码器)
  11. '-preset', 'ultrafast', #使用ultrafast预设,以获得更快的编码速度
  12. '-tune', 'zerolatency', #使用zerolatency调整 以降低延迟
  13. '-pix_fmt', 'yuv420p', #指定输出视频像素格式为yuv420p
  14. '-f', 'flv', #指定输出格式为FLV
  15. rtmp_url] #指定输出目标为‘rtmp_url' 即RTMP服务器地址
  16. print('ffmpeg_cmd:', ffmpeg_cmd)
  17. # 启动 ffmpeg
  18. self.ffmepg_process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
  19. def streamPush(self, frame): #用于推送视频帧数据到FFmpeg进程
  20. self.ffmepg_process.stdin.write(frame.tobytes())

     b.采用onnx格式文件来封装yolo的模型权重文件(可以去github上下载yolo源码生成.onnx文件),因为onnx只是模型和权重文件,其他一些的后处理组件要自己定义,具体如下:

  1. class YOLOv7:
  2. def __init__(self, path, conf_thres=0.7, iou_thres=0.5):
  3. self.conf_threshold = conf_thres
  4. self.iou_threshold = iou_thres
  5. self.class_names = list(map(lambda x: x.strip(), open('coco.names', 'r').readlines()))
  6. # Initialize model
  7. self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
  8. model_inputs = self.session.get_inputs()
  9. self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
  10. self.input_shape = model_inputs[0].shape
  11. self.input_height = self.input_shape[2]
  12. self.input_width = self.input_shape[3]
  13. model_outputs = self.session.get_outputs()
  14. self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
  15. self.has_postprocess = 'score' in self.output_names
  16. def detect(self, image):
  17. input_tensor = self.prepare_input(image)
  18. # Perform inference on the image
  19. outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
  20. if self.has_postprocess:
  21. boxes, scores, class_ids = self.parse_processed_output(outputs)
  22. else:
  23. # Process output data
  24. boxes, scores, class_ids = self.process_output(outputs)
  25. return boxes, scores, class_ids
  26. def prepare_input(self, image):
  27. self.img_height, self.img_width = image.shape[:2]
  28. input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  29. # Resize input image
  30. input_img = cv2.resize(input_img, (self.input_width, self.input_height))
  31. # Scale input pixel values to 0 to 1
  32. input_img = input_img / 255.0
  33. input_img = input_img.transpose(2, 0, 1)
  34. input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
  35. return input_tensor
  36. def process_output(self, output):
  37. predictions = np.squeeze(output[0]) #输出一个多维数组
  38. # Filter out object confidence scores below threshold
  39. obj_conf = predictions[:, 4]
  40. predictions = predictions[obj_conf > self.conf_threshold]
  41. obj_conf = obj_conf[obj_conf > self.conf_threshold]
  42. # Multiply class confidence with bounding box confidence
  43. predictions[:, 5:] *= obj_conf[:, np.newaxis]
  44. # Get the scores
  45. scores = np.max(predictions[:, 5:], axis=1)
  46. # Filter out the objects with a low score
  47. valid_scores = scores > self.conf_threshold
  48. predictions = predictions[valid_scores]
  49. scores = scores[valid_scores]
  50. # Get the class with the highest confidence
  51. class_ids = np.argmax(predictions[:, 5:], axis=1)
  52. # Get bounding boxes for each object
  53. boxes = self.extract_boxes(predictions)
  54. # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
  55. # indices = nms(boxes, scores, self.iou_threshold)
  56. nms_indices = cv2.dnn.NMSBoxes(boxes.tolist(), scores.tolist(), self.conf_threshold, self.iou_threshold)
  57. indices = np.array(nms_indices).flatten().astype(int)
  58. return boxes[indices], scores[indices], class_ids[indices]
  59. def parse_processed_output(self, outputs):
  60. scores = np.squeeze(outputs[self.output_names.index('score')])
  61. predictions = outputs[self.output_names.index('batchno_classid_x1y1x2y2')]
  62. # Filter out object scores below threshold
  63. valid_scores = scores > self.conf_threshold
  64. predictions = predictions[valid_scores, :]
  65. scores = scores[valid_scores]
  66. # Extract the boxes and class ids
  67. # TODO: Separate based on batch number
  68. batch_number = predictions[:, 0]
  69. class_ids = predictions[:, 1]
  70. boxes = predictions[:, 2:]
  71. # In postprocess, the x,y are the y,x
  72. boxes = boxes[:, [1, 0, 3, 2]]
  73. # Rescale boxes to original image dimensions
  74. boxes = self.rescale_boxes(boxes)
  75. return boxes, scores, class_ids
  76. def extract_boxes(self, predictions):
  77. # Extract boxes from predictions
  78. boxes = predictions[:, :4]
  79. # Scale boxes to original image dimensions
  80. boxes = self.rescale_boxes(boxes)
  81. # Convert boxes to xywh format
  82. boxes_ = np.copy(boxes)
  83. boxes_[..., 0] = boxes[..., 0] - boxes[..., 2] * 0.5
  84. boxes_[..., 1] = boxes[..., 1] - boxes[..., 3] * 0.5
  85. return boxes_
  86. def rescale_boxes(self, boxes):
  87. # Rescale boxes to original image dimensions
  88. input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
  89. boxes = np.divide(boxes, input_shape, dtype=np.float32)
  90. boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
  91. return boxes
  92. def draw_detections(self, image, boxes, scores, class_ids):
  93. for box, score, class_id in zip(boxes, scores, class_ids):
  94. x, y, w, h = box.astype(int)
  95. # Draw rectangle
  96. cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), thickness=2)
  97. label = self.class_names[class_id]
  98. label = f'{label} {int(score * 100)}%'
  99. labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
  100. # top = max(y1, labelSize[1])
  101. # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
  102. cv2.putText(image, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
  103. return image

3.采用cap = cv2.VideoCapture(0)读取本地摄像头,针对每帧进行目标检测算法然后进行推流:

  1. if imgpath.split('.')[-1] in VID_FORMATS:
  2. cap = cv2.VideoCapture("rtmp://:1935/stream")
  3. pusher = StreamPusher(rtmp_server)
  4. while True:
  5. success, srcimg = cap.read()
  6. srcimg = imutils.resize(srcimg, width=640)
  7. t1 = time.time()
  8. boxes, scores, class_ids = yolov7_detector.detect(srcimg)
  9. print(time.time() - t1) #测量处理一帧图像的时间 用于评估模型的处理速度或新能(推理时间)
  10. # Draw detections
  11. dstimg = yolov7_detector.draw_detections(srcimg, boxes, scores, class_ids)
  12. print(time.time() - t1) #测量了模型的推理时间以及绘制检测结果的时间
  13. winName = 'Deep learning object detection in OpenCV'
  14. #cv2.namedWindow(winName, 0)
  15. #cv2.imshow(winName, dstimg)
  16. cv2.waitKey(1)
  17. pusher.streamPush(dstimg)
  18. cv2.destroyAllWindows()
  19. else:
  20. srcimg = cv2.imread(args.imgpath)
  21. # Detect Objects
  22. t1 = time.time()
  23. boxes, scores, class_ids = yolov7_detector.detect(srcimg)
  24. print(time.time() - t1)
  25. # Draw detections
  26. dstimg = yolov7_detector.draw_detections(srcimg, boxes, scores, class_ids)
  27. print(time.time() - t1)
  28. winName = 'Deep learning object detection in OpenCV'
  29. cv2.namedWindow(winName, 0)
  30. cv2.imshow(winName, dstimg)
  31. cv2.waitKey(0)
  32. cv2.destroyAllWindows()

效果展示:

7c1d64d33c7048c8af1449e6940260b0.png

以上代码借鉴了很多博主的文章,具体的忘记了。如有冒犯,多多谅解!

代码地址:

   GitHub - 23jisuper/yolov7-ffmpeg: 基于yolov5目标检测,使用ffmpeg推流 vlc拉流

后续工作:

      目标检测模型采用的是cpu进行推理的,可以采用GPU加速推理。整个代码是根据python实现的,考虑采用C++代替来提高速度

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Gausst松鼠会/article/detail/77807
推荐阅读
相关标签
  

闽ICP备14008679号