赞
踩
GPU架构近些年也有不少的变化,具体的可以参考别的博主的介绍,都比较详细。还有一些cuda中的专有名词的含义,可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》
常见的NppStatus,可以看这里。
图像尺寸的resize操作。
// 指定x和y的resize尺寸 NppStatus nppiResizeSqrPixel_8u_C3R(const Npp8u *pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u *pDst, int nDstStep, NppiRect oDstROI, double nXFactor, double nYFactor, double nXShift, double nYShift, int eInterpolation); // x和y的resize尺寸自动计算 NppStatus nppiResize_8u_C3R(const Npp8u *pSrc, int nSrcStep, NppiSize oSrcSize, NppiRect oSrcRectROI, Npp8u *pDst, int nDstStep, NppiSize oDstSize, NppiRect oDstRectROI, int eInterpolation); // 多batch的resize // 涉及到一个新的数据类型,NppiResizeBatchCXR NppStatus nppiResizeBatch_8u_C3R(NppiSize oSmallestSrcSize, NppiRect oSrcRectROI, NppiSize oSmallestDstSize, NppiRect oDstRectROI, int eInterpolation, NppiResizeBatchCXR *pBatchList, unsigned int nBatchSize);
#include <iostream> #include <cuda_runtime.h> #include <npp.h> #include <opencv2/opencv.hpp> #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } } int main() { std::string directory = "../"; cv::Mat image_dog = cv::imread(directory + "dog.png"); int image_width = image_dog.cols; int image_height = image_dog.rows; int image_size = image_width * image_height; // =============== device memory =============== // input uint8_t *in_image; cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t)); cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice); // output double scale_w = 1.0 / 4; double scale_h = 1.0 / 4; uint8_t *out_ptr1, *out_ptr2; int dst_width = image_width * scale_w; int dst_height = image_height * scale_h; cudaMalloc((void**)&out_ptr1, dst_width * dst_height * 3 * sizeof(uint8_t)); // 三通道 cudaMalloc((void**)&out_ptr2, dst_width * dst_height * 3 * sizeof(uint8_t)); // 三通道 // roi size NppiSize in_size, out_size; in_size.width = image_width; in_size.height = image_height; out_size.width = dst_width; out_size.height = dst_height; NppiRect rc1, rc2; rc1.x = 0; rc1.y = 0; rc1.width = image_width; rc1.height = image_height; rc2.x = 0; rc2.y = 0; rc2.width = dst_width; rc2.height = dst_height; cv::Mat out_image = cv::Mat::zeros(dst_height, dst_width, CV_8UC3); NppStatus status; // =============== nppiResizeSqrPixel_8u_C3R =============== // resize to half status = nppiResizeSqrPixel_8u_C3R(in_image, in_size, image_width * 3, rc1, out_ptr1, dst_width * 3, rc2, scale_w, scale_h, 10.0, 50.0, NPPI_INTER_LINEAR); if (status != NPP_SUCCESS) { std::cout << "[GPU] ERROR nppiResizeSqrPixel_8u_C3R failed, status = " << status << std::endl; return false; } cudaMemcpy(out_image.data, out_ptr1, dst_width * dst_height * 3, cudaMemcpyDeviceToHost); cv::imwrite(directory + "resize_sqr.jpg", out_image); // =============== nppiResize_8u_C3R =============== // resize to half status = nppiResize_8u_C3R(in_image, image_width * 3, in_size, rc1, out_ptr2, dst_width * 3, out_size, rc2, NPPI_INTER_LINEAR); if (status != NPP_SUCCESS) { std::cout << "[GPU] ERROR nppiResize_8u_C3R failed, status = " << status << std::endl; return false; } cudaMemcpy(out_image.data, out_ptr2, dst_width * dst_height * 3, cudaMemcpyDeviceToHost); cv::imwrite(directory + "resize.jpg", out_image); // free CUDA_FREE(in_image) CUDA_FREE(out_ptr1) CUDA_FREE(out_ptr2) }
cmake_minimum_required(VERSION 3.20)
project(test)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
add_executable(test test.cpp)
target_link_libraries(test
${OpenCV_LIBS}
${CUDA_LIBS}
)
注意:
1.resizesqr的接口支持x和y不同scale的resize操作,并且可以添加左上角的offset值(result中的x和y分别添加了10像素和50像素的offset)
重映射,remap实现功能与原始的Opencv的功能一致。
NppStatus nppiRemap_8u_C3R(const Npp8u *pSrc,
NppiSize oSrcSize,
int nSrcStep,
NppiRect oSrcROI,
const Npp32f *pXMap,
int nXMapStep,
const Npp32f *pYMap,
int nYMapStep,
Npp8u *pDst,
int nDstStep,
NppiSize oDstSizeROI,
int eInterpolation);
#include <iostream> #include <cuda_runtime.h> #include <npp.h> #include <opencv2/opencv.hpp> #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } } int main() { std::string directory = "../"; cv::Mat image_dog = cv::imread(directory + "dog.png"); int image_width = image_dog.cols; int image_height = image_dog.rows; int image_size = image_width * image_height; // =============== device memory =============== // input uint8_t *in_image; cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t)); cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice); cv::Mat mat_mapx = cv::Mat::zeros(image_height, image_width, CV_32FC1); cv::Mat mat_mapy = cv::Mat::zeros(image_height, image_width, CV_32FC1); for (int i = 0; i < image_height; ++i) { for (int j = 0; j < image_width; ++j) { mat_mapx.at<float>(i, j) = (float)j; mat_mapy.at<float>(i, j) = (float)(image_height - i - 1); } } float *mapx, *mapy; cudaMalloc((void**)&mapx, image_size * sizeof(float)); cudaMalloc((void**)&mapy, image_size * sizeof(float)); cudaMemcpy(mapx, mat_mapx.data, image_size * sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(mapy, mat_mapy.data, image_size * sizeof(float), cudaMemcpyHostToDevice); // output uint8_t *out_ptr1; cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t)); // 三通道 // size NppiSize in_size, out_size; in_size.width = image_width; in_size.height = image_height; out_size.width = image_width; out_size.height = image_height; NppiRect rc1; rc1.x = 0; rc1.y = 0; rc1.width = image_width; rc1.height = image_height; cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3); NppStatus status; // =============== nppiResizeSqrPixel_8u_C3R =============== // resize to half status = nppiRemap_8u_C3R(in_image, in_size, image_width * 3, rc1, mapx, image_width * sizeof(float), mapy, image_width * sizeof(float), out_ptr1, image_width * 3, out_size, NPPI_INTER_LINEAR); if (status != NPP_SUCCESS) { std::cout << "[GPU] ERROR nppiRemap_8u_C3R failed, status = " << status << std::endl; return false; } cudaMemcpy(out_image.data, out_ptr1, image_size* 3, cudaMemcpyDeviceToHost); cv::imwrite(directory + "remap.jpg", out_image); // free CUDA_FREE(in_image) CUDA_FREE(mapx) CUDA_FREE(mapy) CUDA_FREE(out_ptr1) }
cmake_minimum_required(VERSION 3.20)
project(test)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
add_executable(test test.cpp)
target_link_libraries(test
${OpenCV_LIBS}
${CUDA_LIBS}
)
注意点:
旋转,该模块除了直接提供旋转的接口,还同步提供了根据角度和品阿姨计算旋转矩阵的接口
// 除了当前接口,还提供了一个返回四元素的接口 NppStatus nppiGetRotateBound(NppiRect oSrcROI, double aBoundingBox[2][2], double nAngle, double nShiftX, double nShiftY); // 旋转 NppStatus nppiRotate_8u_C3R(const Npp8u *pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u *pDst, int nDstStep, NppiRect oDstROI, double nAngle, double nShiftX, double nShiftY, int eInterpolation);
#include <iostream> #include <cuda_runtime.h> #include <npp.h> #include <opencv2/opencv.hpp> #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } } int main() { std::string directory = "../"; cv::Mat image_dog = cv::imread(directory + "dog.png"); int image_width = image_dog.cols; int image_height = image_dog.rows; int image_size = image_width * image_height; // =============== device memory =============== // input uint8_t *in_image; cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t)); cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice); // output uint8_t *out_ptr1; cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t)); // 三通道 // size NppiSize in_size, out_size; in_size.width = image_width; in_size.height = image_height; out_size.width = image_width; out_size.height = image_height; NppiRect rc1; rc1.x = 0; rc1.y = 0; rc1.width = image_width; rc1.height = image_height; cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3); NppStatus status; // =============== nppiResizeSqrPixel_8u_C3R =============== // resize to half double angle = 30.0; double shift_x = image_width / 4; double shift_y = image_height / 4; status = nppiRotate_8u_C3R(in_image, in_size, image_width * 3, rc1, out_ptr1, image_width * 3, rc1, angle, shift_x, shift_y, NPPI_INTER_LINEAR); if (status != NPP_SUCCESS) { std::cout << "[GPU] ERROR nppiRemap_8u_C3R failed, status = " << status << std::endl; return false; } cudaMemcpy(out_image.data, out_ptr1, image_size* 3, cudaMemcpyDeviceToHost); cv::imwrite(directory + "rotate.jpg", out_image); // free CUDA_FREE(in_image) CUDA_FREE(out_ptr1) }
cmake_minimum_required(VERSION 3.20)
project(test)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
add_executable(test test.cpp)
target_link_libraries(test
${OpenCV_LIBS}
${CUDA_LIBS}
)
主要是用于将图像用于镜像操作。
enum NppiAxis {
NPP_HORIZONTAL_AXIS,
NPP_VERTICAL_AXIS,
NPP_BOTH_AXIS
};
// 新增枚举,用于表示镜像的轴
NppStatus nppiMirror_8u_C3R(const Npp8u *pSrc,
int nSrcStep,
Npp8u *pDst,
int nDstStep,
NppiSize oROI,
NppiAxis flip);
#include <iostream> #include <cuda_runtime.h> #include <npp.h> #include <opencv2/opencv.hpp> #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } } int main() { std::string directory = "../"; cv::Mat image_dog = cv::imread(directory + "dog.png"); int image_width = image_dog.cols; int image_height = image_dog.rows; int image_size = image_width * image_height; // =============== device memory =============== // input uint8_t *in_image; cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t)); cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice); // output uint8_t *out_ptr1; cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t)); // 三通道 NppiSize in_size; in_size.width = image_width; in_size.height = image_height; cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3); NppStatus status; // =============== nppiMirror_8u_C3R =============== status = nppiMirror_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3, in_size, NPP_HORIZONTAL_AXIS); if (status != NPP_SUCCESS) { std::cout << "[GPU] ERROR nppiMirror_8u_C3R failed, status = " << status << std::endl; return false; } cudaMemcpy(out_image.data, out_ptr1, image_size* 3, cudaMemcpyDeviceToHost); cv::imwrite(directory + "mirror_horizontal.jpg", out_image); // =============== nppiMirror_8u_C3R =============== status = nppiMirror_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3, in_size, NPP_VERTICAL_AXIS); if (status != NPP_SUCCESS) { std::cout << "[GPU] ERROR nppiMirror_8u_C3R failed, status = " << status << std::endl; return false; } cudaMemcpy(out_image.data, out_ptr1, image_size* 3, cudaMemcpyDeviceToHost); cv::imwrite(directory + "mirror_vertical.jpg", out_image); // =============== nppiMirror_8u_C3R =============== status = nppiMirror_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3, in_size, NPP_BOTH_AXIS); if (status != NPP_SUCCESS) { std::cout << "[GPU] ERROR nppiMirror_8u_C3R failed, status = " << status << std::endl; return false; } cudaMemcpy(out_image.data, out_ptr1, image_size* 3, cudaMemcpyDeviceToHost); cv::imwrite(directory + "mirror_both.jpg", out_image); // free CUDA_FREE(in_image) CUDA_FREE(out_ptr1) }
cmake_minimum_required(VERSION 3.20)
project(test)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
add_executable(test test.cpp)
target_link_libraries(test
${OpenCV_LIBS}
${CUDA_LIBS}
)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。