当前位置:   article > 正文

一个简洁的cublasSmatinvBatched应用示例_cublassymmstirdedbatchded

cublassymmstirdedbatchded

可以简单地粘贴放入一个cuBLAS sample的文件中替代运行:

  1. //一个简介的cublasSmatinvBatched 示例:
  2. /* Includes, system */
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. /* Includes, cuda */
  7. #include <cublas_v2.h>
  8. #include <cuda_runtime.h>
  9. #include <helper_cuda.h>
  10. /* Matrix size */
  11. #define N (2)
  12. #define BATCH_SIZE (1)
  13. /* Main */
  14. int main(int argc, char **argv) {
  15. cublasStatus_t status;
  16. float* h_A;
  17. float* d_A = 0;
  18. float* d_Ainv = 0;
  19. float* h_Ainv = 0;
  20. int n2 = N * N;
  21. int* info=NULL;
  22. float** A=NULL;
  23. float** Ainv=NULL;
  24. cublasHandle_t handle;
  25. printf("LL:: main()\n");
  26. int dev = findCudaDevice(argc, (const char **)argv);
  27. if (dev == -1) {
  28. return EXIT_FAILURE;
  29. }
  30. printf("simpleCUBLAS_Smatinv test running..\n");
  31. status = cublasCreate(&handle);
  32. if (status != CUBLAS_STATUS_SUCCESS) {
  33. fprintf(stderr, "!!!! CUBLAS initialization error\n");
  34. return EXIT_FAILURE;
  35. }
  36. h_A = reinterpret_cast<float *>(malloc(n2 * sizeof(h_A[0])));
  37. if (h_A == 0) {
  38. fprintf(stderr, "!!!! host memory allocation error (A)\n");
  39. return EXIT_FAILURE;
  40. }
  41. h_Ainv = reinterpret_cast<float *>(malloc(n2 * sizeof(h_Ainv[0])));
  42. if (h_Ainv == 0) {
  43. fprintf(stderr, "!!!! host memory allocation error (A)\n");
  44. return EXIT_FAILURE;
  45. }
  46. for (int i = 0; i < n2; i++) {
  47. h_A[i] = rand() / static_cast<float>(RAND_MAX);
  48. }
  49. cudaMalloc(&A, sizeof(float*));
  50. cudaMalloc(&Ainv, sizeof(float*));
  51. if (cudaMalloc(reinterpret_cast<void **>(&d_A), n2 * sizeof(d_A[0])) !=
  52. cudaSuccess) {
  53. fprintf(stderr, "!!!! device memory allocation error (allocate d_A)\n");
  54. return EXIT_FAILURE;
  55. }
  56. cudaMemcpy(A, &d_A, sizeof(float*), cudaMemcpyHostToDevice);
  57. if (cudaMalloc(reinterpret_cast<void **>(&d_Ainv), n2 * sizeof(d_Ainv[0])) !=
  58. cudaSuccess) {
  59. fprintf(stderr, "!!!! device memory allocation error (allocate d_Ainv)\n");
  60. return EXIT_FAILURE;
  61. }
  62. cudaMemcpy(Ainv, &d_Ainv, sizeof(float*), cudaMemcpyHostToDevice);
  63. if (cudaMalloc(reinterpret_cast<void **>(&info), BATCH_SIZE*sizeof(int)) !=
  64. cudaSuccess) {
  65. fprintf(stderr, "!!!! device memory allocation error (allocate A)\n");
  66. return EXIT_FAILURE;
  67. }
  68. status = cublasSetVector(n2, sizeof(h_A[0]), h_A, 1, d_A, 1);
  69. if (status != CUBLAS_STATUS_SUCCESS) {
  70. fprintf(stderr, "!!!! device access error (write A)\n");
  71. return EXIT_FAILURE;
  72. }
  73. status = cublasSmatinvBatched(handle, N, A, N,
  74. Ainv, N, info, BATCH_SIZE);
  75. if (status != CUBLAS_STATUS_SUCCESS) {
  76. fprintf(stderr, "!!!! kernel execution error.\n");
  77. return EXIT_FAILURE;
  78. }
  79. status = cublasGetVector(n2, sizeof(float), d_Ainv, 1, h_Ainv, 1);
  80. if (status != CUBLAS_STATUS_SUCCESS) {
  81. fprintf(stderr, "!!!! device access error (read C)\n");
  82. return EXIT_FAILURE;
  83. }
  84. printf("A =\n");
  85. for(int i=0; i<n2; i++){
  86. if(i%N==0)printf("\n");
  87. printf("%3.3f ",h_A[i]);
  88. }
  89. printf("\ninversion of A:\n");
  90. printf("Ainv =\n");
  91. for(int i=0; i<n2; i++){
  92. if(i%N==0) printf("\n");
  93. printf("%3.3f ",h_Ainv[i]);
  94. }
  95. printf("\n\n");
  96. free(h_A);
  97. free(h_Ainv);
  98. if(cudaFree(d_A) != cudaSuccess) {
  99. fprintf(stderr, "!!!! memory free error (d_A)\n");
  100. return EXIT_FAILURE;
  101. }
  102. if (cudaFree(d_Ainv) != cudaSuccess) {
  103. fprintf(stderr, "!!!! memory free error (d_Ainv)\n");
  104. return EXIT_FAILURE;
  105. }
  106. status = cublasDestroy(handle);
  107. if (status != CUBLAS_STATUS_SUCCESS) {
  108. fprintf(stderr, "!!!! shutdown error (A)\n");
  109. return EXIT_FAILURE;
  110. }
  111. return 0;
  112. }

不检查版:

  1. //一个简介的cublasSmatinvBatched 示例:
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <cublas_v2.h>
  6. #include <cuda_runtime.h>
  7. #define N (5)
  8. #define BATCH_SIZE (1)
  9. int NV_smatinv(float* matrixA, int n2);
  10. /* cublasStatus_t cublasSmatinvBatched(cublasHandle_t handle, int n, const float *A[], int lda,
  11. float *Ainv[], int lda_inv, int *info, int batchSize); */
  12. int main(){
  13. float* matrixA;
  14. matrixA = reinterpret_cast<float *>(malloc(N*N*sizeof(matrixA[0])));
  15. for(int i=0; i<N*N; i++){
  16. matrixA[i] = rand() / static_cast<float>(RAND_MAX);
  17. }
  18. NV_smatinv(matrixA, N*N);
  19. //IX_smatinv(matrixA, N*N);
  20. free(matrixA);
  21. return 0;
  22. }
  23. int NV_smatinv(float* matrixA, int n2) {
  24. cublasStatus_t status;
  25. float* h_A;
  26. float* d_A = 0;
  27. float* d_Ainv = 0;
  28. float* h_Ainv = 0;
  29. int* info=NULL;
  30. float** A=NULL; //LL:: array of matrices in d_A
  31. float** Ainv=NULL; //LL:: array of inversion of matrices d_Ainv
  32. cublasHandle_t handle;
  33. status = cublasCreate(&handle);
  34. h_A = reinterpret_cast<float *>(malloc(n2 * sizeof(h_A[0])));
  35. h_Ainv = reinterpret_cast<float *>(malloc(n2 * sizeof(h_Ainv[0])));
  36. memcpy(h_A, matrixA, n2*sizeof(matrixA[0]));
  37. cudaMalloc(&A, sizeof(float*)); //LL:: this example only has one float Matrix
  38. cudaMalloc(&Ainv, sizeof(float*)); //LL::
  39. if (cudaMalloc(reinterpret_cast<void **>(&d_A), n2 * sizeof(d_A[0])) !=
  40. cudaSuccess) {
  41. fprintf(stderr, "!!!! device memory allocation error (allocate d_A)\n");
  42. return EXIT_FAILURE;
  43. }
  44. cudaMemcpy(A, &d_A, sizeof(float*), cudaMemcpyHostToDevice);
  45. if (cudaMalloc(reinterpret_cast<void **>(&d_Ainv), n2 * sizeof(d_Ainv[0])) !=
  46. cudaSuccess) {
  47. fprintf(stderr, "!!!! device memory allocation error (allocate d_Ainv)\n");
  48. return EXIT_FAILURE;
  49. }
  50. cudaMemcpy(Ainv, &d_Ainv, sizeof(float*), cudaMemcpyHostToDevice);
  51. if (cudaMalloc(reinterpret_cast<void **>(&info), BATCH_SIZE*sizeof(int)) !=
  52. cudaSuccess) {
  53. fprintf(stderr, "!!!! device memory allocation error (allocate A)\n");
  54. return EXIT_FAILURE;
  55. }
  56. cudaMemcpy(d_A, h_A, n2*sizeof(h_A[0]), cudaMemcpyHostToDevice);
  57. if (status != CUBLAS_STATUS_SUCCESS) {
  58. fprintf(stderr, "!!!! device access error (write A)\n");
  59. return EXIT_FAILURE;
  60. }
  61. /* cublasStatus_t cublasSmatinvBatched(cublasHandle_t handle, int n, const float *A[], int lda,
  62. float *Ainv[], int lda_inv, int *info, int batchSize); */
  63. status = cublasSmatinvBatched(handle, N, A, N,
  64. Ainv, N, info, BATCH_SIZE);
  65. if (status != CUBLAS_STATUS_SUCCESS) {
  66. fprintf(stderr, "!!!! kernel execution error.\n");
  67. return EXIT_FAILURE;
  68. }
  69. cudaMemcpy(h_Ainv, d_Ainv, n2*sizeof(h_Ainv[0]), cudaMemcpyDeviceToHost);
  70. printf("\nnew A ="); for(int i=0; i<n2; i++){ if(i%N==0)printf("\n"); printf("%3.3f ",h_A[i]); }
  71. printf("\n\nnew Ainv ="); for(int i=0; i<n2; i++){ if(i%N==0) printf("\n"); printf("%3.3f ",h_Ainv[i]); } printf("\n\n");
  72. free(h_A);
  73. free(h_Ainv);
  74. cudaFree(d_A);
  75. cudaFree(d_Ainv);
  76. cudaFree(A);
  77. cudaFree(Ainv);
  78. cudaFree(info);
  79. status = cublasDestroy(handle);
  80. return 0;
  81. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/245514
推荐阅读
相关标签
  

闽ICP备14008679号