赞
踩
可以简单地粘贴放入一个cuBLAS sample的文件中替代运行:
-
- //一个简介的cublasSmatinvBatched 示例:
-
-
-
- /* Includes, system */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
- /* Includes, cuda */
- #include <cublas_v2.h>
- #include <cuda_runtime.h>
- #include <helper_cuda.h>
-
- /* Matrix size */
- #define N (2)
- #define BATCH_SIZE (1)
-
-
- /* Main */
- int main(int argc, char **argv) {
- cublasStatus_t status;
- float* h_A;
- float* d_A = 0;
- float* d_Ainv = 0;
- float* h_Ainv = 0;
-
- int n2 = N * N;
- int* info=NULL;
- float** A=NULL;
- float** Ainv=NULL;
-
- cublasHandle_t handle;
-
- printf("LL:: main()\n");
-
- int dev = findCudaDevice(argc, (const char **)argv);
-
- if (dev == -1) {
- return EXIT_FAILURE;
- }
-
- printf("simpleCUBLAS_Smatinv test running..\n");
-
- status = cublasCreate(&handle);
-
- if (status != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "!!!! CUBLAS initialization error\n");
- return EXIT_FAILURE;
- }
-
- h_A = reinterpret_cast<float *>(malloc(n2 * sizeof(h_A[0])));
-
- if (h_A == 0) {
- fprintf(stderr, "!!!! host memory allocation error (A)\n");
- return EXIT_FAILURE;
- }
-
- h_Ainv = reinterpret_cast<float *>(malloc(n2 * sizeof(h_Ainv[0])));
-
- if (h_Ainv == 0) {
- fprintf(stderr, "!!!! host memory allocation error (A)\n");
- return EXIT_FAILURE;
- }
-
- for (int i = 0; i < n2; i++) {
- h_A[i] = rand() / static_cast<float>(RAND_MAX);
- }
-
- cudaMalloc(&A, sizeof(float*));
- cudaMalloc(&Ainv, sizeof(float*));
-
- if (cudaMalloc(reinterpret_cast<void **>(&d_A), n2 * sizeof(d_A[0])) !=
- cudaSuccess) {
- fprintf(stderr, "!!!! device memory allocation error (allocate d_A)\n");
- return EXIT_FAILURE;
- }
-
- cudaMemcpy(A, &d_A, sizeof(float*), cudaMemcpyHostToDevice);
-
-
- if (cudaMalloc(reinterpret_cast<void **>(&d_Ainv), n2 * sizeof(d_Ainv[0])) !=
- cudaSuccess) {
- fprintf(stderr, "!!!! device memory allocation error (allocate d_Ainv)\n");
- return EXIT_FAILURE;
- }
-
- cudaMemcpy(Ainv, &d_Ainv, sizeof(float*), cudaMemcpyHostToDevice);
-
- if (cudaMalloc(reinterpret_cast<void **>(&info), BATCH_SIZE*sizeof(int)) !=
- cudaSuccess) {
- fprintf(stderr, "!!!! device memory allocation error (allocate A)\n");
- return EXIT_FAILURE;
- }
-
- status = cublasSetVector(n2, sizeof(h_A[0]), h_A, 1, d_A, 1);
-
- if (status != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "!!!! device access error (write A)\n");
- return EXIT_FAILURE;
- }
-
- status = cublasSmatinvBatched(handle, N, A, N,
- Ainv, N, info, BATCH_SIZE);
-
- if (status != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "!!!! kernel execution error.\n");
- return EXIT_FAILURE;
- }
-
- status = cublasGetVector(n2, sizeof(float), d_Ainv, 1, h_Ainv, 1);
-
- if (status != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "!!!! device access error (read C)\n");
- return EXIT_FAILURE;
- }
-
- printf("A =\n");
-
- for(int i=0; i<n2; i++){
- if(i%N==0)printf("\n");
- printf("%3.3f ",h_A[i]);
- }
-
- printf("\ninversion of A:\n");
- printf("Ainv =\n");
-
- for(int i=0; i<n2; i++){
- if(i%N==0) printf("\n");
- printf("%3.3f ",h_Ainv[i]);
- }
-
- printf("\n\n");
-
- free(h_A);
- free(h_Ainv);
-
- if(cudaFree(d_A) != cudaSuccess) {
- fprintf(stderr, "!!!! memory free error (d_A)\n");
- return EXIT_FAILURE;
- }
-
- if (cudaFree(d_Ainv) != cudaSuccess) {
- fprintf(stderr, "!!!! memory free error (d_Ainv)\n");
- return EXIT_FAILURE;
- }
-
- status = cublasDestroy(handle);
-
- if (status != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "!!!! shutdown error (A)\n");
- return EXIT_FAILURE;
- }
-
- return 0;
- }
不检查版:
- //一个简介的cublasSmatinvBatched 示例:
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
- #include <cublas_v2.h>
- #include <cuda_runtime.h>
-
- #define N (5)
- #define BATCH_SIZE (1)
- int NV_smatinv(float* matrixA, int n2);
-
- /* cublasStatus_t cublasSmatinvBatched(cublasHandle_t handle, int n, const float *A[], int lda,
- float *Ainv[], int lda_inv, int *info, int batchSize); */
-
- int main(){
- float* matrixA;
- matrixA = reinterpret_cast<float *>(malloc(N*N*sizeof(matrixA[0])));
- for(int i=0; i<N*N; i++){
- matrixA[i] = rand() / static_cast<float>(RAND_MAX);
- }
-
- NV_smatinv(matrixA, N*N);
- //IX_smatinv(matrixA, N*N);
-
- free(matrixA);
-
- return 0;
- }
-
-
-
-
-
- int NV_smatinv(float* matrixA, int n2) {
-
- cublasStatus_t status;
- float* h_A;
- float* d_A = 0;
- float* d_Ainv = 0;
- float* h_Ainv = 0;
-
- int* info=NULL;
- float** A=NULL; //LL:: array of matrices in d_A
- float** Ainv=NULL; //LL:: array of inversion of matrices d_Ainv
-
- cublasHandle_t handle;
-
- status = cublasCreate(&handle);
-
- h_A = reinterpret_cast<float *>(malloc(n2 * sizeof(h_A[0])));
- h_Ainv = reinterpret_cast<float *>(malloc(n2 * sizeof(h_Ainv[0])));
-
- memcpy(h_A, matrixA, n2*sizeof(matrixA[0]));
-
- cudaMalloc(&A, sizeof(float*)); //LL:: this example only has one float Matrix
- cudaMalloc(&Ainv, sizeof(float*)); //LL::
-
- if (cudaMalloc(reinterpret_cast<void **>(&d_A), n2 * sizeof(d_A[0])) !=
- cudaSuccess) {
- fprintf(stderr, "!!!! device memory allocation error (allocate d_A)\n");
- return EXIT_FAILURE;
- }
-
- cudaMemcpy(A, &d_A, sizeof(float*), cudaMemcpyHostToDevice);
-
- if (cudaMalloc(reinterpret_cast<void **>(&d_Ainv), n2 * sizeof(d_Ainv[0])) !=
- cudaSuccess) {
- fprintf(stderr, "!!!! device memory allocation error (allocate d_Ainv)\n");
- return EXIT_FAILURE;
- }
-
- cudaMemcpy(Ainv, &d_Ainv, sizeof(float*), cudaMemcpyHostToDevice);
-
- if (cudaMalloc(reinterpret_cast<void **>(&info), BATCH_SIZE*sizeof(int)) !=
- cudaSuccess) {
- fprintf(stderr, "!!!! device memory allocation error (allocate A)\n");
- return EXIT_FAILURE;
- }
-
- cudaMemcpy(d_A, h_A, n2*sizeof(h_A[0]), cudaMemcpyHostToDevice);
-
- if (status != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "!!!! device access error (write A)\n");
- return EXIT_FAILURE;
- }
- /* cublasStatus_t cublasSmatinvBatched(cublasHandle_t handle, int n, const float *A[], int lda,
- float *Ainv[], int lda_inv, int *info, int batchSize); */
-
- status = cublasSmatinvBatched(handle, N, A, N,
- Ainv, N, info, BATCH_SIZE);
-
- if (status != CUBLAS_STATUS_SUCCESS) {
- fprintf(stderr, "!!!! kernel execution error.\n");
- return EXIT_FAILURE;
- }
-
- cudaMemcpy(h_Ainv, d_Ainv, n2*sizeof(h_Ainv[0]), cudaMemcpyDeviceToHost);
-
-
-
- printf("\nnew A ="); for(int i=0; i<n2; i++){ if(i%N==0)printf("\n"); printf("%3.3f ",h_A[i]); }
- printf("\n\nnew Ainv ="); for(int i=0; i<n2; i++){ if(i%N==0) printf("\n"); printf("%3.3f ",h_Ainv[i]); } printf("\n\n");
-
-
-
-
- free(h_A);
- free(h_Ainv);
-
- cudaFree(d_A);
- cudaFree(d_Ainv);
- cudaFree(A);
- cudaFree(Ainv);
- cudaFree(info);
-
- status = cublasDestroy(handle);
-
- return 0;
- }
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。