赞
踩
- /* This example demonstrates how to use the CUBLAS library
- * by scaling an array of floating-point values on the device
- * and comparing the result to the same operation performed
- * on the host.
- */
- /* Includes, system */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
- /* Includes, cuda */
- #include <cublas_v2.h>
- #include <cuda_runtime.h>
- //#include <helper_cuda.h>
- #include <vector>
- #include <random>
- #include <iostream>
-
- using namespace std;
-
- /* Matrix size */
- #define N (4)
-
- void printComplexMatrix(float2* A, int m, int n, int lda);
-
- /* Main */
- int main(int argc, char **argv) {
-
- int n=N;
- int batchCount=1;
-
-
- const auto kSizeN = n;
- int Pivot[kSizeN * batchCount];
- int info[batchCount];
- int Pivot_cu[kSizeN * batchCount];
- int info_cu[batchCount];
-
-
- // Creates input matrices
- auto mat_a = std::vector<float2>(kSizeN * kSizeN);
- auto mat_a_cu = std::vector<float2>(kSizeN * kSizeN);
-
- // Create a random number generator
- const auto random_seed = 12;
- std::default_random_engine generator(
- static_cast<unsigned int>(random_seed));
- std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
-
- // Populates input data structures
- for (auto &item : mat_a) {
- item.x = distribution(generator);
- item.y = distribution(generator);
- }
-
- //LL:: printComplexMatrix(mat_a.data(), kSizeN, kSizeN, kSizeN);
-
- size_t len_a = kSizeN * kSizeN * sizeof(float2);
-
- float2 *devA_cu[batchCount];
- float2 **d_Aarray_cu;
-
- for (int i = 0; i < batchCount; i++) {
- cudaMalloc((void **)&devA_cu[i], len_a);
- }
-
- for (int i = 0; i < batchCount; i++) {
- cudaMemcpy(devA_cu[i], mat_a.data(), len_a, cudaMemcpyHostToDevice);
- }
-
- cudaMalloc((void **)&d_Aarray_cu, batchCount * sizeof(float2 *));
- cudaMemcpy(d_Aarray_cu, devA_cu, batchCount * sizeof(float2 *),
- cudaMemcpyHostToDevice);
-
- int *Pivot_cu_d;
- cudaMalloc((void **)&Pivot_cu_d, kSizeN * batchCount * sizeof(int));
-
- int *info_cu_d;
- cudaMalloc((void **)&info_cu_d, batchCount * sizeof(int));
-
- cublasHandle_t cublasHandle;
- cublasStatus_t cu_status;
- cu_status = cublasCreate(&cublasHandle);
- if(CUBLAS_STATUS_SUCCESS != cu_status)
- cout<<"ERROR!"<<endl;
-
- cu_status = cublasCgetrfBatched(cublasHandle, kSizeN, d_Aarray_cu, kSizeN,
- Pivot_cu_d, info_cu_d, batchCount);
- if(CUBLAS_STATUS_SUCCESS != cu_status)
- cout<<"ERROR!"<<endl;
-
- cudaError_t ret = cudaDeviceSynchronize();
-
- cu_status = cublasDestroy(cublasHandle);
- if(CUBLAS_STATUS_SUCCESS != cu_status)
- cout<<"ERROR!"<<endl;
-
- cudaMemcpy(Pivot_cu, Pivot_cu_d, kSizeN * sizeof(int) * batchCount,
- cudaMemcpyDeviceToHost);
- cudaMemcpy(info_cu, info_cu_d, sizeof(int) * batchCount,
- cudaMemcpyDeviceToHost);
- for (int i = 0; i < batchCount; i++) {
- for (int j = 0; j < kSizeN; j++) {
- //LL:: cout<<(Pivot_cu[i * kSizeN + j])<< endl;
- }
- cudaMemcpy(mat_a_cu.data(), devA_cu[i], len_a, cudaMemcpyDeviceToHost);
- float2 *cu_result = mat_a_cu.data();
- //LL:: printComplexMatrix(cu_result, kSizeN,kSizeN,kSizeN );
-
- cudaFree(devA_cu[i]);
- }
-
- cudaFree(Pivot_cu_d);
- cudaFree(info_cu_d);
- cudaFree(d_Aarray_cu);
- //------------------------------------------------------------------------
-
- cuComplex a,b;
- a.x = -0.214;
- a.y = 0.255;
-
- b.x = -1.0000;
- b.y = 0.1570;
- cuComplex c = cuCdivf(a, b);
-
- cout<<c.x<<" + "<<c.y<<"i"<<endl;
- //------------------------------------------------------------------------
- return 0;
- }
-
-
-
- void printComplexMatrix(float2* A, int m, int n, int lda){
- for(int i=0; i<m; i++){
- for(int j=0; j<n; j++){
- printf("(%8.5f,%8.5f*I) ", A[i+j*lda].x, A[i+j*lda].y);
- }
- printf("\n\n");
- }
- printf("\n");
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。