>>(); return 0;..._cuda unique 算法">
赞
踩
#include<stdio.h>
__global__ void kernel() {
printf("hello world");
}
int main() {
kernel<<<1, 1>>>();
return 0;
}
#include<stdio.h>
__global__ void add(int a,int b,int *c){
*c = a + b;
}
int main(){
int c;
int *dev_c;
cudaMalloc((void**)&dev_c,sizeof(int));
add<<<1,1>>>(2,7,dev_c);
cudaMemcpy(&c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);
printf("2 + 7 = %d",c);
return 0;
}
#include<stdio.h> #define N 10 __global__ void add( int *a, int *b, int *c ) { int tid = blockIdx.x; // this thread handles the data at its thread id if (tid < N) c[tid] = a[tid] + b[tid]; } int main( void ) { int a[N], b[N], c[N]; int *dev_a, *dev_b, *dev_c; // allocate the memory on the GPU cudaMalloc( (void**)&dev_a, N * sizeof(int) ); cudaMalloc( (void**)&dev_b, N * sizeof(int) ); cudaMalloc( (void**)&dev_c, N * sizeof(int) ); // fill the arrays 'a' and 'b' on the CPU for (int i=0; i<N; i++) { a[i] = -i; b[i] = i * i; } // copy the arrays 'a' and 'b' to the GPU cudaMemcpy( dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice ); cudaMemcpy( dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice ); add<<<N,1>>>( dev_a, dev_b, dev_c ); // copy the array 'c' back from the GPU to the CPU cudaMemcpy( c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost ); // display the results for (int i=0; i<N; i++) { printf( "%d + %d = %d\n", a[i], b[i], c[i] ); } // free the memory allocated on the GPU cudaFree( dev_a ); cudaFree( dev_b ); cudaFree( dev_c ); return 0; }
__global__ void add( int *a, int *b, int *c ) {
int tid = blockIdx.x;
if (tid < N)
c[tid] = a[tid] + b[tid];
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。