我參照 Nvidia 手冊使用 surface memory 但是發生錯誤
[
我從網站上複製貼上,結果還是一樣,手冊說CC 2.0以上都可以使用,我的卡是GTX650 下的指令是 -arch=sm_30
以下是我的程式碼
#include <stdio.h>
#include <iostream>
#include <fstream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
using namespace std;
__global__ void kernel_surf(cudaSurfaceObject V, cudaSurfaceObject VN, cudaSurfaceObject rho)
{
... ... ...
surf2Dread(&r,rho,i,j, cudaBoundaryModeTrap);
temp= 0.25* (VR + VD + VL + VU - h*h*r);
surf2Dwrite(temp, VN, i,j, cudaBoundaryModeTrap);
}
int main()
{
float *V, *dev_V, *dev_VNew, *dev_rho;
cudaHostAlloc((void**)&V,N*N*sizeof(float),cudaHostAllocDefault);
cudaMalloc((void**)&dev_V,N*N*sizeof(float));
cudaMalloc((void**)&dev_VNew,N*N*sizeof(float));
cudaMalloc((void**)&dev_rho,N*N*sizeof(float));
cudaChannelFormatDesc chDesc = cudaCreateChannelDesc<float>();
cudaArray *cu_rho,*cu_V,*cu_VN;
cudaMallocArray(&cu_rho, &chDesc, N, N, cudaArraySurfaceLoadStore);
cudaMallocArray(&cu_VN, &chDesc, N, N, cudaArraySurfaceLoadStore);
cudaMallocArray(&cu_V, &chDesc, N, N, cudaArraySurfaceLoadStore);
cudaMemcpyToArray( cu_rho, 0, 0, dev_rho, N*N*sizeof(float),cudaMemcpyDeviceToDevice );
cudaMemcpyToArray( cu_VN, 0, 0, dev_VNew, N*N*sizeof(float),cudaMemcpyDeviceToDevice );
cudaMemcpyToArray( cu_V, 0, 0, dev_V, N*N*sizeof(float),cudaMemcpyDeviceToDevice );
// Create the cuda resource description
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resoureDescription));
resDesc.resType = cudaResourceTypeArray; // be sure to set the resource type to cudaResourceTypeArray
// Create the surface object
resDesc.res.array.array = cu_rho; // this is the important bit
cudaSurfaceObject_t su_rho = 0;
cudaCreateSurfaceObject(&su_rho, &resDesc);
resDesc.res.array.array = cu_VN; // this is the important bit
cudaSurfaceObject_t su_VN = 0;
cudaCreateSurfaceObject(&su_VN, &resDesc);
resDesc.res.array.array = cu_V; // this is the important bit
cudaSurfaceObject_t su_V = 0;
cudaCreateSurfaceObject(&su_V, &resDesc);
... ... ...
if(tf)
{kernel_surf<<<grid,block>>>(su_V,su_VN,su_rho);}
else
{kernel_surf<<<grid,block>>>(su_VN,su_V,su_rho);}
... ... ...
// Destroy surface objects
cudaDestroySurfaceObject(su_rho);
cudaDestroySurfaceObject(su_VN);
cudaDestroySurfaceObject(su_V);
// Free device memory
cudaFreeArray(cu_rho);
cudaFreeArray(cu_VN);
cudaFreeArray(cu_V);
cudaFree(dev_V);
cudaFree(dev_VNew);
cudaFree(dev_rho);
cudaFreeHost(V);
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaThreadExit();
}