cuda的一个小bug请教

system · 2011 年8 月 8 日 10:26

global void DCTkernel(Matrix A){
int i;
DCT_type a,b,c,d,e,f;
a=1.387039842;
b=1.306562962;
c=1.175875600;
d=0.785694957;
e=0.541196099;
f=0.275899378;
i=threadIdx.x;
Matrix*array;
array=A.elements;

各位前辈
大家好
本人新手，刚学CUDA编程
仿照例子想实现8*8的dct变换
编译到红色部分的时候，总是通不过，搞了一天还是不行
error：expected a member name
有点崩溃了
请大家指教

vw009 · 2011 年8 月 8 日 13:09

能否贴出你的代码(删掉无关的部分)

system · 2011 年8 月 9 日 07:35

只是简单的把代码改写，没有优化，不知道问题出在哪里，请看一下谢谢了
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<sys/time.h>
#include"cutil_inline.h"
#define x 0.353553390
typedef double DCT_type;

typedef struct{
int width;
int height;
DCT_type*elements;
}Matrix;

global void DCT_kernel(Matrix A){
//int i;
int i=0;
i=blockIdx.x;／／在这是编译器总是指出错误 error：expected a member name
DCT_type a,b,c,d,e,f;
a=1.387039842;
b=1.306562962;
c=1.175875600;
d=0.785694957;
e=0.541196099;
f=0.275899378;

DCT_type*array;
array=A.elements;

DCT_type temp0,temp1,temp2,temp3,temp4,temp5,temp6,temp7;

temp0=(array[i*8+0]+array[i*8+7])*x;
temp2=(array[i*8+1]+array[i*8+6])*x;
temp4=(array[i*8+2]+array[i*8+5])*x;
temp6=(array[i*8+3]+array[i*8+4])*x;

temp1=(array[i*8+0]-array[i*8+7])*x;
temp3=(array[i*8+6]-array[i*8+1])*x;
temp5=(array[i*8+2]-array[i*8+5])*x;
temp7=(array[i*8+4]-array[i*8+3])*x;

array[i*8+0]=temp0+temp2+temp4+temp6;

array[i*8+2]=btemp0+etemp2-etemp4-btemp6;

array[i*8+4]=temp0-temp2-temp4+temp6;
array[i*8+6]=etemp0-btemp2+btemp4-etemp6;

array[i*8+1]=atemp1-ctemp3+dtemp5-ftemp7;

array[i*8+3]=ctemp1+ftemp3-atemp5+dtemp7;
array[i*8+5]=dtemp1+atemp3+ftemp5-ctemp7;
array[i*8+7]=ftemp1+dtemp3+ctemp5+atemp7;

__syncthreads();

temp0=(array[08+i]+array[78+i])x;
temp2=(array[18+i]+array[68+i])x;
temp4=(array[28+i]+array[58+i])x;
temp6=(array[38+i]+array[4*8+i])*x;

temp1=(array[08+i]-array[78+i])x;
temp3=(array[68+i]-array[18+i])x;
temp5=(array[28+i]-array[58+i])x;
temp7=(array[48+i]-array[3*8+i])*x;

array[0*8+i]=temp0+temp2+temp4+temp6;

array[28+i]=btemp0+etemp2-etemp4-b*temp6;

array[48+i]=temp0-temp2-temp4+temp6;
array[68+i]=etemp0-btemp2+btemp4-etemp6;

array[18+i]=atemp1-ctemp3+dtemp5-f*temp7;

array[38+i]=ctemp1+ftemp3-atemp5+dtemp7;
array[58+i]=dtemp1+atemp3+ftemp5-ctemp7;
array[78+i]=ftemp1+dtemp3+ctemp5+a*temp7;

__syncthreads();

}

main(){
int i,j,nblocks,nthreads;
nblocks=8;
nthreads=1;
size_t size;

Matrix A;
A.width=8;
A.height=8;
size=A.widthA.heightsizeof(DCT_type);
A.elements=(DCT_type*)malloc(size);
for(i=0;i<A.height;i++){
for(j=0;j<A.width;j++)
A.elements[i*A.width+j]=1;
}

//Initialize Matrix d_A,d_B,d_C
Matrix d_A;
d_A.width=A.width;
d_A.height=A.height;
size=A.widthA.heightsizeof(DCT_type);
cudaMalloc((void**)&d_A.elements,size);
cudaMemcpy(d_A.elements,A.elements,size,cudaMemcpyHostToDevice);

//Invoke kernel
//dim3 dimBlock(8,8);
//dim3 dimGrid(8,8);
DCT_kernel<<<nblocks,nthreads>>>(d_A);
//Read A from device memory
cudaMemcpy(A.elements,d_A.elements,size,cudaMemcpyDeviceToHost);
for(i=0;i<A.width*A.height;i++){
if(i%8==0)
printf(“\n”);
printf("%f ",A.elements[i]);
}
//Free device memory
cudaFree(d_A.elements);

}[/i]

system · 2011 年8 月 9 日 08:00

感觉像是不识别blockIdx呢。。
是不是规则没定义好，右键->自定义生成规则->cuda build rule
或者是不是写成.cpp文件了？

system · 2011 年8 月 10 日 03:44

谢谢楼上的解答
经过一天的排查终于找到错误了:‘(
我这个源程序是在windows下写的， CUDA是在Ubuntu下运行，应该是操作系统字符编码的问题:’(
以后要小心了，唉！
下面是程序的优化了，有什么问题再向前辈请教:)

system · 2011 年8 月 11 日 04:55

这种事最折磨人，不过也锻炼人。。呵呵。

system · 2011 年8 月 14 日 12:50

学习了，呵呵

system · 2011 年8 月 14 日 12:57

应该是写成.cu文件就行吧。