版主您好:
代码贴上了。
现在的主要问题是:
在程序执行时(出现CPU利用率为0的情况下),kernel会执行的很慢。如果期间采用打开网页等办法刺激一下,
就可以暂时恢复。不知道为什么。
我在笔记本上运行程序时没出现这样的问题。会不会是CPU能力不行。
for (j=0;j<m_GridnumXm_GridnumZ;j++)
{
v=m_Velocity[j];
m_Velocity[j]=1.1;
cudaMemcpy(m_vecVelocity,m_Velocity,sizeof(float)m_GridnumXm_GridnumZ,cudaMemcpyHostToDevice);
for(m_ShotX=0; m_ShotX<m_GridnumX; m_ShotX++)
{
WaveRecord(m_ShotX,m_vecRecord); //调用kernel函数
}
cudaMemcpy(m_Record,m_vecRecord,sizeof(float)m_tnumm_receiver,cudaMemcpyDeviceToHost);
for(i=0; i<m_receiverm_tnum; i++)
{
a1=m_Record[i];
a2=m_Arecord[i];
ADerivative[i][j]=10(a1-a2)/v;
}
m_Velocity[j]=v;
}
////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
void WaveR::WaveRecord(int ShotX , float * m_vecR )
{
cudaMemset(m_vecUpLongX,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecUpLongZ,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecDownLongX,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecDownLongZ,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecUpLongX0,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecUpLongZ0,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecDownLongX0,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecDownLongZ0,0,m_PMLm_Widesizeof(float));
cudaMemset(m_UUpLongX,0,m_PMLm_Widesizeof(float));
cudaMemset(m_UUpLongZ,0,m_PMLm_Widesizeof(float));
cudaMemset(m_UDownLongX,0,m_PMLm_Widesizeof(float));
cudaMemset(m_UDownLongZ,0,m_PMLm_Widesizeof(float));
cudaMemset(m_vecLeftShortX,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecLeftShortZ,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecRightShortX,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecRightShortZ,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecLeftShortX0,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecLeftShortZ0,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecRightShortX0,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecRightShortZ0,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_ULeftShortX,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_ULeftShortZ,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_URightShortX,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_URightShortZ,0,m_PMLm_GridnumZsizeof(float));
cudaMemset(m_vecPast,0,m_GridWidethm_GridHightsizeof(float));
cudaMemset(m_vecNow,0,m_GridWidethm_GridHightsizeof(float));
cudaMemset(m_vecNext,0,m_GridWidethm_GridHightsizeof(float));
cudaMemset(m_vecPast1,0,m_GridWidethm_GridHightsizeof(float));
cudaMemset(m_vecNow1,0,m_GridWidethm_GridHightsizeof(float));
cudaMemset(m_vecNext1,0,m_GridWidethm_GridHightsizeof(float));
if (ShotX==0)
{
cudaMemset(m_vecR,0,m_receiverm_tnumsizeof(float));
}
for (m_t=2;m_t<m_tnum+2;m_t++)
{
int m_ThreadX = 32;
int m_ThreadZ = 2;
if(m_t%2==0)
{
c_Calc(
m_vecRecord ,m_vecVelocity ,
m_alphax ,m_alphaz,
m_vecPast ,m_vecNow,
m_vecPast1 ,m_vecNow1,
m_vecUpLongX0 ,m_vecUpLongZ0,
m_vecDownLongX0 ,m_vecDownLongZ0,
m_vecUpLongX ,m_vecUpLongZ,
m_vecDownLongX ,m_vecDownLongZ,
m_UUpLongX ,m_UUpLongZ,
m_UDownLongX ,m_UDownLongZ,
m_vecLeftShortX0 ,m_vecLeftShortZ0,
m_vecRightShortX0 ,m_vecRightShortZ0,
m_vecLeftShortX ,m_vecLeftShortZ,
m_vecRightShortX ,m_vecRightShortZ,
m_ULeftShortX ,m_ULeftShortZ,
m_URightShortX ,m_URightShortZ,
m_GridWideth ,m_GridHight,
m_numX ,m_GridnumZ,
m_t ,ShotX,
m_ThreadX ,m_ThreadZ
);
}
else
{
c_Calc(
m_vecRecord ,m_vecVelocity ,
m_alphax ,m_alphaz,
m_vecPast1 ,m_vecNow1,
m_vecPast ,m_vecNow,
m_vecUpLongX ,m_vecUpLongZ,
m_vecDownLongX ,m_vecDownLongZ,
m_vecUpLongX0 ,m_vecUpLongZ0,
m_vecDownLongX0 ,m_vecDownLongZ0,
m_UUpLongX ,m_UUpLongZ,
m_UDownLongX ,m_UDownLongZ,
m_vecLeftShortX ,m_vecLeftShortZ,
m_vecRightShortX ,m_vecRightShortZ,
m_vecLeftShortX0 ,m_vecLeftShortZ0,
m_vecRightShortX0 ,m_vecRightShortZ0,
m_ULeftShortX ,m_ULeftShortZ,
m_URightShortX ,m_URightShortZ,
m_GridWideth ,m_GridHight,
m_numX ,m_GridnumZ,
m_t ,ShotX,
m_ThreadX ,m_ThreadZ
);
}
}
}