在gpu上要求解一个三次方程才c[3]ss*s+c[2]ss+c[1]*s+c[0]=0;
图片是方法
[attach]3288[/attach][attach]3289[/attach]
下面是对应代码
struct zuobiao
{
float x;
float y;
float z;
};
device host int reconstruction::SolveCubic(double c,zuobiao &s)
{
int i, num;
double sub;
double A, B, C;
double sq_A, p, q;
double cb_p, D;
double s_[3];
s_[0]=-1;
s_[1]=-1;
s_[2]=-1;
/* normal form: x^3 + Ax^2 + Bx + C = 0 /
A = c[ 2 ] / c[ 3 ];
B = c[ 1 ] / c[ 3 ];
C = c[ 0 ] / c[ 3 ];
/ substitute x = y - A/3 to eliminate quadric term:
x^3 +px + q = 0 /
sq_A = A * A;
p = 1.0/3 * (- 1.0/3 * sq_A + B);
q = 1.0/2 * (2.0/27 * A * sq_A - 1.0/3 * A * B + C);
/ use Cardano’s formula /
cb_p = p * p * p;
D = q * q + cb_p;
if (IsZero(D))
{
if (IsZero(q)) / one triple solution /
{
s_[0] = 0;
num = 1;
}
else / one single and one double solution /
{
double u = cbrt(-q);
s_[0] = 2 * u;
s_[1] = - u;
num = 2;
}
}
else if (D < 0) / Casus irreducibilis: three real solutions /
{
double phi = 1.0/3 * acos(-q / sqrt(-cb_p));
double t = 2 * sqrt(-p);
s_[0] = t * cos(phi);
s_[1] = - t * cos(phi + M_PI / 3);
s_[2] = - t * cos(phi - M_PI / 3);
num = 3;
}
else / one real solution /
{
double sqrt_D = sqrt(D);
double u = cbrt(sqrt_D - q);
double v = - cbrt(sqrt_D + q);
s_[0] = u + v;
num = 1;
}
/ resubstitute /
sub = 1.0/3 * A;
for (i = 0; i < num; ++i)
s_[ i ] -= sub;
s.x=s_[0];
s.y=s_[1];
s.z=s_[2];
return num;
}
其中c[]是系数,s是解集
但这函数在GPU和在CPU上运行的结果差别很大,最后解出来s,再算c[3]s.xs.xs.x+c[2]s.xs.x+c[1]*s.x+c[0]的值在CPU上运行可达到1e-7的精度,但在GPU上可能只有1e-3甚至1e-2.请问可以怎么解决,怎么提高GPU的进度呢?我在用GT630来算的