在计算cusparse库时的问题,为什么第三个为0

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <cusparse.h>
#include
using namespace std;

// error check macros
#define CUSPARSE_CHECK(x) {cusparseStatus_t _c=x; if (_c != CUSPARSE_STATUS_SUCCESS) {printf(“cusparse fail: %d, line: %d\n”, (int)_c, LINE); exit(-1);}}

#define cudaCheckErrors(msg)
do {
cudaError_t __err = cudaGetLastError();
if (__err != cudaSuccess) {
fprintf(stderr, “Fatal error: %s (%s at %s:%d)\n”,
msg, cudaGetErrorString(__err),
FILE, LINE);
fprintf(stderr, “*** FAILED - ABORTING\n”);
exit(1);
}
} while (0)

int main( )
{
//CSR format of matrix A and Vector b
double* h_valA;
int* h_csrRowPtrA;
int* h_csrColIndA;
double* h_b;
int n, nnzA;
n = 3; nnzA = 5;

h_valA = (double*)malloc(nnzA * sizeof(double));
h_csrRowPtrA = (int*)malloc((n + 1) * sizeof(int));
h_csrColIndA = (int*)malloc(nnzA * sizeof(int));
h_b = (double*)malloc(n * sizeof(double));

h_valA[0] = 3.0;
h_valA[1] = 2.0;
h_valA[2] = 2.0;
h_valA[3] = 2.0;
h_valA[4] = 1.0;

h_csrRowPtrA[0] = 0;
h_csrRowPtrA[1] = 2;
h_csrRowPtrA[2] = 3;
h_csrRowPtrA[3] = 5;

h_csrColIndA[0] = 0;
h_csrColIndA[1] = 2;
h_csrColIndA[2] = 1;
h_csrColIndA[3] = 0;
h_csrColIndA[4] = 2;

h_b[0] = 3.5;
h_b[1] = 1.5;
h_b[2] = 2.0;

//CSR format of matrix A and Vector b (device)
double* valA;
int* csrRowPtrA;
int* csrColIndA;
double* b;

cudaMalloc((void**)&valA, nnzA * sizeof(double));
cudaMalloc((void**)&csrRowPtrA, (n + 1) * sizeof(int));
cudaMalloc((void**)&csrColIndA, nnzA * sizeof(int));
cudaMalloc((void**)&b, n * sizeof(double));
cudaCheckErrors(“cudaMalloc fail”);

cudaMemcpy(valA, h_valA, (size_t)(nnzA * sizeof(double)), cudaMemcpyHostToDevice);
cudaMemcpy(csrRowPtrA, h_csrRowPtrA, (size_t)((n + 1) * sizeof(int)), cudaMemcpyHostToDevice);
cudaMemcpy(csrColIndA, h_csrColIndA, (size_t)(nnzA * sizeof(int)), cudaMemcpyHostToDevice);
cudaMemcpy(b, h_b, (size_t)(n * sizeof(double)), cudaMemcpyHostToDevice);
cudaCheckErrors(“cudaMemcpy fail”);

//Initialize cuSPARSE
cusparseHandle_t handle;
CUSPARSE_CHECK(cusparseCreate(&handle));
cusparseStatus_t status;

cusparseMatDescr_t descrA;
status = cusparseCreateMatDescr(&descrA);
CUSPARSE_CHECK(status);

cusparseMatDescr_t descr_L;
status = cusparseCreateMatDescr(&descr_L);
CUSPARSE_CHECK(status);
status = cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO);
CUSPARSE_CHECK(status);
status = cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
CUSPARSE_CHECK(status);
status = cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
CUSPARSE_CHECK(status);
status = cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT);
CUSPARSE_CHECK(status);

cusparseMatDescr_t descr_U;
status = cusparseCreateMatDescr(&descr_U);
status = cusparseSetMatIndexBase(descr_U, CUSPARSE_INDEX_BASE_ZERO);
CUSPARSE_CHECK(status);
status = cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
CUSPARSE_CHECK(status);
status = cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER);
CUSPARSE_CHECK(status);
status = cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
CUSPARSE_CHECK(status);

//Query space and allocate memory
csrilu02Info_t info_A = 0; CUSPARSE_CHECK(cusparseCreateCsrilu02Info(&info_A));
csrsv2Info_t info_L = 0; CUSPARSE_CHECK(cusparseCreateCsrsv2Info(&info_L));
csrsv2Info_t info_U = 0; CUSPARSE_CHECK(cusparseCreateCsrsv2Info(&info_U));

int pBufferSize_A; int pBufferSize_L; int pBufferSize_U;
status = cusparseDcsrilu02_bufferSize(handle, n, nnzA, descrA, valA, csrRowPtrA, csrColIndA, info_A, &pBufferSize_A);
CUSPARSE_CHECK(status);
status = cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnzA, descr_L, valA, csrRowPtrA, csrColIndA, info_L, &pBufferSize_L);
CUSPARSE_CHECK(status);
status = cusparseDcsrsv2_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnzA, descr_U, valA, csrRowPtrA, csrColIndA, info_U, &pBufferSize_U);
CUSPARSE_CHECK(status);

int pBufferSize = max(pBufferSize_A, max(pBufferSize_L, pBufferSize_U));
void* pBuffer = 0;
cudaMalloc((void**)&pBuffer, pBufferSize);
cudaCheckErrors(“cudaMalloc fail”);

// LU decomposition analysis
int structural_zero;
status = cusparseDcsrilu02_analysis(handle, n, nnzA, descrA, valA, csrRowPtrA, csrColIndA, info_A, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer);
CUSPARSE_CHECK(status);
status = cusparseXcsrilu02_zeroPivot(handle, info_A, &structural_zero);
CUSPARSE_CHECK(status);
if (CUSPARSE_STATUS_ZERO_PIVOT == status)
{
printf(“A(%d,%d) is missing\n”, structural_zero, structural_zero);
}
status = cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnzA, descr_L, valA, csrRowPtrA, csrColIndA, info_L, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer);
CUSPARSE_CHECK(status);
status = cusparseDcsrsv2_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnzA, descr_U, valA, csrRowPtrA, csrColIndA, info_U, CUSPARSE_SOLVE_POLICY_USE_LEVEL, pBuffer);
CUSPARSE_CHECK(status);

// A = L * U
int numerical_zero;
status = cusparseDcsrilu02(handle, n, nnzA, descrA, valA, csrRowPtrA, csrColIndA, info_A, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer);
CUSPARSE_CHECK(status);
status = cusparseXcsrilu02_zeroPivot(handle, info_A, &numerical_zero);
CUSPARSE_CHECK(status);
if (CUSPARSE_STATUS_ZERO_PIVOT == status)
{
printf(“U(%d,%d) is zero\n”, numerical_zero, numerical_zero);
}

// b = L * Z
double* d_z;
cudaMalloc(&d_z, n * sizeof(double));
cudaCheckErrors(“cudaMalloc fail”);

const double alpha = 1.0;
status = cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnzA, &alpha, descr_L, valA, csrRowPtrA, csrColIndA, info_L, b, d_z, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer);
CUSPARSE_CHECK(status);

// Z = U * X
double* d_x;
cudaMalloc((void**)&d_x, n * sizeof(double));
cudaCheckErrors(“cudaMalloc fail”);
status = cusparseDcsrsv2_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, nnzA, &alpha, descr_U, valA, csrRowPtrA, csrColIndA, info_U, d_z, d_x, CUSPARSE_SOLVE_POLICY_USE_LEVEL, pBuffer);
CUSPARSE_CHECK(status);

// Ax = A * x
const double beta = 0.0;
double* d_Ax;
cudaMalloc((void**)&d_Ax, n * sizeof(double));
cudaCheckErrors(“cudaMalloc fail”);
cusparseSpMatDescr_t matA;
cusparseDnVecDescr_t vecX, vecAx;
void* dBuffer = NULL;
size_t bufferSize = 0;
status = cusparseCreateCsr(&matA, n, n, nnzA, csrRowPtrA, csrColIndA, valA, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
CUSPARSE_CHECK(status);
status = cusparseCreateDnVec(&vecX, n, d_x, CUDA_R_64F);
CUSPARSE_CHECK(status);
status = cusparseCreateDnVec(&vecAx, n, d_Ax, CUDA_R_64F);
CUSPARSE_CHECK(status);
cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecAx, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize);
CUSPARSE_CHECK(status);
cudaMalloc(&dBuffer, bufferSize);
cudaCheckErrors(“cudaMalloc fail”);
cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecAx, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT, dBuffer);
CUSPARSE_CHECK(status);

// Return the data from GPU to CPU
double* h_x = (double*)malloc(n * sizeof(double));
cudaMemcpy(h_x, d_x, n * sizeof(double), cudaMemcpyDeviceToHost);
cudaCheckErrors(“cudaMemcpy fail”);
printf(“Final result\n”);
for (int k = 0; k < n; k++)
{
printf(“x[%i] = %f\n”, k, h_x[k]);
}
double* h_Ax = (double*)malloc(n * sizeof(double));
cudaMemcpy(h_Ax, d_Ax, n * sizeof(double), cudaMemcpyDeviceToHost);
cudaCheckErrors(“cudaMalloc fail”);
printf(“Relative error analysis\n”);
for (int k = 0; k < n; k++)
{
printf(“h_Ax[%i] = %f\n”, k, h_Ax[k]);
}

if (h_valA)free(h_valA);
if (h_csrRowPtrA)free(h_csrRowPtrA);
if (h_csrColIndA)free(h_csrColIndA);
if (h_x)free(h_x);
if (valA)cudaFree(valA);
if (csrRowPtrA)cudaFree(csrRowPtrA);
if (csrColIndA)cudaFree(csrColIndA);
if (d_z)cudaFree(d_z);
if (d_x)cudaFree(d_x);
if (dBuffer)cudaFree(dBuffer);
if (d_Ax)cudaFree(d_Ax);
CUSPARSE_CHECK(cusparseDestroy(handle));
CUSPARSE_CHECK(cusparseDestroyMatDescr(descrA));
CUSPARSE_CHECK(cusparseDestroyMatDescr(descr_L));
CUSPARSE_CHECK(cusparseDestroyMatDescr(descr_U));
CUSPARSE_CHECK(cusparseDestroyCsrilu02Info(info_A));
CUSPARSE_CHECK(cusparseDestroyCsrsv2Info(info_L));
CUSPARSE_CHECK(cusparseDestroyCsrsv2Info(info_U));
CUSPARSE_CHECK(cusparseDestroySpMat(matA));
CUSPARSE_CHECK(cusparseDestroyDnVec(vecX));
CUSPARSE_CHECK(cusparseDestroyDnVec(vecAx));

return 0;
}