这个历程是 NVIDIA 的sdk的例程
这里面,我有个地方不是很明白,
// Index of the first sub-matrix of A processed by the block
int aBegin = wA * BLOCK_SIZE * by;
这句话里, 它是求 子矩阵的索引,为啥用 A矩阵的宽度 乘以 每个BLOCK 的大小,然后再乘以 block的索引呢?
希望大家帮我解答一下
新手求帮助。
[attach]3257[/attach]
源程序在这
- Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- Please refer to the NVIDIA end user license agreement (EULA) associated
- with this source code for terms and conditions that govern your use of
- this software. Any use, reproduction, disclosure, or distribution of
- this software and related documentation outside the terms of the EULA
- is strictly prohibited.
*/
/* Matrix multiplication: C = A * B.
- Device code.
*/
#ifndef MATRIXMUL_KERNEL_H
#define MATRIXMUL_KERNEL_H
#include <stdio.h>
#include “matrixMul.h”
#define CHECK_BANK_CONFLICTS 0
#if CHECK_BANK_CONFLICTS
#define AS(i, j) cutilBankChecker(((float*)&As[0][0]), (BLOCK_SIZE * i + j))
#define BS(i, j) cutilBankChecker(((float*)&Bs[0][0]), (BLOCK_SIZE * i + j))
#else
#define AS(i, j) As[i][j]
#define BS(i, j) Bs[i][j]
#endif
////////////////////////////////////////////////////////////////////////////////
//! Matrix multiplication on the device: C = A * B
//! wA is A’s width and wB is B’s width
////////////////////////////////////////////////////////////////////////////////
global void
matrixMul( float* C, float* A, float* B, int wA, int wB) //3个矩阵的指针,及矩阵大小
{
// Block index
int bx = blockIdx.x;
int by = blockIdx.y;
[/i][/i]