from pycuda import tools
import pycuda.driver as drv
import numpy as np
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from pycuda.compiler import SourceModule
mod = SourceModule(
'''
__global__ void test(int a ){
printf("%d\\n" ,a);
printf("hello world\\n");
return;
}
'''
)
def process(i):
# print i
str = drv.Stream()
test1 = mod.get_function("test")
test1(np.uint32(i), block=(2, 1, 1), grid=(1, 1) , stream = str)
if __name__ == '__main__':
executor = ThreadPoolExecutor(4)
for i in range(10):
executor.submit(process, i)
executor.shutdown()
请教各位前辈,我想在使用pycuda,在多线程里面调用cuda内核,一直不能成功,想问一下这个问题的话我该如何解决