本文共 7437 字,大约阅读时间需要 24 分钟。
纹理对象API:
纹理对象是使用cudaCreateTextureObject()从指定纹理的struct cudaResourceDesc类型的资源描述中创建的,也可以是从如此定义的纹理描述中创建的:struct cudaTextureDesc{ enum cudaTextureAddressMode addressMode[3]; enum cudaTextureFilterMode filterMode; enum cudaTextureReadMode readMode; int sRGB; int normalizedCoords; unsigned int maxAnisotropy; enum cudaTextureFilterMode mipmapFilterMode; float mipmapLevelBias; float minMipmapLevelClamp; float maxMipmapLevelClamp;};
// Simple transformation kernel__global__ void transformKernel(float* output, cudaTextureObject_t texObj, int width, int height, float theta){ // Calculate normalized texture coordinates unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; float u = x / (float)width; float v = y / (float)height; // Transform coordinates u -= 0.5f; v -= 0.5f; float tu = u * cosf(theta) - v * sinf(theta) + 0.5f; float tv = v * cosf(theta) + u * sinf(theta) + 0.5f; // Read from texture and write to global memory output[y * width + x] = tex2D(texObj, tu, tv);}// Host codeint main(){ // Allocate CUDA array in device memory cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); cudaArray* cuArray; cudaMallocArray(&cuArray, &channelDesc, width, height); // Copy to device memory some data located at address h_data // in host memory cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice); // Specify texture struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = cuArray; // Specify texture object parameters struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeWrap; texDesc.addressMode[1] = cudaAddressModeWrap; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = 1; // Create texture object cudaTextureObject_t texObj = 0; cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL); // Allocate result of transformation in device memory float* output; cudaMalloc(&output, width * height * sizeof(float)); // Invoke kernel dim3 dimBlock(16, 16); dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y); transformKernel << > >(output, texObj, width, height, angle); // Destroy texture object cudaDestroyTextureObject(texObj); // Free device memory cudaFreeArray(cuArray); cudaFree(output); return 0;}
纹理参考API:
纹理引用的一些属性是不可变的,在编译时必须知道; 它们在声明纹理参考时被指定。 纹理引用在文件范围内被声明为纹理类型的变量:texturetexRef;
当:
纹理引用只能被声明为静态全局变量,不能作为参数传递给函数。
纹理引用的其他属性是可变的,可以在运行时通过主机运行时更改。 如参考手册中所述,运行时API具有低级别C风格界面和高级C ++风格界面。 纹理类型在高级API中定义为从低级API中定义的textureReference类型公开派生的结构,如下所示:struct textureReference { int normalized; enum cudaTextureFilterMode filterMode; enum cudaTextureAddressMode addressMode[3]; struct cudaChannelFormatDesc channelDesc; int sRGB; unsigned int maxAnisotropy; enum cudaTextureFilterMode mipmapFilterMode; float mipmapLevelBias; float minMipmapLevelClamp; float maxMipmapLevelClamp;}
struct cudaChannelFormatDesc { int x, y, z, w; enum cudaChannelFormatKind f;};
其中x,y,z和w等于返回值的每个分量的位数,f是:
normalized,addressMode和filterMode可以在主机代码中直接修改。在内核可以使用纹理参考从纹理存储器读取之前,必须使用cudaBindTexture()或cudaBindTexture2D()将纹理参考绑定到线性存储器,或者 CUDA数组的cudaBindTextureToArray()。 cudaUnbindTexture()用于取消绑定纹理参考。 一旦纹理引用被解除绑定,即使使用先前绑定的纹理的内核还没有完成,也可以安全地将其重新引导到另一个数组。 建议使用cudaMallocPitch()在线性内存中分配二维纹理,并使用cudaMallocPitch()返回的间距作为cudaBindTexture2D()的输入参数。
以下代码示例将2D纹理引用绑定到由devPtr指向的线性内存:使用低级API:texturetexRef;textureReference* texRefPtr;cudaGetTextureReference(&texRefPtr, &texRef);cudaChannelFormatDesc channelDesc =cudaCreateChannelDesc ();size_t offset;cudaBindTexture2D(&offset, texRefPtr, devPtr, &channelDesc, width, height, pitch);
使用高级API:
texturetexRef;cudaChannelFormatDesc channelDesc =cudaCreateChannelDesc ();size_t offset;cudaBindTexture2D(&offset, texRef, devPtr, channelDesc, width, height, pitch);
以下代码示例将2D纹理引用绑定到CUDA数组cuArray:
使用低级API:texturetexRef;textureReference* texRefPtr;cudaGetTextureReference(&texRefPtr, &texRef);cudaChannelFormatDesc channelDesc;cudaGetChannelDesc(&channelDesc, cuArray);cudaBindTextureToArray(texRef, cuArray, &channelDesc);
使用高级API:
texturetexRef;cudaBindTextureToArray(texRef, cuArray);
将纹理绑定到纹理参考时指定的格式必须与声明纹理参考时指定的参数相匹配; 否则,纹理提取的结果是不确定的。
如表所示,可以绑定到内核的纹理数量是有限制的以下代码示例将一些简单的转换内核应用于纹理。// 2D float texturetexturetexRef;// Simple transformation kernel__global__ void transformKernel(float* output, int width, int height, float theta){ // Calculate normalized texture coordinates unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; float u = x / (float)width; float v = y / (float)height; // Transform coordinates u -= 0.5f; v -= 0.5f; float tu = u * cosf(theta) - v * sinf(theta) + 0.5f; float tv = v * cosf(theta) + u * sinf(theta) + 0.5f; // Read from texture and write to global memory output[y * width + x] = tex2D(texRef, tu, tv);}// Host codeint main(){ // Allocate CUDA array in device memory cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); cudaArray* cuArray; cudaMallocArray(&cuArray, &channelDesc, width, height); // Copy to device memory some data located at address h_data // in host memory cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice); // Set texture reference parameters texRef.addressMode[0] = cudaAddressModeWrap; texRef.addressMode[1] = cudaAddressModeWrap; texRef.filterMode = cudaFilterModeLinear; texRef.normalized = true; // Bind the array to the texture reference cudaBindTextureToArray(texRef, cuArray, channelDesc); // Allocate result of transformation in device memory float* output; cudaMalloc(&output, width * height * sizeof(float)); // Invoke kernel dim3 dimBlock(16, 16); dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y); transformKernel << > >(output, width, height, angle); // Free device memory cudaFreeArray(cuArray); cudaFree(output); return 0;}
转载地址:http://jmkpa.baihongyu.com/