/** * \file cuArrays.cu * \brief Implementations for cuArrays class * */ // dependencies #include "cuArrays.h" #include "cudaError.h" #include #include #include // allocate arrays in device memory template void cuArrays::allocate() { checkCudaErrors(cudaMalloc((void **)&devData, getByteSize())); is_allocated = 1; } // allocate arrays in host memory template void cuArrays::allocateHost() { hostData = (T *)malloc(getByteSize()); is_allocatedHost = 1; } // deallocate arrays in device memory template void cuArrays::deallocate() { checkCudaErrors(cudaFree(devData)); is_allocated = 0; } // deallocate arrays in host memory template void cuArrays::deallocateHost() { free(hostData); is_allocatedHost = 0; } // copy arrays from device to host // use asynchronous for possible overlaps between data copying and kernel execution template void cuArrays::copyToHost(cudaStream_t stream) { checkCudaErrors(cudaMemcpyAsync(hostData, devData, getByteSize(), cudaMemcpyDeviceToHost, stream)); } // copy arrays from host to device template void cuArrays::copyToDevice(cudaStream_t stream) { checkCudaErrors(cudaMemcpyAsync(devData, hostData, getByteSize(), cudaMemcpyHostToDevice, stream)); } // set to 0 template void cuArrays::setZero(cudaStream_t stream) { checkCudaErrors(cudaMemsetAsync(devData, 0, getByteSize(), stream)); } // output (partial) data when debugging template void cuArrays::debuginfo(cudaStream_t stream) { // output size info std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl; // check whether host data is allocated if( !is_allocatedHost) allocateHost(); // copy to host copyToHost(stream); // set a max output range int range = std::min(10, size*count); // first 10 data for(int i=0; irange) { for(int i=size*count-range; i void cuArrays::debuginfo(cudaStream_t stream) { std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl; if( !is_allocatedHost) allocateHost(); copyToHost(stream); int range = std::min(10, size*count); for(int i=0; irange) { for(int i=size*count-range; i void cuArrays::debuginfo(cudaStream_t stream) { std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl; if( !is_allocatedHost) allocateHost(); copyToHost(stream); int range = std::min(10, size*count); for(int i=0; irange) { for(int i=size*count-range; i void cuArrays::debuginfo(cudaStream_t stream) { std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl; if( !is_allocatedHost) allocateHost(); copyToHost(stream); int range = std::min(10, size*count); for(int i=0; irange) { for(int i=size*count-range; i void cuArrays::outputToFile(std::string fn, cudaStream_t stream) { if( !is_allocatedHost) allocateHost(); copyToHost(stream); outputHostToFile(fn); } // save the host data to (binary) file template void cuArrays::outputHostToFile(std::string fn) { std::ofstream file; file.open(fn.c_str(), std::ios_base::binary); file.write((char *)hostData, getByteSize()); file.close(); } // instantiations, required by python extensions template class cuArrays; template class cuArrays; template class cuArrays; template class cuArrays; template class cuArrays; // end of file