ISCE_INSAR/contrib/PyCuAmpcor/src/GDALImage.cu

#include "GDALImage.h"
#include <iostream>

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <cublas_v2.h>
#include "cudaError.h"
#include <errno.h>
#include <unistd.h>


/**
 * \brief Constructor
 *
 * @param filename a std::string with the raster image file name
 */

GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap)
   : _useMmap(useMmap)
{
    // open the file as dataset
    _poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly );
    // if something is wrong, throw an exception
    // GDAL reports the error message
    if(!_poDataset)
        throw;

    // check the band info
    int count = _poDataset->GetRasterCount();
    if(band > count)
    {
        std::cout << "The desired band " << band << " is greated than " << count << " bands available";
        throw;
    }

    // get the desired band
    _poBand = _poDataset->GetRasterBand(band);
    if(!_poBand)
        throw;

     // get the width(x), and height(y)
    _width = _poBand->GetXSize();
    _height = _poBand->GetYSize();

    _dataType = _poBand->GetRasterDataType();
    // determine the image type
    _isComplex = GDALDataTypeIsComplex(_dataType);
    // determine the pixel size in bytes
    _pixelSize = GDALGetDataTypeSize(_dataType);

    _bufferSize = 1024*1024*cacheSizeInGB;

    // checking whether using memory map
    if(_useMmap) {

       char **papszOptions = NULL;
        // if cacheSizeInGB = 0, use default
        // else set the option
        if(cacheSizeInGB > 0)
            papszOptions = CSLSetNameValue( papszOptions,
                "CACHE_SIZE",
		        std::to_string(_bufferSize).c_str());

        // space between two lines
	    GIntBig pnLineSpace;
        // set up the virtual mem buffer
        _poBandVirtualMem =  GDALGetVirtualMemAuto(
            static_cast<GDALRasterBandH>(_poBand),
		    GF_Read,
		    &_pixelSize,
		    &pnLineSpace,
		    papszOptions);

        // check it
        if(!_poBandVirtualMem)
            throw;

        // get the starting pointer
        _memPtr = CPLVirtualMemGetAddr(_poBandVirtualMem);
    }
    else { // use a buffer
        checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
    }

    // make sure memPtr is not Null
    if (!_memPtr)
        throw;

    // all done
}


/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU
/// @param dArray pointer for array in device memory
/// @param h_offset Down/Height offset
/// @param w_offset Across/Width offset
/// @param h_tile Down/Height tile size
/// @param w_tile Across/Width tile size
/// @param stream CUDA stream for copying
void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream)
{
    size_t tileStartOffset = (h_offset*_width + w_offset)*_pixelSize;

    char * startPtr = (char *)_memPtr ;
    startPtr += tileStartOffset;

    // @note
    // We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major.
    // cuBlas assumes both source and target arrays are column major.
    // To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols
    // checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream));
    if (_useMmap)
        checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile*_pixelSize, startPtr, _width*_pixelSize,
                                      w_tile*_pixelSize, h_tile, cudaMemcpyHostToDevice,stream));
    else {
        // get the total tile size in bytes
        size_t tileSize = h_tile*w_tile*_pixelSize;
        // if the size is bigger than existing buffer, reallocate
        if (tileSize > _bufferSize) {
            // maybe we need to make it to fit the pagesize
            _bufferSize = tileSize;
            checkCudaErrors(cudaFree(_memPtr));
            checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
        }
        // copy from file to buffer
        CPLErr err = _poBand->RasterIO(GF_Read, //eRWFlag
            w_offset, h_offset,  //nXOff, nYOff
            w_tile, h_tile,  // nXSize, nYSize
            _memPtr, // pData
            w_tile*h_tile, 1, // nBufXSize, nBufYSize
            _dataType, //eBufType
            0, 0, //nPixelSpace, nLineSpace in pData
            NULL //psExtraArg extra resampling callback
            );

        if(err != CE_None)
            throw;
        // copy from buffer to gpu
        checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream));
    }
}

GDALImage::~GDALImage()
{
    // free the virtual memory
    CPLVirtualMemFree(_poBandVirtualMem),
    // free the GDAL Dataset, close the file
    delete _poDataset;
}

// end of file
PyCuAmpcor: updated to the most recent version with gdal input 2019-11-20 00:59:49 +00:00			`#include "GDALImage.h"`
			`#include <iostream>`

			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <unistd.h>`
			`#include <fcntl.h>`
			`#include <assert.h>`
			`#include <cublas_v2.h>`
			`#include "cudaError.h"`
			`#include <errno.h>`
			`#include <unistd.h>`


			`/**`
			`* \brief Constructor`
			`*`
			`* @param filename a std::string with the raster image file name`
			`*/`

			`GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap)`
			`: _useMmap(useMmap)`
			`{`
			`// open the file as dataset`
			`_poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly );`
			`// if something is wrong, throw an exception`
			`// GDAL reports the error message`
			`if(!_poDataset)`
			`throw;`

			`// check the band info`
			`int count = _poDataset->GetRasterCount();`
			`if(band > count)`
			`{`
			`std::cout << "The desired band " << band << " is greated than " << count << " bands available";`
			`throw;`
			`}`

			`// get the desired band`
			`_poBand = _poDataset->GetRasterBand(band);`
			`if(!_poBand)`
			`throw;`

			`// get the width(x), and height(y)`
			`_width = _poBand->GetXSize();`
			`_height = _poBand->GetYSize();`

			`_dataType = _poBand->GetRasterDataType();`
			`// determine the image type`
			`_isComplex = GDALDataTypeIsComplex(_dataType);`
			`// determine the pixel size in bytes`
			`_pixelSize = GDALGetDataTypeSize(_dataType);`

			`_bufferSize = 10241024cacheSizeInGB;`

			`// checking whether using memory map`
			`if(_useMmap) {`

			`char **papszOptions = NULL;`
			`// if cacheSizeInGB = 0, use default`
			`// else set the option`
			`if(cacheSizeInGB > 0)`
			`papszOptions = CSLSetNameValue( papszOptions,`
			`"CACHE_SIZE",`
			`std::to_string(_bufferSize).c_str());`

			`// space between two lines`
			`GIntBig pnLineSpace;`
			`// set up the virtual mem buffer`
			`_poBandVirtualMem = GDALGetVirtualMemAuto(`
			`static_cast<GDALRasterBandH>(_poBand),`
			`GF_Read,`
			`&_pixelSize,`
			`&pnLineSpace,`
			`papszOptions);`

			`// check it`
			`if(!_poBandVirtualMem)`
			`throw;`

			`// get the starting pointer`
			`_memPtr = CPLVirtualMemGetAddr(_poBandVirtualMem);`
			`}`
			`else { // use a buffer`
			`checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));`
			`}`

			`// make sure memPtr is not Null`
			`if (!_memPtr)`
			`throw;`

			`// all done`
			`}`


			`/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU`
			`/// @param dArray pointer for array in device memory`
			`/// @param h_offset Down/Height offset`
			`/// @param w_offset Across/Width offset`
			`/// @param h_tile Down/Height tile size`
			`/// @param w_tile Across/Width tile size`
			`/// @param stream CUDA stream for copying`
			`void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream)`
			`{`
			`size_t tileStartOffset = (h_offset_width + w_offset)_pixelSize;`

			`char * startPtr = (char *)_memPtr ;`
			`startPtr += tileStartOffset;`

			`// @note`
			`// We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major.`
			`// cuBlas assumes both source and target arrays are column major.`
			`// To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols`
			`// checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream));`
			`if (_useMmap)`
			`checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile_pixelSize, startPtr, _width_pixelSize,`
			`w_tile*_pixelSize, h_tile, cudaMemcpyHostToDevice,stream));`
			`else {`
			`// get the total tile size in bytes`
			`size_t tileSize = h_tilew_tile_pixelSize;`
			`// if the size is bigger than existing buffer, reallocate`
			`if (tileSize > _bufferSize) {`
			`// maybe we need to make it to fit the pagesize`
			`_bufferSize = tileSize;`
			`checkCudaErrors(cudaFree(_memPtr));`
			`checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));`
			`}`
			`// copy from file to buffer`
			`CPLErr err = _poBand->RasterIO(GF_Read, //eRWFlag`
			`w_offset, h_offset, //nXOff, nYOff`
			`w_tile, h_tile, // nXSize, nYSize`
			`_memPtr, // pData`
			`w_tile*h_tile, 1, // nBufXSize, nBufYSize`
			`_dataType, //eBufType`
			`0, 0, //nPixelSpace, nLineSpace in pData`
			`NULL //psExtraArg extra resampling callback`
			`);`

			`if(err != CE_None)`
			`throw;`
			`// copy from buffer to gpu`
			`checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream));`
			`}`
			`}`

			`GDALImage::~GDALImage()`
			`{`
			`// free the virtual memory`
			`CPLVirtualMemFree(_poBandVirtualMem),`
			`// free the GDAL Dataset, close the file`
			`delete _poDataset;`
			`}`

			`// end of file`