Merge remote-tracking branch 'upstream/master' into rubbersheet
commit
f1238856e1
|
@ -54,7 +54,7 @@ class snaphu(Component):
|
|||
self.azimuthLooks = obj.insar.topo.numberAzimuthLooks
|
||||
|
||||
azres = obj.insar.masterFrame.platform.antennaLength/2.0
|
||||
azfact = obj.insar.topo.numberAzimuthLooks *azres / obj.insar.topo.azimuthSpacing
|
||||
azfact = azres / obj.insar.topo.azimuthSpacing
|
||||
|
||||
rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope
|
||||
rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW))
|
||||
|
|
|
@ -54,7 +54,7 @@ class snaphu_mcf(Component):
|
|||
self.azimuthLooks = obj.insar.topo.numberAzimuthLooks
|
||||
|
||||
azres = obj.insar.masterFrame.platform.antennaLength/2.0
|
||||
azfact = obj.insar.topo.numberAzimuthLooks *azres / obj.insar.topo.azimuthSpacing
|
||||
azfact = azres / obj.insar.topo.azimuthSpacing
|
||||
|
||||
rBW = obj.insar.masterFrame.instrument.pulseLength * obj.insar.masterFrame.instrument.chirpSlope
|
||||
rgres = abs(SPEED_OF_LIGHT / (2.0 * rBW))
|
||||
|
|
|
@ -2,19 +2,19 @@
|
|||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# Copyright 2010 California Institute of Technology. ALL RIGHTS RESERVED.
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#
|
||||
# United States Government Sponsorship acknowledged. This software is subject to
|
||||
# U.S. export control laws and regulations and has been classified as 'EAR99 NLR'
|
||||
# (No [Export] License Required except when exporting to an embargoed country,
|
||||
|
@ -49,7 +49,7 @@ if envGPUampcor['GPU_ACC_ENABLED']:
|
|||
build_base += "-ccbin " + envGPUampcor['NVCC_CCBIN'] + " "
|
||||
else:
|
||||
print('Assuming default system compiler for nvcc.')
|
||||
build_base += "-arch=sm_35 -shared -Xcompiler -fPIC -O3 "
|
||||
build_base += "-shared -Xcompiler -fPIC -O3 "
|
||||
build_cmd = build_base + "-dc -m64 -o $TARGET -c $SOURCE"
|
||||
built_path = os.path.join(build, 'gpu-ampcor.o')
|
||||
linked_path = os.path.join(build, 'gpu-ampcor-linked.o')
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
nvcc -arch=sm_35 -Xcompiler -fPIC -o gpu-topo.o -c Topo.cu
|
||||
nvcc -Xcompiler -fPIC -o gpu-topo.o -c Topo.cu
|
||||
cp -f gpu-topo.o ..
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
|
||||
|
@ -28,7 +28,7 @@ if envPyCuAmpcor['GPU_ACC_ENABLED']:
|
|||
|
||||
if not os.path.exists(initFile):
|
||||
with open(initFile, 'w') as fout:
|
||||
fout.write("#!/usr/bin/env python")
|
||||
fout.write("#!/usr/bin/env python3")
|
||||
|
||||
listFiles = [initFile]
|
||||
envPyCuAmpcor.Install(install, listFiles)
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# Test program to run ampcor with GPU
|
||||
# For two GeoTiff images
|
||||
#
|
||||
|
||||
import argparse
|
||||
import numpy as np
|
||||
from PyCuAmpcor import PyCuAmpcor
|
||||
|
||||
|
||||
def main():
|
||||
'''
|
||||
main program
|
||||
'''
|
||||
|
||||
objOffset = PyCuAmpcor() # create the processor
|
||||
|
||||
objOffset.algorithm = 0 # cross-correlation method 0=freq 1=time
|
||||
objOffset.deviceID = 0 # GPU device id to be used
|
||||
objOffset.nStreams = 2 # cudaStreams; multiple streams to overlap data transfer with gpu calculations
|
||||
objOffset.masterImageName = "master.tif"
|
||||
objOffset.masterImageHeight = 16480 # RasterYSize
|
||||
objOffset.masterImageWidth = 17000 # RasterXSize
|
||||
objOffset.slaveImageName = "slave.tif"
|
||||
objOffset.slaveImageHeight = 16480
|
||||
objOffset.slaveImageWidth = 17000
|
||||
objOffset.windowSizeWidth = 64 # template window size
|
||||
objOffset.windowSizeHeight = 64
|
||||
objOffset.halfSearchRangeDown = 20 # search range
|
||||
objOffset.halfSearchRangeAcross = 20
|
||||
objOffset.derampMethod = 1 # deramping for complex signal, set to 1 for real images
|
||||
|
||||
objOffset.skipSampleDown = 128 # strides between windows
|
||||
objOffset.skipSampleAcross = 64
|
||||
# gpu processes several windows in one batch/Chunk
|
||||
# total windows in Chunk = numberWindowDownInChunk*numberWindowAcrossInChunk
|
||||
# the max number of windows depending on gpu memory and type
|
||||
objOffset.numberWindowDownInChunk = 1
|
||||
objOffset.numberWindowAcrossInChunk = 10
|
||||
objOffset.corrSurfaceOverSamplingFactor = 8 # oversampling factor for correlation surface
|
||||
objOffset.corrSurfaceZoomInWindow = 16 # area in correlation surface to be oversampled
|
||||
objOffset.corrSufaceOverSamplingMethod = 1 # fft or sinc oversampler
|
||||
objOffset.useMmap = 1 # default using memory map as buffer, if having troubles, set to 0
|
||||
objOffset.mmapSize = 1 # mmap or buffer size used for transferring data from file to gpu, in GB
|
||||
|
||||
objOffset.numberWindowDown = 40 # number of windows to be processed
|
||||
objOffset.numberWindowAcross = 100
|
||||
# if to process the whole image; some math needs to be done
|
||||
# margin = 0 # margins to be neglected
|
||||
#objOffset.numberWindowDown = (objOffset.slaveImageHeight - 2*margin - 2*objOffset.halfSearchRangeDown - objOffset.windowSizeHeight) // objOffset.skipSampleDown
|
||||
#objOffset.numberWindowAcross = (objOffset.slaveImageWidth - 2*margin - 2*objOffset.halfSearchRangeAcross - objOffset.windowSizeWidth) // objOffset.skipSampleAcross
|
||||
|
||||
objOffset.setupParams()
|
||||
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown # starting pixel offset
|
||||
objOffset.masterStartPixelAcrossStatic = objOffset.halfSearchRangeDown
|
||||
objOffset.setConstantGrossOffset(0, 0) # gross offset between master and slave images
|
||||
objOffset.checkPixelInImageRange() # check whether there is something wrong with
|
||||
objOffset.runAmpcor()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
#
|
||||
# test_cuAmpcor.py
|
||||
# Test program to run ampcor with GPU
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
import argparse
|
||||
import numpy as np
|
||||
#from PyCuAmpcor import PyCuAmpcor
|
||||
from isce.components.contrib.PyCuAmpcor import PyCuAmpcor
|
||||
from PyCuAmpcor import PyCuAmpcor
|
||||
|
||||
|
||||
def main():
|
||||
'''
|
||||
|
@ -20,10 +20,10 @@ def main():
|
|||
objOffset.algorithm = 0
|
||||
objOffset.deviceID = 0 # -1:let system find the best GPU
|
||||
objOffset.nStreams = 2 #cudaStreams
|
||||
objOffset.masterImageName = "master.slc"
|
||||
objOffset.masterImageName = "20131213.slc.vrt"
|
||||
objOffset.masterImageHeight = 43008
|
||||
objOffset.masterImageWidth = 24320
|
||||
objOffset.slaveImageName = "slave.slc"
|
||||
objOffset.slaveImageName = "20131221.slc.vrt"
|
||||
objOffset.slaveImageHeight = 43008
|
||||
objOffset.slaveImageWidth = 24320
|
||||
objOffset.windowSizeWidth = 64
|
||||
|
@ -38,8 +38,9 @@ def main():
|
|||
objOffset.numberWindowDownInChunk = 10
|
||||
objOffset.numberWindowAcrossInChunk = 10
|
||||
objOffset.corrSurfaceOverSamplingFactor = 8
|
||||
objOffset.corrSurfaceZoomInWindow = 16
|
||||
objOffset.corrSufaceOverSamplingMethod = 1
|
||||
objOffset.corrSurfaceZoomInWindow = 16
|
||||
objOffset.corrSufaceOverSamplingMethod = 1
|
||||
objOffset.useMmap = 1
|
||||
objOffset.mmapSize = 8
|
||||
|
||||
objOffset.setupParams()
|
||||
|
@ -48,8 +49,8 @@ def main():
|
|||
objOffset.setConstantGrossOffset(642, -30)
|
||||
objOffset.checkPixelInImageRange()
|
||||
objOffset.runAmpcor()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
main()
|
|
@ -1,27 +1,27 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
#
|
||||
|
||||
from PyCuAmpcor import PyCuAmpcor
|
||||
import numpy as np
|
||||
|
||||
def main():
|
||||
def main():
|
||||
'''
|
||||
Set parameters manually and run ampcor
|
||||
'''
|
||||
objOffset = PyCuAmpcor()
|
||||
|
||||
|
||||
#step 1 set constant parameters
|
||||
objOffset.masterImageName = "master.slc"
|
||||
objOffset.masterImageName = "master.slc.vrt"
|
||||
objOffset.masterImageHeight = 128
|
||||
objOffset.masterImageWidth = 128
|
||||
objOffset.slaveImageName = "slave.slc"
|
||||
objOffset.slaveImageName = "slave.slc.vrt"
|
||||
objOffset.masterImageHeight = 128
|
||||
objOffset.masterImageWidth = 128
|
||||
objOffset.masterImageWidth = 128
|
||||
objOffset.skipSampleDown = 2
|
||||
objOffset.skipSampleAcross = 2
|
||||
objOffset.windowSizeHeight = 16
|
||||
objOffset.windowSizeWidth = 16
|
||||
objOffset.halfSearchRangeDown = 20
|
||||
objOffset.halfSearchRangeDown = 20
|
||||
objOffset.halfSearchRangeAcross = 20
|
||||
objOffset.numberWindowDown = 2
|
||||
objOffset.numberWindowAcross = 2
|
||||
|
@ -29,19 +29,19 @@ def main():
|
|||
objOffset.numberWindowAcrossInChunk = 2
|
||||
# 2 set other dependent parameters and allocate aray parameters
|
||||
objOffset.setupParams()
|
||||
|
||||
|
||||
#3 set gross offsets: constant or varying
|
||||
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown
|
||||
objOffset.masterStartPixelDownStatic = objOffset.halfSearchRangeDown
|
||||
objOffset.masterStartPixelAcrossStatic = objOffset.halfSearchRangeAcross
|
||||
vD = np.random.randint(0, 10, size =objOffset.numberWindows, dtype=np.int32)
|
||||
vD = np.random.randint(0, 10, size =objOffset.numberWindows, dtype=np.int32)
|
||||
vA = np.random.randint(0, 1, size = objOffset.numberWindows, dtype=np.int32)
|
||||
objOffset.setVaryingGrossOffset(vD, vA)
|
||||
|
||||
|
||||
objOffset.checkPixelInImageRange()
|
||||
#4 run ampcor
|
||||
objOffset.runAmpcor()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
#include "GDALImage.h"
|
||||
#include <iostream>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <assert.h>
|
||||
#include <cublas_v2.h>
|
||||
#include "cudaError.h"
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
/**
|
||||
* \brief Constructor
|
||||
*
|
||||
* @param filename a std::string with the raster image file name
|
||||
*/
|
||||
|
||||
GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap)
|
||||
: _useMmap(useMmap)
|
||||
{
|
||||
// open the file as dataset
|
||||
_poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly );
|
||||
// if something is wrong, throw an exception
|
||||
// GDAL reports the error message
|
||||
if(!_poDataset)
|
||||
throw;
|
||||
|
||||
// check the band info
|
||||
int count = _poDataset->GetRasterCount();
|
||||
if(band > count)
|
||||
{
|
||||
std::cout << "The desired band " << band << " is greated than " << count << " bands available";
|
||||
throw;
|
||||
}
|
||||
|
||||
// get the desired band
|
||||
_poBand = _poDataset->GetRasterBand(band);
|
||||
if(!_poBand)
|
||||
throw;
|
||||
|
||||
// get the width(x), and height(y)
|
||||
_width = _poBand->GetXSize();
|
||||
_height = _poBand->GetYSize();
|
||||
|
||||
_dataType = _poBand->GetRasterDataType();
|
||||
// determine the image type
|
||||
_isComplex = GDALDataTypeIsComplex(_dataType);
|
||||
// determine the pixel size in bytes
|
||||
_pixelSize = GDALGetDataTypeSize(_dataType);
|
||||
|
||||
_bufferSize = 1024*1024*cacheSizeInGB;
|
||||
|
||||
// checking whether using memory map
|
||||
if(_useMmap) {
|
||||
|
||||
char **papszOptions = NULL;
|
||||
// if cacheSizeInGB = 0, use default
|
||||
// else set the option
|
||||
if(cacheSizeInGB > 0)
|
||||
papszOptions = CSLSetNameValue( papszOptions,
|
||||
"CACHE_SIZE",
|
||||
std::to_string(_bufferSize).c_str());
|
||||
|
||||
// space between two lines
|
||||
GIntBig pnLineSpace;
|
||||
// set up the virtual mem buffer
|
||||
_poBandVirtualMem = GDALGetVirtualMemAuto(
|
||||
static_cast<GDALRasterBandH>(_poBand),
|
||||
GF_Read,
|
||||
&_pixelSize,
|
||||
&pnLineSpace,
|
||||
papszOptions);
|
||||
|
||||
// check it
|
||||
if(!_poBandVirtualMem)
|
||||
throw;
|
||||
|
||||
// get the starting pointer
|
||||
_memPtr = CPLVirtualMemGetAddr(_poBandVirtualMem);
|
||||
}
|
||||
else { // use a buffer
|
||||
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
|
||||
}
|
||||
|
||||
// make sure memPtr is not Null
|
||||
if (!_memPtr)
|
||||
throw;
|
||||
|
||||
// all done
|
||||
}
|
||||
|
||||
|
||||
/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU
|
||||
/// @param dArray pointer for array in device memory
|
||||
/// @param h_offset Down/Height offset
|
||||
/// @param w_offset Across/Width offset
|
||||
/// @param h_tile Down/Height tile size
|
||||
/// @param w_tile Across/Width tile size
|
||||
/// @param stream CUDA stream for copying
|
||||
void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream)
|
||||
{
|
||||
size_t tileStartOffset = (h_offset*_width + w_offset)*_pixelSize;
|
||||
|
||||
char * startPtr = (char *)_memPtr ;
|
||||
startPtr += tileStartOffset;
|
||||
|
||||
// @note
|
||||
// We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major.
|
||||
// cuBlas assumes both source and target arrays are column major.
|
||||
// To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols
|
||||
// checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream));
|
||||
if (_useMmap)
|
||||
checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile*_pixelSize, startPtr, _width*_pixelSize,
|
||||
w_tile*_pixelSize, h_tile, cudaMemcpyHostToDevice,stream));
|
||||
else {
|
||||
// get the total tile size in bytes
|
||||
size_t tileSize = h_tile*w_tile*_pixelSize;
|
||||
// if the size is bigger than existing buffer, reallocate
|
||||
if (tileSize > _bufferSize) {
|
||||
// maybe we need to make it to fit the pagesize
|
||||
_bufferSize = tileSize;
|
||||
checkCudaErrors(cudaFree(_memPtr));
|
||||
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
|
||||
}
|
||||
// copy from file to buffer
|
||||
CPLErr err = _poBand->RasterIO(GF_Read, //eRWFlag
|
||||
w_offset, h_offset, //nXOff, nYOff
|
||||
w_tile, h_tile, // nXSize, nYSize
|
||||
_memPtr, // pData
|
||||
w_tile*h_tile, 1, // nBufXSize, nBufYSize
|
||||
_dataType, //eBufType
|
||||
0, 0, //nPixelSpace, nLineSpace in pData
|
||||
NULL //psExtraArg extra resampling callback
|
||||
);
|
||||
|
||||
if(err != CE_None)
|
||||
throw;
|
||||
// copy from buffer to gpu
|
||||
checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
}
|
||||
|
||||
GDALImage::~GDALImage()
|
||||
{
|
||||
// free the virtual memory
|
||||
CPLVirtualMemFree(_poBandVirtualMem),
|
||||
// free the GDAL Dataset, close the file
|
||||
delete _poDataset;
|
||||
}
|
||||
|
||||
// end of file
|
|
@ -0,0 +1,79 @@
|
|||
// -*- c++ -*-
|
||||
/**
|
||||
* \brief Class for an image described GDAL vrt
|
||||
*
|
||||
* only complex (pixelOffset=8) or real(pixelOffset=4) images are supported, such as SLC and single-precision TIFF
|
||||
*/
|
||||
|
||||
#ifndef __GDALIMAGE_H
|
||||
#define __GDALIMAGE_H
|
||||
|
||||
#include <cublas_v2.h>
|
||||
#include <string>
|
||||
#include <gdal/gdal_priv.h>
|
||||
#include <gdal/cpl_conv.h>
|
||||
|
||||
class GDALImage{
|
||||
|
||||
public:
|
||||
using size_t = std::size_t;
|
||||
|
||||
private:
|
||||
size_t _fileSize;
|
||||
int _height;
|
||||
int _width;
|
||||
|
||||
// buffer pointer
|
||||
void * _memPtr = NULL;
|
||||
|
||||
int _pixelSize; //in bytes
|
||||
|
||||
int _isComplex;
|
||||
|
||||
size_t _bufferSize;
|
||||
int _useMmap;
|
||||
|
||||
GDALDataType _dataType;
|
||||
CPLVirtualMem * _poBandVirtualMem = NULL;
|
||||
GDALDataset * _poDataset = NULL;
|
||||
GDALRasterBand * _poBand = NULL;
|
||||
|
||||
public:
|
||||
GDALImage() = delete;
|
||||
GDALImage(std::string fn, int band=1, int cacheSizeInGB=0, int useMmap=1);
|
||||
|
||||
void * getmemPtr()
|
||||
{
|
||||
return(_memPtr);
|
||||
}
|
||||
|
||||
size_t getFileSize()
|
||||
{
|
||||
return (_fileSize);
|
||||
}
|
||||
|
||||
size_t getHeight() {
|
||||
return (_height);
|
||||
}
|
||||
|
||||
size_t getWidth()
|
||||
{
|
||||
return (_width);
|
||||
}
|
||||
|
||||
int getPixelSize()
|
||||
{
|
||||
return _pixelSize;
|
||||
}
|
||||
|
||||
bool isComplex()
|
||||
{
|
||||
return _isComplex;
|
||||
}
|
||||
|
||||
void loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream);
|
||||
~GDALImage();
|
||||
|
||||
};
|
||||
|
||||
#endif //__GDALIMAGE_H
|
|
@ -3,23 +3,24 @@ PROJECT = CUAMPCOR
|
|||
LDFLAGS = -lcuda -lcudart -lcufft -lcublas
|
||||
CXXFLAGS = -std=c++11 -fpermissive -fPIC -shared
|
||||
NVCCFLAGS = -ccbin g++ -m64 \
|
||||
-gencode arch=compute_35,code=sm_35 \
|
||||
-gencode arch=compute_35,code=sm_35 \
|
||||
-gencode arch=compute_60,code=sm_60 \
|
||||
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
|
||||
-ftz=false -prec-div=true -prec-sqrt=true
|
||||
|
||||
CXX=g++
|
||||
NVCC=nvcc
|
||||
|
||||
DEPS = cudaUtil.h cudaError.h cuArrays.h SlcImage.h cuAmpcorParameter.h
|
||||
OBJS = SlcImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
|
||||
DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h
|
||||
OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
|
||||
cuSincOverSampler.o cuDeramp.o cuOffset.o \
|
||||
cuCorrNormalization.o cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \
|
||||
cuAmpcorChunk.o cuAmpcorController.o cuEstimateStats.o
|
||||
|
||||
all: cuampcor
|
||||
all: pyampcor
|
||||
|
||||
SlcImage.o: SlcImage.cu $(DEPS)
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ SlcImage.cu
|
||||
GDALImage.o: GDALImage.cu $(DEPS)
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ GDALImage.cu
|
||||
|
||||
cuArrays.o: cuArrays.cu $(DEPS)
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuArrays.cu
|
||||
|
@ -45,8 +46,8 @@ cuOffset.o: cuOffset.cu $(DEPS)
|
|||
cuCorrNormalization.o: cuCorrNormalization.cu $(DEPS)
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalization.cu
|
||||
|
||||
cuAmpcorParameter.o: cuAmpcorParameter.cu
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu
|
||||
cuAmpcorParameter.o: cuAmpcorParameter.cu
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu
|
||||
|
||||
cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS)
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrTimeDomain.cu
|
||||
|
@ -54,8 +55,8 @@ cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS)
|
|||
cuCorrFrequency.o: cuCorrFrequency.cu $(DEPS) cuCorrFrequency.h
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrFrequency.cu
|
||||
|
||||
cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS)
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu
|
||||
cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS)
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu
|
||||
|
||||
cuAmpcorController.o: cuAmpcorController.cu
|
||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorController.cu
|
||||
|
@ -64,8 +65,8 @@ cuEstimateStats.o: cuEstimateStats.cu
|
|||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuEstimateStats.cu
|
||||
|
||||
|
||||
cuampcor: $(OBJS)
|
||||
pyampcor: $(OBJS)
|
||||
rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace
|
||||
|
||||
clean:
|
||||
rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat
|
||||
rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#
|
||||
#
|
||||
# PYX file to control Python module interface to underlying CUDA-Ampcor code
|
||||
#
|
||||
#
|
||||
from libcpp.string cimport string
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
@ -9,13 +9,13 @@ cimport numpy as np
|
|||
cdef extern from "cudaUtil.h":
|
||||
int gpuDeviceInit(int)
|
||||
void gpuDeviceList()
|
||||
int gpuGetMaxGflopsDeviceId()
|
||||
int gpuGetMaxGflopsDeviceId()
|
||||
|
||||
def listGPU():
|
||||
gpuDeviceList()
|
||||
|
||||
def findGPU():
|
||||
return gpuGetMaxGflopsDeviceId()
|
||||
return gpuGetMaxGflopsDeviceId()
|
||||
|
||||
def setGPU(int id):
|
||||
return gpuDeviceInit(id)
|
||||
|
@ -24,90 +24,92 @@ def setGPU(int id):
|
|||
cdef extern from "cuAmpcorParameter.h":
|
||||
cdef cppclass cuAmpcorParameter:
|
||||
cuAmpcorParameter() except +
|
||||
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
|
||||
int deviceID ## Targeted GPU device ID: use -1 to auto select
|
||||
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
|
||||
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
|
||||
int deviceID ## Targeted GPU device ID: use -1 to auto select
|
||||
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
|
||||
int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient
|
||||
|
||||
|
||||
## chip or window size for raw data
|
||||
int windowSizeHeightRaw ## Template window height (original size)
|
||||
int windowSizeWidthRaw ## Template window width (original size)
|
||||
int searchWindowSizeHeightRaw ## Search window height (original size)
|
||||
int windowSizeWidthRaw ## Template window width (original size)
|
||||
int searchWindowSizeHeightRaw ## Search window height (original size)
|
||||
int searchWindowSizeWidthRaw ## Search window width (orignal size)
|
||||
int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
||||
int halfSearchRangeDownRaw ##(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
||||
int halfSearchRangeAcrossRaw ##(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
||||
## chip or window size after oversampling
|
||||
int rawDataOversamplingFactor ## Raw data overampling factor (from original size to oversampled size)
|
||||
|
||||
## strides between chips/windows
|
||||
|
||||
## strides between chips/windows
|
||||
int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size)
|
||||
int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size)
|
||||
|
||||
|
||||
## Zoom in region near location of max correlation
|
||||
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
|
||||
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
|
||||
int oversamplingFactor ## Oversampling factor for interpolating correlation surface
|
||||
int oversamplingMethod
|
||||
|
||||
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
|
||||
|
||||
int oversamplingMethod
|
||||
|
||||
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
|
||||
|
||||
##master image
|
||||
string masterImageName ## master SLC image name
|
||||
int imageDataType1 ## master image data type, 2=cfloat=complex=float2 1=float
|
||||
int masterImageHeight ## master image height
|
||||
int masterImageHeight ## master image height
|
||||
int masterImageWidth ## master image width
|
||||
|
||||
|
||||
##slave image
|
||||
string slaveImageName ## slave SLC image name
|
||||
int imageDataType2 ## slave image data type, 2=cfloat=complex=float2 1=float
|
||||
int slaveImageHeight ## slave image height
|
||||
int slaveImageHeight ## slave image height
|
||||
int slaveImageWidth ## slave image width
|
||||
|
||||
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes
|
||||
|
||||
|
||||
int useMmap ## whether to use mmap
|
||||
int mmapSizeInGB ## mmap buffer size in unit of Gigabytes (if not mmmap, the buffer size)
|
||||
|
||||
## total number of chips/windows
|
||||
int numberWindowDown ## number of total windows (down)
|
||||
int numberWindowAcross ## number of total windows (across)
|
||||
int numberWindows ## numberWindowDown*numberWindowAcross
|
||||
|
||||
|
||||
## number of chips/windows in a batch/chunk
|
||||
int numberWindowDownInChunk ## number of windows processed in a chunk (down)
|
||||
int numberWindowAcrossInChunk ## number of windows processed in a chunk (across)
|
||||
int numberWindowsInChunk ## numberWindowDownInChunk*numberWindowAcrossInChunk
|
||||
int numberChunkDown ## number of chunks (down)
|
||||
int numberChunkAcross ## number of chunks (across)
|
||||
int numberChunks
|
||||
int numberChunks
|
||||
|
||||
int *masterStartPixelDown ## master starting pixels for each window (down)
|
||||
int *masterStartPixelDown ## master starting pixels for each window (down)
|
||||
int *masterStartPixelAcross ## master starting pixels for each window (across)
|
||||
int *slaveStartPixelDown ## slave starting pixels for each window (down)
|
||||
int *slaveStartPixelAcross ## slave starting pixels for each window (across)
|
||||
int *slaveStartPixelDown ## slave starting pixels for each window (down)
|
||||
int *slaveStartPixelAcross ## slave starting pixels for each window (across)
|
||||
int *grossOffsetDown ## Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel
|
||||
int *grossOffsetAcross ## Gross offsets between master and slave windows (across)
|
||||
int *grossOffsetAcross ## Gross offsets between master and slave windows (across)
|
||||
int grossOffsetDown0 ## constant gross offset (down)
|
||||
int grossOffsetAcross0 ## constant gross offset (across)
|
||||
int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset
|
||||
int masterStartPixelDown0 ## the first pixel of master image (down), be adjusted with margins and gross offset
|
||||
int masterStartPixelAcross0 ## the first pixel of master image (across)
|
||||
int *masterChunkStartPixelDown ## array of starting pixels for all master chunks (down)
|
||||
int *masterChunkStartPixelAcross ## array of starting pixels for all master chunks (across)
|
||||
int *slaveChunkStartPixelDown ## array of starting pixels for all slave chunks (down)
|
||||
int *slaveChunkStartPixelAcross ## array of starting pixels for all slave chunks (across)
|
||||
int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU
|
||||
int *masterChunkHeight ## array of heights of all master chunks, required when loading chunk to GPU
|
||||
int *masterChunkWidth ## array of width of all master chunks
|
||||
int *slaveChunkHeight ## array of width of all master chunks
|
||||
int *slaveChunkWidth ## array of width of all slave chunks
|
||||
int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU
|
||||
int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU
|
||||
int maxMasterChunkHeight ## max height for all master/slave chunks, determine the size of reading cache in GPU
|
||||
int maxMasterChunkWidth ## max width for all master chunks, determine the size of reading cache in GPU
|
||||
int maxSlaveChunkHeight
|
||||
int maxSlaveChunkWidth
|
||||
|
||||
string grossOffsetImageName
|
||||
string offsetImageName ## Output Offset fields filename
|
||||
|
||||
string grossOffsetImageName
|
||||
string offsetImageName ## Output Offset fields filename
|
||||
string snrImageName ## Output SNR filename
|
||||
void setStartPixels(int*, int*, int*, int*)
|
||||
void setStartPixels(int, int, int*, int*)
|
||||
void setStartPixels(int, int, int, int)
|
||||
void checkPixelInImageRange() ## check whether
|
||||
|
||||
string covImageName ## Output COV filename
|
||||
void setStartPixels(int*, int*, int*, int*)
|
||||
void setStartPixels(int, int, int*, int*)
|
||||
void setStartPixels(int, int, int, int)
|
||||
void checkPixelInImageRange() ## check whether
|
||||
|
||||
void setupParameters() ## Process other parameters after Python Inpu
|
||||
|
||||
cdef extern from "cuAmpcorController.h":
|
||||
|
@ -115,34 +117,40 @@ cdef extern from "cuAmpcorController.h":
|
|||
cuAmpcorController() except +
|
||||
cuAmpcorParameter *param
|
||||
void runAmpcor()
|
||||
|
||||
|
||||
cdef class PyCuAmpcor(object):
|
||||
'''
|
||||
Python interface for cuda Ampcor
|
||||
Python interface for cuda Ampcor
|
||||
'''
|
||||
cdef cuAmpcorController c_cuAmpcor
|
||||
def __cinit__(self):
|
||||
return
|
||||
|
||||
return
|
||||
|
||||
@property
|
||||
def algorithm(self):
|
||||
return self.c_cuAmpcor.param.algorithm
|
||||
return self.c_cuAmpcor.param.algorithm
|
||||
@algorithm.setter
|
||||
def algorithm(self, int a):
|
||||
self.c_cuAmpcor.param.algorithm = a
|
||||
@property
|
||||
def deviceID(self):
|
||||
return self.c_cuAmpcor.param.deviceID
|
||||
return self.c_cuAmpcor.param.deviceID
|
||||
@deviceID.setter
|
||||
def deviceID(self, int a):
|
||||
self.c_cuAmpcor.param.deviceID = a
|
||||
@property
|
||||
def nStreams(self):
|
||||
return self.c_cuAmpcor.param.nStreams
|
||||
return self.c_cuAmpcor.param.nStreams
|
||||
@nStreams.setter
|
||||
def nStreams(self, int a):
|
||||
self.c_cuAmpcor.param.nStreams = a
|
||||
@property
|
||||
@property
|
||||
def useMmap(self):
|
||||
return self.c_cuAmpcor.param.useMmap
|
||||
@useMmap.setter
|
||||
def useMmap(self, int a):
|
||||
self.c_cuAmpcor.param.useMmap = a
|
||||
@property
|
||||
def mmapSize(self):
|
||||
return self.c_cuAmpcor.param.mmapSizeInGB
|
||||
@mmapSize.setter
|
||||
|
@ -150,19 +158,19 @@ cdef class PyCuAmpcor(object):
|
|||
self.c_cuAmpcor.param.mmapSizeInGB = a
|
||||
@property
|
||||
def derampMethod(self):
|
||||
return self.c_cuAmpcor.param.derampMethod
|
||||
return self.c_cuAmpcor.param.derampMethod
|
||||
@derampMethod.setter
|
||||
def derampMethod(self, int a):
|
||||
self.c_cuAmpcor.param.derampMethod = a
|
||||
@property
|
||||
def windowSizeHeight(self):
|
||||
return self.c_cuAmpcor.param.windowSizeHeightRaw
|
||||
return self.c_cuAmpcor.param.windowSizeHeightRaw
|
||||
@windowSizeHeight.setter
|
||||
def windowSizeHeight(self, int a):
|
||||
self.c_cuAmpcor.param.windowSizeHeightRaw = a
|
||||
@property
|
||||
def windowSizeWidth(self):
|
||||
return self.c_cuAmpcor.param.windowSizeWidthRaw
|
||||
return self.c_cuAmpcor.param.windowSizeWidthRaw
|
||||
@windowSizeWidth.setter
|
||||
def windowSizeWidth(self, int a):
|
||||
self.c_cuAmpcor.param.windowSizeWidthRaw = a
|
||||
|
@ -200,7 +208,7 @@ cdef class PyCuAmpcor(object):
|
|||
@skipSampleAcross.setter
|
||||
def skipSampleAcross(self, int a):
|
||||
self.c_cuAmpcor.param.skipSampleAcrossRaw = a
|
||||
|
||||
|
||||
@property
|
||||
def rawDataOversamplingFactor(self):
|
||||
"""anti-aliasing oversampling factor"""
|
||||
|
@ -229,7 +237,7 @@ cdef class PyCuAmpcor(object):
|
|||
@corrSufaceOverSamplingMethod.setter
|
||||
def corrSufaceOverSamplingMethod(self, int a):
|
||||
self.c_cuAmpcor.param.oversamplingMethod = a
|
||||
@property
|
||||
@property
|
||||
def masterImageName(self):
|
||||
return self.c_cuAmpcor.param.masterImageName
|
||||
@masterImageName.setter
|
||||
|
@ -241,12 +249,12 @@ cdef class PyCuAmpcor(object):
|
|||
@slaveImageName.setter
|
||||
def slaveImageName(self, str a):
|
||||
self.c_cuAmpcor.param.slaveImageName = <string> a.encode()
|
||||
@property
|
||||
@property
|
||||
def masterImageName(self):
|
||||
return self.c_cuAmpcor.param.masterImageName
|
||||
@masterImageName.setter
|
||||
def masterImageName(self, str a):
|
||||
self.c_cuAmpcor.param.masterImageName = <string> a.encode()
|
||||
self.c_cuAmpcor.param.masterImageName = <string> a.encode()
|
||||
@property
|
||||
def masterImageHeight(self):
|
||||
return self.c_cuAmpcor.param.masterImageHeight
|
||||
|
@ -258,7 +266,7 @@ cdef class PyCuAmpcor(object):
|
|||
return self.c_cuAmpcor.param.masterImageWidth
|
||||
@masterImageWidth.setter
|
||||
def masterImageWidth(self, int a):
|
||||
self.c_cuAmpcor.param.masterImageWidth=a
|
||||
self.c_cuAmpcor.param.masterImageWidth=a
|
||||
@property
|
||||
def slaveImageHeight(self):
|
||||
return self.c_cuAmpcor.param.slaveImageHeight
|
||||
|
@ -270,8 +278,8 @@ cdef class PyCuAmpcor(object):
|
|||
return self.c_cuAmpcor.param.slaveImageWidth
|
||||
@slaveImageWidth.setter
|
||||
def slaveImageWidth(self, int a):
|
||||
self.c_cuAmpcor.param.slaveImageWidth=a
|
||||
|
||||
self.c_cuAmpcor.param.slaveImageWidth=a
|
||||
|
||||
@property
|
||||
def numberWindowDown(self):
|
||||
return self.c_cuAmpcor.param.numberWindowDown
|
||||
|
@ -283,11 +291,11 @@ cdef class PyCuAmpcor(object):
|
|||
return self.c_cuAmpcor.param.numberWindowAcross
|
||||
@numberWindowAcross.setter
|
||||
def numberWindowAcross(self, int a):
|
||||
self.c_cuAmpcor.param.numberWindowAcross = a
|
||||
self.c_cuAmpcor.param.numberWindowAcross = a
|
||||
@property
|
||||
def numberWindows(self):
|
||||
return self.c_cuAmpcor.param.numberWindows
|
||||
|
||||
|
||||
@property
|
||||
def numberWindowDownInChunk(self):
|
||||
return self.c_cuAmpcor.param.numberWindowDownInChunk
|
||||
|
@ -299,7 +307,7 @@ cdef class PyCuAmpcor(object):
|
|||
return self.c_cuAmpcor.param.numberWindowAcrossInChunk
|
||||
@numberWindowAcrossInChunk.setter
|
||||
def numberWindowAcrossInChunk(self, int a):
|
||||
self.c_cuAmpcor.param.numberWindowAcrossInChunk = a
|
||||
self.c_cuAmpcor.param.numberWindowAcrossInChunk = a
|
||||
@property
|
||||
def numberChunkDown(self):
|
||||
return self.c_cuAmpcor.param.numberChunkDown
|
||||
|
@ -309,9 +317,9 @@ cdef class PyCuAmpcor(object):
|
|||
@property
|
||||
def numberChunks(self):
|
||||
return self.c_cuAmpcor.param.numberChunks
|
||||
|
||||
|
||||
## gross offets
|
||||
|
||||
|
||||
## gross offets
|
||||
@property
|
||||
def grossOffsetImageName(self):
|
||||
return self.c_cuAmpcor.param.grossOffsetImageName
|
||||
|
@ -324,13 +332,21 @@ cdef class PyCuAmpcor(object):
|
|||
@offsetImageName.setter
|
||||
def offsetImageName(self, str a):
|
||||
self.c_cuAmpcor.param.offsetImageName = <string> a.encode()
|
||||
|
||||
@property
|
||||
def snrImageName(self):
|
||||
return self.c_cuAmpcor.param.snrImageName
|
||||
@snrImageName.setter
|
||||
def snrImageName(self, str a):
|
||||
self.c_cuAmpcor.param.snrImageName = <string> a.encode()
|
||||
|
||||
|
||||
@property
|
||||
def covImageName(self):
|
||||
return self.c_cuAmpcor.param.covImageName
|
||||
@covImageName.setter
|
||||
def covImageName(self, str a):
|
||||
self.c_cuAmpcor.param.covImageName = <string> a.encode()
|
||||
|
||||
@property
|
||||
def masterStartPixelDownStatic(self):
|
||||
return self.c_cuAmpcor.param.masterStartPixelDown0
|
||||
|
@ -342,20 +358,20 @@ cdef class PyCuAmpcor(object):
|
|||
return self.c_cuAmpcor.param.masterStartPixelAcross0
|
||||
@masterStartPixelAcrossStatic.setter
|
||||
def masterStartPixelAcrossStatic(self, int a):
|
||||
self.c_cuAmpcor.param.masterStartPixelAcross0 = a
|
||||
self.c_cuAmpcor.param.masterStartPixelAcross0 = a
|
||||
@property
|
||||
def grossOffsetDownStatic(self):
|
||||
return self.c_cuAmpcor.param.grossOffsetDown0
|
||||
@grossOffsetDownStatic.setter
|
||||
def grossOffsetDownStatic(self, int a):
|
||||
self.c_cuAmpcor.param.grossOffsetDown0 =a
|
||||
self.c_cuAmpcor.param.grossOffsetDown0 =a
|
||||
@property
|
||||
def grossOffsetAcrossStatic(self):
|
||||
return self.c_cuAmpcor.param.grossOffsetAcross0
|
||||
@grossOffsetAcrossStatic.setter
|
||||
def grossOffsetAcrossStatic(self, int a):
|
||||
self.c_cuAmpcor.param.grossOffsetAcross0 =a
|
||||
|
||||
self.c_cuAmpcor.param.grossOffsetAcross0 =a
|
||||
|
||||
@property
|
||||
def grossOffsetDownDynamic(self):
|
||||
cdef int *c_data
|
||||
|
@ -366,12 +382,12 @@ cdef class PyCuAmpcor(object):
|
|||
return p_data
|
||||
@grossOffsetDownDynamic.setter
|
||||
def grossOffsetDownDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa):
|
||||
cdef int *c_data
|
||||
cdef int *c_data
|
||||
cdef int *p_data
|
||||
c_data = self.c_cuAmpcor.param.grossOffsetDown
|
||||
p_data = <int *> pa.data
|
||||
for i in range (self.numberWindows):
|
||||
c_data[i] = p_data[i]
|
||||
c_data[i] = p_data[i]
|
||||
@property
|
||||
def grossOffsetAcrossDynamic(self):
|
||||
cdef int *c_data
|
||||
|
@ -382,23 +398,23 @@ cdef class PyCuAmpcor(object):
|
|||
return p_data
|
||||
@grossOffsetAcrossDynamic.setter
|
||||
def grossOffsetAcrossDynamic (self, np.ndarray[np.int32_t,ndim=1,mode="c"] pa):
|
||||
cdef int *c_data
|
||||
cdef int *c_data
|
||||
cdef int *p_data
|
||||
c_data = self.c_cuAmpcor.param.grossOffsetAcross
|
||||
p_data = <int *> pa.data
|
||||
for i in range (self.numberWindows):
|
||||
c_data[i] = p_data[i]
|
||||
c_data[i] = p_data[i]
|
||||
return
|
||||
|
||||
|
||||
|
||||
def setConstantGrossOffset(self, int goDown, int goAcross):
|
||||
"""
|
||||
"""
|
||||
constant gross offsets
|
||||
param goDown gross offset in azimuth direction
|
||||
param goAcross gross offset in range direction
|
||||
"""
|
||||
self.c_cuAmpcor.param.setStartPixels(<int>self.masterStartPixelDownStatic, <int>self.masterStartPixelAcrossStatic, goDown, goAcross)
|
||||
|
||||
|
||||
def setVaryingGrossOffset(self, np.ndarray[np.int32_t,ndim=1,mode="c"] vD, np.ndarray[np.int32_t,ndim=1,mode="c"] vA):
|
||||
"""
|
||||
varying gross offsets for each window
|
||||
|
@ -411,21 +427,21 @@ cdef class PyCuAmpcor(object):
|
|||
def checkPixelInImageRange(self):
|
||||
""" check whether each window is with image range """
|
||||
self.c_cuAmpcor.param.checkPixelInImageRange()
|
||||
|
||||
|
||||
def setupParams(self):
|
||||
"""
|
||||
set up constant parameters and allocate array parameters (offsets)
|
||||
should be called after number of windows is set and before setting varying gross offsets
|
||||
"""
|
||||
self.c_cuAmpcor.param.setupParameters()
|
||||
self.c_cuAmpcor.param.setupParameters()
|
||||
|
||||
def runAmpcor(self):
|
||||
""" main procedure to run ampcor """
|
||||
self.c_cuAmpcor.runAmpcor()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ package = envPyCuAmpcor['PACKAGE']
|
|||
project = envPyCuAmpcor['PROJECT']
|
||||
build = envPyCuAmpcor['PRJ_LIB_DIR']
|
||||
install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project
|
||||
listFiles = ['SlcImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
|
||||
listFiles = ['GDALImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
|
||||
'cuArraysPadding.cu', 'cuOverSampler.cu',
|
||||
'cuSincOverSampler.cu', 'cuDeramp.cu',
|
||||
'cuOffset.cu', 'cuCorrNormalization.cu',
|
||||
|
|
|
@ -2,58 +2,74 @@
|
|||
#include "cuAmpcorUtil.h"
|
||||
|
||||
/**
|
||||
* Run ampcor process for a batch of images (a chunk)
|
||||
* Run ampcor process for a batch of images (a chunk)
|
||||
* @param[in] idxDown_ index oIDIVUP(i,j) ((i+j-1)/j)f the chunk along Down/Azimuth direction
|
||||
* @param[in] idxAcross_ index of the chunk along Across/Range direction
|
||||
*/
|
||||
*/
|
||||
void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
|
||||
{
|
||||
// set chunk index
|
||||
setIndex(idxDown_, idxAcross_);
|
||||
|
||||
// load master image chunk
|
||||
loadMasterChunk();
|
||||
|
||||
|
||||
// load master image chunk
|
||||
loadMasterChunk();
|
||||
|
||||
//std::cout << "load master chunk ok\n";
|
||||
|
||||
|
||||
cuArraysAbs(c_masterBatchRaw, r_masterBatchRaw, stream);
|
||||
cuArraysSubtractMean(r_masterBatchRaw, stream);
|
||||
// load slave image chunk
|
||||
loadSlaveChunk();
|
||||
cuArraysAbs(c_slaveBatchRaw, r_slaveBatchRaw, stream);
|
||||
|
||||
|
||||
//std::cout << "load slave chunk ok\n";
|
||||
|
||||
|
||||
|
||||
|
||||
//cross correlation for none-oversampled data
|
||||
if(param->algorithm == 0) {
|
||||
cuCorrFreqDomain->execute(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw);
|
||||
}
|
||||
else {
|
||||
cuCorrTimeDomain(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream); //time domain cross correlation
|
||||
}
|
||||
}
|
||||
cuCorrNormalize(r_masterBatchRaw, r_slaveBatchRaw, r_corrBatchRaw, stream);
|
||||
//find the maximum location of none-oversampled correlation
|
||||
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream);
|
||||
|
||||
// Estimate SNR (Minyan Zhong)
|
||||
|
||||
//std::cout<< "flag stats 1" <<std::endl;
|
||||
//cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchZoomIn, i_corrBatchZoomInValid, offsetInit, stream);
|
||||
// find the maximum location of none-oversampled correlation
|
||||
// 41 x 41, if halfsearchrange=20
|
||||
//cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream);
|
||||
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, r_maxval, stream);
|
||||
|
||||
//std::cout<< "flag stats 2" <<std::endl;
|
||||
//cuArraysSumCorr(r_corrBatchZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
|
||||
offsetInit->outputToFile("offsetInit1", stream);
|
||||
|
||||
//std::cout<< "flag stats 3" <<std::endl;
|
||||
//cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream);
|
||||
// Estimation of statistics
|
||||
// Author: Minyan Zhong
|
||||
// Extraction of correlation surface around the peak
|
||||
cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchRawZoomIn, i_corrBatchZoomInValid, offsetInit, stream);
|
||||
|
||||
//
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
// debug: output the intermediate results
|
||||
r_maxval->outputToFile("r_maxval",stream);
|
||||
r_corrBatchRaw->outputToFile("r_corrBatchRaw",stream);
|
||||
r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawZoomIn",stream);
|
||||
i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid",stream);
|
||||
|
||||
// Summation of correlation and data point values
|
||||
cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
|
||||
|
||||
// SNR
|
||||
cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream);
|
||||
|
||||
// Variance
|
||||
// cuEstimateVariance(r_corrBatchRaw, offsetInit, r_maxval, r_covValue, stream);
|
||||
|
||||
// Using the approximate estimation to adjust slave image (half search window size becomes only 4 pixels)
|
||||
//offsetInit->debuginfo(stream);
|
||||
// determine the starting pixel to extract slave images around the max location
|
||||
cuDetermineSlaveExtractOffset(offsetInit,
|
||||
cuDetermineSlaveExtractOffset(offsetInit,
|
||||
param->halfSearchRangeDownRaw, // old range
|
||||
param->halfSearchRangeAcrossRaw,
|
||||
param->halfSearchRangeAcrossRaw,
|
||||
param->halfZoomWindowSizeRaw, // new range
|
||||
param->halfZoomWindowSizeRaw,
|
||||
stream);
|
||||
|
@ -63,58 +79,67 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
|
|||
masterBatchOverSampler->execute(c_masterBatchRaw, c_masterBatchOverSampled, param->derampMethod);
|
||||
cuArraysAbs(c_masterBatchOverSampled, r_masterBatchOverSampled, stream);
|
||||
cuArraysSubtractMean(r_masterBatchOverSampled, stream);
|
||||
|
||||
|
||||
// extract slave and oversample
|
||||
cuArraysCopyExtract(c_slaveBatchRaw, c_slaveBatchZoomIn, offsetInit, stream);
|
||||
slaveBatchOverSampler->execute(c_slaveBatchZoomIn, c_slaveBatchOverSampled, param->derampMethod);
|
||||
cuArraysAbs(c_slaveBatchOverSampled, r_slaveBatchOverSampled, stream);
|
||||
|
||||
|
||||
// correlate oversampled images
|
||||
if(param->algorithm == 0) {
|
||||
cuCorrFreqDomain_OverSampled->execute(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn);
|
||||
}
|
||||
else {
|
||||
cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
|
||||
}
|
||||
cuCorrTimeDomain(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
|
||||
}
|
||||
cuCorrNormalize(r_masterBatchOverSampled, r_slaveBatchOverSampled, r_corrBatchZoomIn, stream);
|
||||
|
||||
|
||||
//std::cout << "debug correlation oversample\n";
|
||||
//std::cout << r_masterBatchOverSampled->height << " " << r_masterBatchOverSampled->width << "\n";
|
||||
//std::cout << r_slaveBatchOverSampled->height << " " << r_slaveBatchOverSampled->width << "\n";
|
||||
//std::cout << r_corrBatchZoomIn->height << " " << r_corrBatchZoomIn->width << "\n";
|
||||
|
||||
// oversample the correlation surface
|
||||
|
||||
// oversample the correlation surface
|
||||
cuArraysCopyExtract(r_corrBatchZoomIn, r_corrBatchZoomInAdjust, make_int2(0,0), stream);
|
||||
|
||||
|
||||
//std::cout << "debug oversampling " << r_corrBatchZoomInAdjust << " " << r_corrBatchZoomInOverSampled << "\n";
|
||||
|
||||
|
||||
if(param->oversamplingMethod) {
|
||||
corrSincOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
|
||||
}
|
||||
else {
|
||||
corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
|
||||
corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
|
||||
}
|
||||
|
||||
|
||||
//find the max again
|
||||
|
||||
|
||||
cuArraysMaxloc2D(r_corrBatchZoomInOverSampled, offsetZoomIn, corrMaxValue, stream);
|
||||
|
||||
// determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel)
|
||||
cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal,
|
||||
param->oversamplingFactor, param->rawDataOversamplingFactor,
|
||||
|
||||
// determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel)
|
||||
cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal,
|
||||
param->oversamplingFactor, param->rawDataOversamplingFactor,
|
||||
param->halfSearchRangeDownRaw, param->halfSearchRangeAcrossRaw,
|
||||
param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw,
|
||||
stream);
|
||||
//offsetInit->debuginfo(stream);
|
||||
//offsetZoomIn->debuginfo(stream);
|
||||
//offsetFinal->debuginfo(stream);
|
||||
|
||||
//offsetFinal->debuginfo(stream);
|
||||
|
||||
// Do insertion.
|
||||
// Offsetfields.
|
||||
cuArraysCopyInsert(offsetFinal, offsetImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
|
||||
// Minyan Zhong
|
||||
//cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
//cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
// Debugging matrix.
|
||||
cuArraysCopyInsert(r_corrBatchSum, floatImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
cuArraysCopyInsert(i_corrBatchValidCount, intImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
|
||||
// Old: save max correlation coefficients.
|
||||
//cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
// New: save SNR
|
||||
cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
|
||||
// Variance.
|
||||
cuArraysCopyInsert(r_covValue, covImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||
}
|
||||
|
||||
void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
||||
|
@ -122,14 +147,14 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
|||
idxChunkDown = idxDown_;
|
||||
idxChunkAcross = idxAcross_;
|
||||
idxChunk = idxChunkAcross + idxChunkDown*param->numberChunkAcross;
|
||||
|
||||
|
||||
if(idxChunkDown == param->numberChunkDown -1) {
|
||||
nWindowsDown = param->numberWindowDown - param->numberWindowDownInChunk*(param->numberChunkDown -1);
|
||||
}
|
||||
else {
|
||||
nWindowsDown = param->numberWindowDownInChunk;
|
||||
}
|
||||
|
||||
|
||||
if(idxChunkAcross == param->numberChunkAcross -1) {
|
||||
nWindowsAcross = param->numberWindowAcross - param->numberWindowAcrossInChunk*(param->numberChunkAcross -1);
|
||||
}
|
||||
|
@ -137,20 +162,20 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
|||
nWindowsAcross = param->numberWindowAcrossInChunk;
|
||||
}
|
||||
//std::cout << "DEBUG setIndex" << idxChunk << " " << nWindowsDown << " " << nWindowsAcross << "\n";
|
||||
|
||||
|
||||
}
|
||||
|
||||
/// obtain the starting pixels for each chip
|
||||
/// @param[in] oStartPixel
|
||||
/// @param[in] oStartPixel
|
||||
///
|
||||
void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff)
|
||||
{
|
||||
for(int i=0; i<param->numberWindowDownInChunk; ++i) {
|
||||
int iDown = i;
|
||||
if(i>=nWindowsDown) iDown = nWindowsDown-1;
|
||||
if(i>=nWindowsDown) iDown = nWindowsDown-1;
|
||||
for(int j=0; j<param->numberWindowAcrossInChunk; ++j){
|
||||
int iAcross = j;
|
||||
if(j>=nWindowsAcross) iAcross = nWindowsAcross-1;
|
||||
if(j>=nWindowsAcross) iAcross = nWindowsAcross-1;
|
||||
int idxInChunk = iDown*param->numberWindowAcrossInChunk+iAcross;
|
||||
int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross
|
||||
+ idxChunkAcross*param->numberWindowAcrossInChunk+iAcross;
|
||||
|
@ -158,108 +183,179 @@ void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel,
|
|||
//fprintf(stderr, "relative offset %d %d %d %d\n", i, j, rStartPixel[idxInChunk], diff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cuAmpcorChunk::loadMasterChunk()
|
||||
{
|
||||
//load a chunk from mmap to gpu
|
||||
int startD = param->masterChunkStartPixelDown[idxChunk];
|
||||
int startA = param->masterChunkStartPixelAcross[idxChunk];
|
||||
int height = param->masterChunkHeight[idxChunk];
|
||||
int width = param->masterChunkWidth[idxChunk];
|
||||
masterImage->loadToDevice(c_masterChunkRaw->devData, startD, startA, height, width, stream);
|
||||
std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n";
|
||||
//copy the chunk to a batch of images format (nImages, height, width)
|
||||
//use cpu for some simple math
|
||||
|
||||
// we first load the whole chunk of image from cpu to a gpu buffer c(r)_masterChunkRaw
|
||||
// then copy to a batch of windows with (nImages, height, width) (leading dimension on the right)
|
||||
|
||||
// get the chunk size to be loaded to gpu
|
||||
int startD = param->masterChunkStartPixelDown[idxChunk]; //start pixel down (along height)
|
||||
int startA = param->masterChunkStartPixelAcross[idxChunk]; // start pixel across (along width)
|
||||
int height = param->masterChunkHeight[idxChunk]; // number of pixels along height
|
||||
int width = param->masterChunkWidth[idxChunk]; // number of pixels along width
|
||||
|
||||
//use cpu to compute the starting positions for each window
|
||||
getRelativeOffset(ChunkOffsetDown->hostData, param->masterStartPixelDown, param->masterChunkStartPixelDown[idxChunk]);
|
||||
// copy the positions to gpu
|
||||
ChunkOffsetDown->copyToDevice(stream);
|
||||
// same for the across direction
|
||||
getRelativeOffset(ChunkOffsetAcross->hostData, param->masterStartPixelAcross, param->masterChunkStartPixelAcross[idxChunk]);
|
||||
ChunkOffsetAcross->copyToDevice(stream);
|
||||
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
|
||||
if(param->derampMethod == 0) {
|
||||
cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
|
||||
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
|
||||
// check whether the image is complex (e.g., SLC) or real( e.g. TIFF)
|
||||
if(masterImage->isComplex())
|
||||
{
|
||||
// allocate a gpu buffer to load data from cpu/file
|
||||
// try allocate/deallocate the buffer on the fly to save gpu memory 07/09/19
|
||||
c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
|
||||
c_masterChunkRaw->allocate();
|
||||
|
||||
// load the data from cpu
|
||||
masterImage->loadToDevice((void *)c_masterChunkRaw->devData, startD, startA, height, width, stream);
|
||||
//std::cout << "debug load master: " << startD << " " << startA << " " << height << " " << width << "\n";
|
||||
|
||||
//copy the chunk to a batch format (nImages, height, width)
|
||||
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
|
||||
if(param->derampMethod == 0) {
|
||||
cuArraysCopyToBatchAbsWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
|
||||
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
}
|
||||
else {
|
||||
cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
|
||||
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
}
|
||||
// deallocate the gpu buffer
|
||||
delete c_masterChunkRaw;
|
||||
}
|
||||
// if the image is real
|
||||
else {
|
||||
cuArraysCopyToBatchWithOffset(c_masterChunkRaw, param->masterChunkWidth[idxChunk],
|
||||
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
r_masterChunkRaw = new cuArrays<float> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
|
||||
r_masterChunkRaw->allocate();
|
||||
|
||||
// load the data from cpu
|
||||
masterImage->loadToDevice((void *)r_masterChunkRaw->devData, startD, startA, height, width, stream);
|
||||
|
||||
// copy the chunk (real) to a batch format (complex)
|
||||
cuArraysCopyToBatchWithOffsetR2C(r_masterChunkRaw, param->masterChunkWidth[idxChunk],
|
||||
c_masterBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
// deallocate the gpu buffer
|
||||
delete r_masterChunkRaw;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void cuAmpcorChunk::loadSlaveChunk()
|
||||
{
|
||||
//load a chunk from mmap to gpu
|
||||
slaveImage->loadToDevice(c_slaveChunkRaw->devData,
|
||||
param->slaveChunkStartPixelDown[idxChunk],
|
||||
param->slaveChunkStartPixelAcross[idxChunk],
|
||||
param->slaveChunkHeight[idxChunk],
|
||||
param->slaveChunkWidth[idxChunk],
|
||||
stream);
|
||||
|
||||
//copy to a batch format (nImages, height, width)
|
||||
getRelativeOffset(ChunkOffsetDown->hostData, param->slaveStartPixelDown, param->slaveChunkStartPixelDown[idxChunk]);
|
||||
ChunkOffsetDown->copyToDevice(stream);
|
||||
getRelativeOffset(ChunkOffsetAcross->hostData, param->slaveStartPixelAcross, param->slaveChunkStartPixelAcross[idxChunk]);
|
||||
ChunkOffsetAcross->copyToDevice(stream);
|
||||
if(param->derampMethod == 0) {
|
||||
cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
|
||||
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
}
|
||||
else
|
||||
|
||||
if(slaveImage->isComplex())
|
||||
{
|
||||
cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
|
||||
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
}
|
||||
c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
|
||||
c_slaveChunkRaw->allocate();
|
||||
|
||||
//load a chunk from mmap to gpu
|
||||
slaveImage->loadToDevice(c_slaveChunkRaw->devData,
|
||||
param->slaveChunkStartPixelDown[idxChunk],
|
||||
param->slaveChunkStartPixelAcross[idxChunk],
|
||||
param->slaveChunkHeight[idxChunk],
|
||||
param->slaveChunkWidth[idxChunk],
|
||||
stream);
|
||||
|
||||
if(param->derampMethod == 0) {
|
||||
cuArraysCopyToBatchAbsWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
|
||||
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
}
|
||||
else {
|
||||
cuArraysCopyToBatchWithOffset(c_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
|
||||
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
}
|
||||
delete c_slaveChunkRaw;
|
||||
}
|
||||
else { //real image
|
||||
//allocate the gpu buffer
|
||||
r_slaveChunkRaw = new cuArrays<float> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
|
||||
r_slaveChunkRaw->allocate();
|
||||
|
||||
//load a chunk from mmap to gpu
|
||||
slaveImage->loadToDevice(r_slaveChunkRaw->devData,
|
||||
param->slaveChunkStartPixelDown[idxChunk],
|
||||
param->slaveChunkStartPixelAcross[idxChunk],
|
||||
param->slaveChunkHeight[idxChunk],
|
||||
param->slaveChunkWidth[idxChunk],
|
||||
stream);
|
||||
|
||||
// convert to the batch format
|
||||
cuArraysCopyToBatchWithOffsetR2C(r_slaveChunkRaw, param->slaveChunkWidth[idxChunk],
|
||||
c_slaveBatchRaw, ChunkOffsetDown->devData, ChunkOffsetAcross->devData, stream);
|
||||
delete r_slaveChunkRaw;
|
||||
}
|
||||
}
|
||||
|
||||
cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_,
|
||||
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cudaStream_t stream_)
|
||||
cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_,
|
||||
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_)
|
||||
|
||||
{
|
||||
param = param_;
|
||||
masterImage = master_;
|
||||
slaveImage = slave_;
|
||||
slaveImage = slave_;
|
||||
offsetImage = offsetImage_;
|
||||
snrImage = snrImage_;
|
||||
covImage = covImage_;
|
||||
|
||||
intImage1 = intImage1_;
|
||||
floatImage1 = floatImage1_;
|
||||
|
||||
stream = stream_;
|
||||
|
||||
std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n";
|
||||
c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
|
||||
c_masterChunkRaw->allocate();
|
||||
|
||||
c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
|
||||
c_slaveChunkRaw->allocate();
|
||||
|
||||
|
||||
// std::cout << "debug Chunk creator " << param->maxMasterChunkHeight << " " << param->maxMasterChunkWidth << "\n";
|
||||
// try allocate/deallocate on the fly to save gpu memory 07/09/19
|
||||
// c_masterChunkRaw = new cuArrays<float2> (param->maxMasterChunkHeight, param->maxMasterChunkWidth);
|
||||
// c_masterChunkRaw->allocate();
|
||||
|
||||
// c_slaveChunkRaw = new cuArrays<float2> (param->maxSlaveChunkHeight, param->maxSlaveChunkWidth);
|
||||
// c_slaveChunkRaw->allocate();
|
||||
|
||||
ChunkOffsetDown = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
ChunkOffsetDown->allocate();
|
||||
ChunkOffsetDown->allocateHost();
|
||||
ChunkOffsetAcross = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
ChunkOffsetAcross->allocate();
|
||||
ChunkOffsetAcross->allocateHost();
|
||||
|
||||
|
||||
c_masterBatchRaw = new cuArrays<float2> (
|
||||
param->windowSizeHeightRaw, param->windowSizeWidthRaw,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
c_masterBatchRaw->allocate();
|
||||
|
||||
|
||||
c_slaveBatchRaw = new cuArrays<float2> (
|
||||
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
c_slaveBatchRaw->allocate();
|
||||
|
||||
|
||||
r_masterBatchRaw = new cuArrays<float> (
|
||||
param->windowSizeHeightRaw, param->windowSizeWidthRaw,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
r_masterBatchRaw->allocate();
|
||||
|
||||
|
||||
r_slaveBatchRaw = new cuArrays<float> (
|
||||
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
r_slaveBatchRaw->allocate();
|
||||
|
||||
|
||||
c_slaveBatchZoomIn = new cuArrays<float2> (
|
||||
param->searchWindowSizeHeightRawZoomIn, param->searchWindowSizeWidthRawZoomIn,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
c_slaveBatchZoomIn->allocate();
|
||||
|
||||
|
||||
c_masterBatchOverSampled = new cuArrays<float2> (
|
||||
param->windowSizeHeight, param->windowSizeWidth,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
|
@ -269,7 +365,7 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
|
|||
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
c_slaveBatchOverSampled->allocate();
|
||||
|
||||
|
||||
r_masterBatchOverSampled = new cuArrays<float> (
|
||||
param->windowSizeHeight, param->windowSizeWidth,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
|
@ -279,66 +375,114 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
|
|||
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
|
||||
param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
r_slaveBatchOverSampled->allocate();
|
||||
|
||||
|
||||
masterBatchOverSampler = new cuOverSamplerC2C(
|
||||
c_masterBatchRaw->height, c_masterBatchRaw->width, //orignal size
|
||||
c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size
|
||||
c_masterBatchOverSampled->height, c_masterBatchOverSampled->width, //oversampled size
|
||||
c_masterBatchRaw->count, stream);
|
||||
|
||||
slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width,
|
||||
|
||||
slaveBatchOverSampler = new cuOverSamplerC2C(c_slaveBatchZoomIn->height, c_slaveBatchZoomIn->width,
|
||||
c_slaveBatchOverSampled->height, c_slaveBatchOverSampled->width, c_slaveBatchRaw->count, stream);
|
||||
|
||||
|
||||
r_corrBatchRaw = new cuArrays<float> (
|
||||
param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1,
|
||||
param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1,
|
||||
param->numberWindowDownInChunk,
|
||||
param->searchWindowSizeHeightRaw-param->windowSizeHeightRaw+1,
|
||||
param->searchWindowSizeWidthRaw-param->windowSizeWidthRaw+1,
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
r_corrBatchRaw->allocate();
|
||||
|
||||
|
||||
r_corrBatchZoomIn = new cuArrays<float> (
|
||||
param->searchWindowSizeHeight - param->windowSizeHeight+1,
|
||||
param->searchWindowSizeWidth - param->windowSizeWidth+1,
|
||||
param->numberWindowDownInChunk,
|
||||
param->searchWindowSizeHeight - param->windowSizeHeight+1,
|
||||
param->searchWindowSizeWidth - param->windowSizeWidth+1,
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
r_corrBatchZoomIn->allocate();
|
||||
|
||||
|
||||
r_corrBatchZoomInAdjust = new cuArrays<float> (
|
||||
param->searchWindowSizeHeight - param->windowSizeHeight,
|
||||
param->searchWindowSizeWidth - param->windowSizeWidth,
|
||||
param->numberWindowDownInChunk,
|
||||
param->searchWindowSizeHeight - param->windowSizeHeight,
|
||||
param->searchWindowSizeWidth - param->windowSizeWidth,
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
r_corrBatchZoomInAdjust->allocate();
|
||||
|
||||
|
||||
|
||||
|
||||
r_corrBatchZoomInOverSampled = new cuArrays<float> (
|
||||
param->zoomWindowSize * param->oversamplingFactor,
|
||||
param->zoomWindowSize * param->oversamplingFactor,
|
||||
param->numberWindowDownInChunk,
|
||||
param->zoomWindowSize * param->oversamplingFactor,
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
r_corrBatchZoomInOverSampled->allocate();
|
||||
|
||||
|
||||
offsetInit = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
offsetInit->allocate();
|
||||
|
||||
|
||||
offsetZoomIn = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
offsetZoomIn->allocate();
|
||||
|
||||
|
||||
offsetFinal = new cuArrays<float2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
offsetFinal->allocate();
|
||||
|
||||
|
||||
corrMaxValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
corrMaxValue->allocate();
|
||||
|
||||
|
||||
|
||||
// new arrays due to snr estimation
|
||||
std::cout<< "corrRawZoomInHeight: " << param->corrRawZoomInHeight << "\n";
|
||||
std::cout<< "corrRawZoomInWidth: " << param->corrRawZoomInWidth << "\n";
|
||||
|
||||
r_corrBatchRawZoomIn = new cuArrays<float> (
|
||||
param->corrRawZoomInHeight,
|
||||
param->corrRawZoomInWidth,
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
r_corrBatchRawZoomIn->allocate();
|
||||
|
||||
i_corrBatchZoomInValid = new cuArrays<int> (
|
||||
param->corrRawZoomInHeight,
|
||||
param->corrRawZoomInWidth,
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
i_corrBatchZoomInValid->allocate();
|
||||
|
||||
|
||||
r_corrBatchSum = new cuArrays<float> (
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
r_corrBatchSum->allocate();
|
||||
|
||||
i_corrBatchValidCount = new cuArrays<int> (
|
||||
param->numberWindowDownInChunk,
|
||||
param->numberWindowAcrossInChunk);
|
||||
i_corrBatchValidCount->allocate();
|
||||
|
||||
i_maxloc = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
|
||||
i_maxloc->allocate();
|
||||
|
||||
r_maxval = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
|
||||
r_maxval->allocate();
|
||||
|
||||
r_snrValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
|
||||
r_snrValue->allocate();
|
||||
|
||||
r_covValue = new cuArrays<float3> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||
|
||||
r_covValue->allocate();
|
||||
|
||||
// end of new arrays
|
||||
|
||||
if(param->oversamplingMethod) {
|
||||
corrSincOverSampler = new cuSincOverSamplerR2R(param->zoomWindowSize, param->oversamplingFactor, stream);
|
||||
}
|
||||
else {
|
||||
else {
|
||||
corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize,
|
||||
(param->zoomWindowSize)*param->oversamplingFactor,
|
||||
(param->zoomWindowSize)*param->oversamplingFactor,
|
||||
(param->zoomWindowSize)*param->oversamplingFactor,
|
||||
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
|
||||
stream);
|
||||
}
|
||||
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
|
||||
stream);
|
||||
}
|
||||
if(param->algorithm == 0) {
|
||||
cuCorrFreqDomain = new cuFreqCorrelator(
|
||||
param->searchWindowSizeHeightRaw, param->searchWindowSizeWidthRaw,
|
||||
|
@ -347,10 +491,10 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcIm
|
|||
cuCorrFreqDomain_OverSampled = new cuFreqCorrelator(
|
||||
param->searchWindowSizeHeight, param->searchWindowSizeWidth,
|
||||
param->numberWindowDownInChunk*param->numberWindowAcrossInChunk,
|
||||
stream);
|
||||
stream);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
debugmsg("all objects in chunk are created ...\n");
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/*
|
||||
/*
|
||||
* cuAmpcorChunk.h
|
||||
* Purpose: a group of chips processed at the same time
|
||||
*/
|
||||
|
@ -6,7 +6,7 @@
|
|||
#ifndef __CUAMPCORCHUNK_H
|
||||
#define __CUAMPCORCHUNK_H
|
||||
|
||||
#include "SlcImage.h"
|
||||
#include "GDALImage.h"
|
||||
#include "cuArrays.h"
|
||||
#include "cuAmpcorParameter.h"
|
||||
#include "cuOverSampler.h"
|
||||
|
@ -22,64 +22,81 @@ private:
|
|||
int nWindowsAcross;
|
||||
|
||||
int devId;
|
||||
cudaStream_t stream;
|
||||
|
||||
SlcImage *masterImage;
|
||||
SlcImage *slaveImage;
|
||||
cudaStream_t stream;
|
||||
|
||||
GDALImage *masterImage;
|
||||
GDALImage *slaveImage;
|
||||
cuAmpcorParameter *param;
|
||||
cuArrays<float2> *offsetImage;
|
||||
cuArrays<float> *snrImage;
|
||||
|
||||
cuArrays<float2> * c_masterChunkRaw, * c_slaveChunkRaw;
|
||||
cuArrays<float3> *covImage;
|
||||
|
||||
// added for test
|
||||
cuArrays<int> *intImage1;
|
||||
cuArrays<float> *floatImage1;
|
||||
|
||||
// gpu buffer
|
||||
cuArrays<float2> * c_masterChunkRaw, * c_slaveChunkRaw;
|
||||
cuArrays<float> * r_masterChunkRaw, * r_slaveChunkRaw;
|
||||
|
||||
// gpu windows raw data
|
||||
cuArrays<float2> * c_masterBatchRaw, * c_slaveBatchRaw, * c_slaveBatchZoomIn;
|
||||
cuArrays<float> * r_masterBatchRaw, * r_slaveBatchRaw;
|
||||
cuArrays<float2> * c_masterBatchOverSampled, * c_slaveBatchOverSampled;
|
||||
|
||||
// gpu windows oversampled data
|
||||
cuArrays<float2> * c_masterBatchOverSampled, * c_slaveBatchOverSampled;
|
||||
cuArrays<float> * r_masterBatchOverSampled, * r_slaveBatchOverSampled;
|
||||
cuArrays<float> * r_corrBatchRaw, * r_corrBatchZoomIn, * r_corrBatchZoomInOverSampled, * r_corrBatchZoomInAdjust;
|
||||
|
||||
|
||||
cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross;
|
||||
|
||||
|
||||
cuOverSamplerC2C *masterBatchOverSampler, *slaveBatchOverSampler;
|
||||
|
||||
|
||||
cuOverSamplerR2R *corrOverSampler;
|
||||
cuSincOverSamplerR2R *corrSincOverSampler;
|
||||
|
||||
cuSincOverSamplerR2R *corrSincOverSampler;
|
||||
|
||||
//for frequency domain
|
||||
cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled;
|
||||
|
||||
|
||||
cuArrays<int2> *offsetInit;
|
||||
cuArrays<int2> *offsetZoomIn;
|
||||
cuArrays<float2> *offsetFinal;
|
||||
cuArrays<float> *corrMaxValue;
|
||||
|
||||
|
||||
//SNR estimation
|
||||
|
||||
cuArrays<float> *r_corrBatchRawZoomIn;
|
||||
cuArrays<float> *r_corrBatchSum;
|
||||
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
|
||||
|
||||
cuArrays<float> *r_snrValue;
|
||||
|
||||
//corr statistics
|
||||
cuArrays<int2> *i_maxloc;
|
||||
cuArrays<float> *r_maxval;
|
||||
|
||||
cuArrays<float> *r_corrBatchSum;
|
||||
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
|
||||
|
||||
cuArrays<float> *corrMaxValue;
|
||||
cuArrays<float> *r_snrValue;
|
||||
|
||||
|
||||
// Varince estimation.
|
||||
cuArrays<float3> *r_covValue;
|
||||
|
||||
public:
|
||||
cuAmpcorChunk() {}
|
||||
//cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_);
|
||||
|
||||
|
||||
void setIndex(int idxDown_, int idxAcross_);
|
||||
|
||||
cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *master_, GDALImage *slave_, cuArrays<float2> *offsetImage_,
|
||||
cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_);
|
||||
|
||||
|
||||
cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *master_, SlcImage *slave_, cuArrays<float2> *offsetImage_,
|
||||
cuArrays<float> *snrImage_, cudaStream_t stream_);
|
||||
|
||||
void loadMasterChunk();
|
||||
void loadSlaveChunk();
|
||||
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
|
||||
|
||||
~cuAmpcorChunk();
|
||||
|
||||
void run(int, int);
|
||||
|
||||
~cuAmpcorChunk();
|
||||
|
||||
void run(int, int);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -1,113 +1,142 @@
|
|||
// Implementation of cuAmpcorController
|
||||
|
||||
#include "cuAmpcorController.h"
|
||||
#include "SlcImage.h"
|
||||
#include "GDALImage.h"
|
||||
#include "cuArrays.h"
|
||||
#include "cudaUtil.h"
|
||||
#include "cuAmpcorChunk.h"
|
||||
#include "cuAmpcorUtil.h"
|
||||
#include <iostream>
|
||||
|
||||
cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();}
|
||||
cuAmpcorController::~cuAmpcorController() { delete param; }
|
||||
cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();}
|
||||
cuAmpcorController::~cuAmpcorController() { delete param; }
|
||||
|
||||
void cuAmpcorController::runAmpcor() {
|
||||
|
||||
void cuAmpcorController::runAmpcor() {
|
||||
|
||||
// set the gpu id
|
||||
param->deviceID = gpuDeviceInit(param->deviceID);
|
||||
SlcImage *masterImage;
|
||||
SlcImage *slaveImage;
|
||||
|
||||
// initialize the gdal driver
|
||||
GDALAllRegister();
|
||||
// master and slave images; use band=1 as default
|
||||
// TODO: selecting band
|
||||
GDALImage *masterImage = new GDALImage(param->masterImageName, 1, param->mmapSizeInGB);
|
||||
GDALImage *slaveImage = new GDALImage(param->slaveImageName, 1, param->mmapSizeInGB);
|
||||
|
||||
cuArrays<float2> *offsetImage, *offsetImageRun;
|
||||
cuArrays<float> *snrImage, *snrImageRun;
|
||||
|
||||
|
||||
// cuArrays<float> *floatImage;
|
||||
// cuArrays<int> *intImage;
|
||||
cuArrays<float3> *covImage, *covImageRun;
|
||||
|
||||
// For debugging.
|
||||
cuArrays<int> *intImage1;
|
||||
cuArrays<float> *floatImage1;
|
||||
|
||||
int nWindowsDownRun = param->numberChunkDown * param->numberWindowDownInChunk;
|
||||
int nWindowsAcrossRun = param->numberChunkAcross * param->numberWindowAcrossInChunk;
|
||||
|
||||
masterImage = new SlcImage(param->masterImageName, param->masterImageHeight, param->masterImageWidth, param->mmapSizeInGB);
|
||||
slaveImage = new SlcImage(param->slaveImageName, param->slaveImageHeight, param->slaveImageWidth, param->mmapSizeInGB);
|
||||
|
||||
int nWindowsDownRun = param->numberChunkDown*param->numberWindowDownInChunk;
|
||||
int nWindowsAcrossRun = param->numberChunkAcross*param->numberWindowAcrossInChunk;
|
||||
|
||||
std::cout << "Debug " << nWindowsDownRun << " " << param->numberWindowDown << "\n";
|
||||
|
||||
|
||||
offsetImageRun = new cuArrays<float2>(nWindowsDownRun, nWindowsAcrossRun);
|
||||
snrImageRun = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
|
||||
offsetImageRun->allocate();
|
||||
|
||||
snrImageRun = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
|
||||
snrImageRun->allocate();
|
||||
|
||||
|
||||
covImageRun = new cuArrays<float3>(nWindowsDownRun, nWindowsAcrossRun);
|
||||
covImageRun->allocate();
|
||||
|
||||
// intImage 1 and floatImage 1 are added for debugging issues
|
||||
|
||||
intImage1 = new cuArrays<int>(nWindowsDownRun, nWindowsAcrossRun);
|
||||
intImage1->allocate();
|
||||
|
||||
floatImage1 = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
|
||||
floatImage1->allocate();
|
||||
|
||||
// Offsetfields.
|
||||
offsetImage = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
|
||||
snrImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
|
||||
offsetImage->allocate();
|
||||
|
||||
// SNR.
|
||||
snrImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
|
||||
snrImage->allocate();
|
||||
|
||||
// Minyan Zhong
|
||||
// floatImage = new cuArrays<float>(param->numberWindowDown, param->numberWindowAcross);
|
||||
// intImage = new cuArrays<int>(param->numberWindowDown, param->numberWindowAcross);
|
||||
// Variance.
|
||||
covImage = new cuArrays<float3>(param->numberWindowDown, param->numberWindowAcross);
|
||||
covImage->allocate();
|
||||
|
||||
// floatImage->allocate();
|
||||
// intImage->allocate();
|
||||
//
|
||||
cudaStream_t streams[param->nStreams];
|
||||
cuAmpcorChunk *chunk[param->nStreams];
|
||||
for(int ist=0; ist<param->nStreams; ist++)
|
||||
for(int ist=0; ist<param->nStreams; ist++)
|
||||
{
|
||||
cudaStreamCreate(&streams[ist]);
|
||||
chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, streams[ist]);
|
||||
chunk[ist]= new cuAmpcorChunk(param, masterImage, slaveImage, offsetImageRun, snrImageRun, covImageRun, intImage1, floatImage1, streams[ist]);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int nChunksDown = param->numberChunkDown;
|
||||
int nChunksAcross = param->numberChunkAcross;
|
||||
|
||||
int nChunksAcross = param->numberChunkAcross;
|
||||
|
||||
std::cout << "Total number of windows (azimuth x range): " <<param->numberWindowDown << " x " << param->numberWindowAcross << std::endl;
|
||||
std::cout << "to be processed in the number of chunks: " <<nChunksDown << " x " << nChunksAcross << std::endl;
|
||||
|
||||
for(int i = 60; i<nChunksDown; i++)
|
||||
|
||||
for(int i = 0; i<nChunksDown; i++)
|
||||
{
|
||||
std::cout << "Processing chunk (" << i <<", x" << ")" << std::endl;
|
||||
std::cout << "Processing chunk (" << i <<", x" << ")" << std::endl;
|
||||
for(int j=0; j<nChunksAcross; j+=param->nStreams)
|
||||
{
|
||||
//std::cout << "Processing chunk(" << i <<", " << j <<")" << std::endl;
|
||||
for(int ist = 0; ist<param->nStreams; ist++)
|
||||
{
|
||||
{
|
||||
if(j+ist < nChunksAcross) {
|
||||
|
||||
|
||||
chunk[ist]->run(i, j+ist);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
|
||||
// Do extraction.
|
||||
cuArraysCopyExtract(offsetImageRun, offsetImage, make_int2(0,0), streams[0]);
|
||||
cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]);
|
||||
|
||||
cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]);
|
||||
cuArraysCopyExtract(covImageRun, covImage, make_int2(0,0), streams[0]);
|
||||
|
||||
offsetImage->outputToFile(param->offsetImageName, streams[0]);
|
||||
snrImage->outputToFile(param->snrImageName, streams[0]);
|
||||
covImage->outputToFile(param->covImageName, streams[0]);
|
||||
|
||||
// Minyan Zhong
|
||||
// floatImage->allocate();
|
||||
// intImage->allocate();
|
||||
//
|
||||
// Output debugging arrays.
|
||||
intImage1->outputToFile("intImage1", streams[0]);
|
||||
floatImage1->outputToFile("floatImage1", streams[0]);
|
||||
|
||||
outputGrossOffsets();
|
||||
|
||||
// Delete arrays.
|
||||
delete offsetImage;
|
||||
delete snrImage;
|
||||
delete covImage;
|
||||
|
||||
delete intImage1;
|
||||
delete floatImage1;
|
||||
|
||||
delete offsetImageRun;
|
||||
delete snrImageRun;
|
||||
delete covImageRun;
|
||||
|
||||
for (int ist=0; ist<param->nStreams; ist++)
|
||||
delete chunk[ist];
|
||||
|
||||
delete masterImage;
|
||||
delete slaveImage;
|
||||
}
|
||||
delete slaveImage;
|
||||
|
||||
}
|
||||
|
||||
void cuAmpcorController::outputGrossOffsets()
|
||||
{
|
||||
cuArrays<float2> *grossOffsets = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
|
||||
grossOffsets->allocateHost();
|
||||
|
||||
|
||||
for(int i=0; i< param->numberWindows; i++)
|
||||
grossOffsets->hostData[i] = make_float2(param->grossOffsetDown[i], param->grossOffsetAcross[i]);
|
||||
grossOffsets->outputHostToFile(param->grossOffsetImageName);
|
||||
|
@ -176,7 +205,7 @@ void cuAmpcorController::setGrossOffsets(int *in, int size) {
|
|||
param->grossOffsets = (int *)malloc(size*sizeof(int));
|
||||
mempcpy(param->grossOffsets, in, size*sizeof(int));
|
||||
fprintf(stderr, "copy grossOffsets %d\n", size);
|
||||
}
|
||||
}
|
||||
void cuAmpcorController::setOffsetImageName(std::string s) { param->offsetImageName = s; }
|
||||
void cuAmpcorController::setSNRImageName(std::string s) { param->snrImageName = s; }
|
||||
//void cuAmpcorController::setMargin(int n) { param->margin = n; }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/**
|
||||
* cuAmpcorParameter.cu
|
||||
* Input parameters for ampcor
|
||||
* Input parameters for ampcor
|
||||
*/
|
||||
|
||||
#include "cuAmpcorParameter.h"
|
||||
|
@ -11,17 +11,19 @@
|
|||
#endif
|
||||
|
||||
///
|
||||
/// Constructor for cuAmpcorParameter class
|
||||
/// Constructor for cuAmpcorParameter class
|
||||
/// also sets the default/initial values of various parameters
|
||||
///
|
||||
|
||||
|
||||
cuAmpcorParameter::cuAmpcorParameter()
|
||||
{
|
||||
algorithm = 0; //0 freq; 1 time
|
||||
deviceID = 0;
|
||||
nStreams = 1;
|
||||
// default settings
|
||||
// will be changed if they are set by python scripts
|
||||
algorithm = 0; //0 freq; 1 time
|
||||
deviceID = 0;
|
||||
nStreams = 1;
|
||||
derampMethod = 1;
|
||||
|
||||
|
||||
windowSizeWidthRaw = 64;
|
||||
windowSizeHeightRaw = 64;
|
||||
halfSearchRangeDownRaw = 20;
|
||||
|
@ -31,9 +33,9 @@ cuAmpcorParameter::cuAmpcorParameter()
|
|||
skipSampleDownRaw = 64;
|
||||
rawDataOversamplingFactor = 2;
|
||||
zoomWindowSize = 8;
|
||||
oversamplingFactor = 16;
|
||||
oversamplingMethod = 0;
|
||||
|
||||
oversamplingFactor = 16;
|
||||
oversamplingMethod = 0;
|
||||
|
||||
masterImageName = "master.slc";
|
||||
masterImageWidth = 1000;
|
||||
masterImageHeight = 1000;
|
||||
|
@ -43,50 +45,58 @@ cuAmpcorParameter::cuAmpcorParameter()
|
|||
offsetImageName = "DenseOffset.off";
|
||||
grossOffsetImageName = "GrossOffset.off";
|
||||
snrImageName = "snr.snr";
|
||||
covImageName = "cov.cov";
|
||||
numberWindowDown = 1;
|
||||
numberWindowAcross = 1;
|
||||
numberWindowAcross = 1;
|
||||
numberWindowDownInChunk = 1;
|
||||
numberWindowAcrossInChunk = 1 ;
|
||||
|
||||
numberWindowAcrossInChunk = 1 ;
|
||||
|
||||
masterStartPixelDown0 = 0;
|
||||
masterStartPixelAcross0 = 0;
|
||||
|
||||
corrRawZoomInHeight = 17; // 8*2+1
|
||||
corrRawZoomInWidth = 17;
|
||||
|
||||
useMmap = 1; // use mmap
|
||||
mmapSizeInGB = 1;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* To determine other process parameters after reading essential parameters from python
|
||||
*/
|
||||
* To determine other process parameters after reading essential parameters from python
|
||||
*/
|
||||
|
||||
void cuAmpcorParameter::setupParameters()
|
||||
{
|
||||
{
|
||||
zoomWindowSize *= rawDataOversamplingFactor; //8 * 2
|
||||
halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4
|
||||
|
||||
halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4
|
||||
|
||||
windowSizeWidth = windowSizeWidthRaw*rawDataOversamplingFactor; //
|
||||
windowSizeHeight = windowSizeHeightRaw*rawDataOversamplingFactor;
|
||||
|
||||
searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw;
|
||||
searchWindowSizeWidthRaw = windowSizeWidthRaw + 2*halfSearchRangeDownRaw;
|
||||
searchWindowSizeHeightRaw = windowSizeHeightRaw + 2*halfSearchRangeAcrossRaw;
|
||||
|
||||
|
||||
searchWindowSizeWidthRawZoomIn = windowSizeWidthRaw + 2*halfZoomWindowSizeRaw;
|
||||
searchWindowSizeHeightRawZoomIn = windowSizeHeightRaw + 2*halfZoomWindowSizeRaw;
|
||||
|
||||
|
||||
searchWindowSizeWidth = searchWindowSizeWidthRawZoomIn*rawDataOversamplingFactor;
|
||||
searchWindowSizeHeight = searchWindowSizeHeightRawZoomIn*rawDataOversamplingFactor;
|
||||
|
||||
|
||||
numberWindows = numberWindowDown*numberWindowAcross;
|
||||
if(numberWindows <=0) {
|
||||
fprintf(stderr, "Incorrect number of windows! (%d, %d)\n", numberWindowDown, numberWindowAcross);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// modified 02/12/2018 to include one more chunk
|
||||
// e.g. numberWindowDownInChunk=102, numberWindowDown=10, results in numberChunkDown=11
|
||||
// the last chunk will include 2 windows, numberWindowDownInChunkRun = 2.
|
||||
|
||||
// the last chunk will include 2 windows, numberWindowDownInChunkRun = 2.
|
||||
|
||||
numberChunkDown = IDIVUP(numberWindowDown, numberWindowDownInChunk);
|
||||
numberChunkAcross = IDIVUP(numberWindowAcross, numberWindowAcrossInChunk);
|
||||
numberChunks = numberChunkDown*numberChunkAcross;
|
||||
allocateArrays();
|
||||
allocateArrays();
|
||||
}
|
||||
|
||||
|
||||
|
@ -99,7 +109,7 @@ void cuAmpcorParameter::allocateArrays()
|
|||
masterStartPixelAcross = (int *)malloc(arraySize);
|
||||
slaveStartPixelDown = (int *)malloc(arraySize);
|
||||
slaveStartPixelAcross = (int *)malloc(arraySize);
|
||||
|
||||
|
||||
int arraySizeChunk = numberChunks*sizeof(int);
|
||||
masterChunkStartPixelDown = (int *)malloc(arraySizeChunk);
|
||||
masterChunkStartPixelAcross = (int *)malloc(arraySizeChunk);
|
||||
|
@ -130,18 +140,18 @@ void cuAmpcorParameter::deallocateArrays()
|
|||
}
|
||||
|
||||
|
||||
/// Set starting pixels for master and slave windows from arrays
|
||||
/// Set starting pixels for master and slave windows from arrays
|
||||
/// set also gross offsets between master and slave windows
|
||||
///
|
||||
///
|
||||
void cuAmpcorParameter::setStartPixels(int *mStartD, int *mStartA, int *gOffsetD, int *gOffsetA)
|
||||
{
|
||||
for(int i=0; i<numberWindows; i++)
|
||||
{
|
||||
masterStartPixelDown[i] = mStartD[i];
|
||||
grossOffsetDown[i] = gOffsetD[i];
|
||||
grossOffsetDown[i] = gOffsetD[i];
|
||||
slaveStartPixelDown[i] = masterStartPixelDown[i] + grossOffsetDown[i] - halfSearchRangeDownRaw;
|
||||
masterStartPixelAcross[i] = mStartA[i];
|
||||
grossOffsetAcross[i] = gOffsetA[i];
|
||||
grossOffsetAcross[i] = gOffsetA[i];
|
||||
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
||||
}
|
||||
setChunkStartPixels();
|
||||
|
@ -160,7 +170,7 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int *gOffsetD,
|
|||
masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
|
||||
grossOffsetAcross[i] = gOffsetA[i];
|
||||
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
||||
}
|
||||
}
|
||||
}
|
||||
setChunkStartPixels();
|
||||
}
|
||||
|
@ -179,60 +189,60 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int gOffsetD, i
|
|||
masterStartPixelAcross[i] = mStartA + col*skipSampleAcrossRaw;
|
||||
grossOffsetAcross[i] = gOffsetA;
|
||||
slaveStartPixelAcross[i] = masterStartPixelAcross[i] + grossOffsetAcross[i] - halfSearchRangeAcrossRaw;
|
||||
}
|
||||
}
|
||||
}
|
||||
setChunkStartPixels();
|
||||
}
|
||||
|
||||
void cuAmpcorParameter::setChunkStartPixels()
|
||||
{
|
||||
|
||||
|
||||
maxMasterChunkHeight = 0;
|
||||
maxMasterChunkWidth = 0;
|
||||
maxSlaveChunkHeight = 0;
|
||||
maxSlaveChunkWidth = 0;
|
||||
|
||||
|
||||
for(int ichunk=0; ichunk <numberChunkDown; ichunk++)
|
||||
{
|
||||
for (int jchunk =0; jchunk<numberChunkAcross; jchunk++)
|
||||
{
|
||||
|
||||
|
||||
int idxChunk = ichunk*numberChunkAcross+jchunk;
|
||||
int mChunkSD = masterImageHeight;
|
||||
int mChunkSA = masterImageWidth;
|
||||
int mChunkSD = masterImageHeight;
|
||||
int mChunkSA = masterImageWidth;
|
||||
int mChunkED = 0;
|
||||
int mChunkEA = 0;
|
||||
int sChunkSD = slaveImageHeight;
|
||||
int sChunkSA = slaveImageWidth;
|
||||
int sChunkED = 0;
|
||||
int sChunkEA = 0;
|
||||
|
||||
|
||||
// modified 02/12/2018
|
||||
int numberWindowDownInChunkRun = numberWindowDownInChunk;
|
||||
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
|
||||
// modify the number of windows in last chunk
|
||||
if(ichunk == numberChunkDown -1)
|
||||
int numberWindowDownInChunkRun = numberWindowDownInChunk;
|
||||
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
|
||||
// modify the number of windows in last chunk
|
||||
if(ichunk == numberChunkDown -1)
|
||||
numberWindowDownInChunkRun = numberWindowDown - numberWindowDownInChunk*(numberChunkDown -1);
|
||||
if(jchunk == numberChunkAcross -1)
|
||||
if(jchunk == numberChunkAcross -1)
|
||||
numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1);
|
||||
|
||||
|
||||
for(int i=0; i<numberWindowDownInChunkRun; i++)
|
||||
|
||||
|
||||
for(int i=0; i<numberWindowDownInChunkRun; i++)
|
||||
{
|
||||
for(int j=0; j<numberWindowAcrossInChunkRun; j++)
|
||||
{
|
||||
{
|
||||
int idxWindow = (ichunk*numberWindowDownInChunk+i)*numberWindowAcross + (jchunk*numberWindowAcrossInChunk+j);
|
||||
int vpixel = masterStartPixelDown[idxWindow];
|
||||
if(mChunkSD > vpixel) mChunkSD = vpixel;
|
||||
if(mChunkSD > vpixel) mChunkSD = vpixel;
|
||||
if(mChunkED < vpixel) mChunkED = vpixel;
|
||||
vpixel = masterStartPixelAcross[idxWindow];
|
||||
if(mChunkSA > vpixel) mChunkSA = vpixel;
|
||||
if(mChunkSA > vpixel) mChunkSA = vpixel;
|
||||
if(mChunkEA < vpixel) mChunkEA = vpixel;
|
||||
vpixel = slaveStartPixelDown[idxWindow];
|
||||
if(sChunkSD > vpixel) sChunkSD = vpixel;
|
||||
if(sChunkSD > vpixel) sChunkSD = vpixel;
|
||||
if(sChunkED < vpixel) sChunkED = vpixel;
|
||||
vpixel = slaveStartPixelAcross[idxWindow];
|
||||
if(sChunkSA > vpixel) sChunkSA = vpixel;
|
||||
if(sChunkSA > vpixel) sChunkSA = vpixel;
|
||||
if(sChunkEA < vpixel) sChunkEA = vpixel;
|
||||
}
|
||||
}
|
||||
|
@ -261,58 +271,58 @@ void cuAmpcorParameter::checkPixelInImageRange()
|
|||
for(int col = 0; col < numberWindowAcross; col++)
|
||||
{
|
||||
int i = row*numberWindowAcross + col;
|
||||
if(masterStartPixelDown[i] <0)
|
||||
if(masterStartPixelDown[i] <0)
|
||||
{
|
||||
fprintf(stderr, "Master Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, masterStartPixelDown[i]);
|
||||
exit(EXIT_FAILURE); //or raise range error
|
||||
}
|
||||
if(masterStartPixelAcross[i] <0)
|
||||
}
|
||||
if(masterStartPixelAcross[i] <0)
|
||||
{
|
||||
fprintf(stderr, "Master Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, masterStartPixelAcross[i]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
endPixel = masterStartPixelDown[i] + windowSizeHeightRaw;
|
||||
if(endPixel >= masterImageHeight)
|
||||
}
|
||||
endPixel = masterStartPixelDown[i] + windowSizeHeightRaw;
|
||||
if(endPixel >= masterImageHeight)
|
||||
{
|
||||
fprintf(stderr, "Master Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw;
|
||||
if(endPixel >= masterImageWidth)
|
||||
}
|
||||
endPixel = masterStartPixelAcross[i] + windowSizeWidthRaw;
|
||||
if(endPixel >= masterImageWidth)
|
||||
{
|
||||
fprintf(stderr, "Master Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
//slave
|
||||
if(slaveStartPixelDown[i] <0)
|
||||
if(slaveStartPixelDown[i] <0)
|
||||
{
|
||||
fprintf(stderr, "Slave Window start pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, slaveStartPixelDown[i]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if(slaveStartPixelAcross[i] <0)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if(slaveStartPixelAcross[i] <0)
|
||||
{
|
||||
fprintf(stderr, "Slave Window start pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, slaveStartPixelAcross[i]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw;
|
||||
if(endPixel >= slaveImageHeight)
|
||||
}
|
||||
endPixel = slaveStartPixelDown[i] + searchWindowSizeHeightRaw;
|
||||
if(endPixel >= slaveImageHeight)
|
||||
{
|
||||
fprintf(stderr, "Slave Window end pixel out ot range in Down, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw;
|
||||
if(endPixel >= slaveImageWidth)
|
||||
}
|
||||
endPixel = slaveStartPixelAcross[i] + searchWindowSizeWidthRaw;
|
||||
if(endPixel >= slaveImageWidth)
|
||||
{
|
||||
fprintf(stderr, "Slave Window end pixel out ot range in Across, window (%d,%d), pixel %d\n", row, col, endPixel);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cuAmpcorParameter::~cuAmpcorParameter()
|
||||
cuAmpcorParameter::~cuAmpcorParameter()
|
||||
{
|
||||
deallocateArrays();
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* cuAmpcorParameter.h
|
||||
* Header file for Ampcor Parameter Class
|
||||
*
|
||||
*
|
||||
* Author: Lijun Zhu @ Seismo Lab, Caltech
|
||||
* March 2017
|
||||
*/
|
||||
|
@ -12,13 +12,13 @@
|
|||
#include <string>
|
||||
|
||||
/// Class container for all parameters
|
||||
///
|
||||
///
|
||||
/// @note
|
||||
/// The dimension/direction names used are:
|
||||
/// The dimension/direction names used are:
|
||||
/// The inner-most dimension: x, row, height, down, azimuth, along the track.
|
||||
/// The outer-most dimension: y, column, width, across, range, along the sight.
|
||||
/// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j]
|
||||
/// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH]
|
||||
/// C/C++/Python use row-major indexing: a[i][j] -> a[i*WIDTH+j]
|
||||
/// FORTRAN/BLAS/CUBLAS use column-major indexing: a[i][j]->a[i+j*LENGTH]
|
||||
|
||||
/// @note
|
||||
/// Common procedures to use cuAmpcorParameter
|
||||
|
@ -27,72 +27,74 @@
|
|||
/// 3. Call setupParameters() to determine related parameters and allocate starting pixels for each window: param->setupParameters()
|
||||
/// 4. Provide/set Master window starting pixel(s), and gross offset(s): param->setStartPixels(masterStartDown, masterStartAcross, grossOffsetDown, grossOffsetAcross)
|
||||
/// 4a. Optionally, check the range of windows is within the SLC image range: param->checkPixelInImageRange()
|
||||
/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used
|
||||
/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used
|
||||
|
||||
|
||||
class cuAmpcorParameter{
|
||||
public:
|
||||
int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain
|
||||
int deviceID; /// Targeted GPU device ID: use -1 to auto select
|
||||
int nStreams; /// Number of streams to asynchonize data transfers and compute kernels
|
||||
int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain
|
||||
int deviceID; /// Targeted GPU device ID: use -1 to auto select
|
||||
int nStreams; /// Number of streams to asynchonize data transfers and compute kernels
|
||||
int derampMethod; /// Method for deramping 0=None, 1=average, 2=phase gradient
|
||||
|
||||
|
||||
// chip or window size for raw data
|
||||
int windowSizeHeightRaw; /// Template window height (original size)
|
||||
int windowSizeWidthRaw; /// Template window width (original size)
|
||||
int searchWindowSizeHeightRaw; /// Search window height (original size)
|
||||
int windowSizeWidthRaw; /// Template window width (original size)
|
||||
int searchWindowSizeHeightRaw; /// Search window height (original size)
|
||||
int searchWindowSizeWidthRaw; /// Search window width (orignal size)
|
||||
|
||||
int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
||||
|
||||
int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
||||
int halfSearchRangeAcrossRaw; ///(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
||||
// search range is (-halfSearchRangeRaw, halfSearchRangeRaw)
|
||||
|
||||
|
||||
int searchWindowSizeHeightRawZoomIn;
|
||||
int searchWindowSizeWidthRawZoomIn;
|
||||
|
||||
|
||||
|
||||
int corrRawZoomInHeight; // window to estimate snr
|
||||
int corrRawZoomInWidth;
|
||||
|
||||
// chip or window size after oversampling
|
||||
int rawDataOversamplingFactor; /// Raw data overampling factor (from original size to oversampled size)
|
||||
int windowSizeHeight; /// Template window length (oversampled size)
|
||||
int windowSizeWidth; /// Template window width (original size)
|
||||
int searchWindowSizeHeight; /// Search window height (oversampled size)
|
||||
int searchWindowSizeWidth; /// Search window width (oversampled size)
|
||||
|
||||
// strides between chips/windows
|
||||
int searchWindowSizeHeight; /// Search window height (oversampled size)
|
||||
int searchWindowSizeWidth; /// Search window width (oversampled size)
|
||||
|
||||
// strides between chips/windows
|
||||
int skipSampleDownRaw; /// Skip size between neighboring windows in Down direction (original size)
|
||||
int skipSampleAcrossRaw; /// Skip size between neighboring windows in across direction (original size)
|
||||
//int skipSampleDown; /// Skip size between neighboring windows in Down direction (oversampled size)
|
||||
//int skipSampleAcross; /// Skip size between neighboring windows in Across direction (oversampled size)
|
||||
|
||||
|
||||
// Zoom in region near location of max correlation
|
||||
int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions)
|
||||
int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor
|
||||
|
||||
|
||||
|
||||
int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions)
|
||||
int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor
|
||||
|
||||
|
||||
|
||||
int oversamplingFactor; /// Oversampling factor for interpolating correlation surface
|
||||
int oversamplingMethod; /// 0 = fft (default) 1 = sinc
|
||||
|
||||
|
||||
float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data
|
||||
|
||||
int oversamplingMethod; /// 0 = fft (default) 1 = sinc
|
||||
|
||||
|
||||
float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data
|
||||
|
||||
//master image
|
||||
std::string masterImageName; /// master SLC image name
|
||||
int imageDataType1; /// master image data type, 2=cfloat=complex=float2 1=float
|
||||
int masterImageHeight; /// master image height
|
||||
int masterImageHeight; /// master image height
|
||||
int masterImageWidth; /// master image width
|
||||
|
||||
|
||||
//slave image
|
||||
std::string slaveImageName; /// slave SLC image name
|
||||
int imageDataType2; /// slave image data type, 2=cfloat=complex=float2 1=float
|
||||
int slaveImageHeight; /// slave image height
|
||||
int slaveImageHeight; /// slave image height
|
||||
int slaveImageWidth; /// slave image width
|
||||
|
||||
|
||||
// total number of chips/windows
|
||||
int numberWindowDown; /// number of total windows (down)
|
||||
int numberWindowAcross; /// number of total windows (across)
|
||||
int numberWindows; /// numberWindowDown*numberWindowAcross
|
||||
|
||||
|
||||
// number of chips/windows in a batch/chunk
|
||||
int numberWindowDownInChunk; /// number of windows processed in a chunk (down)
|
||||
int numberWindowAcrossInChunk; /// number of windows processed in a chunk (across)
|
||||
|
@ -100,20 +102,21 @@ public:
|
|||
int numberChunkDown; /// number of chunks (down)
|
||||
int numberChunkAcross; /// number of chunks (across)
|
||||
int numberChunks;
|
||||
|
||||
int mmapSizeInGB;
|
||||
|
||||
|
||||
int useMmap; /// whether to use mmap 0=not 1=yes (default = 0)
|
||||
int mmapSizeInGB; /// size for mmap buffer(useMmap=1) or a cpu memory buffer (useMmap=0)
|
||||
|
||||
int masterStartPixelDown0;
|
||||
int masterStartPixelAcross0;
|
||||
int *masterStartPixelDown; /// master starting pixels for each window (down)
|
||||
int *masterStartPixelDown; /// master starting pixels for each window (down)
|
||||
int *masterStartPixelAcross;/// master starting pixels for each window (across)
|
||||
int *slaveStartPixelDown; /// slave starting pixels for each window (down)
|
||||
int *slaveStartPixelAcross; /// slave starting pixels for each window (across)
|
||||
int *slaveStartPixelDown; /// slave starting pixels for each window (down)
|
||||
int *slaveStartPixelAcross; /// slave starting pixels for each window (across)
|
||||
int grossOffsetDown0;
|
||||
int grossOffsetAcross0;
|
||||
int *grossOffsetDown; /// Gross offsets between master and slave windows (down) : slaveStartPixel - masterStartPixel
|
||||
int *grossOffsetAcross; /// Gross offsets between master and slave windows (across)
|
||||
|
||||
int *grossOffsetAcross; /// Gross offsets between master and slave windows (across)
|
||||
|
||||
int *masterChunkStartPixelDown;
|
||||
int *masterChunkStartPixelAcross;
|
||||
int *slaveChunkStartPixelDown;
|
||||
|
@ -124,18 +127,19 @@ public:
|
|||
int *slaveChunkWidth;
|
||||
int maxMasterChunkHeight, maxMasterChunkWidth;
|
||||
int maxSlaveChunkHeight, maxSlaveChunkWidth;
|
||||
|
||||
std::string grossOffsetImageName;
|
||||
std::string offsetImageName; /// Output Offset fields filename
|
||||
|
||||
std::string grossOffsetImageName;
|
||||
std::string offsetImageName; /// Output Offset fields filename
|
||||
std::string snrImageName; /// Output SNR filename
|
||||
|
||||
std::string covImageName;
|
||||
|
||||
cuAmpcorParameter(); /// Class constructor and default parameters setter
|
||||
~cuAmpcorParameter(); /// Class descontructor
|
||||
|
||||
~cuAmpcorParameter(); /// Class descontructor
|
||||
|
||||
void allocateArrays(); /// Allocate various arrays after the number of Windows is given
|
||||
void deallocateArrays(); /// Deallocate arrays on exit
|
||||
|
||||
|
||||
|
||||
/// Three methods to set master/slave starting pixels and gross offsets from input master start pixel(s) and gross offset(s)
|
||||
/// 1 (int *, int *, int *, int *): varying master start pixels and gross offsets
|
||||
/// 2 (int, int, int *, int *): fixed master start pixel (first window) and varying gross offsets
|
||||
|
@ -144,7 +148,7 @@ public:
|
|||
void setStartPixels(int, int, int*, int*);
|
||||
void setStartPixels(int, int, int, int);
|
||||
void setChunkStartPixels();
|
||||
void checkPixelInImageRange(); /// check whether
|
||||
void checkPixelInImageRange(); /// check whether
|
||||
void setupParameters(); /// Process other parameters after Python Input
|
||||
|
||||
};
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
/*
|
||||
* cuAmpcorUtil.h
|
||||
* header file to include the various routines for ampcor
|
||||
* serves as an index
|
||||
* serves as an index
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef __CUAMPCORUTIL_H
|
||||
#define __CUMAPCORUTIL_H
|
||||
|
||||
|
@ -18,20 +18,27 @@
|
|||
|
||||
//in cuArraysCopy.cu: various utitlies for copy images file in gpu memory
|
||||
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream);
|
||||
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
||||
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
||||
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
||||
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream);
|
||||
|
||||
// same routine name overloaded for different data type
|
||||
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offset, cudaStream_t stream);
|
||||
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
|
||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
|
||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int2 offset, cudaStream_t stream);
|
||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream);
|
||||
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream);
|
||||
|
||||
void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
|
||||
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
|
||||
void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream);
|
||||
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
|
||||
|
||||
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
|
||||
|
||||
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
|
||||
|
@ -46,8 +53,8 @@ void cuDerampMethod2(cuArrays<float2> *images, cudaStream_t stream);
|
|||
void cpuDerampMethod3(cuArrays<float2> *images, cudaStream_t stream);
|
||||
|
||||
//in cuArraysPadding.cu: various utilities for oversampling padding
|
||||
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
|
||||
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
|
||||
|
@ -57,21 +64,21 @@ void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream);
|
|||
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
|
||||
|
||||
//in cuOffset.cu: utitilies for determining the max locaiton of cross correlations or the offset
|
||||
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cudaStream_t stream);
|
||||
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream);
|
||||
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cudaStream_t stream);
|
||||
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream);
|
||||
void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn, cuArrays<float2> *offsetFinal,
|
||||
int OverSampleRatioZoomin, int OverSampleRatioRaw,
|
||||
int xHalfRangeInit, int yHalfRangeInit, int xHalfRangeZoomIn, int yHalfRangeZoomIn,
|
||||
cudaStream_t stream);
|
||||
|
||||
|
||||
void cuDetermineInterpZone(cuArrays<int2> *maxloc, cuArrays<int2> *zoomInOffset, cuArrays<float> *corrOrig, cuArrays<float> *corrZoomIn, cudaStream_t stream);
|
||||
void cuDetermineSlaveExtractOffset(cuArrays<int2> *maxLoc, int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream);
|
||||
|
||||
//in cuCorrTimeDomain.cu: cross correlation in time domain
|
||||
void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
|
||||
void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
|
||||
|
||||
//in cuCorrFrequency.cu: cross correlation in freq domain, also include fft correlatior class
|
||||
void cuArraysElementMultiply(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||
void cuArraysElementMultiply(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||
void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2> *image2, float coef, cudaStream_t stream);
|
||||
|
||||
|
||||
|
@ -80,7 +87,11 @@ void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2>
|
|||
void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int> *imagesValid, cuArrays<int2> *maxloc, cudaStream_t stream);
|
||||
// implemented in cuCorrNormalization.cu
|
||||
void cuArraysSumCorr(cuArrays<float> *images, cuArrays<int> *imagesValid, cuArrays<float> *imagesSum, cuArrays<int> *imagesValidCount, cudaStream_t stream);
|
||||
|
||||
// implemented in cuEstimateStats.cu
|
||||
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream);
|
||||
|
||||
#endif
|
||||
// implemented in cuEstimateStats.cu
|
||||
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -154,8 +154,21 @@
|
|||
file.write((char *)data, size*count*sizeof(float2));
|
||||
file.close();
|
||||
}
|
||||
|
||||
template<>
|
||||
void cuArrays<float3>::outputToFile(std::string fn, cudaStream_t stream)
|
||||
{
|
||||
float *data;
|
||||
data = (float *)malloc(size*count*sizeof(float3));
|
||||
checkCudaErrors(cudaMemcpyAsync(data, devData, size*count*sizeof(float3), cudaMemcpyDeviceToHost, stream));
|
||||
std::ofstream file;
|
||||
file.open(fn.c_str(), std::ios_base::binary);
|
||||
file.write((char *)data, size*count*sizeof(float3));
|
||||
file.close();
|
||||
}
|
||||
|
||||
template class cuArrays<float>;
|
||||
template class cuArrays<float2>;
|
||||
template class cuArrays<float3>;
|
||||
template class cuArrays<int2>;
|
||||
template class cuArrays<int>;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* Lijun Zhu @ Seismo Lab, Caltech
|
||||
* v1.0 Jan 2017
|
||||
*/
|
||||
|
||||
|
||||
#include "cuArrays.h"
|
||||
#include "cudaUtil.h"
|
||||
#include "cudaError.h"
|
||||
|
@ -16,14 +16,14 @@ inline __device__ float cuAbs(float2 a)
|
|||
return sqrtf(a.x*a.x+a.y*a.y);
|
||||
}*/
|
||||
|
||||
//copy a chunk into a series of chips
|
||||
__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY,
|
||||
const int nImagesX, const int nImagesY,
|
||||
// copy a chunk into a batch of chips for a given stride
|
||||
__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY,
|
||||
const int nImagesX, const int nImagesY,
|
||||
const int strideX, const int strideY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return;
|
||||
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
||||
|
@ -33,8 +33,7 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX
|
|||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
|
||||
//tested
|
||||
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
||||
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
||||
int strideH, int strideW, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = NTHREADS2D;
|
||||
|
@ -48,12 +47,14 @@ void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
|||
getLastCudaError("cuArraysCopyToBatch_kernel");
|
||||
}
|
||||
|
||||
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
|
||||
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex
|
||||
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int *offsetX, const int *offsetY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
|
||||
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
||||
|
@ -61,11 +62,8 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons
|
|||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
|
||||
/// @param[in] image1 input image in a large chunk
|
||||
/// @param[in] lda1 width of image 1
|
||||
/// @param[out] image2 output image with a batch of small windows
|
||||
|
||||
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
// lda1 (inNY) is the leading dimension of image1, usually, its width
|
||||
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
|
@ -79,12 +77,13 @@ void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuA
|
|||
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
||||
}
|
||||
|
||||
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes)
|
||||
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int *offsetX, const int *offsetY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
|
||||
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
||||
|
@ -92,7 +91,7 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c
|
|||
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
|
||||
}
|
||||
|
||||
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
|
@ -106,14 +105,42 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1,
|
|||
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
||||
}
|
||||
|
||||
// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part)
|
||||
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int *offsetX, const int *offsetY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(idxImage>=nImages || outx >= outNX || outy >= outNY) return;
|
||||
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
||||
int idxIn = (offsetX[idxImage]+outx)*inNY + offsetY[idxImage] + outy;
|
||||
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
|
||||
}
|
||||
|
||||
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
||||
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
dim3 blockSize(nthreads, nthreads, 1);
|
||||
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
||||
//fprintf(stderr, "copy tile to batch, %d %d\n", lda1, image2->count);
|
||||
cuArraysCopyToBatchWithOffsetR2C_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
||||
image1->devData, lda1,
|
||||
image2->devData, image2->height, image2->width, image2->count,
|
||||
offsetH, offsetW);
|
||||
getLastCudaError("cuArraysCopyToBatchWithOffsetR2C_kernel");
|
||||
}
|
||||
|
||||
//copy a chunk into a series of chips
|
||||
__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float *imageOut, const int outNX, const int outNY,
|
||||
const int nImagesX, const int nImagesY,
|
||||
__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float *imageOut, const int outNX, const int outNY,
|
||||
const int nImagesX, const int nImagesY,
|
||||
const int strideX, const int strideY, const float factor)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(idxImage >=nImagesX*nImagesY|| outx >= outNX || outy >= outNY) return;
|
||||
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
||||
|
@ -121,17 +148,17 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co
|
|||
int idxImageY = idxImage%nImagesY;
|
||||
int idxIn = (idxImageX*strideX+outx)*inNY + idxImageY*strideY+outy;
|
||||
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
|
||||
//printf( "%d\n", idxOut);
|
||||
//printf( "%d\n", idxOut);
|
||||
}
|
||||
|
||||
//tested
|
||||
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
||||
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
||||
int strideH, int strideW, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
dim3 blockSize(nthreads, nthreads, 1);
|
||||
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
||||
float factor = 1.0f/image1->size; //the FFT factor
|
||||
float factor = 1.0f/image1->size; //the FFT factor
|
||||
cuArraysCopyC2R_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
||||
image1->devData, image1->height, image1->width,
|
||||
image2->devData, image2->height, image2->width,
|
||||
|
@ -141,22 +168,22 @@ void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
|||
}
|
||||
|
||||
__global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int inNX, const int inNY,
|
||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int2 *offsets)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
int idxImage = blockIdx.z;
|
||||
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
}
|
||||
|
||||
/* copy a tile of images to another image, with starting pixels offsets
|
||||
/* copy a tile of images to another image, with starting pixels offsets
|
||||
* param[in] imageIn inut images
|
||||
* param[out] imageOut output images of dimension nImages*outNX*outNY
|
||||
*/
|
||||
|
@ -166,24 +193,24 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
|
|||
const int nthreads = 16;
|
||||
dim3 threadsperblock(nthreads, nthreads,1);
|
||||
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
||||
cuArraysCopyExtractVaryingOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
cuArraysCopyExtractVaryingOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData);
|
||||
getLastCudaError("cuArraysCopyExtract error");
|
||||
}
|
||||
|
||||
|
||||
__global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int2 *offsets)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
int idxImage = blockIdx.z;
|
||||
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsets[idxImage].x)*inNY + outy + offsets[idxImage].y;
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
}
|
||||
|
@ -194,7 +221,7 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
|
|||
const int nthreads = 16;
|
||||
dim3 threadsperblock(nthreads, nthreads,1);
|
||||
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
||||
cuArraysCopyExtractVaryingOffset_C2C<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
cuArraysCopyExtractVaryingOffset_C2C<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offsets->devData);
|
||||
getLastCudaError("cuArraysCopyExtractC2C error");
|
||||
|
||||
|
@ -202,26 +229,29 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
|
|||
|
||||
// correlation surface extraction (Minyan Zhong)
|
||||
__global__ void cuArraysCopyExtractVaryingOffsetCorr(const float *imageIn, const int inNX, const int inNY,
|
||||
float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages,
|
||||
float *imageOut, const int outNX, const int outNY, int *imageValid, const int nImages,
|
||||
const int2 *maxloc)
|
||||
{
|
||||
|
||||
int idxImage = blockIdx.z;
|
||||
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
// One thread per out point. Find the coordinates within the current image.
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
// Find the correponding input.
|
||||
int inx = outx + maxloc[idxImage].x - outNX/2;
|
||||
int iny = outy + maxloc[idxImage].y - outNY/2;
|
||||
|
||||
if (outx < outNX && outy < outNY)
|
||||
|
||||
if (outx < outNX && outy < outNY)
|
||||
{
|
||||
// Find the location in full array.
|
||||
int idxOut = ( blockIdx.z * outNX + outx ) * outNY + outy;
|
||||
|
||||
int idxIn = ( blockIdx.z * inNX + inx ) * inNY + iny;
|
||||
|
||||
if (inx>=0 && iny>=0 && inx<inNX && iny<inNY) {
|
||||
|
||||
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
imageValid[idxOut] = 1;
|
||||
}
|
||||
|
@ -255,21 +285,21 @@ void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesO
|
|||
|
||||
|
||||
__global__ void cuArraysCopyExtractFixedOffset(const float *imageIn, const int inNX, const int inNY,
|
||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int offsetX, const int offsetY)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
{
|
||||
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
}
|
||||
|
||||
/* copy a tile of images to another image, with starting pixels offsets
|
||||
/* copy a tile of images to another image, with starting pixels offsets
|
||||
* param[in] imageIn inut images
|
||||
* param[out] imageOut output images of dimension nImages*outNX*outNY
|
||||
*/
|
||||
|
@ -279,23 +309,24 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
|
|||
const int nthreads = 16;
|
||||
dim3 threadsperblock(nthreads, nthreads,1);
|
||||
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
||||
cuArraysCopyExtractFixedOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
cuArraysCopyExtractFixedOffset<<<blockspergrid, threadsperblock,0, stream>>>(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
||||
getLastCudaError("cuArraysCopyExtract error");
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int offsetX, const int offsetY)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
{
|
||||
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
}
|
||||
|
@ -311,27 +342,64 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut
|
|||
//imagesIn->debuginfo(stream);
|
||||
//imagesOut->debuginfo(stream);
|
||||
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
||||
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
||||
getLastCudaError("cuArraysCopyExtractC2C error");
|
||||
}
|
||||
//
|
||||
|
||||
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
// float3
|
||||
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
|
||||
float3 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int offsetX, const int offsetY)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
{
|
||||
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream)
|
||||
{
|
||||
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
||||
const int nthreads = NTHREADS2D;
|
||||
dim3 threadsperblock(nthreads, nthreads,1);
|
||||
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
||||
//std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
|
||||
//imagesIn->debuginfo(stream);
|
||||
//imagesOut->debuginfo(stream);
|
||||
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
||||
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
||||
getLastCudaError("cuArraysCopyExtractFloat3 error");
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
|
||||
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||
const int offsetX, const int offsetY)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
int idxOut = (blockIdx.z * outNX + outx)*outNY+outy;
|
||||
int idxIn = (blockIdx.z*inNX + outx + offsetX)*inNY + outy + offsetY;
|
||||
imageOut[idxOut] = imageIn[idxIn].x;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream)
|
||||
{
|
||||
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
||||
|
@ -339,16 +407,16 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut,
|
|||
dim3 threadsperblock(nthreads, nthreads,1);
|
||||
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
||||
cuArraysCopyExtract_C2R_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
||||
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
||||
getLastCudaError("cuArraysCopyExtractC2C error");
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
|
||||
float2* imageOut, const int outNY, const int offsetX, const int offsetY)
|
||||
{
|
||||
int inx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int inx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int iny = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(inx < inNX && iny < inNY) {
|
||||
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
|
||||
|
@ -363,16 +431,40 @@ void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, i
|
|||
const int nthreads = 16;
|
||||
dim3 threadsperblock(nthreads, nthreads);
|
||||
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||
imageOut->devData, imageOut->width, offsetX, offsetY);
|
||||
getLastCudaError("cuArraysCopyInsert error");
|
||||
}
|
||||
//
|
||||
// float3
|
||||
__global__ void cuArraysCopyInsert_kernel(const float3* imageIn, const int inNX, const int inNY,
|
||||
float3* imageOut, const int outNY, const int offsetX, const int offsetY)
|
||||
{
|
||||
int inx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int iny = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(inx < inNX && iny < inNY) {
|
||||
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
|
||||
int idxIn = IDX2R(inx, iny, inNY);
|
||||
imageOut[idxOut] = make_float3(imageIn[idxIn].x, imageIn[idxIn].y, imageIn[idxIn].z);
|
||||
}
|
||||
}
|
||||
|
||||
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
dim3 threadsperblock(nthreads, nthreads);
|
||||
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||
imageOut->devData, imageOut->width, offsetX, offsetY);
|
||||
getLastCudaError("cuArraysCopyInsert error");
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
__global__ void cuArraysCopyInsert_kernel(const float* imageIn, const int inNX, const int inNY,
|
||||
float* imageOut, const int outNY, const int offsetX, const int offsetY)
|
||||
{
|
||||
int inx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int inx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int iny = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(inx < inNX && iny < inNY) {
|
||||
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
|
||||
|
@ -387,18 +479,44 @@ void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int
|
|||
const int nthreads = 16;
|
||||
dim3 threadsperblock(nthreads, nthreads);
|
||||
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||
imageOut->devData, imageOut->width, offsetX, offsetY);
|
||||
getLastCudaError("cuArraysCopyInsert Float error");
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
__global__ void cuArraysCopyInsert_kernel(const int* imageIn, const int inNX, const int inNY,
|
||||
int* imageOut, const int outNY, const int offsetX, const int offsetY)
|
||||
{
|
||||
int inx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int iny = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
if(inx < inNX && iny < inNY) {
|
||||
int idxOut = IDX2R(inx+offsetX, iny+offsetY, outNY);
|
||||
int idxIn = IDX2R(inx, iny, inNY);
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
dim3 threadsperblock(nthreads, nthreads);
|
||||
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||
imageOut->devData, imageOut->width, offsetX, offsetY);
|
||||
getLastCudaError("cuArraysCopyInsert Integer error");
|
||||
}
|
||||
//
|
||||
|
||||
|
||||
__global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
||||
float *imageOut, int outNX, int outNY, int sizeOut, int nImages)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
|
@ -409,27 +527,27 @@ __global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int i
|
|||
}
|
||||
else
|
||||
{ imageOut[idxOut] = 0.0f; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
int nImages = imageIn->count;
|
||||
int nImages = imageIn->count;
|
||||
dim3 blockSize(nthreads, nthreads,1);
|
||||
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
|
||||
cuArraysCopyInversePadded_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
||||
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
||||
getLastCudaError("cuArraysCopyInversePadded error");
|
||||
getLastCudaError("cuArraysCopyInversePadded error");
|
||||
}
|
||||
|
||||
|
||||
__global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
||||
float *imageOut, int outNX, int outNY, int sizeOut, int nImages)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
|
@ -440,26 +558,26 @@ __global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY
|
|||
}
|
||||
else
|
||||
{ imageOut[idxOut] = 0.0f; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 16;
|
||||
int nImages = imageIn->count;
|
||||
int nImages = imageIn->count;
|
||||
dim3 blockSize(nthreads, nthreads,1);
|
||||
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
|
||||
cuArraysCopyPadded_R2R_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
||||
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
||||
getLastCudaError("cuArraysCopyPaddedR2R error");
|
||||
getLastCudaError("cuArraysCopyPaddedR2R error");
|
||||
}
|
||||
|
||||
__global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inNY, int sizeIn,
|
||||
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
|
@ -468,31 +586,31 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN
|
|||
int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn;
|
||||
imageOut[idxOut] = imageIn[idxIn];
|
||||
}
|
||||
else{
|
||||
imageOut[idxOut] = make_float2(0.0f, 0.0f);
|
||||
else{
|
||||
imageOut[idxOut] = make_float2(0.0f, 0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = NTHREADS2D;
|
||||
int nImages = imageIn->count;
|
||||
int nImages = imageIn->count;
|
||||
dim3 blockSize(nthreads, nthreads,1);
|
||||
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
|
||||
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
||||
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
||||
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
||||
getLastCudaError("cuArraysCopyInversePadded error");
|
||||
getLastCudaError("cuArraysCopyInversePadded error");
|
||||
}
|
||||
|
||||
__global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
||||
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
|
||||
{
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||
|
||||
|
||||
if(outx < outNX && outy < outNY)
|
||||
{
|
||||
int idxImage = blockIdx.z;
|
||||
|
@ -501,42 +619,42 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY
|
|||
int idxIn = IDX2R(outx, outy, inNY)+idxImage*sizeIn;
|
||||
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
|
||||
}
|
||||
else{
|
||||
imageOut[idxOut] = make_float2(0.0f, 0.0f);
|
||||
else{
|
||||
imageOut[idxOut] = make_float2(0.0f, 0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = NTHREADS2D;
|
||||
int nImages = imageIn->count;
|
||||
int nImages = imageIn->count;
|
||||
dim3 blockSize(nthreads, nthreads,1);
|
||||
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
|
||||
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
||||
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
||||
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
||||
getLastCudaError("cuArraysCopyPadded error");
|
||||
getLastCudaError("cuArraysCopyPadded error");
|
||||
}
|
||||
|
||||
|
||||
__global__ void cuArraysSetConstant_kernel(float *image, int size, float value)
|
||||
{
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
|
||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
|
||||
if(idx < size)
|
||||
{
|
||||
image[idx] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
int size = imageIn->getSize();
|
||||
int size = imageIn->getSize();
|
||||
|
||||
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
|
||||
(imageIn->devData, imageIn->size, value);
|
||||
getLastCudaError("cuArraysCopyPadded error");
|
||||
getLastCudaError("cuArraysCopyPadded error");
|
||||
}
|
||||
|
|
|
@ -195,7 +195,6 @@ __device__ float2 partialSums(const float v, volatile float* shmem, const int st
|
|||
return make_float2(Sum, Sum2);
|
||||
}
|
||||
|
||||
__forceinline__ __device__ int __mul(const int a, const int b) { return a*b; }
|
||||
|
||||
template<const int Nthreads2>
|
||||
__global__ void cuCorrNormalize_kernel(
|
||||
|
@ -232,7 +231,7 @@ __global__ void cuCorrNormalize_kernel(
|
|||
templateSum += templateD[i];
|
||||
}
|
||||
templateSum = sumReduceBlock<Nthreads>(templateSum, shmem);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
float templateSum2 = 0.0f;
|
||||
for (int i = tid; i < templateSize; i += Nthreads)
|
||||
|
@ -241,11 +240,12 @@ __global__ void cuCorrNormalize_kernel(
|
|||
templateSum2 += t*t;
|
||||
}
|
||||
templateSum2 = sumReduceBlock<Nthreads>(templateSum2, shmem);
|
||||
__syncthreads();
|
||||
|
||||
//if(tid ==0) printf("template sum %d %g %g \n", imageIdx, templateSum, templateSum2);
|
||||
/*********/
|
||||
|
||||
shmem[tid] = shmem[tid + Nthreads] = 0.0f;
|
||||
shmem[tid] = shmem[tid + Nthreads] = shmem[tid + 2*Nthreads] = 0.0f;
|
||||
__syncthreads();
|
||||
|
||||
float imageSum = 0.0f;
|
||||
|
@ -281,7 +281,7 @@ __global__ void cuCorrNormalize_kernel(
|
|||
if (tid < resultNY)
|
||||
{
|
||||
const int ix = iaddr/imageNY;
|
||||
const int addr = __mul(ix-templateNX, resultNY);
|
||||
const int addr = (ix-templateNX)*resultNY;
|
||||
|
||||
//printf("test norm %d %d %d %d %f\n", tid, ix, addr, addr+tid, resultD[addr + tid]);
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ __global__ void cudaKernel_estimateSnr(const float* corrSum, const int* corrVali
|
|||
|
||||
float mean = (corrSum[idx] - maxval[idx] * maxval[idx]) / (corrValidCount[idx] - 1);
|
||||
|
||||
snrValue[idx] = maxval[idx] / mean;
|
||||
snrValue[idx] = maxval[idx] * maxval[idx] / mean;
|
||||
}
|
||||
|
||||
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream)
|
||||
|
@ -55,7 +55,7 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
|
|||
|
||||
|
||||
//for (int i=0; i<size; i++){
|
||||
|
||||
|
||||
// std::cout<<corrSum->hostData[i]<<std::endl;
|
||||
// std::cout<<corrValidCount->hostData[i]<<std::endl;
|
||||
|
||||
|
@ -68,3 +68,80 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
|
|||
|
||||
getLastCudaError("cuda kernel estimate stats error\n");
|
||||
}
|
||||
|
||||
|
||||
template <const int BLOCKSIZE> // number of threads per block.
|
||||
__global__ void cudaKernel_estimateVar(const float* corrBatchRaw, const int NX, const int NY, const int2* maxloc, const float* maxval, float3* covValue, const int size)
|
||||
{
|
||||
|
||||
// Find image id.
|
||||
int idxImage = threadIdx.x + blockDim.x*blockIdx.x;
|
||||
|
||||
if (idxImage >= size) return;
|
||||
|
||||
// Preparation.
|
||||
int px = maxloc[idxImage].x;
|
||||
int py = maxloc[idxImage].y;
|
||||
float peak = maxval[idxImage];
|
||||
|
||||
// Check if maxval is on the margin.
|
||||
if (px-1 < 0 || py-1 <0 || px + 1 >=NX || py+1 >=NY) {
|
||||
|
||||
covValue[idxImage] = make_float3(99.0, 99.0, 99.0);
|
||||
|
||||
}
|
||||
else {
|
||||
int offset = NX * NY * idxImage;
|
||||
int idx00 = offset + (px - 1) * NY + py - 1;
|
||||
int idx01 = offset + (px - 1) * NY + py ;
|
||||
int idx02 = offset + (px - 1) * NY + py + 1;
|
||||
int idx10 = offset + (px ) * NY + py - 1;
|
||||
int idx11 = offset + (px ) * NY + py ;
|
||||
int idx12 = offset + (px ) * NY + py + 1;
|
||||
int idx20 = offset + (px + 1) * NY + py - 1;
|
||||
int idx21 = offset + (px + 1) * NY + py ;
|
||||
int idx22 = offset + (px + 1) * NY + py + 1;
|
||||
|
||||
float dxx = - ( corrBatchRaw[idx21] + corrBatchRaw[idx01] - 2*corrBatchRaw[idx11] ) * 0.5;
|
||||
float dyy = - ( corrBatchRaw[idx12] + corrBatchRaw[idx10] - 2*corrBatchRaw[idx11] ) * 0.5;
|
||||
float dxy = - ( corrBatchRaw[idx22] + corrBatchRaw[idx00] - corrBatchRaw[idx20] - corrBatchRaw[idx02] ) *0.25;
|
||||
|
||||
float n2 = fmaxf(1 - peak, 0.0);
|
||||
|
||||
int winSize = NX*NY;
|
||||
|
||||
dxx = dxx * winSize;
|
||||
dyy = dyy * winSize;
|
||||
dxy = dxy * winSize;
|
||||
|
||||
float n4 = n2*n2;
|
||||
n2 = n2 * 2;
|
||||
n4 = n4 * 0.5 * winSize;
|
||||
|
||||
float u = dxy * dxy - dxx * dyy;
|
||||
float u2 = u*u;
|
||||
|
||||
if (fabsf(u) < 1e-2) {
|
||||
|
||||
covValue[idxImage] = make_float3(99.0, 99.0, 99.0);
|
||||
|
||||
}
|
||||
else {
|
||||
float cov_xx = (- n2 * u * dyy + n4 * ( dyy*dyy + dxy*dxy) ) / u2;
|
||||
float cov_yy = (- n2 * u * dxx + n4 * ( dxx*dxx + dxy*dxy) ) / u2;
|
||||
float cov_xy = ( n2 * u * dxy - n4 * ( dxx + dyy ) * dxy ) / u2;
|
||||
covValue[idxImage] = make_float3(cov_xx, cov_yy, cov_xy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream)
|
||||
{
|
||||
|
||||
int size = corrBatchRaw->count;
|
||||
|
||||
// One dimensional launching parameters to loop over every correlation surface.
|
||||
cudaKernel_estimateVar<NTHREADS><<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
|
||||
(corrBatchRaw->devData, corrBatchRaw->height, corrBatchRaw->width, maxloc->devData, maxval->devData, covValue->devData, size);
|
||||
getLastCudaError("cudaKernel_estimateVar error\n");
|
||||
}
|
||||
|
|
|
@ -7,20 +7,21 @@
|
|||
from distutils.core import setup
|
||||
from distutils.extension import Extension
|
||||
from Cython.Build import cythonize
|
||||
import os
|
||||
|
||||
os.environ["CC"] = "g++"
|
||||
import numpy
|
||||
|
||||
setup( name = 'PyCuAmpcor',
|
||||
ext_modules = cythonize(Extension(
|
||||
"PyCuAmpcor",
|
||||
sources=['PyCuAmpcor.pyx'],
|
||||
include_dirs=['/usr/local/cuda/include'], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS
|
||||
include_dirs=['/usr/local/cuda/include', numpy.get_include()], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS
|
||||
extra_compile_args=['-fPIC','-fpermissive'],
|
||||
extra_objects=['SlcImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o',
|
||||
extra_objects=['GDALImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o',
|
||||
'cuCorrNormalization.o','cuCorrTimeDomain.o','cuArraysCopy.o',
|
||||
'cuArrays.o','cuArraysPadding.o','cuOffset.o','cuOverSampler.o',
|
||||
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o'],
|
||||
extra_link_args=['-L/usr/local/cuda/lib64','-lcuda','-lcudart','-lcufft','-lcublas'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
|
||||
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o','cuEstimateStats.o'],
|
||||
extra_link_args=['-L/usr/local/cuda/lib64',
|
||||
'-L/usr/lib64/nvidia',
|
||||
'-lcuda','-lcudart','-lcufft','-lcublas','-lgdal'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
|
||||
language='c++'
|
||||
)))
|
||||
|
|
|
@ -197,6 +197,8 @@ class config(object):
|
|||
self.f.write('nomcf : ' + self.noMCF + '\n')
|
||||
self.f.write('master : ' + self.master + '\n')
|
||||
self.f.write('defomax : ' + self.defoMax + '\n')
|
||||
self.f.write('alks : ' + self.alks + '\n')
|
||||
self.f.write('rlks : ' + self.rlks + '\n')
|
||||
self.f.write('method : ' + self.unwMethod + '\n')
|
||||
self.f.write('##########################'+'\n')
|
||||
|
||||
|
|
|
@ -113,9 +113,19 @@ def extractInfoFromPickle(pckfile, inps):
|
|||
data['earthRadius'] = elp.local_radius_of_curvature(llh.lat, hdg)
|
||||
|
||||
#azspacing = burst.azimuthTimeInterval * sv.getScalarVelocity()
|
||||
azres = 20.0
|
||||
#azres = 20.0
|
||||
azspacing = sv.getScalarVelocity() / burst.PRF
|
||||
azres = burst.platform.antennaLength / 2.0
|
||||
azfact = azres / azspacing
|
||||
|
||||
burst.getInstrument()
|
||||
rgBandwidth = burst.instrument.pulseLength * burst.instrument.chirpSlope
|
||||
rgres = abs(SPEED_OF_LIGHT / (2.0 * rgBandwidth))
|
||||
rgspacing = burst.instrument.rangePixelSize
|
||||
rgfact = rgres / rgspacing
|
||||
|
||||
#data['corrlooks'] = inps.rglooks * inps.azlooks * azspacing / azres
|
||||
data['corrlooks'] = inps.rglooks * inps.azlooks / (azfact * rgfact)
|
||||
data['rglooks'] = inps.rglooks
|
||||
data['azlooks'] = inps.azlooks
|
||||
|
||||
|
@ -149,7 +159,7 @@ def runUnwrap(infile, outfile, corfile, config, costMode = None,initMethod = Non
|
|||
altitude = config['altitude']
|
||||
rangeLooks = config['rglooks']
|
||||
azimuthLooks = config['azlooks']
|
||||
#corrLooks = config['corrlooks']
|
||||
corrLooks = config['corrlooks']
|
||||
maxComponents = 20
|
||||
|
||||
snp = Snaphu()
|
||||
|
@ -163,7 +173,7 @@ def runUnwrap(infile, outfile, corfile, config, costMode = None,initMethod = Non
|
|||
snp.setAltitude(altitude)
|
||||
snp.setCorrfile(corfile)
|
||||
snp.setInitMethod(initMethod)
|
||||
# snp.setCorrLooks(corrLooks)
|
||||
snp.setCorrLooks(corrLooks)
|
||||
snp.setMaxComponents(maxComponents)
|
||||
snp.setDefoMaxCycles(defomax)
|
||||
snp.setRangeLooks(rangeLooks)
|
||||
|
@ -248,33 +258,34 @@ def runUnwrapIcu(infile, outfile):
|
|||
unwImage.finalizeImage()
|
||||
unwImage.renderHdr()
|
||||
|
||||
def runUnwrap2Stage(unwrappedIntFilename,connectedComponentsFilename,unwrapped2StageFilename, unwrapper_2stage_name=None, solver_2stage=None):
|
||||
def runUnwrap2Stage(unwrappedIntFilename,connectedComponentsFilename,unwrapped2StageFilename,
|
||||
unwrapper_2stage_name=None, solver_2stage=None):
|
||||
|
||||
if unwrapper_2stage_name is None:
|
||||
unwrapper_2stage_name = 'REDARC0'
|
||||
|
||||
if solver_2stage is None:
|
||||
# If unwrapper_2state_name is MCF then solver is ignored
|
||||
# and relaxIV MCF solver is used by default
|
||||
solver_2stage = 'pulp'
|
||||
|
||||
print('Unwrap 2 Stage Settings:')
|
||||
print('Name: %s'%unwrapper_2stage_name)
|
||||
print('Solver: %s'%solver_2stage)
|
||||
|
||||
inpFile = unwrappedIntFilename
|
||||
ccFile = connectedComponentsFilename
|
||||
outFile = unwrapped2StageFilename
|
||||
|
||||
# Hand over to 2Stage unwrap
|
||||
unw = UnwrapComponents()
|
||||
unw.setInpFile(inpFile)
|
||||
unw.setConnCompFile(ccFile)
|
||||
unw.setOutFile(outFile)
|
||||
unw.setSolver(solver_2stage)
|
||||
unw.setRedArcs(unwrapper_2stage_name)
|
||||
unw.unwrapComponents()
|
||||
return
|
||||
if unwrapper_2stage_name is None:
|
||||
unwrapper_2stage_name = 'REDARC0'
|
||||
|
||||
if solver_2stage is None:
|
||||
# If unwrapper_2state_name is MCF then solver is ignored
|
||||
# and relaxIV MCF solver is used by default
|
||||
solver_2stage = 'pulp'
|
||||
|
||||
print('Unwrap 2 Stage Settings:')
|
||||
print('Name: %s'%unwrapper_2stage_name)
|
||||
print('Solver: %s'%solver_2stage)
|
||||
|
||||
inpFile = unwrappedIntFilename
|
||||
ccFile = connectedComponentsFilename
|
||||
outFile = unwrapped2StageFilename
|
||||
|
||||
# Hand over to 2Stage unwrap
|
||||
unw = UnwrapComponents()
|
||||
unw.setInpFile(inpFile)
|
||||
unw.setConnCompFile(ccFile)
|
||||
unw.setOutFile(outFile)
|
||||
unw.setSolver(solver_2stage)
|
||||
unw.setRedArcs(unwrapper_2stage_name)
|
||||
unw.unwrapComponents()
|
||||
return
|
||||
|
||||
|
||||
def main(iargs=None):
|
||||
|
@ -293,24 +304,26 @@ def main(iargs=None):
|
|||
|
||||
if inps.method != 'icu':
|
||||
|
||||
masterShelveDir = os.path.join(interferogramDir , 'masterShelve')
|
||||
if not os.path.exists(masterShelveDir):
|
||||
os.makedirs(masterShelveDir)
|
||||
masterShelveDir = os.path.join(interferogramDir , 'masterShelve')
|
||||
if not os.path.exists(masterShelveDir):
|
||||
os.makedirs(masterShelveDir)
|
||||
|
||||
inps.master = os.path.dirname(inps.master)
|
||||
cpCmd='cp ' + os.path.join(inps.master, 'data*') +' '+masterShelveDir
|
||||
os.system(cpCmd)
|
||||
pckfile = os.path.join(masterShelveDir,'data')
|
||||
print(pckfile)
|
||||
metadata = extractInfoFromPickle(pckfile, inps)
|
||||
|
||||
inps.master = os.path.dirname(inps.master)
|
||||
cpCmd='cp ' + os.path.join(inps.master, 'data*') +' '+masterShelveDir
|
||||
os.system(cpCmd)
|
||||
pckfile = os.path.join(masterShelveDir,'data')
|
||||
print(pckfile)
|
||||
metadata = extractInfoFromPickle(pckfile, inps)
|
||||
########
|
||||
print ('unwrapping method : ' , inps.method)
|
||||
if inps.method == 'snaphu':
|
||||
if inps.nomcf:
|
||||
fncall = runUnwrap
|
||||
else:
|
||||
fncall = runUnwrapMcf
|
||||
fncall(inps.intfile, inps.unwprefix + '_snaphu.unw', inps.cohfile, metadata, defomax=inps.defomax)
|
||||
if inps.nomcf:
|
||||
fncall = runUnwrap
|
||||
else:
|
||||
fncall = runUnwrapMcf
|
||||
fncall(inps.intfile, inps.unwprefix + '_snaphu.unw', inps.cohfile, metadata, defomax=inps.defomax)
|
||||
|
||||
elif inps.method == 'snaphu2stage':
|
||||
if inps.nomcf:
|
||||
fncall = runUnwrap
|
||||
|
@ -319,11 +332,12 @@ def main(iargs=None):
|
|||
fncall(inps.intfile, inps.unwprefix + '_snaphu.unw', inps.cohfile, metadata, defomax=inps.defomax)
|
||||
|
||||
# adding in the two-stage
|
||||
runUnwrap2Stage(inps.unwprefix + '_snaphu.unw', inps.unwprefix + '_snaphu.unw.conncomp',inps.unwprefix + '_snaphu2stage.unw')
|
||||
|
||||
runUnwrap2Stage(inps.unwprefix + '_snaphu.unw',
|
||||
inps.unwprefix + '_snaphu.unw.conncomp',
|
||||
inps.unwprefix + '_snaphu2stage.unw')
|
||||
|
||||
elif inps.method == 'icu':
|
||||
runUnwrapIcu(inps.intfile, inps.unwprefix + '_icu.unw')
|
||||
runUnwrapIcu(inps.intfile, inps.unwprefix + '_icu.unw')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -52,7 +52,7 @@ def generate(env):
|
|||
# default flags for the NVCC compiler
|
||||
env['STATICNVCCFLAGS'] = ''
|
||||
env['SHAREDNVCCFLAGS'] = ''
|
||||
env['ENABLESHAREDNVCCFLAG'] = '-arch=sm_35 -shared -Xcompiler -fPIC'
|
||||
env['ENABLESHAREDNVCCFLAG'] = '-shared -Xcompiler -fPIC'
|
||||
|
||||
# default NVCC commands
|
||||
env['STATICNVCCCMD'] = '$NVCC -o $TARGET -c $NVCCFLAGS $STATICNVCCFLAGS $SOURCES'
|
||||
|
@ -153,7 +153,7 @@ def generate(env):
|
|||
#env.Append(LIBPATH=[cudaSDKPath + '/lib', cudaSDKPath + '/common/lib' + cudaSDKSubLibDir, cudaToolkitPath + '/lib'])
|
||||
|
||||
env.Append(CUDACPPPATH=[cudaToolkitPath + '/include'])
|
||||
env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64'])
|
||||
env.Append(CUDALIBPATH=[cudaToolkitPath + '/lib', cudaToolkitPath + '/lib64', '/lib64'])
|
||||
env.Append(CUDALIBS=['cudart'])
|
||||
|
||||
def exists(env):
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
from __future__ import print_function
|
||||
import sys
|
||||
import os
|
||||
import urllib2
|
||||
import urllib
|
||||
import getopt
|
||||
import re
|
||||
import shutil
|
||||
|
@ -57,7 +57,7 @@ def print2log(msg, withtime=True, cmd=False):
|
|||
if withtime:
|
||||
now = datetime.datetime.today()
|
||||
msg = "%s >> %s" % (now.isoformat(), msg)
|
||||
LOGFILE.write(msg + '\n')
|
||||
LOGFILE.write((msg + '\n').encode('utf-8'))
|
||||
LOGFILE.flush()
|
||||
os.fsync(LOGFILE)
|
||||
|
||||
|
@ -157,9 +157,9 @@ def downloadfile(url, fname, repeat=1):
|
|||
counter = 0
|
||||
while counter < repeat:
|
||||
try:
|
||||
response = urllib2.urlopen(url)
|
||||
response = urllib.request.urlopen(url)
|
||||
break
|
||||
except urllib2.URLError, e:
|
||||
except urllib.request.URLError as e:
|
||||
counter += 1
|
||||
if hasattr(e, 'reason'):
|
||||
print2log("Failed to reach server. Reason: %s" % e.reason)
|
||||
|
@ -851,7 +851,7 @@ class ISCEDeps(object):
|
|||
f = open(self.config, 'rb')
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
m = re.match("([^#].*?)=([^#]+?)$", line.strip())
|
||||
m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8'))
|
||||
if m:
|
||||
var = m.group(1).strip()
|
||||
val = m.group(2).strip()
|
||||
|
@ -867,7 +867,7 @@ def readSetupConfig(setup_config):
|
|||
f = open(setup_config, 'rb')
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
m = re.match("([^#].*?)=([^#]+?)$", line.strip())
|
||||
m = re.match("([^#].*?)=([^#]+?)$", line.strip().decode('utf-8'))
|
||||
if m:
|
||||
var = m.group(1).strip()
|
||||
val = m.group(2).strip().replace('"', '')
|
||||
|
@ -885,7 +885,7 @@ def checkArgs(args):
|
|||
"""
|
||||
try:
|
||||
opts, args = getopt.getopt(args, "h", ["help", "prefix=", "ping=", "config=", "uname=", "download=", "unpack=", "install=", "gcc=", "gpp=", "verbose"])
|
||||
except getopt.GetoptError, err:
|
||||
except getopt.GetoptError as err:
|
||||
print2log("ProgError: %s" % str(err))
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
|
Loading…
Reference in New Issue