Merge remote-tracking branch 'upstream/main' into main
commit
7b0856fc27
|
@ -3,7 +3,8 @@
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
import logging
|
import logging
|
||||||
import os,gdal
|
import os
|
||||||
|
from osgeo import gdal
|
||||||
import isceobj
|
import isceobj
|
||||||
from isceobj.Constants import SPEED_OF_LIGHT
|
from isceobj.Constants import SPEED_OF_LIGHT
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
|
@ -9,7 +9,7 @@ import logging
|
||||||
from components.stdproc.stdproc import crossmul
|
from components.stdproc.stdproc import crossmul
|
||||||
from iscesys.ImageUtil.ImageUtil import ImageUtil as IU
|
from iscesys.ImageUtil.ImageUtil import ImageUtil as IU
|
||||||
import os
|
import os
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
logger = logging.getLogger('isce.insar.runInterferogram')
|
logger = logging.getLogger('isce.insar.runInterferogram')
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#define PERMS 0666
|
#define PERMS 0666
|
||||||
/* IO library:
|
/* IO library:
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <sys/times.h>
|
#include <sys/times.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
#if defined(NEEDS_F77_TRANSLATION)
|
#if defined(NEEDS_F77_TRANSLATION)
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
void geolocate_(double *, double *, double *, double *, int *, double *, double *, double *, double *, double *);
|
||||||
|
|
||||||
// A wrapper for the Fortran geolocation code
|
// A wrapper for the Fortran geolocation code
|
||||||
int geolocate_wrapper(double *pos, double *vel, double range, double squint, int side, double a, double e2, double *llh, double *lookAngle, double *incidenceAngle)
|
int geolocate_wrapper(double *pos, double *vel, double range, double squint, int side, double a, double e2, double *llh, double *lookAngle, double *incidenceAngle)
|
||||||
{
|
{
|
||||||
|
|
|
@ -11,6 +11,8 @@ if(CMAKE_CUDA_COMPILER)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(GPUgeo2rdr)
|
add_subdirectory(GPUgeo2rdr)
|
||||||
|
add_subdirectory(GPUresampslc)
|
||||||
|
add_subdirectory(GPUtopozero)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
InstallSameDir(__init__.py)
|
InstallSameDir(__init__.py)
|
||||||
|
|
|
@ -17,3 +17,8 @@ target_link_libraries(GPUgeo2rdr PRIVATE
|
||||||
if(TARGET OpenMP::OpenMP_CXX)
|
if(TARGET OpenMP::OpenMP_CXX)
|
||||||
target_link_libraries(GPUgeo2rdr PRIVATE OpenMP::OpenMP_CXX)
|
target_link_libraries(GPUgeo2rdr PRIVATE OpenMP::OpenMP_CXX)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
InstallSameDir(
|
||||||
|
GPUgeo2rdr
|
||||||
|
__init__.py
|
||||||
|
)
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
cython_add_module(GPUresampslc
|
||||||
|
GPUresampslc.pyx
|
||||||
|
cuda/GPUresamp.cu
|
||||||
|
src/Interpolator.cpp
|
||||||
|
src/Poly2d.cpp
|
||||||
|
src/ResampMethods.cpp
|
||||||
|
src/ResampSlc.cpp
|
||||||
|
)
|
||||||
|
target_include_directories(GPUresampslc PRIVATE
|
||||||
|
include
|
||||||
|
)
|
||||||
|
target_link_libraries(GPUresampslc PRIVATE
|
||||||
|
isce2::DataAccessorLib
|
||||||
|
OpenMP::OpenMP_CXX
|
||||||
|
)
|
||||||
|
InstallSameDir(
|
||||||
|
GPUresampslc
|
||||||
|
__init__.py
|
||||||
|
)
|
|
@ -0,0 +1,26 @@
|
||||||
|
cython_add_module(GPUtopozero
|
||||||
|
GPUtopozero.pyx
|
||||||
|
cuda/gpuTopo.cu
|
||||||
|
src/AkimaLib.cpp
|
||||||
|
src/Ellipsoid.cpp
|
||||||
|
src/LinAlg.cpp
|
||||||
|
src/Orbit.cpp
|
||||||
|
src/Peg.cpp
|
||||||
|
src/PegTrans.cpp
|
||||||
|
src/Poly2d.cpp
|
||||||
|
src/Topo.cpp
|
||||||
|
src/TopoMethods.cpp
|
||||||
|
src/Topozero.cpp
|
||||||
|
src/UniformInterp.cpp
|
||||||
|
)
|
||||||
|
target_include_directories(GPUtopozero PRIVATE
|
||||||
|
include
|
||||||
|
)
|
||||||
|
target_link_libraries(GPUtopozero PRIVATE
|
||||||
|
isce2::DataAccessorLib
|
||||||
|
OpenMP::OpenMP_CXX
|
||||||
|
)
|
||||||
|
InstallSameDir(
|
||||||
|
GPUtopozero
|
||||||
|
__init__.py
|
||||||
|
)
|
|
@ -13,7 +13,7 @@ add_subdirectory(alos2proc)
|
||||||
add_subdirectory(alos2proc_f)
|
add_subdirectory(alos2proc_f)
|
||||||
add_subdirectory(rfi)
|
add_subdirectory(rfi)
|
||||||
add_subdirectory(mdx)
|
add_subdirectory(mdx)
|
||||||
|
add_subdirectory(UnwrapComp)
|
||||||
InstallSameDir(
|
InstallSameDir(
|
||||||
__init__.py
|
__init__.py
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# Early exit if prereqs not available
|
# Early exit if prereqs not available
|
||||||
if(NOT TARGET GDAL::GDAL
|
if(NOT TARGET GDAL::GDAL
|
||||||
OR NOT TARGET Python::NumPy
|
OR NOT TARGET Python::NumPy
|
||||||
OR NOT TARGET CUDA::cublas
|
|
||||||
OR NOT TARGET CUDA::cufft
|
OR NOT TARGET CUDA::cufft
|
||||||
)
|
)
|
||||||
return()
|
return()
|
||||||
|
@ -14,7 +13,6 @@ cython_add_module(PyCuAmpcor
|
||||||
src/PyCuAmpcor.pyx
|
src/PyCuAmpcor.pyx
|
||||||
src/GDALImage.cu
|
src/GDALImage.cu
|
||||||
src/SConscript
|
src/SConscript
|
||||||
src/SlcImage.cu
|
|
||||||
src/cuAmpcorChunk.cu
|
src/cuAmpcorChunk.cu
|
||||||
src/cuAmpcorController.cu
|
src/cuAmpcorController.cu
|
||||||
src/cuAmpcorParameter.cu
|
src/cuAmpcorParameter.cu
|
||||||
|
@ -35,7 +33,6 @@ target_include_directories(PyCuAmpcor PRIVATE
|
||||||
)
|
)
|
||||||
target_link_libraries(PyCuAmpcor PRIVATE
|
target_link_libraries(PyCuAmpcor PRIVATE
|
||||||
CUDA::cufft
|
CUDA::cufft
|
||||||
CUDA::cublas
|
|
||||||
GDAL::GDAL
|
GDAL::GDAL
|
||||||
Python::NumPy
|
Python::NumPy
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1,416 @@
|
||||||
|
# PyCuAmpcor - Amplitude Cross-Correlation with GPU
|
||||||
|
|
||||||
|
## Contents
|
||||||
|
|
||||||
|
* [1. Introduction](#1-introduction)
|
||||||
|
* [2. Installation](#2-installation)
|
||||||
|
* [3. User Guide](#3-user-guide)
|
||||||
|
* [4. List of Parameters](#4-list-of-parameters)
|
||||||
|
* [5. List of Procedures](#5-list-of-procedures)
|
||||||
|
|
||||||
|
## 1. Introduction
|
||||||
|
|
||||||
|
Ampcor (Amplitude cross correlation) in InSAR processing offers an estimate of spatial displacements (offsets) with the feature tracking method (also called as speckle tracking or pixel tracking). The offsets are in dimensions of a pixel or sub-pixel (with additional oversampling).
|
||||||
|
|
||||||
|
In practice, we
|
||||||
|
|
||||||
|
* choose a rectangle window, $R(x,y)$, from the reference image, serving as the template,
|
||||||
|
|
||||||
|
* choose a series of windows of the same size, $S(x+u, y+v)$, from the search image; the search windows are shifted in location by $(u,v)$;
|
||||||
|
|
||||||
|
* perform cross-correlation between the search windows with the reference window, to obtain the normalized correlation surface $c(u,v)$;
|
||||||
|
|
||||||
|
* find the maximum of $c(u,v)$ while its location, $(u_m,v_m)$, provides an estimate of the offset.
|
||||||
|
|
||||||
|
A detailed formulation can be found, e.g., by J. P. Lewis with [the frequency domain approach](http://scribblethink.org/Work/nvisionInterface/nip.html).
|
||||||
|
|
||||||
|
PyCuAmpcor follows the same procedure as the FORTRAN code, ampcor.F, in RIOPAC. In order to optimize the performance on GPU, some implementations are slightly different. In the [list the procedures](#5-list-of-procedures), we show the detailed steps of PyCuAmpcor, as well as their differences.
|
||||||
|
|
||||||
|
## 2. Installation
|
||||||
|
|
||||||
|
### 2.1 Installation with ISCE2
|
||||||
|
|
||||||
|
PyCuAmpcor is included in [ISCE2](https://github.com/isce-framework/isce2), and can be compiled/installed by CMake or SCons, together with ISCE2. An installation guide can be found at [isce-framework](https://github.com/isce-framework/isce2#building-isce).
|
||||||
|
|
||||||
|
Some special notices for PyCuAmpcor:
|
||||||
|
|
||||||
|
* PyCuAmpcor now uses the GDAL VRT driver to read image files. The memory-map accelerated I/O is only supported by GDAL version >=3.1.0. Earlier versions of GDAL are supported, but run slower.
|
||||||
|
|
||||||
|
* PyCuAmpcor offers a debug mode which outputs intermediate results. For end users, you may disable the debug mode by
|
||||||
|
|
||||||
|
* CMake, use the Release build type *-DCMAKE_BUILD_TYPE=Release*
|
||||||
|
* SCons, it is disabled by default with the -DNDEBUG flag in SConscript
|
||||||
|
|
||||||
|
* PyCuAmpcor requires CUDA-Enabled GPUs with compute capabilities >=2.0. You may specify the targeted architecture by
|
||||||
|
|
||||||
|
* CMake, add the flag *-DCMAKE_CUDA_FLAGS="-arch=sm_60"*, sm_35 for K40/80, sm_60 for P100, sm_70 for V100.
|
||||||
|
|
||||||
|
* SCons, modify the *scons_tools/cuda.py* file by adding *-arch=sm_60* to *env['ENABLESHAREDNVCCFLAG']*.
|
||||||
|
|
||||||
|
Note that if the *-arch* option is not specified, CUDA 10 uses sm_30 as default while CUDA 11 uses sm_52 as default. GPU architectures with lower compute capabilities will not run the compiled code properly.
|
||||||
|
|
||||||
|
### 2.2 Standalone Installation
|
||||||
|
|
||||||
|
You may also install PyCuAmpcor as a standalone package.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# go to PyCuAmpcor source directory
|
||||||
|
cd contrib/PyCuAmpcor/src
|
||||||
|
# edit Makefile to provide the correct gdal include path and gpu architecture to NVCCFLAGS
|
||||||
|
# call make to compile
|
||||||
|
make
|
||||||
|
# install
|
||||||
|
python3 setup.py install
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. User Guide
|
||||||
|
|
||||||
|
The main procedures of PyCuAmpcor are implemented with CUDA/C++. A Python interface to configure and run PyCuAmpcor is offered. Sample python scripts are provided in *contrib/PyCuAmpcor/examples* directory.
|
||||||
|
|
||||||
|
### 3.1 cuDenseOffsets.py
|
||||||
|
|
||||||
|
*cuDenseOffsets.py*, as also included in InSAR processing stacks, serves as a general purpose script to run PyCuAmpcor. It uses *argparse* to pass parameters, either from a command line
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cuDenseOffsets.py -r 20151120.slc.full -s 20151214.slc.full --outprefix ./20151120_20151214/offset --ww 64 --wh 64 --oo 32 --kw 300 --kh 100 --nwac 32 --nwdc 1 --sw 20 --sh 20 --gpuid 2
|
||||||
|
```
|
||||||
|
|
||||||
|
or by a shell script
|
||||||
|
|
||||||
|
```
|
||||||
|
#!/bin/bash
|
||||||
|
reference=./merged/SLC/20151120/20151120.slc.full # reference image name
|
||||||
|
secondary=./merged/SLC/20151214/20151214.slc.full # secondary image name
|
||||||
|
ww=64 # template window width
|
||||||
|
wh=64 # template window height
|
||||||
|
sw=20 # (half) search range along width
|
||||||
|
sh=20 # (half) search range along height
|
||||||
|
kw=300 # skip between windows along width
|
||||||
|
kh=100 # skip between windows along height
|
||||||
|
mm=0 # margin to be neglected
|
||||||
|
gross=0 # whether to use a varying gross offset
|
||||||
|
azshift=0 # constant gross offset along height/azimuth
|
||||||
|
rgshift=0 # constant gross offset along width/range
|
||||||
|
deramp=0 # 0 for mag (TOPS), 1 for complex
|
||||||
|
oo=32 # correlation surface oversampling factor
|
||||||
|
outprefix=./merged/20151120_20151214/offset # output prefix
|
||||||
|
outsuffix=_ww64_wh64 # output suffix
|
||||||
|
gpuid=0 # GPU device ID
|
||||||
|
nstreams=2 # number of CUDA streams
|
||||||
|
usemmap=1 # whether to use memory-map i/o
|
||||||
|
mmapsize=8 # buffer size in GB for memory map
|
||||||
|
nwac=32 # number of windows in a batch along width
|
||||||
|
nwdc=1 # number of windows in a batch along height
|
||||||
|
|
||||||
|
rm $outprefix$outsuffix*
|
||||||
|
cuDenseOffsets.py --reference $reference --secondary $secondary --ww $ww --wh $wh --sw $sw --sh $sh --mm $mm --kw $kw --kh $kh --gross $gross --rr $rgshift --aa $azshift --oo $oo --deramp $deramp --outprefix $outprefix --outsuffix $outsuffix --gpuid $gpuid --usemmap $usemmap --mmapsize $mmapsize --nwac $nwac --nwdc $nwdc
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that in PyCuAmpcor, the following names for directions are equivalent:
|
||||||
|
* row, height, down, azimuth, along the track.
|
||||||
|
* column, width, across, range, along the sight.
|
||||||
|
|
||||||
|
In the above script, the computation starts from the (mm+sh, mm+sw) pixel in the reference image, take a series of template windows of size (wh, ww) with a skip (sh, sw), cross-correlate with the corresponding windows in the secondary image, and iterate till the end of the images. The output offset fields are stored in *outprefix+outputsuffix+'.bip'*, which is in BIP format, i.e., each pixel has two bands of float32 data, (offsetDown, offsetAcross). The total number of pixels is given by the total number of windows (numberWindowDown, numberWindowAcross), which is computed by the script and also saved to the xml file.
|
||||||
|
|
||||||
|
If you are interested in a particular region instead of the whole image, you may specify the location of the starting pixel (in reference image) and the number of windows desired by adding
|
||||||
|
|
||||||
|
```
|
||||||
|
--startpixelac $startPixelAcross --startpixeldw $startPixelDown --nwa $numberOfWindowsAcross --nwd $numberOfWindowsDown
|
||||||
|
```
|
||||||
|
|
||||||
|
PyCuAmpcor supports two types of gross offset fields,
|
||||||
|
* static (--gross=0), i.e., a constant shift between reference and secondary images. The static gross offsets can be passed by *--rr $rgshift --aa $azshift*. Note that the margin as well as the starting pixel may be adjusted.
|
||||||
|
* dynamic (--gross=1), i.e., shifts between reference windows and secondary windows are varying in different locations. This is helpful to reduce the search range if you have a prior knowledge of the estimated offset fields, e.g., the velocity model of glaciers. You may prepare a BIP input file of the varying gross offsets (same format as the output offset fields), and use the option *--gross-file $grossOffsetFilename*. If you need the coordinates of reference windows, you may run *cuDenseOffsets.py* at first to find out the location of the starting pixel and the total number of windows. The coordinate for the starting pixel of the (iDown, iAcross) window will be (startPixelDown+iDown\*skipDown, startPixelAcross+iAcross\*skipAcross).
|
||||||
|
|
||||||
|
### 3.2 Customized Python Scripts
|
||||||
|
|
||||||
|
If you need more control of the computation, you may follow the examples to create your own Python script. The general steps are
|
||||||
|
* create a PyCuAmpcor instance
|
||||||
|
```python
|
||||||
|
# if installed with ISCE2
|
||||||
|
from isce.contrib.PyCuAmpcor.PyCuAmpcor import PyCuAmpcor
|
||||||
|
# if standalone
|
||||||
|
from PyCuAmpcor import PyCuAmpcr
|
||||||
|
# create an instance
|
||||||
|
objOffset = PyCuAmpcor()
|
||||||
|
```
|
||||||
|
|
||||||
|
* set various parameters, e.g., (see a [list of configurable parameters](#4-list-of-parameters) below)
|
||||||
|
```python
|
||||||
|
objOffset.referenceImageName="20151120.slc.full.vrt"
|
||||||
|
...
|
||||||
|
objOffset.windowSizeWidth = 64
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
* ask CUDA/C++ to check/initialize parameters
|
||||||
|
```python
|
||||||
|
objOffset.setupParams()
|
||||||
|
```
|
||||||
|
|
||||||
|
* set up the starting pixel(s) and gross offsets
|
||||||
|
```python
|
||||||
|
objOffset.referenceStartPixelDownStatic = objOffset.halfSearchRangeDown
|
||||||
|
objOffset.referenceStartPixelAcrossStatic = objOffset.halfSearchRangeDown
|
||||||
|
# if static gross offset
|
||||||
|
objOffset.setConstantGrossOffset(0, 0)
|
||||||
|
# if dynamic gross offset, computed and stored in vD, vA
|
||||||
|
objOffset.setVaryingGrossOffset(vD, vA)
|
||||||
|
# check whether all windows are within the image range
|
||||||
|
objOffset.checkPixelInImageRange()
|
||||||
|
```
|
||||||
|
|
||||||
|
* and finally, run PyCuAmpcor
|
||||||
|
```python
|
||||||
|
objOffset.runAmpcor()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. List of Parameters
|
||||||
|
|
||||||
|
**Image Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | Notes |
|
||||||
|
| :--- | :---- |
|
||||||
|
| referenceImageName | The file name of the reference/template image |
|
||||||
|
| referenceImageHeight | The height of the reference image |
|
||||||
|
| referenceImageWidth | The width of the reference image |
|
||||||
|
| secondaryImageName | The file name of the secondary/search image |
|
||||||
|
| secondaryImageHeight | The height of the secondary image |
|
||||||
|
| secondaryImageWidth | The width of the secondary image |
|
||||||
|
| grossOffsetImageName | The output file name for gross offsets |
|
||||||
|
| offsetImageName | The output file name for dense offsets |
|
||||||
|
| snrImageName | The output file name for signal-noise-ratio of the correlation |
|
||||||
|
| covImageName | The output file name for variance of the correlation surface |
|
||||||
|
|
||||||
|
PyCuAmpcor now uses exclusively the GDAL driver to read images, only single-precision binary data are supported. (Image heights/widths are still required as inputs; they are mainly for dimension checking. We will update later to read them with the GDAL driver). Multi-band is not currently supported, but can be added if desired.
|
||||||
|
|
||||||
|
The offset output is arranged in BIP format, with each pixel (azimuth offset, range offset). In addition to a static gross offset (i.e., a constant for all search windows), PyCuAmpcor supports varying gross offsets as inputs (e.g., for glaciers, users can compute the gross offsets with the velocity model for different locations and use them as inputs for PyCuAmpcor.
|
||||||
|
|
||||||
|
The offsetImage only outputs the (dense) offset values computed from the cross-correlations. Users need to add offsetImage and grossOffsetImage to obtain the total offsets.
|
||||||
|
|
||||||
|
The dimension/direction names used in PyCuAmpcor are:
|
||||||
|
* the inner-most dimension x(i): row, height, down, azimuth, along the track.
|
||||||
|
* the outer-most dimension y(j): column, width, across, range, along the sight.
|
||||||
|
|
||||||
|
Note that ampcor.F and GDAL in general use y for rows and x for columns.
|
||||||
|
|
||||||
|
Note also PyCuAmpcor parameters refer to the names used by the PyCuAmpcor Python class. They may be different from those used in C/C++/CUDA, or the cuDenseOffsets.py args.
|
||||||
|
|
||||||
|
**Process Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | Notes |
|
||||||
|
| :--- | :---- |
|
||||||
|
| devID | The CUDA GPU to be used for computation, usually=0, or users can use the CUDA_VISIBLE_DEVICES=n enviromental variable to choose GPU |
|
||||||
|
| nStreams | The number of CUDA streams to be used, recommended=2, to overlap the CUDA kernels with data copying, more streams require more memory which isn't alway better |
|
||||||
|
| useMmap | Whether to use memory map cached file I/O, recommended=1, supported by GDAL vrt driver (needs >=3.1.0) and GeoTIFF |
|
||||||
|
| mmapSize | The cache size used for memory map, in units of GB. The larger the better, but not exceed 1/4 the total physical memory. |
|
||||||
|
| numberWindowDownInChunk | The number of windows processed in a batch/chunk, along lines |
|
||||||
|
| numberWindowAcrossInChunk | The number of windows processed in a batch/chunk, along columns |
|
||||||
|
|
||||||
|
Many windows are processed together to maximize the usage of GPU cores; which is called as a Chunk. The total number of windows in a chunk is limited by the GPU memory. We recommend
|
||||||
|
numberWindowDownInChunk=1, numberWindowAcrossInChunk=10, for a window size=64.
|
||||||
|
|
||||||
|
|
||||||
|
**Search Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | Notes |
|
||||||
|
| :--- | :---- |
|
||||||
|
| skipSampleDown | The skip in pixels for neighboring windows along height |
|
||||||
|
| skipSampleAcross | The skip in pixels for neighboring windows along width |
|
||||||
|
| numberWindowDown | the number of windows along height |
|
||||||
|
| numberWindowAcross | the number of windows along width |
|
||||||
|
| referenceStartPixelDownStatic | the starting pixel location of the first reference window - along height component |
|
||||||
|
|referenceStartPixelAcrossStatic | the starting pixel location of the first reference window - along width component |
|
||||||
|
|
||||||
|
The C/C++/CUDA program accepts inputs with the total number of windows (numberWindowDown, numberWindowAcross) and the starting pixels of each reference window. The purpose is to establish multiple-threads/streams processing. Therefore, users are required to provide/compute these inputs, with tools available from PyCuAmpcor python class. The cuDenseOffsets.py script also does the job.
|
||||||
|
|
||||||
|
We provide some examples below, assuming a PyCuAmpcor class object is created as
|
||||||
|
|
||||||
|
```python
|
||||||
|
objOffset = PyCuAmpcor()
|
||||||
|
```
|
||||||
|
|
||||||
|
**To compute the total number of windows**
|
||||||
|
|
||||||
|
We use the line direction as an example, assuming parameters as
|
||||||
|
|
||||||
|
```
|
||||||
|
margin # the number of pixels to neglect at edges
|
||||||
|
halfSearchRangeDown # the half of the search range
|
||||||
|
windowSizeHeight # the size of the reference window for feature tracking
|
||||||
|
skipSampleDown # the skip in pixels between two reference windows
|
||||||
|
referenceImageHeight # the reference image height, usually the same as the secondary image height
|
||||||
|
```
|
||||||
|
|
||||||
|
and the number of windows may be computed along lines as
|
||||||
|
|
||||||
|
```python
|
||||||
|
objOffset.numberWindowDown = (referenceImageHeight-2*margin-2*halfSearchRangeDown-windowSizeHeight) // skipSampleDown
|
||||||
|
```
|
||||||
|
|
||||||
|
If there is a gross offset, you may also need to subtract it when computing the number of windows.
|
||||||
|
|
||||||
|
The output offset fields will be of size (numberWindowDown, numberWindowAcross). The total number of windows numberWindows = numberWindowDown\*numberWindowAcross.
|
||||||
|
|
||||||
|
**To compute the starting pixels of reference/secondary windows**
|
||||||
|
|
||||||
|
The starting pixel for the first reference window is usually set as
|
||||||
|
|
||||||
|
```python
|
||||||
|
objOffset.referenceStartPixelDownStatic = margin + halfSearchRangeDown
|
||||||
|
objOffset.referenceStartPixelAcrossStatic = margin + halfSearchRangeAcross
|
||||||
|
```
|
||||||
|
|
||||||
|
you may also choose other values, e.g., for a particular region of the image, or a certain location for debug purposes.
|
||||||
|
|
||||||
|
|
||||||
|
With a constant gross offset, call
|
||||||
|
|
||||||
|
```python
|
||||||
|
objOffset.setConstantGrossOffset(grossOffsetDown, grossOffsetAcross)
|
||||||
|
```
|
||||||
|
|
||||||
|
to set the starting pixels of all reference and secondary windows.
|
||||||
|
|
||||||
|
The starting pixel for the secondary window will be (referenceStartPixelDownStatic-halfSearchRangeDown+grossOffsetDown, referenceStartPixelAcrossStatic-halfSearchRangeAcross+grossOffsetAcross).
|
||||||
|
|
||||||
|
For cases you choose a varying grossOffset, you may use two numpy arrays to pass the information to PyCuAmpcor, e.g.,
|
||||||
|
|
||||||
|
```python
|
||||||
|
objOffset.referenceStartPixelDownStatic = objOffset.halfSearchRangeDown + margin
|
||||||
|
objOffset.referenceStartPixelAcrossStatic = objOffset.halfSearchRangeAcross + margin
|
||||||
|
vD = np.random.randint(0, 10, size =objOffset.numberWindows, dtype=np.int32)
|
||||||
|
vA = np.random.randint(0, 1, size = objOffset.numberWindows, dtype=np.int32)
|
||||||
|
objOffset.setVaryingGrossOffset(vD, vA)
|
||||||
|
```
|
||||||
|
|
||||||
|
to set all the starting pixels for reference/secondary windows.
|
||||||
|
|
||||||
|
Sometimes, adding a large gross offset may cause the windows near the edge to be out of range of the orignal image. To avoid memory access errors, call
|
||||||
|
|
||||||
|
```python
|
||||||
|
objOffset.checkPixelInImageRange()
|
||||||
|
```
|
||||||
|
|
||||||
|
to verify. If an out-of-range error is reported, you may consider to increase the margin or reduce the number of windows.
|
||||||
|
|
||||||
|
## 5. List of Procedures
|
||||||
|
|
||||||
|
The following procedures apply to one pair of reference/secondary windows, which are iterated through the whole image.
|
||||||
|
|
||||||
|
### 5.1 Read a window from Reference/Secondary images
|
||||||
|
|
||||||
|
* Load a window of size (windowSizeHeight, windowSizeWidth) from a starting pixel from the reference image
|
||||||
|
|
||||||
|
* Load a larger chip of size (windowSizeHeight+2\*halfSearchRangeDown, windowSizeWidth+2\*halfSearchRangeAcross) from the secondary image, the starting position is shifted by (-halfSearchRangeDown, -halfSearchRangeAcross) from the starting position of the reference image (may also be shifted additionally by the gross offset). The secondary chip can be viewed as a set of windows of the same size as the reference window, but shifted in locations varied within the search range.
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | CUDA variable | ampcor.F equivalent | Notes |
|
||||||
|
| :--- | :--- | :---- | :--- |
|
||||||
|
| windowSizeHeight | windowSizeHeightRaw | i_wsyi |Reference window height |
|
||||||
|
| windowSizeWidth | windowSizeWidthRaw | i_wsxi |Reference window width |
|
||||||
|
| halfSearchRangeDown | halfSearchRangeDownRaw | i_srchy | half of the search range along lines |
|
||||||
|
| halfSearchRangeAcross | halfSearchRangeAcrossRaw | i_srchx | half of the search range along |
|
||||||
|
|
||||||
|
|
||||||
|
**Difference to ROIPAC**
|
||||||
|
No major difference
|
||||||
|
|
||||||
|
|
||||||
|
### 5.2 Perform cross-correlation and obtain an offset in units of the pixel size
|
||||||
|
|
||||||
|
* Take amplitudes (real) of the signals (complex or real) in reference/secondary windows
|
||||||
|
* Compute the normalized correlation surface between reference and secondary windows: the resulting correlation surface is of size (2\*halfSearchRangeDown+1, 2\*halfSearchRangeAcross+1); two cross-correlation methods are offered, time domain or frequency domain algorithms.
|
||||||
|
* Find the location of the maximum/peak in correlation surface.
|
||||||
|
* Around the peak position, extract a smaller window from the correlation surface for statistics, such as signal-noise-ratio (SNR), variance.
|
||||||
|
|
||||||
|
This step provides an initial estimate of the offset, usually with a large search range. In the following, we will zoom in around the peak, and oversample the windows with a smaller search range.
|
||||||
|
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | CUDA variable | ampcor.F equivalent | Notes |
|
||||||
|
| :--- | :--- | :---- | :--- |
|
||||||
|
| algorithm | algorithm | N/A | the cross-correlation computation method 0=Freq 1=time |
|
||||||
|
| corrStatWindowSize | corrStatWindowSize | 21 | the size of correlation surface around the peak position used for statistics, may be adjusted |
|
||||||
|
|
||||||
|
|
||||||
|
**Difference to ROIPAC**
|
||||||
|
|
||||||
|
* RIOPAC only offers the time-domain algorithm. The frequency-domain algorithm is faster and is set as default in PyCuAmpcor.
|
||||||
|
* RIOPAC proceeds from here only for windows with *good* match, or with high coherence. To maintain parallelism, PyCuAmpcor proceeds anyway while leaving the *filtering* to users in post processing.
|
||||||
|
|
||||||
|
|
||||||
|
### 5.3 Extract a smaller window from the secondary window for oversampling
|
||||||
|
|
||||||
|
* From the secondary window, we extract a smaller window of size (windowSizeHeightRaw+2\*halfZoomWindowSizeRaw, windowSizeWidthRaw+2\*halfZoomWindowSizeRaw) with the center determined by the peak position. If the peak position, e.g., along height, is OffsetInit (taking values in \[0, 2\*halfSearchRangeDownRaw\]), the starting position to extract will be OffsetInit+halfSearchRangeDownRaw-halfZoomWindowSizeRaw.
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | CUDA variable | ampcor.F equivalent | Notes |
|
||||||
|
| :--- | :--- | :---- | :--- |
|
||||||
|
| N/A | halfZoomWindowSizeRaw | i_srchp(p)=4 | The smaller search range to zoom-in. In PyCuAmpcor, is determined by zoomWindowSize/(2\*rawDataOversamplingFactor)
|
||||||
|
|
||||||
|
**Difference to ROIPAC**
|
||||||
|
|
||||||
|
RIOPAC extracts the secondary window centering at the correlation surface peak. If the peak locates near the edge, zeros are padded if the extraction zone exceeds the window range. In PyCuAmpcor, the extraction center may be shifted away from peak to warrant all pixels being in the range of the original window.
|
||||||
|
|
||||||
|
|
||||||
|
### 5.4 Oversampling reference and (extracted) secondary windows
|
||||||
|
|
||||||
|
* Oversample both the reference and the (extracted) secondary windows by a factor of 2, which is to avoid aliasing in the complex multiplication of the SAR images. The oversampling is performed with FFT (zero padding), same as in RIOPAC.
|
||||||
|
* A deramping procedure is in general required for complex signals before oversampling, to shift the band center to 0. The procedure is only designed to remove a linear phase ramp. It doesn't work for TOPSAR, whose ramp goes quadratic. Instead, the amplitudes are taken before oversampling.
|
||||||
|
* the amplitudes (real) are then taken for each pixel of the complex signals in reference and secondary windows.
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | CUDA variable | ampcor.F equivalent | Notes |
|
||||||
|
| :--- | :--- | :---- | :--- |
|
||||||
|
| rawDataOversamplingFactor | rawDataOversamplingFactor | i_ovs=2 | the oversampling factor for reference and secondary windows, use 2 for InSAR SLCs. |
|
||||||
|
| derampMethod | derampMethod | 1 or no effect on TOPS | 0=mag for TOPS, 1=deramping (default), else=skip deramping.
|
||||||
|
|
||||||
|
|
||||||
|
**Difference to ROIPAC**
|
||||||
|
|
||||||
|
RIOPAC enlarges both windows to a size which is a power of 2; ideal for FFT. PyCuAmpcor uses their original sizes for FFT.
|
||||||
|
|
||||||
|
RIOPAC always performs deramping with Method 1, to obtain the ramp by averaging the phase difference between neighboring pixels. For TOPS mode, users need to specify 'mag' as the image *datatype* such that the amplitudes are taken before oversampling. Therefore, deramping has no effect. In PyCuAmpcor, derampMethod=0 is equivalent to *datatype='mag'*, taking amplitudes but skipping deramping. derampMethod=1 always performs deramping, no matter the 'complex' or 'real' image datatypes.
|
||||||
|
|
||||||
|
### 5.5 Cross-Correlate the oversampled reference and secondary windows
|
||||||
|
|
||||||
|
* cross-correlate the oversampled reference and secondary windows.
|
||||||
|
* other procedures are needed to obtain the normalized cross-correlation surface, such as calculating and subtracting the mean values.
|
||||||
|
* the resulting correlation surface is of size (2\*halfZoomWindowSizeRaw\*rawDataOversamplingFactor+1, 2\*halfZoomWindowSizeRaw\*rawDataOversamplingFactor+1). We cut the last row and column to make it an even sequence, or the size 2\*halfZoomWindowSizeRaw\*rawDataOversamplingFactor=zoomWindowSize.
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | CUDA variable | ampcor.F equivalent | Notes |
|
||||||
|
| :--- | :--- | :---- | :--- |
|
||||||
|
| corrSurfaceZoomInWindow | zoomWindowSize | i_cw | The size of correlation surface of the (anti-aliasing) oversampled reference/secondary windows, also used to set halfZoomWindowSizeRaw. Set it to 16 to be consistent with RIOPAC. |
|
||||||
|
|
||||||
|
**Difference to ROIPAC**
|
||||||
|
|
||||||
|
In RIOPAC, an extra resizing step is performed on the correlation surface, from (2\*halfZoomWindowSizeRaw\*rawDataOversamplingFactor+1, 2\*halfZoomWindowSizeRaw\*rawDataOversamplingFactor+1) to (i_cw, i_cw), centered at the peak (in RIOPAC, the peak seeking is incorporated in the correlation module while is seperate in PyCuAmpcor). i_cw is a user configurable variable; it could be smaller or bigger than 2\*i_srchp\*i_ovs+1=17 (fixed), leading to extraction or enlargement by padding 0s. This procedure is not performed in PyCuAmpcor, as it makes little difference in the next oversampling procedure.
|
||||||
|
|
||||||
|
### 5.6 Oversample the correlation surface and find the peak position
|
||||||
|
|
||||||
|
* oversample the (real) correlation surface by a factor oversamplingFactor, or the resulting surface is of size (zoomWindowSize\*oversamplingFactor, zoomWindowSize\*oversamplingFactor) Two oversampling methods are offered, oversamplingMethod=0 (FFT, default), =1(sinc).
|
||||||
|
* find the peak position in the oversampled correlation surface, OffsetZoomIn, in range zoomWindowSize\*oversamplingFactor.
|
||||||
|
* calculate the final offset, from OffsetInit (which is the starting position of secondary window extraction in 2.4),
|
||||||
|
|
||||||
|
offset = (OffsetInit-halfSearchRange)+OffsetZoomIn/(oversamplingFactor\*rawDataOversamplingFactor)
|
||||||
|
|
||||||
|
Note that this offset does not include the pre-defined gross offset. Users need to add them together if necessary.
|
||||||
|
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
| PyCuAmpcor | CUDA variable | ampcor.F equivalent | Notes |
|
||||||
|
| :--- | :--- | :---- | :--- |
|
||||||
|
| corrSurfaceOverSamplingFactor | oversamplingFactor | i_covs | The oversampling factor for the correlation surface |
|
||||||
|
| corrSurfaceOverSamplingMethod | oversamplingMethod | i_sinc_fourier=i_sinc | The oversampling method 0=FFT, 1=sinc. |
|
||||||
|
|
||||||
|
**Difference to ROIPAC**
|
||||||
|
|
||||||
|
RIOPAC by default uses the sinc interpolator (the FFT method is included but one needs to change the FORTRAN code to switch). For since interpolator, there is no difference in implementations. For FFT, RIOPAC always enlarges the window to a size in power of 2.
|
|
@ -61,3 +61,4 @@ def main():
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
main()
|
|
@ -14,8 +14,8 @@ from contrib.PyCuAmpcor.PyCuAmpcor import PyCuAmpcor
|
||||||
|
|
||||||
|
|
||||||
EXAMPLE = '''example
|
EXAMPLE = '''example
|
||||||
cuDenseOffsets.py -m ./merged/SLC/20151120/20151120.slc.full -s ./merged/SLC/20151214/20151214.slc.full
|
cuDenseOffsets.py -r ./merged/SLC/20151120/20151120.slc.full -s ./merged/SLC/20151214/20151214.slc.full
|
||||||
--referencexml ./reference/IW1.xml --outprefix ./merged/offsets/20151120_20151214/offset
|
--outprefix ./merged/offsets/20151120_20151214/offset
|
||||||
--ww 256 --wh 256 --oo 32 --kw 300 --kh 100 --nwac 100 --nwdc 1 --sw 8 --sh 8 --gpuid 2
|
--ww 256 --wh 256 --oo 32 --kw 300 --kh 100 --nwac 100 --nwdc 1 --sw 8 --sh 8 --gpuid 2
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
@ -29,77 +29,96 @@ def createParser():
|
||||||
parser = argparse.ArgumentParser(description='Generate offset field between two Sentinel slc',
|
parser = argparse.ArgumentParser(description='Generate offset field between two Sentinel slc',
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
epilog=EXAMPLE)
|
epilog=EXAMPLE)
|
||||||
parser.add_argument('-m','--reference', type=str, dest='reference', required=True,
|
|
||||||
|
# input/output
|
||||||
|
parser.add_argument('-r','--reference', type=str, dest='reference', required=True,
|
||||||
help='Reference image')
|
help='Reference image')
|
||||||
parser.add_argument('-s', '--secondary',type=str, dest='secondary', required=True,
|
parser.add_argument('-s', '--secondary',type=str, dest='secondary', required=True,
|
||||||
help='Secondary image')
|
help='Secondary image')
|
||||||
parser.add_argument('-l', '--lat',type=str, dest='lat', required=False,
|
|
||||||
help='Latitude')
|
|
||||||
parser.add_argument('-L', '--lon',type=str, dest='lon', required=False,
|
|
||||||
help='Longitude')
|
|
||||||
parser.add_argument('--los',type=str, dest='los', required=False,
|
|
||||||
help='Line of Sight')
|
|
||||||
parser.add_argument('-x', '--referencexml',type=str, dest='referencexml', required=False,
|
|
||||||
help='Reference Image XML File')
|
|
||||||
|
|
||||||
parser.add_argument('--op','--outprefix','--output-prefix', type=str, dest='outprefix',
|
parser.add_argument('--op','--outprefix','--output-prefix', type=str, dest='outprefix',
|
||||||
default='offset', required=True,
|
default='offset', required=True,
|
||||||
help='Output prefix, default: offset.')
|
help='Output prefix, default: offset.')
|
||||||
parser.add_argument('--os','--outsuffix', type=str, dest='outsuffix', default='',
|
parser.add_argument('--os','--outsuffix', type=str, dest='outsuffix', default='',
|
||||||
help='Output suffix, default:.')
|
help='Output suffix, default:.')
|
||||||
|
|
||||||
|
# window size settings
|
||||||
parser.add_argument('--ww', type=int, dest='winwidth', default=64,
|
parser.add_argument('--ww', type=int, dest='winwidth', default=64,
|
||||||
help='Window width (default: %(default)s).')
|
help='Window width (default: %(default)s).')
|
||||||
parser.add_argument('--wh', type=int, dest='winhgt', default=64,
|
parser.add_argument('--wh', type=int, dest='winhgt', default=64,
|
||||||
help='Window height (default: %(default)s).')
|
help='Window height (default: %(default)s).')
|
||||||
|
parser.add_argument('--sw', type=int, dest='srcwidth', default=20,
|
||||||
parser.add_argument('--sw', type=int, dest='srcwidth', default=20, choices=range(8, 33),
|
help='Half search range along width, (default: %(default)s, recommend: 4-32).')
|
||||||
help='Search window width (default: %(default)s).')
|
parser.add_argument('--sh', type=int, dest='srchgt', default=20,
|
||||||
parser.add_argument('--sh', type=int, dest='srchgt', default=20, choices=range(8, 33),
|
help='Half search range along height (default: %(default)s, recommend: 4-32).')
|
||||||
help='Search window height (default: %(default)s).')
|
|
||||||
parser.add_argument('--mm', type=int, dest='margin', default=50,
|
|
||||||
help='Margin (default: %(default)s).')
|
|
||||||
|
|
||||||
parser.add_argument('--kw', type=int, dest='skipwidth', default=64,
|
parser.add_argument('--kw', type=int, dest='skipwidth', default=64,
|
||||||
help='Skip across (default: %(default)s).')
|
help='Skip across (default: %(default)s).')
|
||||||
parser.add_argument('--kh', type=int, dest='skiphgt', default=64,
|
parser.add_argument('--kh', type=int, dest='skiphgt', default=64,
|
||||||
help='Skip down (default: %(default)s).')
|
help='Skip down (default: %(default)s).')
|
||||||
|
|
||||||
|
# determine the number of windows
|
||||||
|
# either specify the starting pixel and the number of windows,
|
||||||
|
# or by setting them to -1, let the script to compute these parameters
|
||||||
|
parser.add_argument('--mm', type=int, dest='margin', default=0,
|
||||||
|
help='Margin (default: %(default)s).')
|
||||||
|
parser.add_argument('--nwa', type=int, dest='numWinAcross', default=-1,
|
||||||
|
help='Number of window across (default: %(default)s to be auto-determined).')
|
||||||
|
parser.add_argument('--nwd', type=int, dest='numWinDown', default=-1,
|
||||||
|
help='Number of window down (default: %(default)s).')
|
||||||
|
parser.add_argument('--startpixelac', dest='startpixelac', type=int, default=-1,
|
||||||
|
help='Starting Pixel across of the reference image(default: %(default)s to be determined by margin and search range).')
|
||||||
|
parser.add_argument('--startpixeldw', dest='startpixeldw', type=int, default=-1,
|
||||||
|
help='Starting Pixel down of the reference image (default: %(default)s).')
|
||||||
|
|
||||||
|
# cross-correlation algorithm
|
||||||
|
parser.add_argument('--alg', '--algorithm', dest='algorithm', type=int, default=0,
|
||||||
|
help='cross-correlation algorithm (0 = frequency domain, 1 = time domain) (default: %(default)s).')
|
||||||
parser.add_argument('--raw-osf','--raw-over-samp-factor', type=int, dest='raw_oversample',
|
parser.add_argument('--raw-osf','--raw-over-samp-factor', type=int, dest='raw_oversample',
|
||||||
default=2, choices=range(2,5),
|
default=2, choices=range(2,5),
|
||||||
help='raw data oversampling factor (default: %(default)s).')
|
help='anti-aliasing oversampling factor, equivalent to i_ovs in RIOPAC (default: %(default)s).')
|
||||||
|
parser.add_argument('--drmp', '--deramp', dest='deramp', type=int, default=0,
|
||||||
|
help='deramp method (0: mag for TOPS, 1:complex with linear ramp) (default: %(default)s).')
|
||||||
|
|
||||||
|
# gross offset
|
||||||
gross = parser.add_argument_group('Initial gross offset')
|
gross = parser.add_argument_group('Initial gross offset')
|
||||||
gross.add_argument('-g','--gross', type=int, dest='gross', default=0,
|
gross.add_argument('-g','--gross', type=int, dest='gross', default=0,
|
||||||
help='Use gross offset or not')
|
help='Use varying gross offset or not')
|
||||||
gross.add_argument('--aa', type=int, dest='azshift', default=0,
|
gross.add_argument('--aa', type=int, dest='azshift', default=0,
|
||||||
help='Gross azimuth offset (default: %(default)s).')
|
help='Gross azimuth offset (default: %(default)s).')
|
||||||
gross.add_argument('--rr', type=int, dest='rgshift', default=0,
|
gross.add_argument('--rr', type=int, dest='rgshift', default=0,
|
||||||
help='Gross range offset (default: %(default)s).')
|
help='Gross range offset (default: %(default)s).')
|
||||||
|
gross.add_argument('--gf', '--gross-file', type=str, dest='gross_offset_file',
|
||||||
|
help='Varying gross offset input file')
|
||||||
|
|
||||||
corr = parser.add_argument_group('Correlation surface')
|
corr = parser.add_argument_group('Correlation surface')
|
||||||
corr.add_argument('--corr-win-size', type=int, dest='corr_win_size', default=-1,
|
corr.add_argument('--corr-stat-size', type=int, dest='corr_stat_win_size', default=21,
|
||||||
help='Zoom-in window size of the correlation surface for oversampling (default: %(default)s).')
|
help='Zoom-in window size of the correlation surface for statistics(snr/variance) (default: %(default)s).')
|
||||||
|
corr.add_argument('--corr-srch-size', type=int, dest='corr_srch_size', default=4,
|
||||||
|
help='(half) Zoom-in window size of the correlation surface for oversampling, ' \
|
||||||
|
'equivalent to i_srcp in RIOPAC (default: %(default)s).')
|
||||||
corr.add_argument('--corr-osf', '--oo', '--corr-over-samp-factor', type=int, dest='corr_oversample', default=32,
|
corr.add_argument('--corr-osf', '--oo', '--corr-over-samp-factor', type=int, dest='corr_oversample', default=32,
|
||||||
help = 'Oversampling factor of the zoom-in correlation surface (default: %(default)s).')
|
help = 'Oversampling factor of the zoom-in correlation surface (default: %(default)s).')
|
||||||
|
corr.add_argument('--corr-osm', '--corr-over-samp-method', type=int, dest='corr_oversamplemethod', default=0,
|
||||||
|
help = 'Oversampling method for the correlation surface 0=fft, 1=sinc (default: %(default)s).')
|
||||||
|
|
||||||
parser.add_argument('--nwa', type=int, dest='numWinAcross', default=-1,
|
# gpu settings
|
||||||
help='Number of window across (default: %(default)s).')
|
proc = parser.add_argument_group('Processing parameters')
|
||||||
parser.add_argument('--nwd', type=int, dest='numWinDown', default=-1,
|
proc.add_argument('--gpuid', '--gid', '--gpu-id', dest='gpuid', type=int, default=0,
|
||||||
help='Number of window down (default: %(default)s).')
|
help='GPU ID (default: %(default)).')
|
||||||
|
proc.add_argument('--nstreams', dest='nstreams', type=int, default=2,
|
||||||
|
help='Number of cuda streams (default: %(default)s).')
|
||||||
|
proc.add_argument('--usemmap', dest='usemmap', type=int, default=1,
|
||||||
|
help='Whether to use memory map for loading image files (default: %(default)s).')
|
||||||
|
proc.add_argument('--mmapsize', dest='mmapsize', type=int, default=8,
|
||||||
|
help='The memory map buffer size in GB (default: %(default)s).')
|
||||||
|
proc.add_argument('--nwac', type=int, dest='numWinAcrossInChunk', default=10,
|
||||||
|
help='Number of window across in a chunk/batch (default: %(default)s).')
|
||||||
|
proc.add_argument('--nwdc', type=int, dest='numWinDownInChunk', default=1,
|
||||||
|
help='Number of window down in a chunk/batch (default: %(default)s).')
|
||||||
|
|
||||||
parser.add_argument('--nwac', type=int, dest='numWinAcrossInChunk', default=1,
|
proc.add_argument('--redo', dest='redo', action='store_true',
|
||||||
help='Number of window across in chunk (default: %(default)s).')
|
|
||||||
parser.add_argument('--nwdc', type=int, dest='numWinDownInChunk', default=1,
|
|
||||||
help='Number of window down in chunk (default: %(default)s).')
|
|
||||||
parser.add_argument('-r', '--redo', dest='redo', action='store_true',
|
|
||||||
help='To redo by force (ignore the existing offset fields).')
|
help='To redo by force (ignore the existing offset fields).')
|
||||||
|
|
||||||
parser.add_argument('--drmp', '--deramp', dest='deramp', type=int, default=0,
|
|
||||||
help='deramp method (0: mag, 1: complex) (default: %(default)s).')
|
|
||||||
|
|
||||||
parser.add_argument('--gpuid', '--gid', '--gpu-id', dest='gpuid', type=int, default=-1,
|
|
||||||
help='GPU ID (default: %(default)s).')
|
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,9 +127,13 @@ def cmdLineParse(iargs = None):
|
||||||
inps = parser.parse_args(args=iargs)
|
inps = parser.parse_args(args=iargs)
|
||||||
|
|
||||||
# check oversampled window size
|
# check oversampled window size
|
||||||
if (inps.winwidth + 2 * inps.srcwidth) * inps.raw_oversample > 1024:
|
if (inps.winwidth + 2 * inps.srcwidth ) * inps.raw_oversample > 1024:
|
||||||
msg = 'input oversampled window size in the across/range direction '
|
msg = 'The oversampled window width, ' \
|
||||||
msg += 'exceeds the current implementaion limit of 1024!'
|
'as computed by (winwidth+2*srcwidth)*raw_oversample, ' \
|
||||||
|
'exceeds the current implementation limit of 1,024. ' \
|
||||||
|
f'Please reduce winwidth: {inps.winwidth}, ' \
|
||||||
|
f'srcwidth: {inps.srcwidth}, ' \
|
||||||
|
f'or raw_oversample: {inps.raw_oversample}.'
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
|
||||||
return inps
|
return inps
|
||||||
|
@ -136,11 +159,12 @@ def estimateOffsetField(reference, secondary, inps=None):
|
||||||
width = sar.getWidth()
|
width = sar.getWidth()
|
||||||
length = sar.getLength()
|
length = sar.getLength()
|
||||||
|
|
||||||
|
# create a PyCuAmpcor instance
|
||||||
objOffset = PyCuAmpcor()
|
objOffset = PyCuAmpcor()
|
||||||
|
|
||||||
objOffset.algorithm = 0
|
objOffset.algorithm = inps.algorithm
|
||||||
objOffset.deviceID = inps.gpuid # -1:let system find the best GPU
|
objOffset.deviceID = inps.gpuid
|
||||||
objOffset.nStreams = 2 #cudaStreams
|
objOffset.nStreams = inps.nstreams #cudaStreams
|
||||||
objOffset.derampMethod = inps.deramp
|
objOffset.derampMethod = inps.deramp
|
||||||
print('deramp method (0 for magnitude, 1 for complex): ', objOffset.derampMethod)
|
print('deramp method (0 for magnitude, 1 for complex): ', objOffset.derampMethod)
|
||||||
|
|
||||||
|
@ -155,49 +179,52 @@ def estimateOffsetField(reference, secondary, inps=None):
|
||||||
print("image length:",length)
|
print("image length:",length)
|
||||||
print("image width:",width)
|
print("image width:",width)
|
||||||
|
|
||||||
objOffset.numberWindowDown = (length-2*inps.margin-2*inps.srchgt-inps.winhgt)//inps.skiphgt
|
# if using gross offset, adjust the margin
|
||||||
objOffset.numberWindowAcross = (width-2*inps.margin-2*inps.srcwidth-inps.winwidth)//inps.skipwidth
|
margin = max(inps.margin, abs(inps.azshift), abs(inps.rgshift))
|
||||||
|
|
||||||
if (inps.numWinDown != -1):
|
# determine the number of windows down and across
|
||||||
objOffset.numberWindowDown = inps.numWinDown
|
# that's also the size of the output offset field
|
||||||
if (inps.numWinAcross != -1):
|
objOffset.numberWindowDown = inps.numWinDown if inps.numWinDown > 0 \
|
||||||
objOffset.numberWindowAcross = inps.numWinAcross
|
else (length-2*margin-2*inps.srchgt-inps.winhgt)//inps.skiphgt
|
||||||
print("offset field length: ",objOffset.numberWindowDown)
|
objOffset.numberWindowAcross = inps.numWinAcross if inps.numWinAcross > 0 \
|
||||||
print("offset field width: ",objOffset.numberWindowAcross)
|
else (width-2*margin-2*inps.srcwidth-inps.winwidth)//inps.skipwidth
|
||||||
|
print('the number of windows: {} by {}'.format(objOffset.numberWindowDown, objOffset.numberWindowAcross))
|
||||||
|
|
||||||
# window size
|
# window size
|
||||||
objOffset.windowSizeHeight = inps.winhgt
|
objOffset.windowSizeHeight = inps.winhgt
|
||||||
objOffset.windowSizeWidth = inps.winwidth
|
objOffset.windowSizeWidth = inps.winwidth
|
||||||
print('cross correlation window size: {} by {}'.format(objOffset.windowSizeHeight, objOffset.windowSizeWidth))
|
print('window size for cross-correlation: {} by {}'.format(objOffset.windowSizeHeight, objOffset.windowSizeWidth))
|
||||||
|
|
||||||
# search range
|
# search range
|
||||||
objOffset.halfSearchRangeDown = inps.srchgt
|
objOffset.halfSearchRangeDown = inps.srchgt
|
||||||
objOffset.halfSearchRangeAcross = inps.srcwidth
|
objOffset.halfSearchRangeAcross = inps.srcwidth
|
||||||
print('half search range: {} by {}'.format(inps.srchgt, inps.srcwidth))
|
print('initial search range: {} by {}'.format(inps.srchgt, inps.srcwidth))
|
||||||
|
|
||||||
# starting pixel
|
# starting pixel
|
||||||
|
objOffset.referenceStartPixelDownStatic = inps.startpixeldw if inps.startpixeldw != -1 \
|
||||||
|
else margin + objOffset.halfSearchRangeDown # use margin + halfSearchRange instead
|
||||||
|
objOffset.referenceStartPixelAcrossStatic = inps.startpixelac if inps.startpixelac != -1 \
|
||||||
|
else margin + objOffset.halfSearchRangeAcross
|
||||||
|
|
||||||
objOffset.referenceStartPixelDownStatic = inps.margin
|
print('the first pixel in reference image is: ({}, {})'.format(
|
||||||
objOffset.referenceStartPixelAcrossStatic = inps.margin
|
objOffset.referenceStartPixelDownStatic, objOffset.referenceStartPixelAcrossStatic))
|
||||||
|
|
||||||
# skip size
|
# skip size
|
||||||
|
|
||||||
objOffset.skipSampleDown = inps.skiphgt
|
objOffset.skipSampleDown = inps.skiphgt
|
||||||
objOffset.skipSampleAcross = inps.skipwidth
|
objOffset.skipSampleAcross = inps.skipwidth
|
||||||
print('search step: {} by {}'.format(inps.skiphgt, inps.skipwidth))
|
print('search step: {} by {}'.format(inps.skiphgt, inps.skipwidth))
|
||||||
|
|
||||||
# oversample raw data (SLC)
|
# oversample raw data (SLC)
|
||||||
objOffset.rawDataOversamplingFactor = inps.raw_oversample
|
objOffset.rawDataOversamplingFactor = inps.raw_oversample
|
||||||
print('raw data oversampling factor:', inps.raw_oversample)
|
|
||||||
|
|
||||||
# correlation surface
|
# correlation surface
|
||||||
if inps.corr_win_size == -1:
|
objOffset.corrStatWindowSize = inps.corr_stat_win_size
|
||||||
corr_win_size_orig = min(inps.srchgt, inps.srcwidth) * inps.raw_oversample + 1
|
|
||||||
inps.corr_win_size = np.power(2, int(np.log2(corr_win_size_orig)))
|
|
||||||
objOffset.corrSurfaceZoomInWindow = inps.corr_win_size
|
|
||||||
print('correlation surface zoom-in window size:', inps.corr_win_size)
|
|
||||||
|
|
||||||
objOffset.corrSufaceOverSamplingMethod = 0
|
corr_win_size = 2*inps.corr_srch_size*inps.raw_oversample
|
||||||
|
objOffset.corrSurfaceZoomInWindow = corr_win_size
|
||||||
|
print('correlation surface zoom-in window size:', corr_win_size)
|
||||||
|
|
||||||
|
objOffset.corrSurfaceOverSamplingMethod = inps.corr_oversamplemethod
|
||||||
objOffset.corrSurfaceOverSamplingFactor = inps.corr_oversample
|
objOffset.corrSurfaceOverSamplingFactor = inps.corr_oversample
|
||||||
print('correlation surface oversampling factor:', inps.corr_oversample)
|
print('correlation surface oversampling factor:', inps.corr_oversample)
|
||||||
|
|
||||||
|
@ -211,37 +238,44 @@ def estimateOffsetField(reference, secondary, inps=None):
|
||||||
print("snr: ",objOffset.snrImageName)
|
print("snr: ",objOffset.snrImageName)
|
||||||
print("cov: ",objOffset.covImageName)
|
print("cov: ",objOffset.covImageName)
|
||||||
|
|
||||||
offsetImageName = objOffset.offsetImageName.decode('utf8')
|
offsetImageName = objOffset.offsetImageName
|
||||||
grossOffsetImageName = objOffset.grossOffsetImageName.decode('utf8')
|
grossOffsetImageName = objOffset.grossOffsetImageName
|
||||||
snrImageName = objOffset.snrImageName.decode('utf8')
|
snrImageName = objOffset.snrImageName
|
||||||
covImageName = objOffset.covImageName.decode('utf8')
|
covImageName = objOffset.covImageName
|
||||||
|
|
||||||
print(offsetImageName)
|
|
||||||
print(inps.redo)
|
|
||||||
if os.path.exists(offsetImageName) and not inps.redo:
|
if os.path.exists(offsetImageName) and not inps.redo:
|
||||||
print('offsetfield file exists')
|
print('offsetfield file {} exists while the redo flag is {}.'.format(offsetImageName, inps.redo))
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# generic control
|
# generic control
|
||||||
objOffset.numberWindowDownInChunk = inps.numWinDownInChunk
|
objOffset.numberWindowDownInChunk = inps.numWinDownInChunk
|
||||||
objOffset.numberWindowAcrossInChunk = inps.numWinAcrossInChunk
|
objOffset.numberWindowAcrossInChunk = inps.numWinAcrossInChunk
|
||||||
objOffset.useMmap = 0
|
objOffset.useMmap = inps.usemmap
|
||||||
objOffset.mmapSize = 8
|
objOffset.mmapSize = inps.mmapsize
|
||||||
|
|
||||||
|
# setup and check parameters
|
||||||
objOffset.setupParams()
|
objOffset.setupParams()
|
||||||
|
|
||||||
## Set Gross Offset ###
|
## Set Gross Offset ###
|
||||||
if inps.gross == 0:
|
if inps.gross == 0: # use static grossOffset
|
||||||
print("Set constant grossOffset")
|
print('Set constant grossOffset ({}, {})'.format(inps.azshift, inps.rgshift))
|
||||||
print("By default, the gross offsets are zero")
|
objOffset.setConstantGrossOffset(inps.azshift, inps.rgshift)
|
||||||
print("You can override the default values here")
|
|
||||||
objOffset.setConstantGrossOffset(0, 0)
|
|
||||||
|
|
||||||
else:
|
else: # use varying offset
|
||||||
print("Set varying grossOffset")
|
print("Set varying grossOffset from file {}".format(inps.gross_offset_file))
|
||||||
print("By default, the gross offsets are zero")
|
grossOffset = np.fromfile(inps.gross_offset_file, dtype=np.int32)
|
||||||
print("You can override the default grossDown and grossAcross arrays here")
|
numberWindows = objOffset.numberWindowDown*objOffset.numberWindowAcross
|
||||||
objOffset.setVaryingGrossOffset(np.zeros(shape=grossDown.shape,dtype=np.int32),
|
if grossOffset.size != 2*numberWindows :
|
||||||
np.zeros(shape=grossAcross.shape,dtype=np.int32))
|
print('The input gross offsets do not match the number of windows {} by {} in int32 type'.format(objOffset.numberWindowDown, objOffset.numberWindowAcross))
|
||||||
|
return 0;
|
||||||
|
grossOffset = grossOffset.reshape(numberWindows, 2)
|
||||||
|
grossAzimuthOffset = grossOffset[:, 0]
|
||||||
|
grossRangeOffset = grossOffset[:, 1]
|
||||||
|
# enforce C-contiguous flag
|
||||||
|
grossAzimuthOffset = grossAzimuthOffset.copy(order='C')
|
||||||
|
grossRangeOffset = grossRangeOffset.copy(order='C')
|
||||||
|
# set varying gross offset
|
||||||
|
objOffset.setVaryingGrossOffset(grossAzimuthOffset, grossRangeOffset)
|
||||||
|
|
||||||
# check
|
# check
|
||||||
objOffset.checkPixelInImageRange()
|
objOffset.checkPixelInImageRange()
|
||||||
|
|
|
@ -1,28 +1,29 @@
|
||||||
#include "GDALImage.h"
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <cublas_v2.h>
|
|
||||||
#include "cudaError.h"
|
|
||||||
#include <errno.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Constructor
|
* @file GDALImage.h
|
||||||
|
* @brief Implementations of GDALImage class
|
||||||
*
|
*
|
||||||
* @param filename a std::string with the raster image file name
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// my declaration
|
||||||
|
#include "GDALImage.h"
|
||||||
|
|
||||||
|
// dependencies
|
||||||
|
#include <iostream>
|
||||||
|
#include "cudaError.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
* @brief Create a GDAL image object
|
||||||
|
* @param filename a std::string with the raster image file name
|
||||||
|
* @param band the band number
|
||||||
|
* @param cacheSizeInGB read buffer size in GigaBytes
|
||||||
|
* @param useMmap whether to use memory map
|
||||||
|
*/
|
||||||
GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap)
|
GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useMmap)
|
||||||
: _useMmap(useMmap)
|
: _useMmap(useMmap)
|
||||||
{
|
{
|
||||||
// open the file as dataset
|
// open the file as dataset
|
||||||
_poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly );
|
_poDataset = (GDALDataset *) GDALOpen(filename.c_str(), GA_ReadOnly);
|
||||||
// if something is wrong, throw an exception
|
// if something is wrong, throw an exception
|
||||||
// GDAL reports the error message
|
// GDAL reports the error message
|
||||||
if(!_poDataset)
|
if(!_poDataset)
|
||||||
|
@ -32,7 +33,7 @@ GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useM
|
||||||
int count = _poDataset->GetRasterCount();
|
int count = _poDataset->GetRasterCount();
|
||||||
if(band > count)
|
if(band > count)
|
||||||
{
|
{
|
||||||
std::cout << "The desired band " << band << " is greated than " << count << " bands available";
|
std::cout << "The desired band " << band << " is greater than " << count << " bands available";
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,8 +74,6 @@ GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useM
|
||||||
&_pixelSize,
|
&_pixelSize,
|
||||||
&pnLineSpace,
|
&pnLineSpace,
|
||||||
papszOptions);
|
papszOptions);
|
||||||
|
|
||||||
// check it
|
|
||||||
if(!_poBandVirtualMem)
|
if(!_poBandVirtualMem)
|
||||||
throw;
|
throw;
|
||||||
|
|
||||||
|
@ -84,43 +83,52 @@ GDALImage::GDALImage(std::string filename, int band, int cacheSizeInGB, int useM
|
||||||
else { // use a buffer
|
else { // use a buffer
|
||||||
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
|
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure memPtr is not Null
|
// make sure memPtr is not Null
|
||||||
if (!_memPtr)
|
if (!_memPtr)
|
||||||
|
{
|
||||||
|
std::cout << "unable to locate the memory buffer\n";
|
||||||
throw;
|
throw;
|
||||||
|
}
|
||||||
// all done
|
// all done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU
|
/**
|
||||||
/// @param dArray pointer for array in device memory
|
* Load a tile of data h_tile x w_tile from CPU to GPU
|
||||||
/// @param h_offset Down/Height offset
|
* @param dArray pointer for array in device memory
|
||||||
/// @param w_offset Across/Width offset
|
* @param h_offset Down/Height offset
|
||||||
/// @param h_tile Down/Height tile size
|
* @param w_offset Across/Width offset
|
||||||
/// @param w_tile Across/Width tile size
|
* @param h_tile Down/Height tile size
|
||||||
/// @param stream CUDA stream for copying
|
* @param w_tile Across/Width tile size
|
||||||
void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream)
|
* @param stream CUDA stream for copying
|
||||||
|
* @note Need to use size_t type to pass the parameters to cudaMemcpy2D correctly
|
||||||
|
*/
|
||||||
|
void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset,
|
||||||
|
size_t h_tile, size_t w_tile, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
size_t tileStartOffset = (h_offset*_width + w_offset)*_pixelSize;
|
size_t tileStartOffset = (h_offset*_width + w_offset)*_pixelSize;
|
||||||
|
|
||||||
char * startPtr = (char *)_memPtr ;
|
char * startPtr = (char *)_memPtr ;
|
||||||
startPtr += tileStartOffset;
|
startPtr += tileStartOffset;
|
||||||
|
|
||||||
// @note
|
if (_useMmap) {
|
||||||
// We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major.
|
// direct copy from memory map buffer to device memory
|
||||||
// cuBlas assumes both source and target arrays are column major.
|
checkCudaErrors(cudaMemcpy2DAsync(dArray, // dst
|
||||||
// To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols
|
w_tile*_pixelSize, // dst pitch
|
||||||
// checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream));
|
startPtr, // src
|
||||||
if (_useMmap)
|
_width*_pixelSize, // src pitch
|
||||||
checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile*_pixelSize, startPtr, _width*_pixelSize,
|
w_tile*_pixelSize, // width in Bytes
|
||||||
w_tile*_pixelSize, h_tile, cudaMemcpyHostToDevice,stream));
|
h_tile, // height
|
||||||
else {
|
cudaMemcpyHostToDevice,stream));
|
||||||
|
}
|
||||||
|
else { // use a cpu buffer to load image data to gpu
|
||||||
|
|
||||||
// get the total tile size in bytes
|
// get the total tile size in bytes
|
||||||
size_t tileSize = h_tile*w_tile*_pixelSize;
|
size_t tileSize = h_tile*w_tile*_pixelSize;
|
||||||
// if the size is bigger than existing buffer, reallocate
|
// if the size is bigger than existing buffer, reallocate
|
||||||
if (tileSize > _bufferSize) {
|
if (tileSize > _bufferSize) {
|
||||||
// maybe we need to make it to fit the pagesize
|
// TODO: fit the pagesize
|
||||||
_bufferSize = tileSize;
|
_bufferSize = tileSize;
|
||||||
checkCudaErrors(cudaFree(_memPtr));
|
checkCudaErrors(cudaFree(_memPtr));
|
||||||
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
|
checkCudaErrors(cudaMallocHost((void **)&_memPtr, _bufferSize));
|
||||||
|
@ -132,17 +140,18 @@ void GDALImage::loadToDevice(void *dArray, size_t h_offset, size_t w_offset, siz
|
||||||
_memPtr, // pData
|
_memPtr, // pData
|
||||||
w_tile*h_tile, 1, // nBufXSize, nBufYSize
|
w_tile*h_tile, 1, // nBufXSize, nBufYSize
|
||||||
_dataType, //eBufType
|
_dataType, //eBufType
|
||||||
0, 0, //nPixelSpace, nLineSpace in pData
|
0, 0 //nPixelSpace, nLineSpace in pData
|
||||||
NULL //psExtraArg extra resampling callback
|
|
||||||
);
|
);
|
||||||
|
|
||||||
if(err != CE_None)
|
if(err != CE_None)
|
||||||
throw;
|
throw; // throw if reading error occurs; message reported by GDAL
|
||||||
|
|
||||||
// copy from buffer to gpu
|
// copy from buffer to gpu
|
||||||
checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream));
|
checkCudaErrors(cudaMemcpyAsync(dArray, _memPtr, tileSize, cudaMemcpyHostToDevice, stream));
|
||||||
}
|
}
|
||||||
|
// all done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// destructor
|
||||||
GDALImage::~GDALImage()
|
GDALImage::~GDALImage()
|
||||||
{
|
{
|
||||||
// free the virtual memory
|
// free the virtual memory
|
||||||
|
|
|
@ -1,61 +1,65 @@
|
||||||
// -*- c++ -*-
|
|
||||||
/**
|
/**
|
||||||
* \brief Class for an image described GDAL vrt
|
* @file GDALImage.h
|
||||||
|
* @brief Interface with GDAL vrt driver
|
||||||
*
|
*
|
||||||
* only complex (pixelOffset=8) or real(pixelOffset=4) images are supported, such as SLC and single-precision TIFF
|
* To read image file with the GDAL vrt driver, including SLC, GeoTIFF images
|
||||||
|
* @warning Only single precision images are supported: complex(pixelOffset=8) or real(pixelOffset=4).
|
||||||
|
* @warning Only single band file is currently supported.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// code guard
|
||||||
#ifndef __GDALIMAGE_H
|
#ifndef __GDALIMAGE_H
|
||||||
#define __GDALIMAGE_H
|
#define __GDALIMAGE_H
|
||||||
|
|
||||||
|
// dependencies
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <gdal_priv.h>
|
#include <gdal_priv.h>
|
||||||
#include <cpl_conv.h>
|
#include <cpl_conv.h>
|
||||||
|
|
||||||
class GDALImage{
|
|
||||||
|
|
||||||
|
class GDALImage{
|
||||||
public:
|
public:
|
||||||
|
// specify the types
|
||||||
using size_t = std::size_t;
|
using size_t = std::size_t;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t _fileSize;
|
int _height; ///< image height
|
||||||
int _height;
|
int _width; ///< image width
|
||||||
int _width;
|
|
||||||
|
|
||||||
// buffer pointer
|
void * _memPtr = NULL; ///< pointer to buffer
|
||||||
void * _memPtr = NULL;
|
|
||||||
|
|
||||||
int _pixelSize; //in bytes
|
int _pixelSize; ///< pixel size in bytes
|
||||||
|
|
||||||
int _isComplex;
|
int _isComplex; ///< whether the image is complex
|
||||||
|
|
||||||
size_t _bufferSize;
|
size_t _bufferSize; ///< buffer size
|
||||||
int _useMmap;
|
int _useMmap; ///< whether to use memory map
|
||||||
|
|
||||||
|
// GDAL temporary objects
|
||||||
GDALDataType _dataType;
|
GDALDataType _dataType;
|
||||||
CPLVirtualMem * _poBandVirtualMem = NULL;
|
CPLVirtualMem * _poBandVirtualMem = NULL;
|
||||||
GDALDataset * _poDataset = NULL;
|
GDALDataset * _poDataset = NULL;
|
||||||
GDALRasterBand * _poBand = NULL;
|
GDALRasterBand * _poBand = NULL;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
//disable default constructor
|
||||||
GDALImage() = delete;
|
GDALImage() = delete;
|
||||||
|
// constructor
|
||||||
GDALImage(std::string fn, int band=1, int cacheSizeInGB=0, int useMmap=1);
|
GDALImage(std::string fn, int band=1, int cacheSizeInGB=0, int useMmap=1);
|
||||||
|
// destructor
|
||||||
|
~GDALImage();
|
||||||
|
|
||||||
|
// get class properties
|
||||||
void * getmemPtr()
|
void * getmemPtr()
|
||||||
{
|
{
|
||||||
return(_memPtr);
|
return(_memPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getFileSize()
|
int getHeight() {
|
||||||
{
|
|
||||||
return (_fileSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t getHeight() {
|
|
||||||
return (_height);
|
return (_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getWidth()
|
int getWidth()
|
||||||
{
|
{
|
||||||
return (_width);
|
return (_width);
|
||||||
}
|
}
|
||||||
|
@ -70,9 +74,10 @@ public:
|
||||||
return _isComplex;
|
return _isComplex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// load data from cpu buffer to gpu
|
||||||
void loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream);
|
void loadToDevice(void *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream);
|
||||||
~GDALImage();
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__GDALIMAGE_H
|
#endif //__GDALIMAGE_H
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
PROJECT = CUAMPCOR
|
PROJECT = CUAMPCOR
|
||||||
|
|
||||||
LDFLAGS = -lcuda -lcudart -lcufft -lcublas
|
LDFLAGS = -lcuda -lcudart -lcufft -lgdal
|
||||||
CXXFLAGS = -std=c++11 -fpermissive -fPIC -shared
|
CXXFLAGS = -std=c++11 -fpermissive -DNDEBUG -fPIC -shared
|
||||||
NVCCFLAGS = -std=c++11 -ccbin g++ -m64 \
|
NVCCFLAGS = -std=c++11 -m64 -DNDEBUG \
|
||||||
-gencode arch=compute_35,code=sm_35 \
|
-gencode arch=compute_35,code=sm_35 \
|
||||||
-gencode arch=compute_60,code=sm_60 \
|
-gencode arch=compute_60,code=sm_60 \
|
||||||
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
|
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
|
||||||
-ftz=false -prec-div=true -prec-sqrt=true
|
-ftz=false -prec-div=true -prec-sqrt=true \
|
||||||
|
-I/usr/include/gdal
|
||||||
|
|
||||||
CXX=g++
|
CXX=g++
|
||||||
NVCC=nvcc
|
NVCC=nvcc
|
||||||
|
@ -69,4 +70,4 @@ pyampcor: $(OBJS)
|
||||||
rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace
|
rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o *so build *~ PyCuAmpcor.cpp ctest *.dat
|
rm -rf *.o *so build *~ PyCuAmpcor.cpp *.dat
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#
|
#
|
||||||
# PYX file to control Python module interface to underlying CUDA-Ampcor code
|
# PYX file to control Python module interface to underlying CUDA-Ampcor code
|
||||||
#
|
#
|
||||||
|
|
||||||
from libcpp.string cimport string
|
from libcpp.string cimport string
|
||||||
import numpy as np
|
import numpy as np
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
@ -9,23 +10,21 @@ cimport numpy as np
|
||||||
cdef extern from "cudaUtil.h":
|
cdef extern from "cudaUtil.h":
|
||||||
int gpuDeviceInit(int)
|
int gpuDeviceInit(int)
|
||||||
void gpuDeviceList()
|
void gpuDeviceList()
|
||||||
int gpuGetMaxGflopsDeviceId()
|
|
||||||
|
|
||||||
def listGPU():
|
def listGPU():
|
||||||
gpuDeviceList()
|
gpuDeviceList()
|
||||||
|
|
||||||
def findGPU():
|
|
||||||
return gpuGetMaxGflopsDeviceId()
|
|
||||||
|
|
||||||
def setGPU(int id):
|
def setGPU(int id):
|
||||||
return gpuDeviceInit(id)
|
return gpuDeviceInit(id)
|
||||||
|
|
||||||
|
def version():
|
||||||
|
return "2.0.0"
|
||||||
|
|
||||||
cdef extern from "cuAmpcorParameter.h":
|
cdef extern from "cuAmpcorParameter.h":
|
||||||
cdef cppclass cuAmpcorParameter:
|
cdef cppclass cuAmpcorParameter:
|
||||||
cuAmpcorParameter() except +
|
cuAmpcorParameter() except +
|
||||||
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
|
int algorithm ## Cross-correlation algorithm: 0=freq domain 1=time domain
|
||||||
int deviceID ## Targeted GPU device ID: use -1 to auto select
|
int deviceID ## Targeted GPU device ID
|
||||||
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
|
int nStreams ## Number of streams to asynchonize data transfers and compute kernels
|
||||||
int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient
|
int derampMethod ## Method for deramping 0=None, 1=average, 2=phase gradient
|
||||||
|
|
||||||
|
@ -43,10 +42,12 @@ cdef extern from "cuAmpcorParameter.h":
|
||||||
int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size)
|
int skipSampleDownRaw ## Skip size between neighboring windows in Down direction (original size)
|
||||||
int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size)
|
int skipSampleAcrossRaw ## Skip size between neighboring windows in across direction (original size)
|
||||||
|
|
||||||
|
int corrStatWindowSize ## Size of the raw correlation surface extracted for statistics
|
||||||
|
|
||||||
## Zoom in region near location of max correlation
|
## Zoom in region near location of max correlation
|
||||||
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
|
int zoomWindowSize ## Zoom-in window size in correlation surface (same for down and across directions)
|
||||||
int oversamplingFactor ## Oversampling factor for interpolating correlation surface
|
int oversamplingFactor ## Oversampling factor for interpolating correlation surface
|
||||||
int oversamplingMethod
|
int oversamplingMethod ## Correlation surface oversampling method 0=fft, 1=sinc
|
||||||
|
|
||||||
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
|
float thresholdSNR ## Threshold of Signal noise ratio to remove noisy data
|
||||||
|
|
||||||
|
@ -96,20 +97,22 @@ cdef extern from "cuAmpcorParameter.h":
|
||||||
int *referenceChunkWidth ## array of width of all reference chunks
|
int *referenceChunkWidth ## array of width of all reference chunks
|
||||||
int *secondaryChunkHeight ## array of width of all reference chunks
|
int *secondaryChunkHeight ## array of width of all reference chunks
|
||||||
int *secondaryChunkWidth ## array of width of all secondary chunks
|
int *secondaryChunkWidth ## array of width of all secondary chunks
|
||||||
int maxReferenceChunkHeight ## max height for all reference/secondary chunks, determine the size of reading cache in GPU
|
int maxReferenceChunkHeight ## max height for all reference chunks, determine the size of reading cache in GPU
|
||||||
int maxReferenceChunkWidth ## max width for all reference chunks, determine the size of reading cache in GPU
|
int maxReferenceChunkWidth ## max width for all reference chunks, determine the size of reading cache in GPU
|
||||||
int maxSecondaryChunkHeight
|
int maxSecondaryChunkHeight ## max height for secondary chunk
|
||||||
int maxSecondaryChunkWidth
|
int maxSecondaryChunkWidth ## max width for secondary chunk
|
||||||
|
|
||||||
string grossOffsetImageName
|
string grossOffsetImageName ## Output Gross Offset fields filename
|
||||||
string offsetImageName ## Output Offset fields filename
|
string offsetImageName ## Output Offset fields filename
|
||||||
string snrImageName ## Output SNR filename
|
string snrImageName ## Output SNR filename
|
||||||
string covImageName ## Output COV filename
|
string covImageName ## Output COV filename
|
||||||
void setStartPixels(int*, int*, int*, int*)
|
|
||||||
void setStartPixels(int, int, int*, int*)
|
|
||||||
void setStartPixels(int, int, int, int)
|
|
||||||
void checkPixelInImageRange() ## check whether
|
|
||||||
|
|
||||||
|
## set start pixels for reference/secondary windows
|
||||||
|
void setStartPixels(int*, int*, int*, int*) ## varying locations for reference and secondary
|
||||||
|
void setStartPixels(int, int, int*, int*) ## first window location for reference, varying for secondary
|
||||||
|
void setStartPixels(int, int, int, int) ## first window locations for reference and secondary
|
||||||
|
|
||||||
|
void checkPixelInImageRange() ## check whether all windows are within image range
|
||||||
void setupParameters() ## Process other parameters after Python Inpu
|
void setupParameters() ## Process other parameters after Python Inpu
|
||||||
|
|
||||||
cdef extern from "cuAmpcorController.h":
|
cdef extern from "cuAmpcorController.h":
|
||||||
|
@ -217,6 +220,13 @@ cdef class PyCuAmpcor(object):
|
||||||
def rawDataOversamplingFactor(self, int a):
|
def rawDataOversamplingFactor(self, int a):
|
||||||
self.c_cuAmpcor.param.rawDataOversamplingFactor = a
|
self.c_cuAmpcor.param.rawDataOversamplingFactor = a
|
||||||
@property
|
@property
|
||||||
|
def corrStatWindowSize(self):
|
||||||
|
"""Size of correlation surface extracted for statistics"""
|
||||||
|
return self.c_cuAmpcor.param.corrStatWindowSize
|
||||||
|
@corrStatWindowSize.setter
|
||||||
|
def corrStatWindowSize(self, int a):
|
||||||
|
self.c_cuAmpcor.param.corrStatWindowSize = a
|
||||||
|
@property
|
||||||
def corrSurfaceZoomInWindow(self):
|
def corrSurfaceZoomInWindow(self):
|
||||||
"""Zoom-In Window Size for correlation surface"""
|
"""Zoom-In Window Size for correlation surface"""
|
||||||
return self.c_cuAmpcor.param.zoomWindowSize
|
return self.c_cuAmpcor.param.zoomWindowSize
|
||||||
|
@ -231,11 +241,11 @@ cdef class PyCuAmpcor(object):
|
||||||
def corrSurfaceOverSamplingFactor(self, int a):
|
def corrSurfaceOverSamplingFactor(self, int a):
|
||||||
self.c_cuAmpcor.param.oversamplingFactor = a
|
self.c_cuAmpcor.param.oversamplingFactor = a
|
||||||
@property
|
@property
|
||||||
def corrSufaceOverSamplingMethod(self):
|
def corrSurfaceOverSamplingMethod(self):
|
||||||
"""Oversampling method for correlation surface(0=fft,1=sinc)"""
|
"""Oversampling method for correlation surface(0=fft,1=sinc)"""
|
||||||
return self.c_cuAmpcor.param.oversamplingMethod
|
return self.c_cuAmpcor.param.oversamplingMethod
|
||||||
@corrSufaceOverSamplingMethod.setter
|
@corrSurfaceOverSamplingMethod.setter
|
||||||
def corrSufaceOverSamplingMethod(self, int a):
|
def corrSurfaceOverSamplingMethod(self, int a):
|
||||||
self.c_cuAmpcor.param.oversamplingMethod = a
|
self.c_cuAmpcor.param.oversamplingMethod = a
|
||||||
@property
|
@property
|
||||||
def referenceImageName(self):
|
def referenceImageName(self):
|
||||||
|
@ -318,31 +328,30 @@ cdef class PyCuAmpcor(object):
|
||||||
def numberChunks(self):
|
def numberChunks(self):
|
||||||
return self.c_cuAmpcor.param.numberChunks
|
return self.c_cuAmpcor.param.numberChunks
|
||||||
|
|
||||||
|
## gross offset
|
||||||
## gross offets
|
|
||||||
@property
|
@property
|
||||||
def grossOffsetImageName(self):
|
def grossOffsetImageName(self):
|
||||||
return self.c_cuAmpcor.param.grossOffsetImageName
|
return self.c_cuAmpcor.param.grossOffsetImageName.decode("utf-8")
|
||||||
@grossOffsetImageName.setter
|
@grossOffsetImageName.setter
|
||||||
def grossOffsetImageName(self, str a):
|
def grossOffsetImageName(self, str a):
|
||||||
self.c_cuAmpcor.param.grossOffsetImageName = <string> a.encode()
|
self.c_cuAmpcor.param.grossOffsetImageName = <string> a.encode()
|
||||||
@property
|
@property
|
||||||
def offsetImageName(self):
|
def offsetImageName(self):
|
||||||
return self.c_cuAmpcor.param.offsetImageName
|
return self.c_cuAmpcor.param.offsetImageName.decode("utf-8")
|
||||||
@offsetImageName.setter
|
@offsetImageName.setter
|
||||||
def offsetImageName(self, str a):
|
def offsetImageName(self, str a):
|
||||||
self.c_cuAmpcor.param.offsetImageName = <string> a.encode()
|
self.c_cuAmpcor.param.offsetImageName = <string> a.encode()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def snrImageName(self):
|
def snrImageName(self):
|
||||||
return self.c_cuAmpcor.param.snrImageName
|
return self.c_cuAmpcor.param.snrImageName.decode("utf-8")
|
||||||
@snrImageName.setter
|
@snrImageName.setter
|
||||||
def snrImageName(self, str a):
|
def snrImageName(self, str a):
|
||||||
self.c_cuAmpcor.param.snrImageName = <string> a.encode()
|
self.c_cuAmpcor.param.snrImageName = <string> a.encode()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def covImageName(self):
|
def covImageName(self):
|
||||||
return self.c_cuAmpcor.param.covImageName
|
return self.c_cuAmpcor.param.covImageName.decode("utf-8")
|
||||||
@covImageName.setter
|
@covImageName.setter
|
||||||
def covImageName(self, str a):
|
def covImageName(self, str a):
|
||||||
self.c_cuAmpcor.param.covImageName = <string> a.encode()
|
self.c_cuAmpcor.param.covImageName = <string> a.encode()
|
||||||
|
@ -440,8 +449,4 @@ cdef class PyCuAmpcor(object):
|
||||||
self.c_cuAmpcor.runAmpcor()
|
self.c_cuAmpcor.runAmpcor()
|
||||||
|
|
||||||
|
|
||||||
|
# end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys
|
import sys
|
||||||
|
import subprocess
|
||||||
|
|
||||||
Import('envPyCuAmpcor')
|
Import('envPyCuAmpcor')
|
||||||
package = envPyCuAmpcor['PACKAGE']
|
package = envPyCuAmpcor['PACKAGE']
|
||||||
|
@ -16,6 +17,10 @@ listFiles = ['GDALImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
|
||||||
|
|
||||||
lib = envPyCuAmpcor.SharedLibrary(target = 'PyCuAmpcor', source= listFiles, SHLIBPREFIX='')
|
lib = envPyCuAmpcor.SharedLibrary(target = 'PyCuAmpcor', source= listFiles, SHLIBPREFIX='')
|
||||||
|
|
||||||
|
# add gdal include path
|
||||||
|
gdal_cflags = subprocess.check_output('gdal-config --cflags', shell=True)[:-1].decode('utf-8')
|
||||||
|
envPyCuAmpcor.Append(ENABLESHAREDNVCCFLAG = ' -DNDEBUG ' + gdal_cflags)
|
||||||
|
|
||||||
envPyCuAmpcor.Install(build,lib)
|
envPyCuAmpcor.Install(build,lib)
|
||||||
envPyCuAmpcor.Alias('install', build)
|
envPyCuAmpcor.Alias('install', build)
|
||||||
|
|
||||||
|
|
|
@ -1,177 +0,0 @@
|
||||||
#include "SlcImage.h"
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <cuComplex.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <cublas_v2.h>
|
|
||||||
#include "cudaError.h"
|
|
||||||
#include <errno.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
SlcImage::SlcImage() {
|
|
||||||
fileid = -1;
|
|
||||||
is_mapped = 0;
|
|
||||||
is_opened = 0;
|
|
||||||
height = 0;
|
|
||||||
width = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
SlcImage::SlcImage(std::string fn, size_t h, size_t w) {
|
|
||||||
filename = fn;
|
|
||||||
width = w;
|
|
||||||
height = h;
|
|
||||||
is_mapped = 0;
|
|
||||||
is_opened = 0;
|
|
||||||
openFile();
|
|
||||||
buffersize = filesize;
|
|
||||||
offset = 0l;
|
|
||||||
openFile();
|
|
||||||
setupMmap();
|
|
||||||
}
|
|
||||||
|
|
||||||
SlcImage::SlcImage(std::string fn, size_t h, size_t w, size_t bsize) {
|
|
||||||
filename = fn;
|
|
||||||
width = w;
|
|
||||||
height = h;
|
|
||||||
is_mapped = 0;
|
|
||||||
is_opened = 0;
|
|
||||||
buffersize = bsize*(1l<<30); //1G as a unit
|
|
||||||
offset = 0l;
|
|
||||||
openFile();
|
|
||||||
//std::cout << "buffer and file sizes" << buffersize << " " << filesize << std::endl;
|
|
||||||
setupMmap();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SlcImage::setBufferSize(size_t sizeInG)
|
|
||||||
{
|
|
||||||
buffersize = sizeInG*(1l<<30);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SlcImage::openFile()
|
|
||||||
{
|
|
||||||
if(!is_opened){
|
|
||||||
fileid = open(filename.c_str(), O_RDONLY, 0);
|
|
||||||
if(fileid == -1)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Error opening file %s\n", filename.c_str());
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
struct stat st;
|
|
||||||
stat(filename.c_str(), &st);
|
|
||||||
filesize = st.st_size;
|
|
||||||
//lseek(fileid,filesize-1,SEEK_SET);
|
|
||||||
is_opened = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SlcImage::closeFile()
|
|
||||||
{
|
|
||||||
if(is_opened)
|
|
||||||
{
|
|
||||||
close(fileid);
|
|
||||||
is_opened = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
void SlcImage::setupMmap()
|
|
||||||
{
|
|
||||||
if(!is_mapped) {
|
|
||||||
float2 *fmmap = (float2 *)mmap(NULL, filesize, PROT_READ, MAP_SHARED, fileid, 0);
|
|
||||||
assert (fmmap != MAP_FAILED);
|
|
||||||
mmapPtr = fmmap;
|
|
||||||
is_mapped = 1;
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
void SlcImage::setupMmap()
|
|
||||||
{
|
|
||||||
|
|
||||||
if(is_opened) {
|
|
||||||
if(!is_mapped) {
|
|
||||||
void * fmmap;
|
|
||||||
if((fmmap=mmap((caddr_t)0, buffersize, PROT_READ, MAP_SHARED, fileid, offset)) == MAP_FAILED)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "mmap error: %d %d\n", fileid, errno);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
mmapPtr = (float2 *)fmmap;
|
|
||||||
is_mapped = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "error! file is not opened");
|
|
||||||
exit(1);}
|
|
||||||
//fprintf(stderr, "debug mmap setup %ld, %ld\n", offset, buffersize);
|
|
||||||
//fprintf(stderr, "starting mmap pixel %f %f\n", mmapPtr[0].x, mmapPtr[0].y);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SlcImage::mUnMap()
|
|
||||||
{
|
|
||||||
if(is_mapped) {
|
|
||||||
if(munmap((void *)mmapPtr, buffersize) == -1)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "munmap error: %d\n", fileid);
|
|
||||||
}
|
|
||||||
is_mapped = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// load a tile of data h_tile x w_tile from CPU (mmap) to GPU
|
|
||||||
/// @param dArray pointer for array in device memory
|
|
||||||
/// @param h_offset Down/Height offset
|
|
||||||
/// @param w_offset Across/Width offset
|
|
||||||
/// @param h_tile Down/Height tile size
|
|
||||||
/// @param w_tile Across/Width tile size
|
|
||||||
/// @param stream CUDA stream for copying
|
|
||||||
void SlcImage::loadToDevice(float2 *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
size_t tileStartAddress = (h_offset*width + w_offset)*sizeof(float2);
|
|
||||||
size_t tileLastAddress = tileStartAddress + (h_tile*width + w_tile)*sizeof(float2);
|
|
||||||
size_t pagesize = getpagesize();
|
|
||||||
|
|
||||||
if(tileStartAddress < offset || tileLastAddress > offset + buffersize )
|
|
||||||
{
|
|
||||||
size_t temp = tileStartAddress/pagesize;
|
|
||||||
offset = temp*pagesize;
|
|
||||||
mUnMap();
|
|
||||||
setupMmap();
|
|
||||||
}
|
|
||||||
|
|
||||||
float2 *startPtr = mmapPtr ;
|
|
||||||
startPtr += (tileStartAddress - offset)/sizeof(float2);
|
|
||||||
|
|
||||||
// @note
|
|
||||||
// We assume down/across directions as rows/cols. Therefore, SLC mmap and device array are both row major.
|
|
||||||
// cuBlas assumes both source and target arrays are column major.
|
|
||||||
// To use cublasSetMatrix, we need to switch w_tile/h_tile for rows/cols
|
|
||||||
// checkCudaErrors(cublasSetMatrixAsync(w_tile, h_tile, sizeof(float2), startPtr, width, dArray, w_tile, stream));
|
|
||||||
|
|
||||||
checkCudaErrors(cudaMemcpy2DAsync(dArray, w_tile*sizeof(float2), startPtr, width*sizeof(float2),
|
|
||||||
w_tile*sizeof(float2), h_tile, cudaMemcpyHostToDevice,stream));
|
|
||||||
}
|
|
||||||
|
|
||||||
SlcImage::~SlcImage()
|
|
||||||
{
|
|
||||||
mUnMap();
|
|
||||||
closeFile();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void SlcImage::testData()
|
|
||||||
{
|
|
||||||
float2 *test;
|
|
||||||
test =(float2 *)malloc(10*sizeof(float2));
|
|
||||||
mempcpy(test, mmapPtr+1000000l, 10*sizeof(float2));
|
|
||||||
for(int i=0; i<10; i++)
|
|
||||||
std::cout << test[i].x << " " << test[i].y << ",";
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
|
@ -1,64 +0,0 @@
|
||||||
// -*- c++ -*-
|
|
||||||
#ifndef __SLCIMAGE_H
|
|
||||||
#define __SLCIMAGE_H
|
|
||||||
|
|
||||||
#include <cublas_v2.h>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
class SlcImage{
|
|
||||||
private:
|
|
||||||
std::string filename;
|
|
||||||
int fileid;
|
|
||||||
size_t filesize;
|
|
||||||
size_t height;
|
|
||||||
size_t width;
|
|
||||||
|
|
||||||
bool is_mapped;
|
|
||||||
bool is_opened;
|
|
||||||
float2* mmapPtr;
|
|
||||||
size_t buffersize;
|
|
||||||
size_t offset;
|
|
||||||
|
|
||||||
public:
|
|
||||||
SlcImage();
|
|
||||||
|
|
||||||
SlcImage(std::string fn, size_t h, size_t w);
|
|
||||||
SlcImage(std::string fn, size_t h, size_t w, size_t bsize);
|
|
||||||
void openFile();
|
|
||||||
void closeFile();
|
|
||||||
void setupMmap();
|
|
||||||
void mUnMap();
|
|
||||||
void setBufferSize(size_t size);
|
|
||||||
|
|
||||||
float2* getmmapPtr()
|
|
||||||
{
|
|
||||||
return(mmapPtr);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t getFileSize()
|
|
||||||
{
|
|
||||||
return (filesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t getHeight() {
|
|
||||||
return (height);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t getWidth()
|
|
||||||
{
|
|
||||||
return (width);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool getMmapStatus()
|
|
||||||
{
|
|
||||||
return(is_mapped);
|
|
||||||
}
|
|
||||||
|
|
||||||
//tested
|
|
||||||
void loadToDevice(float2 *dArray, size_t h_offset, size_t w_offset, size_t h_tile, size_t w_tile, cudaStream_t stream);
|
|
||||||
~SlcImage();
|
|
||||||
void testData();
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif //__SLCIMAGE_H
|
|
|
@ -13,78 +13,132 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
|
||||||
|
|
||||||
// load reference image chunk
|
// load reference image chunk
|
||||||
loadReferenceChunk();
|
loadReferenceChunk();
|
||||||
|
// take amplitudes
|
||||||
//std::cout << "load reference chunk ok\n";
|
|
||||||
|
|
||||||
cuArraysAbs(c_referenceBatchRaw, r_referenceBatchRaw, stream);
|
cuArraysAbs(c_referenceBatchRaw, r_referenceBatchRaw, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the raw reference image(s)
|
||||||
|
c_referenceBatchRaw->outputToFile("c_referenceBatchRaw", stream);
|
||||||
|
r_referenceBatchRaw->outputToFile("r_referenceBatchRaw", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// compute and subtract mean values (for normalized)
|
||||||
cuArraysSubtractMean(r_referenceBatchRaw, stream);
|
cuArraysSubtractMean(r_referenceBatchRaw, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the raw reference image(s)
|
||||||
|
r_referenceBatchRaw->outputToFile("r_referenceBatchRawSubMean", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// load secondary image chunk
|
// load secondary image chunk
|
||||||
loadSecondaryChunk();
|
loadSecondaryChunk();
|
||||||
|
// take amplitudes
|
||||||
cuArraysAbs(c_secondaryBatchRaw, r_secondaryBatchRaw, stream);
|
cuArraysAbs(c_secondaryBatchRaw, r_secondaryBatchRaw, stream);
|
||||||
|
|
||||||
//std::cout << "load secondary chunk ok\n";
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the raw secondary image(s)
|
||||||
|
c_secondaryBatchRaw->outputToFile("c_secondaryBatchRaw", stream);
|
||||||
|
r_secondaryBatchRaw->outputToFile("r_secondaryBatchRaw", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//cross correlation for un-oversampled data
|
||||||
//cross correlation for none-oversampled data
|
|
||||||
if(param->algorithm == 0) {
|
if(param->algorithm == 0) {
|
||||||
cuCorrFreqDomain->execute(r_referenceBatchRaw, r_secondaryBatchRaw, r_corrBatchRaw);
|
cuCorrFreqDomain->execute(r_referenceBatchRaw, r_secondaryBatchRaw, r_corrBatchRaw);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
cuCorrTimeDomain(r_referenceBatchRaw, r_secondaryBatchRaw, r_corrBatchRaw, stream); //time domain cross correlation
|
cuCorrTimeDomain(r_referenceBatchRaw, r_secondaryBatchRaw, r_corrBatchRaw, stream); //time domain cross correlation
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the un-normalized correlation surface
|
||||||
|
r_corrBatchRaw->outputToFile("r_corrBatchRawUnNorm", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// normalize the correlation surface
|
||||||
cuCorrNormalize(r_referenceBatchRaw, r_secondaryBatchRaw, r_corrBatchRaw, stream);
|
cuCorrNormalize(r_referenceBatchRaw, r_secondaryBatchRaw, r_corrBatchRaw, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the normalized correlation surface
|
||||||
|
r_corrBatchRaw->outputToFile("r_corrBatchRaw", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// find the maximum location of none-oversampled correlation
|
// find the maximum location of none-oversampled correlation
|
||||||
// 41 x 41, if halfsearchrange=20
|
// 41 x 41, if halfsearchrange=20
|
||||||
//cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, stream);
|
|
||||||
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, r_maxval, stream);
|
cuArraysMaxloc2D(r_corrBatchRaw, offsetInit, r_maxval, stream);
|
||||||
|
|
||||||
offsetInit->outputToFile("offsetInit1", stream);
|
|
||||||
|
|
||||||
// Estimation of statistics
|
// Estimation of statistics
|
||||||
// Author: Minyan Zhong
|
|
||||||
// Extraction of correlation surface around the peak
|
// Extraction of correlation surface around the peak
|
||||||
cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchRawZoomIn, i_corrBatchZoomInValid, offsetInit, stream);
|
cuArraysCopyExtractCorr(r_corrBatchRaw, r_corrBatchRawZoomIn, i_corrBatchZoomInValid, offsetInit, stream);
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
|
|
||||||
// debug: output the intermediate results
|
|
||||||
r_maxval->outputToFile("r_maxval",stream);
|
|
||||||
r_corrBatchRaw->outputToFile("r_corrBatchRaw",stream);
|
|
||||||
r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawZoomIn",stream);
|
|
||||||
i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid",stream);
|
|
||||||
|
|
||||||
// Summation of correlation and data point values
|
// Summation of correlation and data point values
|
||||||
cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
|
cuArraysSumCorr(r_corrBatchRawZoomIn, i_corrBatchZoomInValid, r_corrBatchSum, i_corrBatchValidCount, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
i_corrBatchZoomInValid->outputToFile("i_corrBatchZoomInValid", stream);
|
||||||
|
r_corrBatchSum->outputToFile("r_corrBatchSum", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// SNR
|
// SNR
|
||||||
cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream);
|
cuEstimateSnr(r_corrBatchSum, i_corrBatchValidCount, r_maxval, r_snrValue, stream);
|
||||||
|
|
||||||
// Variance
|
// Variance
|
||||||
// cuEstimateVariance(r_corrBatchRaw, offsetInit, r_maxval, r_covValue, stream);
|
cuEstimateVariance(r_corrBatchRaw, offsetInit, r_maxval, r_covValue, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
offsetInit->outputToFile("i_offsetInit", stream);
|
||||||
|
r_maxval->outputToFile("r_maxval", stream);
|
||||||
|
r_corrBatchRawZoomIn->outputToFile("r_corrBatchRawStatZoomIn", stream);
|
||||||
|
i_corrBatchZoomInValid->outputToFile("i_corrBatchStatZoomInValid", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Using the approximate estimation to adjust secondary image (half search window size becomes only 4 pixels)
|
// Using the approximate estimation to adjust secondary image (half search window size becomes only 4 pixels)
|
||||||
//offsetInit->debuginfo(stream);
|
|
||||||
// determine the starting pixel to extract secondary images around the max location
|
// determine the starting pixel to extract secondary images around the max location
|
||||||
cuDetermineSecondaryExtractOffset(offsetInit,
|
cuDetermineSecondaryExtractOffset(offsetInit,
|
||||||
|
maxLocShift,
|
||||||
param->halfSearchRangeDownRaw, // old range
|
param->halfSearchRangeDownRaw, // old range
|
||||||
param->halfSearchRangeAcrossRaw,
|
param->halfSearchRangeAcrossRaw,
|
||||||
param->halfZoomWindowSizeRaw, // new range
|
param->halfZoomWindowSizeRaw, // new range
|
||||||
param->halfZoomWindowSizeRaw,
|
param->halfZoomWindowSizeRaw,
|
||||||
stream);
|
stream);
|
||||||
//offsetInit->debuginfo(stream);
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
offsetInit->outputToFile("i_offsetInitAdjusted", stream);
|
||||||
|
maxLocShift->outputToFile("i_maxLocShift", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// oversample reference
|
// oversample reference
|
||||||
// (deramping now included in oversampler)
|
// (deramping included in oversampler)
|
||||||
referenceBatchOverSampler->execute(c_referenceBatchRaw, c_referenceBatchOverSampled, param->derampMethod);
|
referenceBatchOverSampler->execute(c_referenceBatchRaw, c_referenceBatchOverSampled, param->derampMethod);
|
||||||
|
// take amplitudes
|
||||||
cuArraysAbs(c_referenceBatchOverSampled, r_referenceBatchOverSampled, stream);
|
cuArraysAbs(c_referenceBatchOverSampled, r_referenceBatchOverSampled, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the oversampled reference image(s)
|
||||||
|
c_referenceBatchOverSampled->outputToFile("c_referenceBatchOverSampled", stream);
|
||||||
|
r_referenceBatchOverSampled->outputToFile("r_referenceBatchOverSampled", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// compute and subtract the mean value
|
||||||
cuArraysSubtractMean(r_referenceBatchOverSampled, stream);
|
cuArraysSubtractMean(r_referenceBatchOverSampled, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the oversampled reference image(s) with mean subtracted
|
||||||
|
r_referenceBatchOverSampled->outputToFile("r_referenceBatchOverSampledSubMean",stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// extract secondary and oversample
|
// extract secondary and oversample
|
||||||
cuArraysCopyExtract(c_secondaryBatchRaw, c_secondaryBatchZoomIn, offsetInit, stream);
|
cuArraysCopyExtract(c_secondaryBatchRaw, c_secondaryBatchZoomIn, offsetInit, stream);
|
||||||
secondaryBatchOverSampler->execute(c_secondaryBatchZoomIn, c_secondaryBatchOverSampled, param->derampMethod);
|
secondaryBatchOverSampler->execute(c_secondaryBatchZoomIn, c_secondaryBatchOverSampled, param->derampMethod);
|
||||||
|
// take amplitudes
|
||||||
cuArraysAbs(c_secondaryBatchOverSampled, r_secondaryBatchOverSampled, stream);
|
cuArraysAbs(c_secondaryBatchOverSampled, r_secondaryBatchOverSampled, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the extracted raw secondary image
|
||||||
|
c_secondaryBatchZoomIn->outputToFile("c_secondaryBatchZoomIn", stream);
|
||||||
|
// dump the oversampled secondary image(s)
|
||||||
|
c_secondaryBatchOverSampled->outputToFile("c_secondaryBatchOverSampled", stream);
|
||||||
|
r_secondaryBatchOverSampled->outputToFile("r_secondaryBatchOverSampled", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// correlate oversampled images
|
// correlate oversampled images
|
||||||
if(param->algorithm == 0) {
|
if(param->algorithm == 0) {
|
||||||
cuCorrFreqDomain_OverSampled->execute(r_referenceBatchOverSampled, r_secondaryBatchOverSampled, r_corrBatchZoomIn);
|
cuCorrFreqDomain_OverSampled->execute(r_referenceBatchOverSampled, r_secondaryBatchOverSampled, r_corrBatchZoomIn);
|
||||||
|
@ -92,56 +146,72 @@ void cuAmpcorChunk::run(int idxDown_, int idxAcross_)
|
||||||
else {
|
else {
|
||||||
cuCorrTimeDomain(r_referenceBatchOverSampled, r_secondaryBatchOverSampled, r_corrBatchZoomIn, stream);
|
cuCorrTimeDomain(r_referenceBatchOverSampled, r_secondaryBatchOverSampled, r_corrBatchZoomIn, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the oversampled correlation surface (un-normalized)
|
||||||
|
r_corrBatchZoomIn->outputToFile("r_corrBatchZoomInUnNorm", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// normalize the correlation surface
|
||||||
cuCorrNormalize(r_referenceBatchOverSampled, r_secondaryBatchOverSampled, r_corrBatchZoomIn, stream);
|
cuCorrNormalize(r_referenceBatchOverSampled, r_secondaryBatchOverSampled, r_corrBatchZoomIn, stream);
|
||||||
|
|
||||||
//std::cout << "debug correlation oversample\n";
|
#ifdef CUAMPCOR_DEBUG
|
||||||
//std::cout << r_referenceBatchOverSampled->height << " " << r_referenceBatchOverSampled->width << "\n";
|
// dump the oversampled correlation surface (normalized)
|
||||||
//std::cout << r_secondaryBatchOverSampled->height << " " << r_secondaryBatchOverSampled->width << "\n";
|
r_corrBatchZoomIn->outputToFile("r_corrBatchZoomIn", stream);
|
||||||
//std::cout << r_corrBatchZoomIn->height << " " << r_corrBatchZoomIn->width << "\n";
|
#endif
|
||||||
|
|
||||||
// oversample the correlation surface
|
// remove the last row and col to get even sequences
|
||||||
cuArraysCopyExtract(r_corrBatchZoomIn, r_corrBatchZoomInAdjust, make_int2(0,0), stream);
|
cuArraysCopyExtract(r_corrBatchZoomIn, r_corrBatchZoomInAdjust, make_int2(0,0), stream);
|
||||||
|
|
||||||
//std::cout << "debug oversampling " << r_corrBatchZoomInAdjust << " " << r_corrBatchZoomInOverSampled << "\n";
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the adjusted correlation Surface
|
||||||
|
r_corrBatchZoomInAdjust->outputToFile("r_corrBatchZoomInAdjust", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// oversample the correlation surface
|
||||||
if(param->oversamplingMethod) {
|
if(param->oversamplingMethod) {
|
||||||
corrSincOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
|
// sinc interpolator only computes (-i_sincwindow, i_sincwindow)*oversamplingfactor
|
||||||
|
// we need the max loc as the center if shifted
|
||||||
|
corrSincOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled,
|
||||||
|
maxLocShift, param->oversamplingFactor*param->rawDataOversamplingFactor
|
||||||
|
);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
|
corrOverSampler->execute(r_corrBatchZoomInAdjust, r_corrBatchZoomInOverSampled);
|
||||||
}
|
}
|
||||||
|
|
||||||
//find the max again
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the oversampled correlation surface
|
||||||
|
r_corrBatchZoomInOverSampled->outputToFile("r_corrBatchZoomInOverSampled", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//find the max again
|
||||||
cuArraysMaxloc2D(r_corrBatchZoomInOverSampled, offsetZoomIn, corrMaxValue, stream);
|
cuArraysMaxloc2D(r_corrBatchZoomInOverSampled, offsetZoomIn, corrMaxValue, stream);
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
|
// dump the max location on oversampled correlation surface
|
||||||
|
offsetZoomIn->outputToFile("i_offsetZoomIn", stream);
|
||||||
|
corrMaxValue->outputToFile("r_maxvalZoomInOversampled", stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
// determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel)
|
// determine the final offset from non-oversampled (pixel) and oversampled (sub-pixel)
|
||||||
|
// = (Init-HalfsearchRange) + ZoomIn/(2*ovs)
|
||||||
cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal,
|
cuSubPixelOffset(offsetInit, offsetZoomIn, offsetFinal,
|
||||||
param->oversamplingFactor, param->rawDataOversamplingFactor,
|
param->oversamplingFactor, param->rawDataOversamplingFactor,
|
||||||
param->halfSearchRangeDownRaw, param->halfSearchRangeAcrossRaw,
|
param->halfSearchRangeDownRaw, param->halfSearchRangeAcrossRaw,
|
||||||
param->halfZoomWindowSizeRaw, param->halfZoomWindowSizeRaw,
|
|
||||||
stream);
|
stream);
|
||||||
//offsetInit->debuginfo(stream);
|
|
||||||
//offsetZoomIn->debuginfo(stream);
|
|
||||||
//offsetFinal->debuginfo(stream);
|
|
||||||
|
|
||||||
// Do insertion.
|
// Insert the chunk results to final images
|
||||||
// Offsetfields.
|
|
||||||
cuArraysCopyInsert(offsetFinal, offsetImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
cuArraysCopyInsert(offsetFinal, offsetImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||||
|
// snr
|
||||||
// Debugging matrix.
|
|
||||||
cuArraysCopyInsert(r_corrBatchSum, floatImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
|
||||||
cuArraysCopyInsert(i_corrBatchValidCount, intImage1, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
|
||||||
|
|
||||||
// Old: save max correlation coefficients.
|
|
||||||
//cuArraysCopyInsert(corrMaxValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
|
||||||
// New: save SNR
|
|
||||||
cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
cuArraysCopyInsert(r_snrValue, snrImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||||
|
|
||||||
// Variance.
|
// Variance.
|
||||||
cuArraysCopyInsert(r_covValue, covImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
cuArraysCopyInsert(r_covValue, covImage, idxDown_*param->numberWindowDownInChunk, idxAcross_*param->numberWindowAcrossInChunk,stream);
|
||||||
|
// all done
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// set chunk index
|
||||||
void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
||||||
{
|
{
|
||||||
idxChunkDown = idxDown_;
|
idxChunkDown = idxDown_;
|
||||||
|
@ -161,13 +231,11 @@ void cuAmpcorChunk::setIndex(int idxDown_, int idxAcross_)
|
||||||
else {
|
else {
|
||||||
nWindowsAcross = param->numberWindowAcrossInChunk;
|
nWindowsAcross = param->numberWindowAcrossInChunk;
|
||||||
}
|
}
|
||||||
//std::cout << "DEBUG setIndex" << idxChunk << " " << nWindowsDown << " " << nWindowsAcross << "\n";
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// obtain the starting pixels for each chip
|
/// obtain the starting pixels for each chip
|
||||||
/// @param[in] oStartPixel
|
/// @param[in] oStartPixel start pixel locations for all chips
|
||||||
///
|
/// @param[out] rstartPixel start pixel locations for chips within the chunk
|
||||||
void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff)
|
void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff)
|
||||||
{
|
{
|
||||||
for(int i=0; i<param->numberWindowDownInChunk; ++i) {
|
for(int i=0; i<param->numberWindowDownInChunk; ++i) {
|
||||||
|
@ -180,7 +248,6 @@ void cuAmpcorChunk::getRelativeOffset(int *rStartPixel, const int *oStartPixel,
|
||||||
int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross
|
int idxInAll = (iDown+idxChunkDown*param->numberWindowDownInChunk)*param->numberWindowAcross
|
||||||
+ idxChunkAcross*param->numberWindowAcrossInChunk+iAcross;
|
+ idxChunkAcross*param->numberWindowAcrossInChunk+iAcross;
|
||||||
rStartPixel[idxInChunk] = oStartPixel[idxInAll] - diff;
|
rStartPixel[idxInChunk] = oStartPixel[idxInAll] - diff;
|
||||||
//fprintf(stderr, "relative offset %d %d %d %d\n", i, j, rStartPixel[idxInChunk], diff);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -215,7 +282,6 @@ void cuAmpcorChunk::loadReferenceChunk()
|
||||||
|
|
||||||
// load the data from cpu
|
// load the data from cpu
|
||||||
referenceImage->loadToDevice((void *)c_referenceChunkRaw->devData, startD, startA, height, width, stream);
|
referenceImage->loadToDevice((void *)c_referenceChunkRaw->devData, startD, startA, height, width, stream);
|
||||||
//std::cout << "debug load reference: " << startD << " " << startA << " " << height << " " << width << "\n";
|
|
||||||
|
|
||||||
//copy the chunk to a batch format (nImages, height, width)
|
//copy the chunk to a batch format (nImages, height, width)
|
||||||
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
|
// if derampMethod = 0 (no deramp), take amplitudes; otherwise, copy complex data
|
||||||
|
@ -300,8 +366,10 @@ void cuAmpcorChunk::loadSecondaryChunk()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// constructor
|
||||||
cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, GDALImage *secondary_,
|
cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, GDALImage *secondary_,
|
||||||
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_)
|
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_, cuArrays<float3> *covImage_,
|
||||||
|
cudaStream_t stream_)
|
||||||
|
|
||||||
{
|
{
|
||||||
param = param_;
|
param = param_;
|
||||||
|
@ -311,19 +379,8 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
snrImage = snrImage_;
|
snrImage = snrImage_;
|
||||||
covImage = covImage_;
|
covImage = covImage_;
|
||||||
|
|
||||||
intImage1 = intImage1_;
|
|
||||||
floatImage1 = floatImage1_;
|
|
||||||
|
|
||||||
stream = stream_;
|
stream = stream_;
|
||||||
|
|
||||||
// std::cout << "debug Chunk creator " << param->maxReferenceChunkHeight << " " << param->maxReferenceChunkWidth << "\n";
|
|
||||||
// try allocate/deallocate on the fly to save gpu memory 07/09/19
|
|
||||||
// c_referenceChunkRaw = new cuArrays<float2> (param->maxReferenceChunkHeight, param->maxReferenceChunkWidth);
|
|
||||||
// c_referenceChunkRaw->allocate();
|
|
||||||
|
|
||||||
// c_secondaryChunkRaw = new cuArrays<float2> (param->maxSecondaryChunkHeight, param->maxSecondaryChunkWidth);
|
|
||||||
// c_secondaryChunkRaw->allocate();
|
|
||||||
|
|
||||||
ChunkOffsetDown = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
ChunkOffsetDown = new cuArrays<int> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
ChunkOffsetDown->allocate();
|
ChunkOffsetDown->allocate();
|
||||||
ChunkOffsetDown->allocateHost();
|
ChunkOffsetDown->allocateHost();
|
||||||
|
@ -422,14 +479,14 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
offsetFinal = new cuArrays<float2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
offsetFinal = new cuArrays<float2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
offsetFinal->allocate();
|
offsetFinal->allocate();
|
||||||
|
|
||||||
|
maxLocShift = new cuArrays<int2> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
|
maxLocShift->allocate();
|
||||||
|
|
||||||
corrMaxValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
corrMaxValue = new cuArrays<float> (param->numberWindowDownInChunk, param->numberWindowAcrossInChunk);
|
||||||
corrMaxValue->allocate();
|
corrMaxValue->allocate();
|
||||||
|
|
||||||
|
|
||||||
// new arrays due to snr estimation
|
// new arrays due to snr estimation
|
||||||
std::cout<< "corrRawZoomInHeight: " << param->corrRawZoomInHeight << "\n";
|
|
||||||
std::cout<< "corrRawZoomInWidth: " << param->corrRawZoomInWidth << "\n";
|
|
||||||
|
|
||||||
r_corrBatchRawZoomIn = new cuArrays<float> (
|
r_corrBatchRawZoomIn = new cuArrays<float> (
|
||||||
param->corrRawZoomInHeight,
|
param->corrRawZoomInHeight,
|
||||||
param->corrRawZoomInWidth,
|
param->corrRawZoomInWidth,
|
||||||
|
@ -474,7 +531,7 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
// end of new arrays
|
// end of new arrays
|
||||||
|
|
||||||
if(param->oversamplingMethod) {
|
if(param->oversamplingMethod) {
|
||||||
corrSincOverSampler = new cuSincOverSamplerR2R(param->zoomWindowSize, param->oversamplingFactor, stream);
|
corrSincOverSampler = new cuSincOverSamplerR2R(param->oversamplingFactor, stream);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize,
|
corrOverSampler= new cuOverSamplerR2R(param->zoomWindowSize, param->zoomWindowSize,
|
||||||
|
@ -495,35 +552,14 @@ cuAmpcorChunk::cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, G
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CUAMPCOR_DEBUG
|
||||||
debugmsg("all objects in chunk are created ...\n");
|
std::cout << "all objects in chunk are created ...\n";
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// destructor
|
||||||
cuAmpcorChunk::~cuAmpcorChunk()
|
cuAmpcorChunk::~cuAmpcorChunk()
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
delete referenceChunkRaw;
|
|
||||||
delete secondaryChunkRaw;
|
|
||||||
delete ChunkOffsetDown;
|
|
||||||
delete ChunkOffsetAcross;
|
|
||||||
delete referenceBatchRaw;
|
|
||||||
delete secondaryBatchRaw;
|
|
||||||
delete referenceChunkOverSampled;
|
|
||||||
delete secondaryChunkOverSampled;
|
|
||||||
delete referenceChunkOverSampler;
|
|
||||||
delete secondaryChunkOverSampler;
|
|
||||||
delete referenceChunk;
|
|
||||||
delete secondaryChunk;
|
|
||||||
delete corrChunk;
|
|
||||||
delete offsetInit;
|
|
||||||
delete zoomInOffset;
|
|
||||||
delete offsetFinal;
|
|
||||||
delete corrChunkZoomIn;
|
|
||||||
delete corrChunkZoomInOverSampled;
|
|
||||||
delete corrOverSampler;
|
|
||||||
delete corrSincOverSampler;
|
|
||||||
delete corrMaxValue;
|
|
||||||
if(param->algorithm == 0)
|
|
||||||
delete cuCorrFreqDomain;
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
/*
|
/*
|
||||||
* cuAmpcorChunk.h
|
* @file cuAmpcorChunk.h
|
||||||
* Purpose: a group of chips processed at the same time
|
* @brief Ampcor processor for a batch of windows
|
||||||
*/
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __CUAMPCORCHUNK_H
|
#ifndef __CUAMPCORCHUNK_H
|
||||||
#define __CUAMPCORCHUNK_H
|
#define __CUAMPCORCHUNK_H
|
||||||
|
@ -13,87 +15,88 @@
|
||||||
#include "cuSincOverSampler.h"
|
#include "cuSincOverSampler.h"
|
||||||
#include "cuCorrFrequency.h"
|
#include "cuCorrFrequency.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cuAmpcor processor for a chunk (a batch of windows)
|
||||||
|
*/
|
||||||
class cuAmpcorChunk{
|
class cuAmpcorChunk{
|
||||||
private:
|
private:
|
||||||
int idxChunkDown;
|
int idxChunkDown; ///< index of the chunk in total batches, down
|
||||||
int idxChunkAcross;
|
int idxChunkAcross; ///< index of the chunk in total batches, across
|
||||||
int idxChunk;
|
int idxChunk; ///<
|
||||||
int nWindowsDown;
|
int nWindowsDown; ///< number of windows in one chunk, down
|
||||||
int nWindowsAcross;
|
int nWindowsAcross; ///< number of windows in one chunk, across
|
||||||
|
|
||||||
int devId;
|
int devId; ///< GPU device ID to use
|
||||||
cudaStream_t stream;
|
cudaStream_t stream; ///< CUDA stream to use
|
||||||
|
|
||||||
GDALImage *referenceImage;
|
GDALImage *referenceImage; ///< reference image object
|
||||||
GDALImage *secondaryImage;
|
GDALImage *secondaryImage; ///< secondary image object
|
||||||
cuAmpcorParameter *param;
|
cuAmpcorParameter *param; ///< reference to the (global) parameters
|
||||||
cuArrays<float2> *offsetImage;
|
cuArrays<float2> *offsetImage; ///< output offsets image
|
||||||
cuArrays<float> *snrImage;
|
cuArrays<float> *snrImage; ///< snr image
|
||||||
cuArrays<float3> *covImage;
|
cuArrays<float3> *covImage; ///< cov image
|
||||||
|
|
||||||
// added for test
|
// local variables and workers
|
||||||
cuArrays<int> *intImage1;
|
// gpu buffer to load images from file
|
||||||
cuArrays<float> *floatImage1;
|
|
||||||
|
|
||||||
// gpu buffer
|
|
||||||
cuArrays<float2> * c_referenceChunkRaw, * c_secondaryChunkRaw;
|
cuArrays<float2> * c_referenceChunkRaw, * c_secondaryChunkRaw;
|
||||||
cuArrays<float> * r_referenceChunkRaw, * r_secondaryChunkRaw;
|
cuArrays<float> * r_referenceChunkRaw, * r_secondaryChunkRaw;
|
||||||
|
|
||||||
// gpu windows raw data
|
// windows raw (not oversampled) data, complex and real
|
||||||
cuArrays<float2> * c_referenceBatchRaw, * c_secondaryBatchRaw, * c_secondaryBatchZoomIn;
|
cuArrays<float2> * c_referenceBatchRaw, * c_secondaryBatchRaw, * c_secondaryBatchZoomIn;
|
||||||
cuArrays<float> * r_referenceBatchRaw, * r_secondaryBatchRaw;
|
cuArrays<float> * r_referenceBatchRaw, * r_secondaryBatchRaw;
|
||||||
|
|
||||||
// gpu windows oversampled data
|
// windows oversampled data
|
||||||
cuArrays<float2> * c_referenceBatchOverSampled, * c_secondaryBatchOverSampled;
|
cuArrays<float2> * c_referenceBatchOverSampled, * c_secondaryBatchOverSampled;
|
||||||
cuArrays<float> * r_referenceBatchOverSampled, * r_secondaryBatchOverSampled;
|
cuArrays<float> * r_referenceBatchOverSampled, * r_secondaryBatchOverSampled;
|
||||||
cuArrays<float> * r_corrBatchRaw, * r_corrBatchZoomIn, * r_corrBatchZoomInOverSampled, * r_corrBatchZoomInAdjust;
|
cuArrays<float> * r_corrBatchRaw, * r_corrBatchZoomIn, * r_corrBatchZoomInOverSampled, * r_corrBatchZoomInAdjust;
|
||||||
|
|
||||||
|
// offset data
|
||||||
cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross;
|
cuArrays<int> *ChunkOffsetDown, *ChunkOffsetAcross;
|
||||||
|
|
||||||
|
// oversampling processors for complex images
|
||||||
cuOverSamplerC2C *referenceBatchOverSampler, *secondaryBatchOverSampler;
|
cuOverSamplerC2C *referenceBatchOverSampler, *secondaryBatchOverSampler;
|
||||||
|
|
||||||
|
// oversampling processor for correlation surface
|
||||||
cuOverSamplerR2R *corrOverSampler;
|
cuOverSamplerR2R *corrOverSampler;
|
||||||
cuSincOverSamplerR2R *corrSincOverSampler;
|
cuSincOverSamplerR2R *corrSincOverSampler;
|
||||||
|
|
||||||
//for frequency domain
|
// cross-correlation processor with frequency domain algorithm
|
||||||
cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled;
|
cuFreqCorrelator *cuCorrFreqDomain, *cuCorrFreqDomain_OverSampled;
|
||||||
|
|
||||||
|
// save offset results in different stages
|
||||||
cuArrays<int2> *offsetInit;
|
cuArrays<int2> *offsetInit;
|
||||||
cuArrays<int2> *offsetZoomIn;
|
cuArrays<int2> *offsetZoomIn;
|
||||||
cuArrays<float2> *offsetFinal;
|
cuArrays<float2> *offsetFinal;
|
||||||
|
cuArrays<int2> *maxLocShift; // record the maxloc from the extract center
|
||||||
cuArrays<float> *corrMaxValue;
|
cuArrays<float> *corrMaxValue;
|
||||||
|
|
||||||
|
|
||||||
//SNR estimation
|
|
||||||
|
|
||||||
cuArrays<float> *r_corrBatchRawZoomIn;
|
|
||||||
cuArrays<float> *r_corrBatchSum;
|
|
||||||
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
|
|
||||||
|
|
||||||
cuArrays<float> *r_snrValue;
|
|
||||||
|
|
||||||
cuArrays<int2> *i_maxloc;
|
cuArrays<int2> *i_maxloc;
|
||||||
cuArrays<float> *r_maxval;
|
cuArrays<float> *r_maxval;
|
||||||
|
|
||||||
// Varince estimation.
|
// SNR estimation
|
||||||
|
cuArrays<float> *r_corrBatchRawZoomIn;
|
||||||
|
cuArrays<float> *r_corrBatchSum;
|
||||||
|
cuArrays<int> *i_corrBatchZoomInValid, *i_corrBatchValidCount;
|
||||||
|
cuArrays<float> *r_snrValue;
|
||||||
|
|
||||||
|
// Variance estimation
|
||||||
cuArrays<float3> *r_covValue;
|
cuArrays<float3> *r_covValue;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
cuAmpcorChunk() {}
|
// constructor
|
||||||
//cuAmpcorChunk(cuAmpcorParameter *param_, SlcImage *reference_, SlcImage *secondary_);
|
cuAmpcorChunk(cuAmpcorParameter *param_,
|
||||||
|
GDALImage *reference_, GDALImage *secondary_,
|
||||||
|
cuArrays<float2> *offsetImage_, cuArrays<float> *snrImage_,
|
||||||
|
cuArrays<float3> *covImage_, cudaStream_t stream_);
|
||||||
|
// destructor
|
||||||
|
~cuAmpcorChunk();
|
||||||
|
|
||||||
|
// local methods
|
||||||
void setIndex(int idxDown_, int idxAcross_);
|
void setIndex(int idxDown_, int idxAcross_);
|
||||||
|
|
||||||
cuAmpcorChunk(cuAmpcorParameter *param_, GDALImage *reference_, GDALImage *secondary_, cuArrays<float2> *offsetImage_,
|
|
||||||
cuArrays<float> *snrImage_, cuArrays<float3> *covImage_, cuArrays<int> *intImage1_, cuArrays<float> *floatImage1_, cudaStream_t stream_);
|
|
||||||
|
|
||||||
|
|
||||||
void loadReferenceChunk();
|
void loadReferenceChunk();
|
||||||
void loadSecondaryChunk();
|
void loadSecondaryChunk();
|
||||||
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
|
void getRelativeOffset(int *rStartPixel, const int *oStartPixel, int diff);
|
||||||
|
// run the given chunk
|
||||||
~cuAmpcorChunk();
|
|
||||||
|
|
||||||
void run(int, int);
|
void run(int, int);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,12 @@
|
||||||
// Implementation of cuAmpcorController
|
/**
|
||||||
|
* @file cuAmpcorController.cu
|
||||||
|
* @brief Implementations of cuAmpcorController
|
||||||
|
*/
|
||||||
|
|
||||||
|
// my declaration
|
||||||
#include "cuAmpcorController.h"
|
#include "cuAmpcorController.h"
|
||||||
|
|
||||||
|
// dependencies
|
||||||
#include "GDALImage.h"
|
#include "GDALImage.h"
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
|
@ -8,11 +14,27 @@
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
cuAmpcorController::cuAmpcorController() { param = new cuAmpcorParameter();}
|
// constructor
|
||||||
cuAmpcorController::~cuAmpcorController() { delete param; }
|
cuAmpcorController::cuAmpcorController()
|
||||||
|
{
|
||||||
|
// create a new set of parameters
|
||||||
|
param = new cuAmpcorParameter();
|
||||||
|
}
|
||||||
|
|
||||||
void cuAmpcorController::runAmpcor() {
|
// destructor
|
||||||
|
cuAmpcorController::~cuAmpcorController()
|
||||||
|
{
|
||||||
|
delete param;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run ampcor
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void cuAmpcorController::runAmpcor()
|
||||||
|
{
|
||||||
// set the gpu id
|
// set the gpu id
|
||||||
param->deviceID = gpuDeviceInit(param->deviceID);
|
param->deviceID = gpuDeviceInit(param->deviceID);
|
||||||
// initialize the gdal driver
|
// initialize the gdal driver
|
||||||
|
@ -26,15 +48,11 @@ void cuAmpcorController::runAmpcor() {
|
||||||
cuArrays<float> *snrImage, *snrImageRun;
|
cuArrays<float> *snrImage, *snrImageRun;
|
||||||
cuArrays<float3> *covImage, *covImageRun;
|
cuArrays<float3> *covImage, *covImageRun;
|
||||||
|
|
||||||
// For debugging.
|
// nWindowsDownRun is defined as numberChunk * numberWindowInChunk
|
||||||
cuArrays<int> *intImage1;
|
// It may be bigger than the actual number of windows
|
||||||
cuArrays<float> *floatImage1;
|
|
||||||
|
|
||||||
int nWindowsDownRun = param->numberChunkDown * param->numberWindowDownInChunk;
|
int nWindowsDownRun = param->numberChunkDown * param->numberWindowDownInChunk;
|
||||||
int nWindowsAcrossRun = param->numberChunkAcross * param->numberWindowAcrossInChunk;
|
int nWindowsAcrossRun = param->numberChunkAcross * param->numberWindowAcrossInChunk;
|
||||||
|
|
||||||
std::cout << "Debug " << nWindowsDownRun << " " << param->numberWindowDown << "\n";
|
|
||||||
|
|
||||||
offsetImageRun = new cuArrays<float2>(nWindowsDownRun, nWindowsAcrossRun);
|
offsetImageRun = new cuArrays<float2>(nWindowsDownRun, nWindowsAcrossRun);
|
||||||
offsetImageRun->allocate();
|
offsetImageRun->allocate();
|
||||||
|
|
||||||
|
@ -44,15 +62,7 @@ void cuAmpcorController::runAmpcor() {
|
||||||
covImageRun = new cuArrays<float3>(nWindowsDownRun, nWindowsAcrossRun);
|
covImageRun = new cuArrays<float3>(nWindowsDownRun, nWindowsAcrossRun);
|
||||||
covImageRun->allocate();
|
covImageRun->allocate();
|
||||||
|
|
||||||
// intImage 1 and floatImage 1 are added for debugging issues
|
// Offset fields.
|
||||||
|
|
||||||
intImage1 = new cuArrays<int>(nWindowsDownRun, nWindowsAcrossRun);
|
|
||||||
intImage1->allocate();
|
|
||||||
|
|
||||||
floatImage1 = new cuArrays<float>(nWindowsDownRun, nWindowsAcrossRun);
|
|
||||||
floatImage1->allocate();
|
|
||||||
|
|
||||||
// Offsetfields.
|
|
||||||
offsetImage = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
|
offsetImage = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
|
||||||
offsetImage->allocate();
|
offsetImage->allocate();
|
||||||
|
|
||||||
|
@ -64,52 +74,61 @@ void cuAmpcorController::runAmpcor() {
|
||||||
covImage = new cuArrays<float3>(param->numberWindowDown, param->numberWindowAcross);
|
covImage = new cuArrays<float3>(param->numberWindowDown, param->numberWindowAcross);
|
||||||
covImage->allocate();
|
covImage->allocate();
|
||||||
|
|
||||||
|
// set up the cuda streams
|
||||||
cudaStream_t streams[param->nStreams];
|
cudaStream_t streams[param->nStreams];
|
||||||
cuAmpcorChunk *chunk[param->nStreams];
|
cuAmpcorChunk *chunk[param->nStreams];
|
||||||
|
// iterate over cuda streams
|
||||||
for(int ist=0; ist<param->nStreams; ist++)
|
for(int ist=0; ist<param->nStreams; ist++)
|
||||||
{
|
{
|
||||||
cudaStreamCreate(&streams[ist]);
|
// create each stream
|
||||||
chunk[ist]= new cuAmpcorChunk(param, referenceImage, secondaryImage, offsetImageRun, snrImageRun, covImageRun, intImage1, floatImage1, streams[ist]);
|
checkCudaErrors(cudaStreamCreate(&streams[ist]));
|
||||||
|
// create the chunk processor for each stream
|
||||||
|
chunk[ist]= new cuAmpcorChunk(param, referenceImage, secondaryImage,
|
||||||
|
offsetImageRun, snrImageRun, covImageRun,
|
||||||
|
streams[ist]);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int nChunksDown = param->numberChunkDown;
|
int nChunksDown = param->numberChunkDown;
|
||||||
int nChunksAcross = param->numberChunkAcross;
|
int nChunksAcross = param->numberChunkAcross;
|
||||||
|
|
||||||
std::cout << "Total number of windows (azimuth x range): " <<param->numberWindowDown << " x " << param->numberWindowAcross << std::endl;
|
// report info
|
||||||
std::cout << "to be processed in the number of chunks: " <<nChunksDown << " x " << nChunksAcross << std::endl;
|
std::cout << "Total number of windows (azimuth x range): "
|
||||||
|
<< param->numberWindowDown << " x " << param->numberWindowAcross
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "to be processed in the number of chunks: "
|
||||||
|
<< nChunksDown << " x " << nChunksAcross << std::endl;
|
||||||
|
|
||||||
|
// iterative over chunks down
|
||||||
for(int i = 0; i<nChunksDown; i++)
|
for(int i = 0; i<nChunksDown; i++)
|
||||||
{
|
{
|
||||||
std::cout << "Processing chunk (" << i <<", x" << ")" << std::endl;
|
std::cout << "Processing chunk (" << i <<", x" << ") out of " << nChunksDown << std::endl;
|
||||||
|
// iterate over chunks across
|
||||||
for(int j=0; j<nChunksAcross; j+=param->nStreams)
|
for(int j=0; j<nChunksAcross; j+=param->nStreams)
|
||||||
{
|
{
|
||||||
//std::cout << "Processing chunk(" << i <<", " << j <<")" << std::endl;
|
// iterate over cuda streams to process chunks
|
||||||
for(int ist = 0; ist<param->nStreams; ist++)
|
for(int ist = 0; ist < param->nStreams; ist++)
|
||||||
{
|
{
|
||||||
if(j+ist < nChunksAcross) {
|
int chunkIdxAcross = j+ist;
|
||||||
|
if(chunkIdxAcross < nChunksAcross) {
|
||||||
chunk[ist]->run(i, j+ist);
|
chunk[ist]->run(i, chunkIdxAcross);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wait all streams are done
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
// Do extraction.
|
// extraction of the run images to output images
|
||||||
cuArraysCopyExtract(offsetImageRun, offsetImage, make_int2(0,0), streams[0]);
|
cuArraysCopyExtract(offsetImageRun, offsetImage, make_int2(0,0), streams[0]);
|
||||||
cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]);
|
cuArraysCopyExtract(snrImageRun, snrImage, make_int2(0,0), streams[0]);
|
||||||
cuArraysCopyExtract(covImageRun, covImage, make_int2(0,0), streams[0]);
|
cuArraysCopyExtract(covImageRun, covImage, make_int2(0,0), streams[0]);
|
||||||
|
// save outputs to files
|
||||||
offsetImage->outputToFile(param->offsetImageName, streams[0]);
|
offsetImage->outputToFile(param->offsetImageName, streams[0]);
|
||||||
snrImage->outputToFile(param->snrImageName, streams[0]);
|
snrImage->outputToFile(param->snrImageName, streams[0]);
|
||||||
covImage->outputToFile(param->covImageName, streams[0]);
|
covImage->outputToFile(param->covImageName, streams[0]);
|
||||||
|
// also save the gross offsets
|
||||||
// Output debugging arrays.
|
|
||||||
intImage1->outputToFile("intImage1", streams[0]);
|
|
||||||
floatImage1->outputToFile("floatImage1", streams[0]);
|
|
||||||
|
|
||||||
outputGrossOffsets();
|
outputGrossOffsets();
|
||||||
|
|
||||||
// Delete arrays.
|
// Delete arrays.
|
||||||
|
@ -117,21 +136,24 @@ void cuAmpcorController::runAmpcor() {
|
||||||
delete snrImage;
|
delete snrImage;
|
||||||
delete covImage;
|
delete covImage;
|
||||||
|
|
||||||
delete intImage1;
|
|
||||||
delete floatImage1;
|
|
||||||
|
|
||||||
delete offsetImageRun;
|
delete offsetImageRun;
|
||||||
delete snrImageRun;
|
delete snrImageRun;
|
||||||
delete covImageRun;
|
delete covImageRun;
|
||||||
|
|
||||||
for (int ist=0; ist<param->nStreams; ist++)
|
for (int ist=0; ist<param->nStreams; ist++)
|
||||||
|
{
|
||||||
|
checkCudaErrors(cudaStreamDestroy(streams[ist]));
|
||||||
delete chunk[ist];
|
delete chunk[ist];
|
||||||
|
}
|
||||||
|
|
||||||
delete referenceImage;
|
delete referenceImage;
|
||||||
delete secondaryImage;
|
delete secondaryImage;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Output gross offset fields
|
||||||
|
*/
|
||||||
void cuAmpcorController::outputGrossOffsets()
|
void cuAmpcorController::outputGrossOffsets()
|
||||||
{
|
{
|
||||||
cuArrays<float2> *grossOffsets = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
|
cuArrays<float2> *grossOffsets = new cuArrays<float2>(param->numberWindowDown, param->numberWindowAcross);
|
||||||
|
@ -143,72 +165,4 @@ void cuAmpcorController::outputGrossOffsets()
|
||||||
delete grossOffsets;
|
delete grossOffsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// end of file
|
||||||
/*
|
|
||||||
void cuAmpcorController::setAlgorithm(int n) { param->algorithm = n; } // 0 - freq domain; 1 - time domain
|
|
||||||
int cuAmpcorController::getAlgorithm() { return param->algorithm; }
|
|
||||||
void cuAmpcorController::setDeviceID(int n) { param->deviceID = n; }
|
|
||||||
int cuAmpcorController::getDeviceID() { return param->deviceID; }
|
|
||||||
void cuAmpcorController::setNStreams(int n) { param->nStreams = n; }
|
|
||||||
int cuAmpcorController::getNStreams() { return param->nStreams; }
|
|
||||||
void cuAmpcorController::setWindowSizeHeight(int n) { param->windowSizeHeight = n; }
|
|
||||||
int cuAmpcorController::getWindowSizeHeight() { return param->windowSizeHeight; }
|
|
||||||
void cuAmpcorController::setWindowSizeWidth(int n) { param->windowSizeWidth = n; }
|
|
||||||
int cuAmpcorController::getWindowSizeWidth() { return param->windowSizeWidth; }
|
|
||||||
void cuAmpcorController::setSearchWindowSizeHeight(int n) { param->searchWindowSizeHeight = n; }
|
|
||||||
int cuAmpcorController::getSearchWindowSizeHeight() { return param->windowSizeHeight; }
|
|
||||||
void cuAmpcorController::setSearchWindowSizeWidth(int n) { param->searchWindowSizeWidth = n; }
|
|
||||||
void cuAmpcorController::setRawOversamplingFactor(int n) { param->rawDataOversamplingFactor = n; }
|
|
||||||
void cuAmpcorController::setZoomWindowSize(int n) { param->zoomWindowSize = n; }
|
|
||||||
void cuAmpcorController::setOversamplingFactor(int n) { param->oversamplingFactor = n; }
|
|
||||||
//void cuAmpcorController::setAcrossLooks(int n) { param->acrossLooks = n; } // 1 - single look
|
|
||||||
//void cuAmpcorController::setDownLooks(int n) { param->downLooks = n; } // 1 - single look
|
|
||||||
//void cuAmpcorController::setSkipSampleAcross(int n) { param->skipSampleAcross = n; }
|
|
||||||
//void cuAmpcorController::setSkipSampleDown(int n) { param->skipSampleDown = n; }
|
|
||||||
void cuAmpcorController::setSkipSampleAcrossRaw(int n) { param->skipSampleAcrossRaw = n; }
|
|
||||||
int cuAmpcorController::getSkipSampleAcrossRaw() { return param->skipSampleAcrossRaw; }
|
|
||||||
void cuAmpcorController::setSkipSampleDownRaw(int n) { param->skipSampleDownRaw = n; }
|
|
||||||
int cuAmpcorController::getSkipSampleDownRaw() { return param->skipSampleDownRaw; }
|
|
||||||
void cuAmpcorController::setNumberWindowDown(int n) { param->numberWindowDown = n; }
|
|
||||||
void cuAmpcorController::setNumberWindowAcross(int n) { param->numberWindowAcross = n; }
|
|
||||||
void cuAmpcorController::setNumberWindowDownInChunk(int n) { param->numberWindowDownInChunk = n; }
|
|
||||||
void cuAmpcorController::setNumberWindowAcrossInChunk(int n) { param->numberWindowAcrossInChunk = n; }
|
|
||||||
//void cuAmpcorController::setRangeSpacing1(float n) { param->rangeSpacing1 = n; } // deprecated
|
|
||||||
//void cuAmpcorController::setRangeSpacing2(float n) { param->rangeSpacing2 = n; } // deprecated
|
|
||||||
//void cuAmpcorController::setImageDatatype1(int n) { param->imageDataType1 = n; }
|
|
||||||
//void cuAmpcorController::setImageDatatype2(int n) { param->imageDataType2 = n; }
|
|
||||||
void cuAmpcorController::setThresholdSNR(float n) { param->thresholdSNR = n; } // deprecated(?)
|
|
||||||
//void cuAmpcorController::setThresholdCov(float n) { param->thresholdCov = n; } // deprecated(?)
|
|
||||||
//void cuAmpcorController::setBand1(int n) { param->band1 = n; }
|
|
||||||
//void cuAmpcorController::setBand2(int n) { param->band2 = n; }
|
|
||||||
void cuAmpcorController::setReferenceImageName(std::string s) { param->referenceImageName = s; }
|
|
||||||
std::string cuAmpcorController::getReferenceImageName() {return param->referenceImageName;}
|
|
||||||
void cuAmpcorController::setSecondaryImageName(std::string s) { param->secondaryImageName = s; }
|
|
||||||
std::string cuAmpcorController::getSecondaryImageName() {return param->secondaryImageName;}
|
|
||||||
void cuAmpcorController::setReferenceImageWidth(int n) { param->referenceImageWidth = n; }
|
|
||||||
void cuAmpcorController::setReferenceImageHeight(int n) { param->referenceImageHeight = n; }
|
|
||||||
void cuAmpcorController::setSecondaryImageWidth(int n) { param->secondaryImageWidth = n; }
|
|
||||||
void cuAmpcorController::setSecondaryImageHeight(int n) { param->secondaryImageHeight = n; }
|
|
||||||
//void cuAmpcorController::setReferenceStartPixelAcross(int n) { param->referenceStartPixelAcross = n; }
|
|
||||||
//void cuAmpcorController::setReferenceStartPixelDown(int n) { param->referenceStartPixelDown = n; }
|
|
||||||
//void cuAmpcorController::setSecondaryStartPixelAcross(int n) { param->secondaryStartPixelAcross = n; }
|
|
||||||
//void cuAmpcorController::setSecondaryStartPixelDown(int n) { param->secondaryStartPixelDown = n; }
|
|
||||||
//void cuAmpcorController::setGrossOffsetMethod(int n) { param->grossOffsetMethod = n; }
|
|
||||||
//int cuAmpcorController::getGrossOffsetMethod() { return param->grossOffsetMethod; }
|
|
||||||
//void cuAmpcorController::setAcrossGrossOffset(int n) { param->acrossGrossOffset = n; }
|
|
||||||
//void cuAmpcorController::setDownGrossOffset(int n) { param->downGrossOffset = n; }
|
|
||||||
//int* cuAmpcorController::getGrossOffsets() {return param->grossOffsets;}
|
|
||||||
|
|
||||||
void cuAmpcorController::setGrossOffsets(int *in, int size) {
|
|
||||||
assert(size = 2*param->numberWindowAcross*param->numberWindowDown);
|
|
||||||
if (param->grossOffsets == NULL)
|
|
||||||
param->grossOffsets = (int *)malloc(size*sizeof(int));
|
|
||||||
mempcpy(param->grossOffsets, in, size*sizeof(int));
|
|
||||||
fprintf(stderr, "copy grossOffsets %d\n", size);
|
|
||||||
}
|
|
||||||
void cuAmpcorController::setOffsetImageName(std::string s) { param->offsetImageName = s; }
|
|
||||||
void cuAmpcorController::setSNRImageName(std::string s) { param->snrImageName = s; }
|
|
||||||
//void cuAmpcorController::setMargin(int n) { param->margin = n; }
|
|
||||||
void cuAmpcorController::setDerampMethod(int n) { param->derampMethod = n; }
|
|
||||||
int cuAmpcorController::getDerampMethod() { return param->derampMethod; }
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,115 +1,35 @@
|
||||||
/**
|
/**
|
||||||
* cuAmpcorController.h
|
* @file cuAmpcorController.h
|
||||||
* Header file for the controller class (interfaces to Python/Cython)
|
* @brief The controller for running cuAmcor
|
||||||
*
|
*
|
||||||
|
* cuAmpController is the main processor, also interface to python
|
||||||
|
* It determines the total number of windows, the starting pixels for each window.
|
||||||
|
* It then divides windows into chunks (batches), and creates cuAmpcorChunk instances
|
||||||
|
* to process each chunk.
|
||||||
|
* A chunk includes multiple windows, to maximize the use of GPU cores.
|
||||||
|
* Different cuAmpcorChunk processors use different cuda streams, to overlap
|
||||||
|
* the kernel execution with data copying.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// code guard
|
||||||
#ifndef CU_AMPCOR_CONTROLLER_H
|
#ifndef CU_AMPCOR_CONTROLLER_H
|
||||||
#define CU_AMPCOR_CONTROLLER_H
|
#define CU_AMPCOR_CONTROLLER_H
|
||||||
|
|
||||||
|
// dependencies
|
||||||
#include "cuAmpcorParameter.h"
|
#include "cuAmpcorParameter.h"
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
class cuAmpcorController {
|
class cuAmpcorController {
|
||||||
public:
|
public:
|
||||||
cuAmpcorParameter *param;
|
cuAmpcorParameter *param; ///< the parameter set
|
||||||
|
// constructor
|
||||||
cuAmpcorController();
|
cuAmpcorController();
|
||||||
|
// destructor
|
||||||
~cuAmpcorController();
|
~cuAmpcorController();
|
||||||
|
// run interface
|
||||||
void runAmpcor();
|
void runAmpcor();
|
||||||
|
// output gross offsets
|
||||||
void outputGrossOffsets();
|
void outputGrossOffsets();
|
||||||
/*
|
|
||||||
void setAlgorithm(int);
|
|
||||||
int getAlgorithm();
|
|
||||||
void setDeviceID(int);
|
|
||||||
int getDeviceID();
|
|
||||||
void setNStreams(int);
|
|
||||||
int getNStreams();
|
|
||||||
void setWindowSizeHeight(int);
|
|
||||||
int getWindowSizeHeight();
|
|
||||||
void setWindowSizeWidth(int);
|
|
||||||
int getWindowSizeWidth();
|
|
||||||
void setSearchWindowSizeHeight(int);
|
|
||||||
int getSearchWindowSizeHeight();
|
|
||||||
void setSearchWindowSizeWidth(int);
|
|
||||||
int setSearchWindowSizeWidth();
|
|
||||||
void setRawOversamplingFactor(int);
|
|
||||||
int getRawOversamplingFactor();
|
|
||||||
void setZoomWindowSize(int);
|
|
||||||
int getZoomWindowSize();
|
|
||||||
void setOversamplingFactor(int);
|
|
||||||
int getOversamplingFactor();
|
|
||||||
void setAcrossLooks(int);
|
|
||||||
int getAcrossLoos();
|
|
||||||
void setDownLooks(int);
|
|
||||||
int getDownLooks();
|
|
||||||
void setSkipSampleAcrossRaw(int);
|
|
||||||
int getSkipSampleAcrossRaw();
|
|
||||||
void setSkipSampleDownRaw(int);
|
|
||||||
int getSkipSampleDownRaw();
|
|
||||||
void setNumberWindowMethod(int);
|
|
||||||
int getNumberWindowMethod();
|
|
||||||
void setNumberWindowDown(int);
|
|
||||||
int getNumberWindowDown();
|
|
||||||
void setNumberWindowAcross(int);
|
|
||||||
int getNumberWindowAcross();
|
|
||||||
void setNumberWindowDownInChunk(int);
|
|
||||||
int getNumberWindowDownInChunk();
|
|
||||||
void setNumberWindowAcrossInChunk(int);
|
|
||||||
int getNumberWindowAcrossInChunk();
|
|
||||||
void setRangeSpacing1(float);
|
|
||||||
float getRangeSpacing1();
|
|
||||||
void setRangeSpacing2(float);
|
|
||||||
float getRangeSpacing2();
|
|
||||||
void setImageDatatype1(int);
|
|
||||||
int getImageDatatype1();
|
|
||||||
void setImageDatatype2(int);
|
|
||||||
int getImageDatatype2();
|
|
||||||
void setThresholdSNR(float);
|
|
||||||
float getThresholdSNR();
|
|
||||||
void setThresholdCov(float);
|
|
||||||
float getThresholdCov();
|
|
||||||
void setBand1(int);
|
|
||||||
int getBand1();
|
|
||||||
void setBand2(int);
|
|
||||||
int getBand2();
|
|
||||||
void setReferenceImageName(std::string);
|
|
||||||
std::string getReferenceImageName();
|
|
||||||
void setSecondaryImageName(std::string);
|
|
||||||
std::string getSecondaryImageName();
|
|
||||||
void setReferenceImageWidth(int);
|
|
||||||
int getReferenceImageWidth();
|
|
||||||
void setReferenceImageHeight(int);
|
|
||||||
int getReferenceImageHeight();
|
|
||||||
void setSecondaryImageWidth(int);
|
|
||||||
int getSecondaryImageWidth();
|
|
||||||
void setSecondaryImageHeight(int);
|
|
||||||
int getSecondaryImageHeight();
|
|
||||||
void setStartPixelMethod(int);
|
|
||||||
int getStartPixelMethod();
|
|
||||||
void setReferenceStartPixelAcross(int);
|
|
||||||
int getReferenceStartPixelAcross();
|
|
||||||
void setReferenceStartPixelDown(int);
|
|
||||||
int setReferenceStartPixelDown();
|
|
||||||
void setSecondaryStartPixelAcross(int);
|
|
||||||
int getSecondaryStartPixelAcross();
|
|
||||||
void setSecondaryStartPixelDown(int);
|
|
||||||
int getSecondaryStartPixelDown();
|
|
||||||
void setGrossOffsetMethod(int);
|
|
||||||
int getGrossOffsetMethod();
|
|
||||||
void setAcrossGrossOffset(int);
|
|
||||||
int getAcrossGrossOffset();
|
|
||||||
void setDownGrossOffset(int);
|
|
||||||
int getDownGrossOffset();
|
|
||||||
void setGrossOffsets(int *, int);
|
|
||||||
int* getGrossOffsets();
|
|
||||||
void setOffsetImageName(std::string);
|
|
||||||
std::string getOffsetImageName();
|
|
||||||
void setSNRImageName(std::string);
|
|
||||||
std::string getSNRImageName();
|
|
||||||
//void setMargin(int);
|
|
||||||
//int getMargin();
|
|
||||||
//void setNumberThreads(int);
|
|
||||||
void setDerampMethod(int);
|
|
||||||
int getDerampMethod();*/
|
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/**
|
/**
|
||||||
* cuAmpcorParameter.cu
|
* @file cuAmpcorParameter.cu
|
||||||
* Input parameters for ampcor
|
* Input parameters for ampcor
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ cuAmpcorParameter::cuAmpcorParameter()
|
||||||
skipSampleAcrossRaw = 64;
|
skipSampleAcrossRaw = 64;
|
||||||
skipSampleDownRaw = 64;
|
skipSampleDownRaw = 64;
|
||||||
rawDataOversamplingFactor = 2;
|
rawDataOversamplingFactor = 2;
|
||||||
zoomWindowSize = 8;
|
zoomWindowSize = 16;
|
||||||
oversamplingFactor = 16;
|
oversamplingFactor = 16;
|
||||||
oversamplingMethod = 0;
|
oversamplingMethod = 0;
|
||||||
|
|
||||||
|
@ -54,8 +54,7 @@ cuAmpcorParameter::cuAmpcorParameter()
|
||||||
referenceStartPixelDown0 = 0;
|
referenceStartPixelDown0 = 0;
|
||||||
referenceStartPixelAcross0 = 0;
|
referenceStartPixelAcross0 = 0;
|
||||||
|
|
||||||
corrRawZoomInHeight = 17; // 8*2+1
|
corrStatWindowSize = 21; // 10*2+1 as in RIOPAC
|
||||||
corrRawZoomInWidth = 17;
|
|
||||||
|
|
||||||
useMmap = 1; // use mmap
|
useMmap = 1; // use mmap
|
||||||
mmapSizeInGB = 1;
|
mmapSizeInGB = 1;
|
||||||
|
@ -68,7 +67,19 @@ cuAmpcorParameter::cuAmpcorParameter()
|
||||||
|
|
||||||
void cuAmpcorParameter::setupParameters()
|
void cuAmpcorParameter::setupParameters()
|
||||||
{
|
{
|
||||||
zoomWindowSize *= rawDataOversamplingFactor; //8 * 2
|
// Size to extract the raw correlation surface for snr/cov
|
||||||
|
corrRawZoomInHeight = std::min(corrStatWindowSize, 2*halfSearchRangeDownRaw+1);
|
||||||
|
corrRawZoomInWidth = std::min(corrStatWindowSize, 2*halfSearchRangeAcrossRaw+1);
|
||||||
|
|
||||||
|
// Size to extract the resampled correlation surface for oversampling
|
||||||
|
// users should use 16 for zoomWindowSize, no need to multiply by 2
|
||||||
|
// zoomWindowSize *= rawDataOversamplingFactor; //8 * 2
|
||||||
|
// to check the search range
|
||||||
|
int corrSurfaceActualSize =
|
||||||
|
std::min(halfSearchRangeAcrossRaw, halfSearchRangeDownRaw)*
|
||||||
|
2*rawDataOversamplingFactor;
|
||||||
|
zoomWindowSize = std::min(zoomWindowSize, corrSurfaceActualSize);
|
||||||
|
|
||||||
halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4
|
halfZoomWindowSizeRaw = zoomWindowSize/(2*rawDataOversamplingFactor); // 8*2/(2*2) = 4
|
||||||
|
|
||||||
windowSizeWidth = windowSizeWidthRaw*rawDataOversamplingFactor; //
|
windowSizeWidth = windowSizeWidthRaw*rawDataOversamplingFactor; //
|
||||||
|
@ -89,10 +100,6 @@ void cuAmpcorParameter::setupParameters()
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// modified 02/12/2018 to include one more chunk
|
|
||||||
// e.g. numberWindowDownInChunk=102, numberWindowDown=10, results in numberChunkDown=11
|
|
||||||
// the last chunk will include 2 windows, numberWindowDownInChunkRun = 2.
|
|
||||||
|
|
||||||
numberChunkDown = IDIVUP(numberWindowDown, numberWindowDownInChunk);
|
numberChunkDown = IDIVUP(numberWindowDown, numberWindowDownInChunk);
|
||||||
numberChunkAcross = IDIVUP(numberWindowAcross, numberWindowAcrossInChunk);
|
numberChunkAcross = IDIVUP(numberWindowAcross, numberWindowAcrossInChunk);
|
||||||
numberChunks = numberChunkDown*numberChunkAcross;
|
numberChunks = numberChunkDown*numberChunkAcross;
|
||||||
|
@ -157,6 +164,7 @@ void cuAmpcorParameter::setStartPixels(int *mStartD, int *mStartA, int *gOffsetD
|
||||||
setChunkStartPixels();
|
setChunkStartPixels();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// set starting pixels for each window with a varying gross offset
|
||||||
void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int *gOffsetD, int *gOffsetA)
|
void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int *gOffsetD, int *gOffsetA)
|
||||||
{
|
{
|
||||||
for(int row=0; row<numberWindowDown; row++)
|
for(int row=0; row<numberWindowDown; row++)
|
||||||
|
@ -175,9 +183,9 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int *gOffsetD,
|
||||||
setChunkStartPixels();
|
setChunkStartPixels();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// set starting pixels for each window with a constant gross offset
|
||||||
void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int gOffsetD, int gOffsetA)
|
void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int gOffsetD, int gOffsetA)
|
||||||
{
|
{
|
||||||
//fprintf(stderr, "set start pixels %d %d %d %d\n", mStartD, mStartA, gOffsetD, gOffsetA);
|
|
||||||
for(int row=0; row<numberWindowDown; row++)
|
for(int row=0; row<numberWindowDown; row++)
|
||||||
{
|
{
|
||||||
for(int col = 0; col < numberWindowAcross; col++)
|
for(int col = 0; col < numberWindowAcross; col++)
|
||||||
|
@ -194,6 +202,7 @@ void cuAmpcorParameter::setStartPixels(int mStartD, int mStartA, int gOffsetD, i
|
||||||
setChunkStartPixels();
|
setChunkStartPixels();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// set starting pixels for each chunk
|
||||||
void cuAmpcorParameter::setChunkStartPixels()
|
void cuAmpcorParameter::setChunkStartPixels()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -217,7 +226,6 @@ void cuAmpcorParameter::setChunkStartPixels()
|
||||||
int sChunkED = 0;
|
int sChunkED = 0;
|
||||||
int sChunkEA = 0;
|
int sChunkEA = 0;
|
||||||
|
|
||||||
// modified 02/12/2018
|
|
||||||
int numberWindowDownInChunkRun = numberWindowDownInChunk;
|
int numberWindowDownInChunkRun = numberWindowDownInChunk;
|
||||||
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
|
int numberWindowAcrossInChunkRun = numberWindowAcrossInChunk;
|
||||||
// modify the number of windows in last chunk
|
// modify the number of windows in last chunk
|
||||||
|
@ -226,7 +234,6 @@ void cuAmpcorParameter::setChunkStartPixels()
|
||||||
if(jchunk == numberChunkAcross -1)
|
if(jchunk == numberChunkAcross -1)
|
||||||
numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1);
|
numberWindowAcrossInChunkRun = numberWindowAcross - numberWindowAcrossInChunk*(numberChunkAcross -1);
|
||||||
|
|
||||||
|
|
||||||
for(int i=0; i<numberWindowDownInChunkRun; i++)
|
for(int i=0; i<numberWindowDownInChunkRun; i++)
|
||||||
{
|
{
|
||||||
for(int j=0; j<numberWindowAcrossInChunkRun; j++)
|
for(int j=0; j<numberWindowAcrossInChunkRun; j++)
|
||||||
|
@ -326,3 +333,4 @@ cuAmpcorParameter::~cuAmpcorParameter()
|
||||||
{
|
{
|
||||||
deallocateArrays();
|
deallocateArrays();
|
||||||
}
|
}
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
/**
|
/**
|
||||||
* cuAmpcorParameter.h
|
* @file cuAmpcorParameter.h
|
||||||
* Header file for Ampcor Parameter Class
|
* @brief A class holds cuAmpcor process parameters
|
||||||
*
|
*
|
||||||
* Author: Lijun Zhu @ Seismo Lab, Caltech
|
* Author: Lijun Zhu @ Seismo Lab, Caltech
|
||||||
* March 2017
|
* March 2017; last modified October 2020
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __CUAMPCORPARAMETER_H
|
#ifndef __CUAMPCORPARAMETER_H
|
||||||
|
@ -29,128 +29,132 @@
|
||||||
/// 4a. Optionally, check the range of windows is within the SLC image range: param->checkPixelInImageRange()
|
/// 4a. Optionally, check the range of windows is within the SLC image range: param->checkPixelInImageRange()
|
||||||
/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used
|
/// Steps 1, 3, 4 are mandatory. If step 2 is missing, default values will be used
|
||||||
|
|
||||||
|
|
||||||
class cuAmpcorParameter{
|
class cuAmpcorParameter{
|
||||||
public:
|
public:
|
||||||
int algorithm; /// Cross-correlation algorithm: 0=freq domain (default) 1=time domain
|
int algorithm; ///< Cross-correlation algorithm: 0=freq domain (default) 1=time domain
|
||||||
int deviceID; /// Targeted GPU device ID: use -1 to auto select
|
int deviceID; ///< Targeted GPU device ID: use -1 to auto select
|
||||||
int nStreams; /// Number of streams to asynchonize data transfers and compute kernels
|
int nStreams; ///< Number of streams to asynchonize data transfers and compute kernels
|
||||||
int derampMethod; /// Method for deramping 0=None, 1=average, 2=phase gradient
|
int derampMethod; ///< Method for deramping 0=None, 1=average
|
||||||
|
|
||||||
// chip or window size for raw data
|
// chip or window size for raw data
|
||||||
int windowSizeHeightRaw; /// Template window height (original size)
|
int windowSizeHeightRaw; ///< Template window height (original size)
|
||||||
int windowSizeWidthRaw; /// Template window width (original size)
|
int windowSizeWidthRaw; ///< Template window width (original size)
|
||||||
int searchWindowSizeHeightRaw; /// Search window height (original size)
|
int searchWindowSizeHeightRaw; ///< Search window height (original size)
|
||||||
int searchWindowSizeWidthRaw; /// Search window width (orignal size)
|
int searchWindowSizeWidthRaw; ///< Search window width (orignal size)
|
||||||
|
|
||||||
int halfSearchRangeDownRaw; ///(searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
int halfSearchRangeDownRaw; ///< (searchWindowSizeHeightRaw-windowSizeHeightRaw)/2
|
||||||
int halfSearchRangeAcrossRaw; ///(searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
int halfSearchRangeAcrossRaw; ///< (searchWindowSizeWidthRaw-windowSizeWidthRaw)/2
|
||||||
// search range is (-halfSearchRangeRaw, halfSearchRangeRaw)
|
// search range is (-halfSearchRangeRaw, halfSearchRangeRaw)
|
||||||
|
|
||||||
int searchWindowSizeHeightRawZoomIn;
|
int searchWindowSizeHeightRawZoomIn; ///< search window height used for zoom in
|
||||||
int searchWindowSizeWidthRawZoomIn;
|
int searchWindowSizeWidthRawZoomIn; ///< search window width used for zoom in
|
||||||
|
|
||||||
int corrRawZoomInHeight; // window to estimate snr
|
int corrStatWindowSize; ///< correlation surface size used to estimate snr
|
||||||
int corrRawZoomInWidth;
|
int corrRawZoomInHeight; ///< correlation surface height used for oversampling
|
||||||
|
int corrRawZoomInWidth; ///< correlation surface width used for oversampling
|
||||||
|
|
||||||
// chip or window size after oversampling
|
// chip or window size after oversampling
|
||||||
int rawDataOversamplingFactor; /// Raw data overampling factor (from original size to oversampled size)
|
int rawDataOversamplingFactor; ///< Raw data overampling factor (from original size to oversampled size)
|
||||||
int windowSizeHeight; /// Template window length (oversampled size)
|
int windowSizeHeight; ///< Template window length (oversampled size)
|
||||||
int windowSizeWidth; /// Template window width (original size)
|
int windowSizeWidth; ///< Template window width (original size)
|
||||||
int searchWindowSizeHeight; /// Search window height (oversampled size)
|
int searchWindowSizeHeight; ///< Search window height (oversampled size)
|
||||||
int searchWindowSizeWidth; /// Search window width (oversampled size)
|
int searchWindowSizeWidth; ///< Search window width (oversampled size)
|
||||||
|
|
||||||
// strides between chips/windows
|
// strides between chips/windows
|
||||||
int skipSampleDownRaw; /// Skip size between neighboring windows in Down direction (original size)
|
int skipSampleDownRaw; ///< Skip size between neighboring windows in Down direction (original size)
|
||||||
int skipSampleAcrossRaw; /// Skip size between neighboring windows in across direction (original size)
|
int skipSampleAcrossRaw; ///< Skip size between neighboring windows in across direction (original size)
|
||||||
//int skipSampleDown; /// Skip size between neighboring windows in Down direction (oversampled size)
|
|
||||||
//int skipSampleAcross; /// Skip size between neighboring windows in Across direction (oversampled size)
|
|
||||||
|
|
||||||
// Zoom in region near location of max correlation
|
// Zoom in region near location of max correlation
|
||||||
int zoomWindowSize; /// Zoom-in window size in correlation surface (same for down and across directions)
|
int zoomWindowSize; ///< Zoom-in window size in correlation surface (same for down and across directions)
|
||||||
int halfZoomWindowSizeRaw; /// = half of zoomWindowSize/rawDataOversamplingFactor
|
int halfZoomWindowSizeRaw; ///< half of zoomWindowSize/rawDataOversamplingFactor
|
||||||
|
|
||||||
|
int oversamplingFactor; ///< Oversampling factor for interpolating correlation surface
|
||||||
|
int oversamplingMethod; ///< correlation surface oversampling method 0 = fft (default) 1 = sinc
|
||||||
|
|
||||||
|
|
||||||
|
float thresholdSNR; ///< Threshold of Signal noise ratio to remove noisy data
|
||||||
int oversamplingFactor; /// Oversampling factor for interpolating correlation surface
|
|
||||||
int oversamplingMethod; /// 0 = fft (default) 1 = sinc
|
|
||||||
|
|
||||||
|
|
||||||
float thresholdSNR; /// Threshold of Signal noise ratio to remove noisy data
|
|
||||||
|
|
||||||
//reference image
|
//reference image
|
||||||
std::string referenceImageName; /// reference SLC image name
|
std::string referenceImageName; ///< reference SLC image name
|
||||||
int imageDataType1; /// reference image data type, 2=cfloat=complex=float2 1=float
|
int imageDataType1; ///< reference image data type, 2=cfloat=complex=float2 1=float
|
||||||
int referenceImageHeight; /// reference image height
|
int referenceImageHeight; ///< reference image height
|
||||||
int referenceImageWidth; /// reference image width
|
int referenceImageWidth; ///< reference image width
|
||||||
|
|
||||||
//secondary image
|
//secondary image
|
||||||
std::string secondaryImageName; /// secondary SLC image name
|
std::string secondaryImageName; ///< secondary SLC image name
|
||||||
int imageDataType2; /// secondary image data type, 2=cfloat=complex=float2 1=float
|
int imageDataType2; ///< secondary image data type, 2=cfloat=complex=float2 1=float
|
||||||
int secondaryImageHeight; /// secondary image height
|
int secondaryImageHeight; ///< secondary image height
|
||||||
int secondaryImageWidth; /// secondary image width
|
int secondaryImageWidth; ///< secondary image width
|
||||||
|
|
||||||
// total number of chips/windows
|
// total number of chips/windows
|
||||||
int numberWindowDown; /// number of total windows (down)
|
int numberWindowDown; ///< number of total windows (down)
|
||||||
int numberWindowAcross; /// number of total windows (across)
|
int numberWindowAcross; ///< number of total windows (across)
|
||||||
int numberWindows; /// numberWindowDown*numberWindowAcross
|
int numberWindows; ///< numberWindowDown*numberWindowAcross
|
||||||
|
|
||||||
// number of chips/windows in a batch/chunk
|
// number of chips/windows in a batch/chunk
|
||||||
int numberWindowDownInChunk; /// number of windows processed in a chunk (down)
|
int numberWindowDownInChunk; ///< number of windows processed in a chunk (down)
|
||||||
int numberWindowAcrossInChunk; /// number of windows processed in a chunk (across)
|
int numberWindowAcrossInChunk; ///< number of windows processed in a chunk (across)
|
||||||
int numberWindowsInChunk; /// numberWindowDownInChunk*numberWindowAcrossInChunk
|
int numberWindowsInChunk; ///< numberWindowDownInChunk*numberWindowAcrossInChunk
|
||||||
int numberChunkDown; /// number of chunks (down)
|
int numberChunkDown; ///< number of chunks (down)
|
||||||
int numberChunkAcross; /// number of chunks (across)
|
int numberChunkAcross; ///< number of chunks (across)
|
||||||
int numberChunks;
|
int numberChunks; ///< total number of chunks
|
||||||
|
|
||||||
int useMmap; /// whether to use mmap 0=not 1=yes (default = 0)
|
int useMmap; ///< whether to use mmap 0=not 1=yes (default = 0)
|
||||||
int mmapSizeInGB; /// size for mmap buffer(useMmap=1) or a cpu memory buffer (useMmap=0)
|
int mmapSizeInGB; ///< size for mmap buffer(useMmap=1) or a cpu memory buffer (useMmap=0)
|
||||||
|
|
||||||
int referenceStartPixelDown0;
|
int referenceStartPixelDown0; ///< first starting pixel in reference image (down)
|
||||||
int referenceStartPixelAcross0;
|
int referenceStartPixelAcross0; ///< first starting pixel in reference image (across)
|
||||||
int *referenceStartPixelDown; /// reference starting pixels for each window (down)
|
int *referenceStartPixelDown; ///< reference starting pixels for each window (down)
|
||||||
int *referenceStartPixelAcross;/// reference starting pixels for each window (across)
|
int *referenceStartPixelAcross; ///< reference starting pixels for each window (across)
|
||||||
int *secondaryStartPixelDown; /// secondary starting pixels for each window (down)
|
int *secondaryStartPixelDown; ///< secondary starting pixels for each window (down)
|
||||||
int *secondaryStartPixelAcross; /// secondary starting pixels for each window (across)
|
int *secondaryStartPixelAcross; ///< secondary starting pixels for each window (across)
|
||||||
int grossOffsetDown0;
|
int grossOffsetDown0; ///< gross offset static component (down)
|
||||||
int grossOffsetAcross0;
|
int grossOffsetAcross0; ///< gross offset static component (across)
|
||||||
int *grossOffsetDown; /// Gross offsets between reference and secondary windows (down) : secondaryStartPixel - referenceStartPixel
|
int *grossOffsetDown; ///< Gross offsets between reference and secondary windows (down)
|
||||||
int *grossOffsetAcross; /// Gross offsets between reference and secondary windows (across)
|
int *grossOffsetAcross; ///< Gross offsets between reference and secondary windows (across)
|
||||||
|
|
||||||
int *referenceChunkStartPixelDown;
|
int *referenceChunkStartPixelDown; ///< reference starting pixels for each chunk (down)
|
||||||
int *referenceChunkStartPixelAcross;
|
int *referenceChunkStartPixelAcross; ///< reference starting pixels for each chunk (across)
|
||||||
int *secondaryChunkStartPixelDown;
|
int *secondaryChunkStartPixelDown; ///< secondary starting pixels for each chunk (down)
|
||||||
int *secondaryChunkStartPixelAcross;
|
int *secondaryChunkStartPixelAcross; ///< secondary starting pixels for each chunk (across)
|
||||||
int *referenceChunkHeight;
|
int *referenceChunkHeight; ///< reference chunk height
|
||||||
int *referenceChunkWidth;
|
int *referenceChunkWidth; ///< reference chunk width
|
||||||
int *secondaryChunkHeight;
|
int *secondaryChunkHeight; ///< secondary chunk height
|
||||||
int *secondaryChunkWidth;
|
int *secondaryChunkWidth; ///< secondary chunk width
|
||||||
int maxReferenceChunkHeight, maxReferenceChunkWidth;
|
int maxReferenceChunkHeight, maxReferenceChunkWidth; ///< max reference chunk size
|
||||||
int maxSecondaryChunkHeight, maxSecondaryChunkWidth;
|
int maxSecondaryChunkHeight, maxSecondaryChunkWidth; ///< max secondary chunk size
|
||||||
|
|
||||||
std::string grossOffsetImageName;
|
std::string grossOffsetImageName; ///< gross offset output filename
|
||||||
std::string offsetImageName; /// Output Offset fields filename
|
std::string offsetImageName; ///< Offset fields output filename
|
||||||
std::string snrImageName; /// Output SNR filename
|
std::string snrImageName; ///< Output SNR filename
|
||||||
std::string covImageName;
|
std::string covImageName; ///< Output variance filename
|
||||||
|
|
||||||
cuAmpcorParameter(); /// Class constructor and default parameters setter
|
// Class constructor and default parameters setter
|
||||||
~cuAmpcorParameter(); /// Class descontructor
|
cuAmpcorParameter();
|
||||||
|
// Class descontructor
|
||||||
|
~cuAmpcorParameter();
|
||||||
|
|
||||||
void allocateArrays(); /// Allocate various arrays after the number of Windows is given
|
// Allocate various arrays after the number of Windows is given
|
||||||
void deallocateArrays(); /// Deallocate arrays on exit
|
void allocateArrays();
|
||||||
|
// Deallocate arrays on exit
|
||||||
|
void deallocateArrays();
|
||||||
|
|
||||||
|
|
||||||
/// Three methods to set reference/secondary starting pixels and gross offsets from input reference start pixel(s) and gross offset(s)
|
// Three methods to set reference/secondary starting pixels and gross offsets from input reference start pixel(s) and gross offset(s)
|
||||||
/// 1 (int *, int *, int *, int *): varying reference start pixels and gross offsets
|
// 1 (int *, int *, int *, int *): varying reference start pixels and gross offsets
|
||||||
/// 2 (int, int, int *, int *): fixed reference start pixel (first window) and varying gross offsets
|
// 2 (int, int, int *, int *): fixed reference start pixel (first window) and varying gross offsets
|
||||||
/// 3 (int, int, int, int): fixed reference start pixel(first window) and fixed gross offsets
|
// 3 (int, int, int, int): fixed reference start pixel(first window) and fixed gross offsets
|
||||||
void setStartPixels(int*, int*, int*, int*);
|
void setStartPixels(int*, int*, int*, int*);
|
||||||
void setStartPixels(int, int, int*, int*);
|
void setStartPixels(int, int, int*, int*);
|
||||||
void setStartPixels(int, int, int, int);
|
void setStartPixels(int, int, int, int);
|
||||||
|
// set starting pixels for each chunk
|
||||||
void setChunkStartPixels();
|
void setChunkStartPixels();
|
||||||
void checkPixelInImageRange(); /// check whether
|
// check whether all chunks/windows are within the image range
|
||||||
void setupParameters(); /// Process other parameters after Python Input
|
void checkPixelInImageRange();
|
||||||
|
// Process other parameters after Python Input
|
||||||
|
void setupParameters();
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif //__CUAMPCORPARAMETER_H
|
||||||
|
//end of file
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
/*
|
/*
|
||||||
* cuAmpcorUtil.h
|
* @file cuAmpcorUtil.h
|
||||||
* header file to include the various routines for ampcor
|
* @brief Header file to include various routines for cuAmpcor
|
||||||
* serves as an index
|
*
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// code guard
|
||||||
#ifndef __CUAMPCORUTIL_H
|
#ifndef __CUAMPCORUTIL_H
|
||||||
#define __CUMAPCORUTIL_H
|
#define __CUAMPCORUTIL_H
|
||||||
|
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cuAmpcorParameter.h"
|
#include "cuAmpcorParameter.h"
|
||||||
|
@ -16,7 +17,7 @@
|
||||||
#include "float2.h"
|
#include "float2.h"
|
||||||
|
|
||||||
|
|
||||||
//in cuArraysCopy.cu: various utitlies for copy images file in gpu memory
|
//in cuArraysCopy.cu: various utilities for copy images file in gpu memory
|
||||||
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream);
|
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2, int strideH, int strideW, cudaStream_t stream);
|
||||||
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||||
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
const int *offsetH, const int* offsetW, cudaStream_t stream);
|
||||||
|
@ -27,6 +28,7 @@ void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, c
|
||||||
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream);
|
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2, int strideH, int strideW, cudaStream_t stream);
|
||||||
|
|
||||||
// same routine name overloaded for different data type
|
// same routine name overloaded for different data type
|
||||||
|
// extract data from a large image
|
||||||
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offset, cudaStream_t stream);
|
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offset, cudaStream_t stream);
|
||||||
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
|
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
|
||||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
|
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream);
|
||||||
|
@ -39,26 +41,23 @@ void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, i
|
||||||
void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream);
|
void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream);
|
||||||
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
|
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offersetY, cudaStream_t stream);
|
||||||
|
|
||||||
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
|
|
||||||
|
|
||||||
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
|
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream);
|
||||||
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream);
|
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream);
|
||||||
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream);
|
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream);
|
||||||
void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream);
|
void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream);
|
||||||
|
|
||||||
//in cuDeramp.cu: deramping phase
|
|
||||||
void cuDeramp(int method, cuArrays<float2> *images, cudaStream_t stream);
|
|
||||||
void cuDerampMethod1(cuArrays<float2> *images, cudaStream_t stream);
|
|
||||||
void cuDerampMethod2(cuArrays<float2> *images, cudaStream_t stream);
|
|
||||||
void cpuDerampMethod3(cuArrays<float2> *images, cudaStream_t stream);
|
|
||||||
|
|
||||||
//in cuArraysPadding.cu: various utilities for oversampling padding
|
|
||||||
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
|
||||||
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
|
||||||
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||||
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
|
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
|
||||||
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
|
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream);
|
||||||
|
|
||||||
|
// cuDeramp.cu: deramping phase
|
||||||
|
void cuDeramp(int method, cuArrays<float2> *images, cudaStream_t stream);
|
||||||
|
void cuDerampMethod1(cuArrays<float2> *images, cudaStream_t stream);
|
||||||
|
|
||||||
|
// cuArraysPadding.cu: various utilities for oversampling padding
|
||||||
|
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||||
|
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream);
|
||||||
|
|
||||||
//in cuCorrNormalization.cu: utities to normalize the cross correlation function
|
//in cuCorrNormalization.cu: utities to normalize the cross correlation function
|
||||||
void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream);
|
void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream);
|
||||||
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
|
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
|
||||||
|
@ -68,11 +67,11 @@ void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cuArrays<
|
||||||
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream);
|
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream);
|
||||||
void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn, cuArrays<float2> *offsetFinal,
|
void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn, cuArrays<float2> *offsetFinal,
|
||||||
int OverSampleRatioZoomin, int OverSampleRatioRaw,
|
int OverSampleRatioZoomin, int OverSampleRatioRaw,
|
||||||
int xHalfRangeInit, int yHalfRangeInit, int xHalfRangeZoomIn, int yHalfRangeZoomIn,
|
int xHalfRangeInit, int yHalfRangeInit,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
|
|
||||||
void cuDetermineInterpZone(cuArrays<int2> *maxloc, cuArrays<int2> *zoomInOffset, cuArrays<float> *corrOrig, cuArrays<float> *corrZoomIn, cudaStream_t stream);
|
void cuDetermineSecondaryExtractOffset(cuArrays<int2> *maxLoc, cuArrays<int2> *maxLocShift,
|
||||||
void cuDetermineSecondaryExtractOffset(cuArrays<int2> *maxLoc, int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream);
|
int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream);
|
||||||
|
|
||||||
//in cuCorrTimeDomain.cu: cross correlation in time domain
|
//in cuCorrTimeDomain.cu: cross correlation in time domain
|
||||||
void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
|
void cuCorrTimeDomain(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream);
|
||||||
|
@ -95,3 +94,5 @@ void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuAr
|
||||||
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
|
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,174 +1,175 @@
|
||||||
|
/**
|
||||||
|
* \file cuArrays.cu
|
||||||
|
* \brief Implementations for cuArrays class
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// dependencies
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaError.h"
|
#include "cudaError.h"
|
||||||
|
|
||||||
template <typename T>
|
// allocate arrays in device memory
|
||||||
void cuArrays<T>::allocate()
|
template <typename T>
|
||||||
{
|
void cuArrays<T>::allocate()
|
||||||
|
{
|
||||||
checkCudaErrors(cudaMalloc((void **)&devData, getByteSize()));
|
checkCudaErrors(cudaMalloc((void **)&devData, getByteSize()));
|
||||||
is_allocated = 1;
|
is_allocated = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
// allocate arrays in host memory
|
||||||
void cuArrays<T>::allocateHost()
|
template <typename T>
|
||||||
{
|
void cuArrays<T>::allocateHost()
|
||||||
|
{
|
||||||
hostData = (T *)malloc(getByteSize());
|
hostData = (T *)malloc(getByteSize());
|
||||||
//checkCudaErrors(cudaMallocHost((void **)&hostData, getByteSize()));
|
|
||||||
is_allocatedHost = 1;
|
is_allocatedHost = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
// deallocate arrays in device memory
|
||||||
void cuArrays<T>::deallocate()
|
template <typename T>
|
||||||
{
|
void cuArrays<T>::deallocate()
|
||||||
|
{
|
||||||
checkCudaErrors(cudaFree(devData));
|
checkCudaErrors(cudaFree(devData));
|
||||||
is_allocated = 0;
|
is_allocated = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
// deallocate arrays in host memory
|
||||||
void cuArrays<T>::deallocateHost()
|
template <typename T>
|
||||||
{
|
void cuArrays<T>::deallocateHost()
|
||||||
//checkCudaErrors(cudaFreeHost(hostData));
|
{
|
||||||
free(hostData);
|
free(hostData);
|
||||||
is_allocatedHost = 0;
|
is_allocatedHost = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
// copy arrays from device to host
|
||||||
void cuArrays<T>::copyToHost(cudaStream_t stream)
|
// use asynchronous for possible overlaps between data copying and kernel execution
|
||||||
{
|
template <typename T>
|
||||||
//std::cout << "debug copy " << is_allocatedHost << " " << is_allocated << " " << getByteSize() << "\n";
|
void cuArrays<T>::copyToHost(cudaStream_t stream)
|
||||||
|
{
|
||||||
checkCudaErrors(cudaMemcpyAsync(hostData, devData, getByteSize(), cudaMemcpyDeviceToHost, stream));
|
checkCudaErrors(cudaMemcpyAsync(hostData, devData, getByteSize(), cudaMemcpyDeviceToHost, stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
// copy arrays from host to device
|
||||||
void cuArrays<T>::copyToDevice(cudaStream_t stream)
|
template <typename T>
|
||||||
{
|
void cuArrays<T>::copyToDevice(cudaStream_t stream)
|
||||||
|
{
|
||||||
checkCudaErrors(cudaMemcpyAsync(devData, hostData, getByteSize(), cudaMemcpyHostToDevice, stream));
|
checkCudaErrors(cudaMemcpyAsync(devData, hostData, getByteSize(), cudaMemcpyHostToDevice, stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
// set to 0
|
||||||
void cuArrays<T>::setZero(cudaStream_t stream)
|
template <typename T>
|
||||||
{
|
void cuArrays<T>::setZero(cudaStream_t stream)
|
||||||
|
{
|
||||||
checkCudaErrors(cudaMemsetAsync(devData, 0, getByteSize(), stream));
|
checkCudaErrors(cudaMemsetAsync(devData, 0, getByteSize(), stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
// output (partial) data when debugging
|
||||||
void cuArrays<float2>::debuginfo(cudaStream_t stream) {
|
template <typename T>
|
||||||
//std::cout << height << " " << width << " " << count << std::endl;
|
void cuArrays<T>::debuginfo(cudaStream_t stream) {
|
||||||
//std::cout << height << " " << width << " " << count << std::endl;
|
// output size info
|
||||||
|
std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl;
|
||||||
|
// check whether host data is allocated
|
||||||
if( !is_allocatedHost)
|
if( !is_allocatedHost)
|
||||||
allocateHost();
|
allocateHost();
|
||||||
|
// copy to host
|
||||||
copyToHost(stream);
|
copyToHost(stream);
|
||||||
|
|
||||||
//cudaStreamSynchronize(stream);
|
// set a max output range
|
||||||
//std::cout << "debug debuginfo " << size << " " << count << " " << stream << "\n";
|
int range = std::min(10, size*count);
|
||||||
|
// first 10 data
|
||||||
int range = min(10, size*count);
|
|
||||||
|
|
||||||
for(int i=0; i<range; i++)
|
|
||||||
std::cout << "(" <<hostData[i].x << " ," << hostData[i].y << ")" ;
|
|
||||||
std::cout << std::endl;
|
|
||||||
if(size*count>range) {
|
|
||||||
for(int i=size*count-range; i<size*count; i++)
|
|
||||||
std::cout << "(" <<hostData[i].x << " ," << hostData[i].y << ")" ;
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<>
|
|
||||||
void cuArrays<int2>::debuginfo(cudaStream_t stream) {
|
|
||||||
//std::cout << height << " " << width << " " << count << std::endl;
|
|
||||||
if( !is_allocatedHost)
|
|
||||||
allocateHost();
|
|
||||||
copyToHost(stream);
|
|
||||||
int range = min(10, size*count);
|
|
||||||
|
|
||||||
for(int i=0; i<range; i++)
|
|
||||||
std::cout << "(" <<hostData[i].x << " ," << hostData[i].y << ")" ;
|
|
||||||
std::cout << std::endl;
|
|
||||||
if(size*count>range) {
|
|
||||||
for(int i=size*count-range; i<size*count; i++)
|
|
||||||
std::cout << "(" <<hostData[i].x << " ," << hostData[i].y << ")" ;
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void cuArrays<float>::debuginfo(cudaStream_t stream) {
|
|
||||||
std::cout << height << " " << width << " " << count << std::endl;
|
|
||||||
if( !is_allocatedHost)
|
|
||||||
allocateHost();
|
|
||||||
copyToHost(stream);
|
|
||||||
|
|
||||||
int range = min(10, size*count);
|
|
||||||
|
|
||||||
for(int i=0; i<range; i++)
|
for(int i=0; i<range; i++)
|
||||||
std::cout << "(" <<hostData[i] << ")" ;
|
std::cout << "(" <<hostData[i] << ")" ;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
// last 10 data
|
||||||
if(size*count>range) {
|
if(size*count>range) {
|
||||||
for(int i=size*count-range; i<size*count; i++)
|
for(int i=size*count-range; i<size*count; i++)
|
||||||
std::cout << "(" <<hostData[i] << ")" ;
|
std::cout << "(" <<hostData[i] << ")" ;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
// need specializations for x,y components
|
||||||
void cuArrays<T>::outputToFile(std::string fn, cudaStream_t stream)
|
template<>
|
||||||
{
|
void cuArrays<float2>::debuginfo(cudaStream_t stream) {
|
||||||
|
std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl;
|
||||||
|
if( !is_allocatedHost)
|
||||||
|
allocateHost();
|
||||||
|
copyToHost(stream);
|
||||||
|
|
||||||
|
int range = std::min(10, size*count);
|
||||||
|
|
||||||
|
for(int i=0; i<range; i++)
|
||||||
|
std::cout << "(" <<hostData[i].x << ", " << hostData[i].y << ")" ;
|
||||||
|
std::cout << std::endl;
|
||||||
|
if(size*count>range) {
|
||||||
|
for(int i=size*count-range; i<size*count; i++)
|
||||||
|
std::cout << "(" <<hostData[i].x << ", " << hostData[i].y << ")" ;
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void cuArrays<float3>::debuginfo(cudaStream_t stream) {
|
||||||
|
std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl;
|
||||||
|
if( !is_allocatedHost)
|
||||||
|
allocateHost();
|
||||||
|
copyToHost(stream);
|
||||||
|
|
||||||
|
int range = std::min(10, size*count);
|
||||||
|
|
||||||
|
for(int i=0; i<range; i++)
|
||||||
|
std::cout << "(" <<hostData[i].x << ", " << hostData[i].y << ")" ;
|
||||||
|
std::cout << std::endl;
|
||||||
|
if(size*count>range) {
|
||||||
|
for(int i=size*count-range; i<size*count; i++)
|
||||||
|
std::cout << "(" <<hostData[i].x << ", " << hostData[i].y << ", " << hostData[i].z <<")";
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void cuArrays<int2>::debuginfo(cudaStream_t stream) {
|
||||||
|
std::cout << "Image height,width,count: " << height << "," << width << "," << count << std::endl;
|
||||||
|
if( !is_allocatedHost)
|
||||||
|
allocateHost();
|
||||||
|
copyToHost(stream);
|
||||||
|
|
||||||
|
int range = std::min(10, size*count);
|
||||||
|
|
||||||
|
for(int i=0; i<range; i++)
|
||||||
|
std::cout << "(" <<hostData[i].x << ", " << hostData[i].y << ")" ;
|
||||||
|
std::cout << std::endl;
|
||||||
|
if(size*count>range) {
|
||||||
|
for(int i=size*count-range; i<size*count; i++)
|
||||||
|
std::cout << "(" <<hostData[i].x << ", " << hostData[i].y << ")" ;
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// output to file by copying to host at first
|
||||||
|
template<typename T>
|
||||||
|
void cuArrays<T>::outputToFile(std::string fn, cudaStream_t stream)
|
||||||
|
{
|
||||||
if( !is_allocatedHost)
|
if( !is_allocatedHost)
|
||||||
allocateHost();
|
allocateHost();
|
||||||
copyToHost(stream);
|
copyToHost(stream);
|
||||||
outputHostToFile(fn);
|
outputHostToFile(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
// save the host data to (binary) file
|
||||||
void cuArrays<T>::outputHostToFile(std::string fn)
|
template <typename T>
|
||||||
{
|
void cuArrays<T>::outputHostToFile(std::string fn)
|
||||||
|
{
|
||||||
std::ofstream file;
|
std::ofstream file;
|
||||||
file.open(fn.c_str(), std::ios_base::binary);
|
file.open(fn.c_str(), std::ios_base::binary);
|
||||||
file.write((char *)hostData, getByteSize());
|
file.write((char *)hostData, getByteSize());
|
||||||
file.close();
|
file.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// instantiations, required by python extensions
|
||||||
template<>
|
template class cuArrays<float>;
|
||||||
void cuArrays<float>::outputToFile(std::string fn, cudaStream_t stream)
|
template class cuArrays<float2>;
|
||||||
{
|
template class cuArrays<float3>;
|
||||||
float *data;
|
template class cuArrays<int2>;
|
||||||
data = (float *)malloc(size*count*sizeof(float));
|
template class cuArrays<int>;
|
||||||
cudaMemcpyAsync(data, devData, size*count*sizeof(float), cudaMemcpyDeviceToHost, stream);
|
|
||||||
std::ofstream file;
|
|
||||||
file.open(fn.c_str(), std::ios_base::binary);
|
|
||||||
file.write((char *)data, size*count*sizeof(float));
|
|
||||||
file.close();
|
|
||||||
}*/
|
|
||||||
|
|
||||||
template<>
|
// end of file
|
||||||
void cuArrays<float2>::outputToFile(std::string fn, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
float *data;
|
|
||||||
data = (float *)malloc(size*count*sizeof(float2));
|
|
||||||
checkCudaErrors(cudaMemcpyAsync(data, devData, size*count*sizeof(float2), cudaMemcpyDeviceToHost, stream));
|
|
||||||
std::ofstream file;
|
|
||||||
file.open(fn.c_str(), std::ios_base::binary);
|
|
||||||
file.write((char *)data, size*count*sizeof(float2));
|
|
||||||
file.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
void cuArrays<float3>::outputToFile(std::string fn, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
float *data;
|
|
||||||
data = (float *)malloc(size*count*sizeof(float3));
|
|
||||||
checkCudaErrors(cudaMemcpyAsync(data, devData, size*count*sizeof(float3), cudaMemcpyDeviceToHost, stream));
|
|
||||||
std::ofstream file;
|
|
||||||
file.open(fn.c_str(), std::ios_base::binary);
|
|
||||||
file.write((char *)data, size*count*sizeof(float3));
|
|
||||||
file.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
template class cuArrays<float>;
|
|
||||||
template class cuArrays<float2>;
|
|
||||||
template class cuArrays<float3>;
|
|
||||||
template class cuArrays<int2>;
|
|
||||||
template class cuArrays<int>;
|
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
/*
|
/**
|
||||||
* cuArrays.h
|
* @file cuArrays.h
|
||||||
* Header file for declaring a group of images
|
* @brief Header file for cuArrays class
|
||||||
*
|
*
|
||||||
* Lijun Zhu
|
* A class describes a batch of images (in 2d arrays).
|
||||||
* Seismo Lab, Caltech
|
* Each image has size (height, width)
|
||||||
* V1.0 11/29/2016
|
* The number of images (countH, countW) or (1, count).
|
||||||
*/
|
**/
|
||||||
|
|
||||||
#ifndef __CUIMAGES_H
|
// code guard
|
||||||
#define __CUIMAGES_H
|
#ifndef __CUARRAYS_H
|
||||||
|
#define __CUARRAYS_H
|
||||||
|
|
||||||
|
// cuda dependencies
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#include <driver_types.h>
|
#include <driver_types.h>
|
||||||
|
|
||||||
|
@ -19,27 +21,28 @@
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class cuArrays{
|
class cuArrays{
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int height; // x, row, down, length, azimuth, along the track
|
int height; ///< x, row, down, length, azimuth, along the track
|
||||||
int width; // y, col, across, range, along the sight
|
int width; // y, col, across, range, along the sight
|
||||||
int size; // chip size, heigh*width
|
int size; // one image size, height*width
|
||||||
int countW; // number of images along width direction
|
int countW; // number of images along width direction
|
||||||
int countH; // number of images along height direction
|
int countH; // number of images along height direction
|
||||||
int count; // countW*countH, number of images
|
int count; // countW*countH, number of images
|
||||||
T* devData; // pointer to data in device (gpu) memory
|
T* devData; // pointer to data in device (gpu) memory
|
||||||
T* hostData;
|
T* hostData; // pointer to data in host (cpu) memory
|
||||||
|
|
||||||
bool is_allocated;
|
bool is_allocated; // whether the data is allocated in device memory
|
||||||
bool is_allocatedHost;
|
bool is_allocatedHost; // whether the data is allocated in host memory
|
||||||
|
|
||||||
|
// default constructor, empty
|
||||||
cuArrays() : width(0), height(0), size(0), countW(0), countH(0), count(0),
|
cuArrays() : width(0), height(0), size(0), countW(0), countH(0), count(0),
|
||||||
is_allocated(0), is_allocatedHost(0),
|
is_allocated(0), is_allocatedHost(0),
|
||||||
devData(0), hostData(0) {}
|
devData(0), hostData(0) {}
|
||||||
|
|
||||||
// single image
|
// constructor for single image
|
||||||
cuArrays(size_t h, size_t w) : width(w), height(h), countH(1), countW(1), count(1),
|
cuArrays(size_t h, size_t w) : width(w), height(h), countH(1), countW(1), count(1),
|
||||||
is_allocated(0), is_allocatedHost(0),
|
is_allocated(0), is_allocatedHost(0),
|
||||||
devData(0), hostData(0)
|
devData(0), hostData(0)
|
||||||
|
@ -47,7 +50,7 @@ public:
|
||||||
size = w*h;
|
size = w*h;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
// constructor for multiple images with a total count
|
||||||
cuArrays(size_t h, size_t w, size_t n) : width(w), height(h), countH(1), countW(n), count(n),
|
cuArrays(size_t h, size_t w, size_t n) : width(w), height(h), countH(1), countW(n), count(n),
|
||||||
is_allocated(0), is_allocatedHost(0),
|
is_allocated(0), is_allocatedHost(0),
|
||||||
devData(0), hostData(0)
|
devData(0), hostData(0)
|
||||||
|
@ -55,6 +58,7 @@ public:
|
||||||
size = w*h;
|
size = w*h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// constructor for multiple images with (countH, countW)
|
||||||
cuArrays(size_t h, size_t w, size_t ch, size_t cw) : width(w), height(h), countW(cw), countH(ch),
|
cuArrays(size_t h, size_t w, size_t ch, size_t cw) : width(w), height(h), countW(cw), countH(ch),
|
||||||
is_allocated(0), is_allocatedHost(0),
|
is_allocated(0), is_allocatedHost(0),
|
||||||
devData(0), hostData(0)
|
devData(0), hostData(0)
|
||||||
|
@ -63,24 +67,29 @@ public:
|
||||||
count = countH*countW;
|
count = countH*countW;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// memory allocation
|
||||||
void allocate();
|
void allocate();
|
||||||
void allocateHost();
|
void allocateHost();
|
||||||
void deallocate();
|
void deallocate();
|
||||||
void deallocateHost();
|
void deallocateHost();
|
||||||
|
|
||||||
|
// copy data between device and host memories
|
||||||
void copyToHost(cudaStream_t stream);
|
void copyToHost(cudaStream_t stream);
|
||||||
void copyToDevice(cudaStream_t stream);
|
void copyToDevice(cudaStream_t stream);
|
||||||
|
|
||||||
|
// get the total size
|
||||||
size_t getSize()
|
size_t getSize()
|
||||||
{
|
{
|
||||||
return size*count;
|
return size*count;
|
||||||
}
|
}
|
||||||
|
|
||||||
long getByteSize()
|
// get the total size in byte
|
||||||
|
inline long getByteSize()
|
||||||
{
|
{
|
||||||
return width*height*count*sizeof(T);
|
return width*height*count*sizeof(T);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// destructor
|
||||||
~cuArrays()
|
~cuArrays()
|
||||||
{
|
{
|
||||||
if(is_allocated)
|
if(is_allocated)
|
||||||
|
@ -89,12 +98,16 @@ public:
|
||||||
deallocateHost();
|
deallocateHost();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set zeroes
|
||||||
void setZero(cudaStream_t stream);
|
void setZero(cudaStream_t stream);
|
||||||
|
// output when debugging
|
||||||
void debuginfo(cudaStream_t stream) ;
|
void debuginfo(cudaStream_t stream) ;
|
||||||
void debuginfo(cudaStream_t stream, float factor);
|
void debuginfo(cudaStream_t stream, float factor);
|
||||||
|
// write to files
|
||||||
void outputToFile(std::string fn, cudaStream_t stream);
|
void outputToFile(std::string fn, cudaStream_t stream);
|
||||||
void outputHostToFile(std::string fn);
|
void outputHostToFile(std::string fn);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__CUIMAGES_H
|
#endif //__CUARRAYS_H
|
||||||
|
//end of file
|
||||||
|
|
|
@ -1,22 +1,32 @@
|
||||||
/* imagecopy.cu
|
/**
|
||||||
* various utitilies for copying images in device memory
|
* @file cuArraysCopy.cu
|
||||||
|
* @brief Utilities for copying/converting images to different format
|
||||||
*
|
*
|
||||||
* Lijun Zhu @ Seismo Lab, Caltech
|
* All methods are declared in cuAmpcorUtil.h
|
||||||
* v1.0 Jan 2017
|
* cudaArraysCopyToBatch to extract a batch of windows from the raw image
|
||||||
|
* various implementations include:
|
||||||
|
* 1. fixed or varying offsets, as start pixels for windows
|
||||||
|
* 2. complex to complex, usually
|
||||||
|
* 3. complex to (amplitude,0), for TOPS
|
||||||
|
* 4. real to complex, for real images
|
||||||
|
* cuArraysCopyExtract to extract(shrink in size) from a batch of windows to another batch
|
||||||
|
* overloaded for different data types
|
||||||
|
* cuArraysCopyInsert to insert a batch of windows (smaller in size) to another batch
|
||||||
|
* overloaded for different data types
|
||||||
|
* cuArraysCopyPadded to insert a batch of windows to another batch while padding 0s for rest elements
|
||||||
|
* used for fft oversampling
|
||||||
|
* see also cuArraysPadding.cu for other zero-padding utilities
|
||||||
|
* cuArraysR2C cuArraysC2R cuArraysAbs to convert between different data types
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
// dependencies
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
#include "cudaError.h"
|
#include "cudaError.h"
|
||||||
#include "float2.h"
|
#include "float2.h"
|
||||||
|
|
||||||
/*
|
// cuda kernel for cuArraysCopyToBatch
|
||||||
inline __device__ float cuAbs(float2 a)
|
|
||||||
{
|
|
||||||
return sqrtf(a.x*a.x+a.y*a.y);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
// copy a chunk into a batch of chips for a given stride
|
|
||||||
__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
||||||
float2 *imageOut, const int outNX, const int outNY,
|
float2 *imageOut, const int outNX, const int outNY,
|
||||||
const int nImagesX, const int nImagesY,
|
const int nImagesX, const int nImagesY,
|
||||||
|
@ -33,6 +43,15 @@ __global__ void cuArraysCopyToBatch_kernel(const float2 *imageIn, const int inNX
|
||||||
imageOut[idxOut] = imageIn[idxIn];
|
imageOut[idxOut] = imageIn[idxIn];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy a chunk into a batch of chips for a given stride
|
||||||
|
* @note used to extract chips from a raw image
|
||||||
|
* @param image1 Input image as a large chunk
|
||||||
|
* @param image2 Output images as a batch of chips
|
||||||
|
* @param strideH stride along height to extract chips
|
||||||
|
* @param strideW stride along width to extract chips
|
||||||
|
* @param stream cudaStream
|
||||||
|
*/
|
||||||
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
||||||
int strideH, int strideW, cudaStream_t stream)
|
int strideH, int strideW, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
@ -47,8 +66,7 @@ void cuArraysCopyToBatch(cuArrays<float2> *image1, cuArrays<float2> *image2,
|
||||||
getLastCudaError("cuArraysCopyToBatch_kernel");
|
getLastCudaError("cuArraysCopyToBatch_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// kernel for cuArraysCopyToBatchWithOffset
|
||||||
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to complex
|
|
||||||
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
|
__global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, const int inNY,
|
||||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||||
const int *offsetX, const int *offsetY)
|
const int *offsetX, const int *offsetY)
|
||||||
|
@ -62,14 +80,22 @@ __global__ void cuArraysCopyToBatchWithOffset_kernel(const float2 *imageIn, cons
|
||||||
imageOut[idxOut] = imageIn[idxIn];
|
imageOut[idxOut] = imageIn[idxIn];
|
||||||
}
|
}
|
||||||
|
|
||||||
// lda1 (inNY) is the leading dimension of image1, usually, its width
|
/**
|
||||||
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
||||||
|
* @note used to extract chips from a raw secondary image with varying offsets
|
||||||
|
* @param image1 Input image as a large chunk
|
||||||
|
* @param lda1 the leading dimension of image1, usually, its width inNY
|
||||||
|
* @param image2 Output images as a batch of chips
|
||||||
|
* @param strideH (varying) offsets along height to extract chips
|
||||||
|
* @param strideW (varying) offsets along width to extract chips
|
||||||
|
* @param stream cudaStream
|
||||||
|
*/
|
||||||
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||||
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
dim3 blockSize(nthreads, nthreads, 1);
|
dim3 blockSize(nthreads, nthreads, 1);
|
||||||
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
||||||
//fprintf(stderr, "copy tile to batch, %d %d\n", lda1, image2->count);
|
|
||||||
cuArraysCopyToBatchWithOffset_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
cuArraysCopyToBatchWithOffset_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
||||||
image1->devData, lda1,
|
image1->devData, lda1,
|
||||||
image2->devData, image2->height, image2->width, image2->count,
|
image2->devData, image2->height, image2->width, image2->count,
|
||||||
|
@ -77,7 +103,7 @@ void cuArraysCopyToBatchWithOffset(cuArrays<float2> *image1, const int lda1, cuA
|
||||||
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy a chunk into a batch of chips for a set of offsets (varying strides), from complex to real(take amplitudes)
|
// same as above, but from complex to real(take amplitudes)
|
||||||
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
|
__global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, const int inNY,
|
||||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||||
const int *offsetX, const int *offsetY)
|
const int *offsetX, const int *offsetY)
|
||||||
|
@ -91,13 +117,22 @@ __global__ void cuArraysCopyToBatchAbsWithOffset_kernel(const float2 *imageIn, c
|
||||||
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
|
imageOut[idxOut] = make_float2(complexAbs(imageIn[idxIn]), 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
||||||
|
* @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
|
||||||
|
* @param image1 Input image as a large chunk
|
||||||
|
* @param lda1 the leading dimension of image1, usually, its width inNY
|
||||||
|
* @param image2 Output images as a batch of chips
|
||||||
|
* @param strideH (varying) offsets along height to extract chips
|
||||||
|
* @param strideW (varying) offsets along width to extract chips
|
||||||
|
* @param stream cudaStream
|
||||||
|
*/
|
||||||
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1, cuArrays<float2> *image2,
|
||||||
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
dim3 blockSize(nthreads, nthreads, 1);
|
dim3 blockSize(nthreads, nthreads, 1);
|
||||||
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
||||||
//fprintf(stderr, "copy tile to batch, %d %d\n", lda1, image2->count);
|
|
||||||
cuArraysCopyToBatchAbsWithOffset_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
cuArraysCopyToBatchAbsWithOffset_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
||||||
image1->devData, lda1,
|
image1->devData, lda1,
|
||||||
image2->devData, image2->height, image2->width, image2->count,
|
image2->devData, image2->height, image2->width, image2->count,
|
||||||
|
@ -105,7 +140,7 @@ void cuArraysCopyToBatchAbsWithOffset(cuArrays<float2> *image1, const int lda1,
|
||||||
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
getLastCudaError("cuArraysCopyToBatchAbsWithOffset_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy a chunk into a batch of chips for a set of offsets (varying strides), from real to complex(to real part)
|
// kernel for cuArraysCopyToBatchWithOffsetR2C
|
||||||
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
|
__global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, const int inNY,
|
||||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||||
const int *offsetX, const int *offsetY)
|
const int *offsetX, const int *offsetY)
|
||||||
|
@ -119,13 +154,22 @@ __global__ void cuArraysCopyToBatchWithOffsetR2C_kernel(const float *imageIn, co
|
||||||
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
|
imageOut[idxOut] = make_float2(imageIn[idxIn], 0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
||||||
|
* @note used to load real images
|
||||||
|
* @param image1 Input image as a large chunk
|
||||||
|
* @param lda1 the leading dimension of image1, usually, its width inNY
|
||||||
|
* @param image2 Output images as a batch of chips
|
||||||
|
* @param strideH (varying) offsets along height to extract chips
|
||||||
|
* @param strideW (varying) offsets along width to extract chips
|
||||||
|
* @param stream cudaStream
|
||||||
|
*/
|
||||||
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, cuArrays<float2> *image2,
|
||||||
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
const int *offsetH, const int* offsetW, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
dim3 blockSize(nthreads, nthreads, 1);
|
dim3 blockSize(nthreads, nthreads, 1);
|
||||||
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
dim3 gridSize(IDIVUP(image2->height,nthreads), IDIVUP(image2->width,nthreads), image2->count);
|
||||||
//fprintf(stderr, "copy tile to batch, %d %d\n", lda1, image2->count);
|
|
||||||
cuArraysCopyToBatchWithOffsetR2C_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
cuArraysCopyToBatchWithOffsetR2C_kernel<<<gridSize,blockSize, 0 , stream>>> (
|
||||||
image1->devData, lda1,
|
image1->devData, lda1,
|
||||||
image2->devData, image2->height, image2->width, image2->count,
|
image2->devData, image2->height, image2->width, image2->count,
|
||||||
|
@ -133,7 +177,7 @@ void cuArraysCopyToBatchWithOffsetR2C(cuArrays<float> *image1, const int lda1, c
|
||||||
getLastCudaError("cuArraysCopyToBatchWithOffsetR2C_kernel");
|
getLastCudaError("cuArraysCopyToBatchWithOffsetR2C_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
//copy a chunk into a series of chips
|
//copy a chunk into a series of chips, from complex to real
|
||||||
__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, const int inNY,
|
||||||
float *imageOut, const int outNX, const int outNY,
|
float *imageOut, const int outNX, const int outNY,
|
||||||
const int nImagesX, const int nImagesY,
|
const int nImagesX, const int nImagesY,
|
||||||
|
@ -148,10 +192,17 @@ __global__ void cuArraysCopyC2R_kernel(const float2 *imageIn, const int inNX, co
|
||||||
int idxImageY = idxImage%nImagesY;
|
int idxImageY = idxImage%nImagesY;
|
||||||
int idxIn = (idxImageX*strideX+outx)*inNY + idxImageY*strideY+outy;
|
int idxIn = (idxImageX*strideX+outx)*inNY + idxImageY*strideY+outy;
|
||||||
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
|
imageOut[idxOut] = complexAbs(imageIn[idxIn])*factor;
|
||||||
//printf( "%d\n", idxOut);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//tested
|
/**
|
||||||
|
* Copy a chunk into a batch of chips with varying offsets/strides
|
||||||
|
* @note similar to cuArraysCopyToBatchWithOffset, but take amplitudes instead
|
||||||
|
* @param image1 Input image as a large chunk
|
||||||
|
* @param image2 Output images as a batch of chips
|
||||||
|
* @param strideH offsets along height to extract chips
|
||||||
|
* @param strideW offsets along width to extract chips
|
||||||
|
* @param stream cudaStream
|
||||||
|
*/
|
||||||
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
||||||
int strideH, int strideW, cudaStream_t stream)
|
int strideH, int strideW, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
@ -167,6 +218,7 @@ void cuArraysCopyC2R(cuArrays<float2> *image1, cuArrays<float> *image2,
|
||||||
getLastCudaError("cuda Error: cuArraysCopyC2R_kernel");
|
getLastCudaError("cuda Error: cuArraysCopyC2R_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//copy a chunk into a series of chips, from complex to real, with varying strides
|
||||||
__global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int inNX, const int inNY,
|
||||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||||
const int2 *offsets)
|
const int2 *offsets)
|
||||||
|
@ -183,9 +235,11 @@ __global__ void cuArraysCopyExtractVaryingOffset(const float *imageIn, const int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* copy a tile of images to another image, with starting pixels offsets
|
/**
|
||||||
* param[in] imageIn inut images
|
* Copy a tile of images to another image, with starting pixels offsets, float to float
|
||||||
* param[out] imageOut output images of dimension nImages*outNX*outNY
|
* @param[in] imageIn input images of dimension nImages*inNX*inNY
|
||||||
|
* @param[out] imageOut output images of dimension nImages*outNX*outNY
|
||||||
|
* @param[in] offsets, varying offsets for extraction
|
||||||
*/
|
*/
|
||||||
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
@ -215,6 +269,12 @@ __global__ void cuArraysCopyExtractVaryingOffset_C2C(const float2 *imageIn, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy a tile of images to another image, with starting pixels offsets, float2 to float2
|
||||||
|
* @param[in] imageIn input images of dimension nImages*inNX*inNY
|
||||||
|
* @param[out] imageOut output images of dimension nImages*outNX*outNY
|
||||||
|
* @param[in] offsets, varying offsets for extraction
|
||||||
|
*/
|
||||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, cuArrays<int2> *offsets, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
||||||
|
@ -233,38 +293,43 @@ __global__ void cuArraysCopyExtractVaryingOffsetCorr(const float *imageIn, const
|
||||||
const int2 *maxloc)
|
const int2 *maxloc)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
// get the image index
|
||||||
int idxImage = blockIdx.z;
|
int idxImage = blockIdx.z;
|
||||||
|
|
||||||
// One thread per out point. Find the coordinates within the current image.
|
// One thread per out point. Find the coordinates within the current image.
|
||||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
||||||
|
|
||||||
// Find the correponding input.
|
// check whether thread is within output image range
|
||||||
|
if (outx < outNX && outy < outNY)
|
||||||
|
{
|
||||||
|
// Find the corresponding input.
|
||||||
int inx = outx + maxloc[idxImage].x - outNX/2;
|
int inx = outx + maxloc[idxImage].x - outNX/2;
|
||||||
int iny = outy + maxloc[idxImage].y - outNY/2;
|
int iny = outy + maxloc[idxImage].y - outNY/2;
|
||||||
|
|
||||||
if (outx < outNX && outy < outNY)
|
// Find the location in flattened array.
|
||||||
{
|
|
||||||
// Find the location in full array.
|
|
||||||
int idxOut = ( blockIdx.z * outNX + outx ) * outNY + outy;
|
int idxOut = ( blockIdx.z * outNX + outx ) * outNY + outy;
|
||||||
|
|
||||||
int idxIn = ( blockIdx.z * inNX + inx ) * inNY + iny;
|
int idxIn = ( blockIdx.z * inNX + inx ) * inNY + iny;
|
||||||
|
|
||||||
if (inx>=0 && iny>=0 && inx<inNX && iny<inNY) {
|
// check whether inside of the input image
|
||||||
|
if (inx>=0 && iny>=0 && inx<inNX && iny<inNY)
|
||||||
|
{
|
||||||
|
// inside the boundary, copy over and mark the pixel as valid (1)
|
||||||
imageOut[idxOut] = imageIn[idxIn];
|
imageOut[idxOut] = imageIn[idxIn];
|
||||||
imageValid[idxOut] = 1;
|
imageValid[idxOut] = 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
// outside, set it to 0 and mark the pixel as invalid (0)
|
||||||
imageOut[idxOut] = 0.0f;
|
imageOut[idxOut] = 0.0f;
|
||||||
imageValid[idxOut] = 0;
|
imageValid[idxOut] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* copy a tile of images to another image, with starting pixels offsets accouting for boundary
|
/**
|
||||||
* param[in] imageIn inut images
|
* copy a tile of images to another image, with starting pixels offsets accouting for boundary
|
||||||
* param[out] imageOut output images of dimension nImages*outNX*outNY
|
* @param[in] imageIn inut images
|
||||||
|
* @param[out] imageOut output images of dimension nImages*outNX*outNY
|
||||||
*/
|
*/
|
||||||
void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int> *imagesValid, cuArrays<int2> *maxloc, cudaStream_t stream)
|
void cuArraysCopyExtractCorr(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int> *imagesValid, cuArrays<int2> *maxloc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
@ -314,8 +379,7 @@ void cuArraysCopyExtract(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
|
||||||
getLastCudaError("cuArraysCopyExtract error");
|
getLastCudaError("cuArraysCopyExtract error");
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
// cuda kernel for cuArraysCopyExtract float2 to float2
|
||||||
|
|
||||||
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
||||||
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
float2 *imageOut, const int outNX, const int outNY, const int nImages,
|
||||||
const int offsetX, const int offsetY)
|
const int offsetX, const int offsetY)
|
||||||
|
@ -331,22 +395,21 @@ __global__ void cuArraysCopyExtract_C2C_FixedOffset(const float2 *imageIn, const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy/extract complex images from a large size to a smaller size from the location (offsetX, offsetY)
|
||||||
|
*/
|
||||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int2 offset, cudaStream_t stream)
|
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int2 offset, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
||||||
const int nthreads = NTHREADS2D;
|
const int nthreads = NTHREADS2D;
|
||||||
dim3 threadsperblock(nthreads, nthreads,1);
|
dim3 threadsperblock(nthreads, nthreads,1);
|
||||||
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
||||||
//std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
|
|
||||||
//imagesIn->debuginfo(stream);
|
|
||||||
//imagesOut->debuginfo(stream);
|
|
||||||
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
||||||
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
||||||
getLastCudaError("cuArraysCopyExtractC2C error");
|
getLastCudaError("cuArraysCopyExtractC2C error");
|
||||||
}
|
}
|
||||||
//
|
|
||||||
|
|
||||||
// float3
|
// float3
|
||||||
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const int inNX, const int inNY,
|
||||||
|
@ -364,24 +427,21 @@ __global__ void cuArraysCopyExtract_C2C_FixedOffset(const float3 *imageIn, const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy/extract float3 images from a large size to a smaller size from the location (offsetX, offsetY)
|
||||||
|
*/
|
||||||
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream)
|
void cuArraysCopyExtract(cuArrays<float3> *imagesIn, cuArrays<float3> *imagesOut, int2 offset, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
||||||
const int nthreads = NTHREADS2D;
|
const int nthreads = NTHREADS2D;
|
||||||
dim3 threadsperblock(nthreads, nthreads,1);
|
dim3 threadsperblock(nthreads, nthreads,1);
|
||||||
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
dim3 blockspergrid(IDIVUP(imagesOut->height,nthreads), IDIVUP(imagesOut->width,nthreads), imagesOut->count);
|
||||||
//std::cout << "debug copyExtract" << imagesOut->width << imagesOut->height << "\n";
|
|
||||||
//imagesIn->debuginfo(stream);
|
|
||||||
//imagesOut->debuginfo(stream);
|
|
||||||
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
cuArraysCopyExtract_C2C_FixedOffset<<<blockspergrid, threadsperblock,0, stream>>>
|
||||||
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
(imagesIn->devData, imagesIn->height, imagesIn->width,
|
||||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
||||||
getLastCudaError("cuArraysCopyExtractFloat3 error");
|
getLastCudaError("cuArraysCopyExtractFloat3 error");
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const int inNX, const int inNY,
|
||||||
float *imageOut, const int outNX, const int outNY, const int nImages,
|
float *imageOut, const int outNX, const int outNY, const int nImages,
|
||||||
|
@ -398,8 +458,10 @@ __global__ void cuArraysCopyExtract_C2R_FixedOffset(const float2 *imageIn, const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy/extract complex images from a large size to float images (by taking real parts)
|
||||||
|
* with a smaller size from the location (offsetX, offsetY)
|
||||||
|
*/
|
||||||
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream)
|
void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut, int2 offset, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
//assert(imagesIn->height >= imagesOut && inNY >= outNY);
|
||||||
|
@ -411,7 +473,6 @@ void cuArraysCopyExtract(cuArrays<float2> *imagesIn, cuArrays<float> *imagesOut,
|
||||||
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
imagesOut->devData, imagesOut->height, imagesOut->width, imagesOut->count, offset.x, offset.y);
|
||||||
getLastCudaError("cuArraysCopyExtractC2C error");
|
getLastCudaError("cuArraysCopyExtractC2C error");
|
||||||
}
|
}
|
||||||
//
|
|
||||||
|
|
||||||
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX, const int inNY,
|
||||||
float2* imageOut, const int outNY, const int offsetX, const int offsetY)
|
float2* imageOut, const int outNY, const int offsetX, const int offsetY)
|
||||||
|
@ -425,7 +486,9 @@ __global__ void cuArraysCopyInsert_kernel(const float2* imageIn, const int inNX,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy/insert complex images from a smaller size to a larger size from the location (offsetX, offsetY)
|
||||||
|
*/
|
||||||
void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
|
@ -433,7 +496,7 @@ void cuArraysCopyInsert(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut, i
|
||||||
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
||||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||||
imageOut->devData, imageOut->width, offsetX, offsetY);
|
imageOut->devData, imageOut->width, offsetX, offsetY);
|
||||||
getLastCudaError("cuArraysCopyInsert error");
|
getLastCudaError("cuArraysCopyInsert float2 error");
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
// float3
|
// float3
|
||||||
|
@ -449,6 +512,9 @@ __global__ void cuArraysCopyInsert_kernel(const float3* imageIn, const int inNX,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy/insert float3 images from a smaller size to a larger size from the location (offsetX, offsetY)
|
||||||
|
*/
|
||||||
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
|
@ -456,7 +522,7 @@ void cuArraysCopyInsert(cuArrays<float3> *imageIn, cuArrays<float3> *imageOut, i
|
||||||
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
dim3 blockspergrid(IDIVUP(imageIn->height,nthreads), IDIVUP(imageIn->width,nthreads));
|
||||||
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
cuArraysCopyInsert_kernel<<<blockspergrid, threadsperblock,0, stream>>>(imageIn->devData, imageIn->height, imageIn->width,
|
||||||
imageOut->devData, imageOut->width, offsetX, offsetY);
|
imageOut->devData, imageOut->width, offsetX, offsetY);
|
||||||
getLastCudaError("cuArraysCopyInsert error");
|
getLastCudaError("cuArraysCopyInsert float3 error");
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -473,7 +539,9 @@ __global__ void cuArraysCopyInsert_kernel(const float* imageIn, const int inNX,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy/insert real images from a smaller size to a larger size from the location (offsetX, offsetY)
|
||||||
|
*/
|
||||||
void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
|
@ -484,7 +552,7 @@ void cuArraysCopyInsert(cuArrays<float> *imageIn, cuArrays<float> *imageOut, int
|
||||||
getLastCudaError("cuArraysCopyInsert Float error");
|
getLastCudaError("cuArraysCopyInsert Float error");
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
|
|
||||||
__global__ void cuArraysCopyInsert_kernel(const int* imageIn, const int inNX, const int inNY,
|
__global__ void cuArraysCopyInsert_kernel(const int* imageIn, const int inNX, const int inNY,
|
||||||
int* imageOut, const int outNY, const int offsetX, const int offsetY)
|
int* imageOut, const int outNY, const int offsetX, const int offsetY)
|
||||||
|
@ -498,7 +566,9 @@ __global__ void cuArraysCopyInsert_kernel(const int* imageIn, const int inNX, co
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy/insert int images from a smaller size to a larger size from the location (offsetX, offsetY)
|
||||||
|
*/
|
||||||
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int offsetX, int offsetY, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
|
@ -508,38 +578,6 @@ void cuArraysCopyInsert(cuArrays<int> *imageIn, cuArrays<int> *imageOut, int off
|
||||||
imageOut->devData, imageOut->width, offsetX, offsetY);
|
imageOut->devData, imageOut->width, offsetX, offsetY);
|
||||||
getLastCudaError("cuArraysCopyInsert Integer error");
|
getLastCudaError("cuArraysCopyInsert Integer error");
|
||||||
}
|
}
|
||||||
//
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void cuArraysCopyInversePadded_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
|
||||||
float *imageOut, int outNX, int outNY, int sizeOut, int nImages)
|
|
||||||
{
|
|
||||||
int outx = threadIdx.x + blockDim.x*blockIdx.x;
|
|
||||||
int outy = threadIdx.y + blockDim.y*blockIdx.y;
|
|
||||||
|
|
||||||
if(outx < outNX && outy < outNY)
|
|
||||||
{
|
|
||||||
int idxImage = blockIdx.z;
|
|
||||||
int idxOut = IDX2R(outx, outy, outNY)+idxImage*sizeOut;
|
|
||||||
if(outx < inNX && outy <inNY) {
|
|
||||||
int idxIn = IDX2R(inNX-outx-1, inNY-outy-1, inNY)+idxImage*sizeIn;
|
|
||||||
imageOut[idxOut] = imageIn[idxIn];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{ imageOut[idxOut] = 0.0f; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuArraysCopyInversePadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
|
|
||||||
{
|
|
||||||
const int nthreads = 16;
|
|
||||||
int nImages = imageIn->count;
|
|
||||||
dim3 blockSize(nthreads, nthreads,1);
|
|
||||||
dim3 gridSize(IDIVUP(imageOut->height,nthreads), IDIVUP(imageOut->width,nthreads), nImages);
|
|
||||||
cuArraysCopyInversePadded_kernel<<<gridSize, blockSize, 0, stream>>>(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
|
||||||
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
|
||||||
getLastCudaError("cuArraysCopyInversePadded error");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
__global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
||||||
|
@ -561,6 +599,9 @@ __global__ void cuArraysCopyPadded_R2R_kernel(float *imageIn, int inNX, int inNY
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy real images from a smaller size to a larger size while padding 0 for extra elements
|
||||||
|
*/
|
||||||
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
|
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float> *imageOut,cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 16;
|
const int nthreads = 16;
|
||||||
|
@ -592,7 +633,10 @@ __global__ void cuArraysCopyPadded_C2C_kernel(float2 *imageIn, int inNX, int inN
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy complex images from a smaller size to a larger size while padding 0 for extra elements
|
||||||
|
* @note use for zero-padding in fft oversampling
|
||||||
|
*/
|
||||||
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = NTHREADS2D;
|
const int nthreads = NTHREADS2D;
|
||||||
|
@ -602,9 +646,10 @@ void cuArraysCopyPadded(cuArrays<float2> *imageIn, cuArrays<float2> *imageOut,cu
|
||||||
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
cuArraysCopyPadded_C2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
||||||
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
||||||
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
||||||
getLastCudaError("cuArraysCopyInversePadded error");
|
getLastCudaError("cuArraysCopyPadded C2C error");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// kernel for cuArraysCopyPadded
|
||||||
__global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
__global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY, int sizeIn,
|
||||||
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
|
float2 *imageOut, int outNX, int outNY, int sizeOut, int nImages)
|
||||||
{
|
{
|
||||||
|
@ -625,7 +670,10 @@ __global__ void cuArraysCopyPadded_R2C_kernel(float *imageIn, int inNX, int inNY
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copy real images to complex images (imaginary part=0) with larger size (pad 0 for extra elements)
|
||||||
|
* @note use for zero-padding in fft oversampling
|
||||||
|
*/
|
||||||
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = NTHREADS2D;
|
const int nthreads = NTHREADS2D;
|
||||||
|
@ -635,10 +683,10 @@ void cuArraysCopyPadded(cuArrays<float> *imageIn, cuArrays<float2> *imageOut,cud
|
||||||
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
cuArraysCopyPadded_R2C_kernel<<<gridSize, blockSize, 0, stream>>>
|
||||||
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
(imageIn->devData, imageIn->height, imageIn->width, imageIn->size,
|
||||||
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
imageOut->devData, imageOut->height, imageOut->width, imageOut->size, nImages);
|
||||||
getLastCudaError("cuArraysCopyPadded error");
|
getLastCudaError("cuArraysCopyPadded R2C error");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel for setting a constant value
|
||||||
__global__ void cuArraysSetConstant_kernel(float *image, int size, float value)
|
__global__ void cuArraysSetConstant_kernel(float *image, int size, float value)
|
||||||
{
|
{
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
|
@ -649,6 +697,10 @@ __global__ void cuArraysSetConstant_kernel(float *image, int size, float value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set real images to a constant value
|
||||||
|
* @note use setZero if value=0 because cudaMemset is faster
|
||||||
|
*/
|
||||||
void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream)
|
void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
|
@ -656,5 +708,76 @@ void cuArraysSetConstant(cuArrays<float> *imageIn, float value, cudaStream_t str
|
||||||
|
|
||||||
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
|
cuArraysSetConstant_kernel<<<IDIVUP(size, nthreads), nthreads, 0, stream>>>
|
||||||
(imageIn->devData, imageIn->size, value);
|
(imageIn->devData, imageIn->size, value);
|
||||||
getLastCudaError("cuArraysCopyPadded error");
|
getLastCudaError("cuArraysSetConstant error");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// convert float to float2(complex)
|
||||||
|
__global__ void cuArraysR2C_kernel(float *image1, float2 *image2, int size)
|
||||||
|
{
|
||||||
|
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
|
if(idx < size)
|
||||||
|
{
|
||||||
|
image2[idx].x = image1[idx];
|
||||||
|
image2[idx].y = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert real images to complex images (set imaginary parts to 0)
|
||||||
|
* @param[in] image1 input images
|
||||||
|
* @param[out] image2 output images
|
||||||
|
*/
|
||||||
|
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
int size = image1->getSize();
|
||||||
|
cuArraysR2C_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>(image1->devData, image2->devData, size);
|
||||||
|
getLastCudaError("cuArraysR2C");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// take real part of float2 to float
|
||||||
|
__global__ void cuArraysC2R_kernel(float2 *image1, float *image2, int size)
|
||||||
|
{
|
||||||
|
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
|
if(idx < size)
|
||||||
|
{
|
||||||
|
image2[idx] = image1[idx].x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Take real part of complex images
|
||||||
|
* @param[in] image1 input images
|
||||||
|
* @param[out] image2 output images
|
||||||
|
*/
|
||||||
|
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
int size = image1->getSize();
|
||||||
|
cuArraysC2R_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>(image1->devData, image2->devData, size);
|
||||||
|
getLastCudaError("cuArraysC2R");
|
||||||
|
}
|
||||||
|
|
||||||
|
// cuda kernel for cuArraysAbs
|
||||||
|
__global__ void cuArraysAbs_kernel(float2 *image1, float *image2, int size)
|
||||||
|
{
|
||||||
|
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
|
if(idx < size)
|
||||||
|
{
|
||||||
|
image2[idx] = complexAbs(image1[idx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtain abs (amplitudes) of complex images
|
||||||
|
* @param[in] image1 input images
|
||||||
|
* @param[out] image2 output images
|
||||||
|
*/
|
||||||
|
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
int size = image1->getSize();
|
||||||
|
cuArraysAbs_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>(image1->devData, image2->devData, size);
|
||||||
|
getLastCudaError("cuArraysAbs_kernel");
|
||||||
|
}
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,13 +1,12 @@
|
||||||
/*
|
/*
|
||||||
* cuArraysPadding.cu
|
* @file cuArraysPadding.cu
|
||||||
* Padding Utitilies for oversampling
|
* @brief Utilities for padding zeros to cuArrays
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
#include "float2.h"
|
#include "float2.h"
|
||||||
|
|
||||||
//padding zeros in the middle, move quads to corners
|
// cuda kernel for cuArraysPadding
|
||||||
//for raw chunk data oversampling
|
|
||||||
__global__ void cuArraysPadding_kernel(
|
__global__ void cuArraysPadding_kernel(
|
||||||
const float2 *image1, const int height1, const int width1,
|
const float2 *image1, const int height1, const int width1,
|
||||||
float2 *image2, const int height2, const int width2)
|
float2 *image2, const int height2, const int width2)
|
||||||
|
@ -21,8 +20,6 @@ __global__ void cuArraysPadding_kernel(
|
||||||
int tx2 = height2 -1 -tx;
|
int tx2 = height2 -1 -tx;
|
||||||
int ty2 = width2 -1 -ty;
|
int ty2 = width2 -1 -ty;
|
||||||
|
|
||||||
//printf("%d %d %d\n", tx, height1, height2);
|
|
||||||
|
|
||||||
image2[IDX2R(tx, ty, width2)] = image1[IDX2R(tx, ty, width1)];
|
image2[IDX2R(tx, ty, width2)] = image1[IDX2R(tx, ty, width1)];
|
||||||
image2[IDX2R(tx2, ty, width2)] = image1[IDX2R(tx1, ty, width1)];
|
image2[IDX2R(tx2, ty, width2)] = image1[IDX2R(tx1, ty, width1)];
|
||||||
image2[IDX2R(tx, ty2, width2)] = image1[IDX2R(tx, ty1, width1)];
|
image2[IDX2R(tx, ty2, width2)] = image1[IDX2R(tx, ty1, width1)];
|
||||||
|
@ -30,7 +27,13 @@ __global__ void cuArraysPadding_kernel(
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//tested
|
|
||||||
|
/**
|
||||||
|
* Padding zeros in the middle, move quads to corners
|
||||||
|
* @param[in] image1 input images
|
||||||
|
* @param[out] image2 output images
|
||||||
|
* @note This routine is for a single image, no longer used
|
||||||
|
*/
|
||||||
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int ThreadsPerBlock = NTHREADS2D;
|
int ThreadsPerBlock = NTHREADS2D;
|
||||||
|
@ -38,7 +41,9 @@ void cuArraysPadding(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStr
|
||||||
int BlockPerGridy = IDIVUP (image1->width/2, ThreadsPerBlock);
|
int BlockPerGridy = IDIVUP (image1->width/2, ThreadsPerBlock);
|
||||||
dim3 dimBlock(ThreadsPerBlock, ThreadsPerBlock);
|
dim3 dimBlock(ThreadsPerBlock, ThreadsPerBlock);
|
||||||
dim3 dimGrid(BlockPerGridx, BlockPerGridy);
|
dim3 dimGrid(BlockPerGridx, BlockPerGridy);
|
||||||
|
// set output image to 0
|
||||||
checkCudaErrors(cudaMemsetAsync(image2->devData, 0, image2->getByteSize(),stream));
|
checkCudaErrors(cudaMemsetAsync(image2->devData, 0, image2->getByteSize(),stream));
|
||||||
|
// copy the quads of input images to four corners of the output images
|
||||||
cuArraysPadding_kernel<<<dimGrid, dimBlock, 0, stream>>>(
|
cuArraysPadding_kernel<<<dimGrid, dimBlock, 0, stream>>>(
|
||||||
image1->devData, image1->height, image1->width,
|
image1->devData, image1->height, image1->width,
|
||||||
image2->devData, image2->height, image2->width);
|
image2->devData, image2->height, image2->width);
|
||||||
|
@ -50,7 +55,7 @@ inline __device__ float2 cmplxMul(float2 c, float a)
|
||||||
return make_float2(c.x*a, c.y*a);
|
return make_float2(c.x*a, c.y*a);
|
||||||
}
|
}
|
||||||
|
|
||||||
//padding for zoomIned correlation oversampling/interpolation
|
// cuda kernel for
|
||||||
__global__ void cuArraysPaddingMany_kernel(
|
__global__ void cuArraysPaddingMany_kernel(
|
||||||
const float2 *image1, const int height1, const int width1, const int size1,
|
const float2 *image1, const int height1, const int width1, const int size1,
|
||||||
float2 *image2, const int height2, const int width2, const int size2, const float factor )
|
float2 *image2, const int height2, const int width2, const int size2, const float factor )
|
||||||
|
@ -67,7 +72,6 @@ __global__ void cuArraysPaddingMany_kernel(
|
||||||
|
|
||||||
int stride1 = blockIdx.z*size1;
|
int stride1 = blockIdx.z*size1;
|
||||||
int stride2 = blockIdx.z*size2;
|
int stride2 = blockIdx.z*size2;
|
||||||
//printf("%d %d %d\n", tx, height1, height2);
|
|
||||||
|
|
||||||
image2[IDX2R(tx, ty, width2)+stride2] = image1[IDX2R(tx, ty, width1)+stride1]*factor;
|
image2[IDX2R(tx, ty, width2)+stride2] = image1[IDX2R(tx, ty, width1)+stride1]*factor;
|
||||||
image2[IDX2R(tx2, ty, width2)+stride2] = cmplxMul(image1[IDX2R(tx1, ty, width1)+stride1], factor);
|
image2[IDX2R(tx2, ty, width2)+stride2] = cmplxMul(image1[IDX2R(tx1, ty, width1)+stride1], factor);
|
||||||
|
@ -76,6 +80,12 @@ __global__ void cuArraysPaddingMany_kernel(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Padding zeros for FFT oversampling
|
||||||
|
* @param[in] image1 input images
|
||||||
|
* @param[out] image2 output images
|
||||||
|
* @note To keep the band center at (0,0), move quads to corners and pad zeros in the middle
|
||||||
|
*/
|
||||||
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int ThreadsPerBlock = NTHREADS2D;
|
int ThreadsPerBlock = NTHREADS2D;
|
||||||
|
@ -91,63 +101,7 @@ void cuArraysPaddingMany(cuArrays<float2> *image1, cuArrays<float2> *image2, cud
|
||||||
image2->devData, image2->height, image2->width, image2->size, factor);
|
image2->devData, image2->height, image2->width, image2->size, factor);
|
||||||
getLastCudaError("cuArraysPadding_kernel");
|
getLastCudaError("cuArraysPadding_kernel");
|
||||||
}
|
}
|
||||||
|
//end of file
|
||||||
|
|
||||||
// convert float to float2(complex)
|
|
||||||
__global__ void cuArraysR2C_kernel(float *image1, float2 *image2, int size)
|
|
||||||
{
|
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
|
||||||
if(idx < size)
|
|
||||||
{
|
|
||||||
image2[idx].x = image1[idx];
|
|
||||||
image2[idx].y = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//tested
|
|
||||||
void cuArraysR2C(cuArrays<float> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
int size = image1->getSize();
|
|
||||||
cuArraysR2C_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>(image1->devData, image2->devData, size);
|
|
||||||
getLastCudaError("cuArraysR2C");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// take real part of float2 to float
|
|
||||||
__global__ void cuArraysC2R_kernel(float2 *image1, float *image2, int size)
|
|
||||||
{
|
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
|
||||||
if(idx < size)
|
|
||||||
{
|
|
||||||
image2[idx] = image1[idx].x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//tested
|
|
||||||
void cuArraysC2R(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
int size = image1->getSize();
|
|
||||||
cuArraysC2R_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>(image1->devData, image2->devData, size);
|
|
||||||
getLastCudaError("cuArraysC2R");
|
|
||||||
}
|
|
||||||
|
|
||||||
// take real part of float2 to float
|
|
||||||
__global__ void cuArraysAbs_kernel(float2 *image1, float *image2, int size)
|
|
||||||
{
|
|
||||||
int idx = threadIdx.x + blockDim.x*blockIdx.x;
|
|
||||||
if(idx < size)
|
|
||||||
{
|
|
||||||
image2[idx] = complexAbs(image1[idx]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//tested
|
|
||||||
void cuArraysAbs(cuArrays<float2> *image1, cuArrays<float> *image2, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
int size = image1->getSize();
|
|
||||||
cuArraysAbs_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>(image1->devData, image2->devData, size);
|
|
||||||
getLastCudaError("cuArraysAbs_kernel");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,26 @@
|
||||||
/*
|
/*
|
||||||
* cuCorrFrequency.cu
|
* @file cuCorrFrequency.cu
|
||||||
* define a class to save FFT plans and intermediate data for cross correlation in frequency domain
|
* @brief A class performs cross correlation in frequency domain
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuCorrFrequency.h"
|
#include "cuCorrFrequency.h"
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cuFreqCorrelator Constructor
|
||||||
|
* @param imageNX height of each image
|
||||||
|
* @param imageNY width of each image
|
||||||
|
* @param nImages number of images in the batch
|
||||||
|
* @param stream CUDA stream
|
||||||
|
*/
|
||||||
cuFreqCorrelator::cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaStream_t stream_)
|
cuFreqCorrelator::cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaStream_t stream_)
|
||||||
{
|
{
|
||||||
|
|
||||||
int imageSize = imageNX*imageNY;
|
int imageSize = imageNX*imageNY;
|
||||||
int fImageSize = imageNX*(imageNY/2+1);
|
int fImageSize = imageNX*(imageNY/2+1);
|
||||||
int n[NRANK] ={imageNX, imageNY};
|
int n[NRANK] ={imageNX, imageNY};
|
||||||
|
|
||||||
|
// set up fft plans
|
||||||
cufft_Error(cufftPlanMany(&forwardPlan, NRANK, n,
|
cufft_Error(cufftPlanMany(&forwardPlan, NRANK, n,
|
||||||
NULL, 1, imageSize,
|
NULL, 1, imageSize,
|
||||||
NULL, 1, fImageSize,
|
NULL, 1, fImageSize,
|
||||||
|
@ -24,6 +33,7 @@ cuFreqCorrelator::cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaSt
|
||||||
cufftSetStream(forwardPlan, stream);
|
cufftSetStream(forwardPlan, stream);
|
||||||
cufftSetStream(backwardPlan, stream);
|
cufftSetStream(backwardPlan, stream);
|
||||||
|
|
||||||
|
// set up work arrays
|
||||||
workFM = new cuArrays<float2>(imageNX, (imageNY/2+1), nImages);
|
workFM = new cuArrays<float2>(imageNX, (imageNY/2+1), nImages);
|
||||||
workFM->allocate();
|
workFM->allocate();
|
||||||
workFS = new cuArrays<float2>(imageNX, (imageNY/2+1), nImages);
|
workFS = new cuArrays<float2>(imageNX, (imageNY/2+1), nImages);
|
||||||
|
@ -32,6 +42,7 @@ cuFreqCorrelator::cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaSt
|
||||||
workT->allocate();
|
workT->allocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// destructor
|
||||||
cuFreqCorrelator::~cuFreqCorrelator()
|
cuFreqCorrelator::~cuFreqCorrelator()
|
||||||
{
|
{
|
||||||
cufft_Error(cufftDestroy(forwardPlan));
|
cufft_Error(cufftDestroy(forwardPlan));
|
||||||
|
@ -41,46 +52,40 @@ cuFreqCorrelator::~cuFreqCorrelator()
|
||||||
workT->deallocate();
|
workT->deallocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the cross correlation
|
||||||
|
* @param[in] templates the reference windows
|
||||||
|
* @param[in] images the search windows
|
||||||
|
* @param[out] results the correlation surfaces
|
||||||
|
*/
|
||||||
|
|
||||||
void cuFreqCorrelator::execute(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results)
|
void cuFreqCorrelator::execute(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results)
|
||||||
{
|
{
|
||||||
|
// pad the reference windows to the the size of search windows
|
||||||
cuArraysCopyPadded(templates, workT, stream);
|
cuArraysCopyPadded(templates, workT, stream);
|
||||||
|
// forward fft to frequency domain
|
||||||
cufft_Error(cufftExecR2C(forwardPlan, workT->devData, workFM->devData));
|
cufft_Error(cufftExecR2C(forwardPlan, workT->devData, workFM->devData));
|
||||||
cufft_Error(cufftExecR2C(forwardPlan, images->devData, workFS->devData));
|
cufft_Error(cufftExecR2C(forwardPlan, images->devData, workFS->devData));
|
||||||
|
// cufft doesn't normalize, so manually get the image size for normalization
|
||||||
float coef = 1.0/(images->size);
|
float coef = 1.0/(images->size);
|
||||||
|
// multiply reference with secondary windows in frequency domain
|
||||||
cuArraysElementMultiplyConjugate(workFM, workFS, coef, stream);
|
cuArraysElementMultiplyConjugate(workFM, workFS, coef, stream);
|
||||||
|
// backward fft to get correlation surface in time domain
|
||||||
cufft_Error(cufftExecC2R(backwardPlan, workFM->devData, workT->devData));
|
cufft_Error(cufftExecC2R(backwardPlan, workFM->devData, workT->devData));
|
||||||
|
// extract to get proper size of correlation surface
|
||||||
cuArraysCopyExtract(workT, results, make_int2(0, 0), stream);
|
cuArraysCopyExtract(workT, results, make_int2(0, 0), stream);
|
||||||
//workT->outputToFile("test",stream);
|
// all done
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void cudaKernel_elementMulC(float2 *ainout, float2 *bin, size_t size)
|
|
||||||
{
|
|
||||||
int idx = threadIdx.x + blockIdx.x*blockDim.x;
|
|
||||||
if(idx < size) {
|
|
||||||
cuComplex prod;
|
|
||||||
prod = cuCmulf(ainout[idx], bin[idx]);
|
|
||||||
ainout [idx] = prod;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuArraysElementMultiply(cuArrays<float2> *image1, cuArrays<float2> *image2, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
int size = image1->getSize();
|
|
||||||
int threadsperblock = NTHREADS;
|
|
||||||
int blockspergrid = IDIVUP (size, threadsperblock);
|
|
||||||
cudaKernel_elementMulC<<<blockspergrid, threadsperblock, 0, stream>>>(image1->devData, image2->devData, size );
|
|
||||||
getLastCudaError("cuArraysElementMultiply error\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// a = a^* * b
|
||||||
inline __device__ float2 cuMulConj(float2 a, float2 b)
|
inline __device__ float2 cuMulConj(float2 a, float2 b)
|
||||||
{
|
{
|
||||||
return make_float2(a.x*b.x + a.y*b.y, -a.y*b.x + a.x*b.y);
|
return make_float2(a.x*b.x + a.y*b.y, -a.y*b.x + a.x*b.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel for cuArraysElementMultiplyConjugate
|
||||||
__global__ void cudaKernel_elementMulConjugate(float2 *ainout, float2 *bin, size_t size, float coef)
|
__global__ void cudaKernel_elementMulConjugate(float2 *ainout, float2 *bin, int size, float coef)
|
||||||
{
|
{
|
||||||
int idx = threadIdx.x + blockIdx.x*blockDim.x;
|
int idx = threadIdx.x + blockIdx.x*blockDim.x;
|
||||||
if(idx < size) {
|
if(idx < size) {
|
||||||
|
@ -90,6 +95,12 @@ __global__ void cudaKernel_elementMulConjugate(float2 *ainout, float2 *bin, size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform multiplication of coef*Conjugate[image1]*image2 for each element
|
||||||
|
* @param[inout] image1, the first image
|
||||||
|
* @param[in] image2, the secondary image
|
||||||
|
* @param[in] coef, usually the normalization factor
|
||||||
|
*/
|
||||||
void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2> *image2, float coef, cudaStream_t stream)
|
void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2> *image2, float coef, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int size = image1->getSize();
|
int size = image1->getSize();
|
||||||
|
@ -98,3 +109,4 @@ void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2>
|
||||||
cudaKernel_elementMulConjugate<<<blockspergrid, threadsperblock, 0, stream>>>(image1->devData, image2->devData, size, coef );
|
cudaKernel_elementMulConjugate<<<blockspergrid, threadsperblock, 0, stream>>>(image1->devData, image2->devData, size, coef );
|
||||||
getLastCudaError("cuArraysElementMultiply error\n");
|
getLastCudaError("cuArraysElementMultiply error\n");
|
||||||
}
|
}
|
||||||
|
//end of file
|
|
@ -1,29 +1,37 @@
|
||||||
/*
|
/*
|
||||||
* cuCorrFrequency.h
|
* @file cuCorrFrequency.h
|
||||||
* define a class to save FFT plans and intermediate data for cross correlation in frequency domain
|
* @brief A class performs cross correlation in frequency domain
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// code guard
|
||||||
#ifndef __CUCORRFREQUENCY_H
|
#ifndef __CUCORRFREQUENCY_H
|
||||||
#define __CUCORRFREQUENCY_H
|
#define __CUCORRFREQUENCY_H
|
||||||
|
|
||||||
|
// dependencies
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
|
|
||||||
class cuFreqCorrelator
|
class cuFreqCorrelator
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
// handles for forward/backward fft
|
||||||
cufftHandle forwardPlan;
|
cufftHandle forwardPlan;
|
||||||
cufftHandle backwardPlan;
|
cufftHandle backwardPlan;
|
||||||
|
// work data
|
||||||
cuArrays<float2> *workFM;
|
cuArrays<float2> *workFM;
|
||||||
cuArrays<float2> *workFS;
|
cuArrays<float2> *workFS;
|
||||||
cuArrays<float> *workT;
|
cuArrays<float> *workT;
|
||||||
|
// cuda stream
|
||||||
cudaStream_t stream;
|
cudaStream_t stream;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
// constructor
|
||||||
cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaStream_t stream_);
|
cuFreqCorrelator(int imageNX, int imageNY, int nImages, cudaStream_t stream_);
|
||||||
|
// destructor
|
||||||
~cuFreqCorrelator();
|
~cuFreqCorrelator();
|
||||||
|
// executor
|
||||||
void execute(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results);
|
void execute(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif //__CUCORRFREQUENCY_H
|
||||||
|
// end of file
|
|
@ -1,13 +1,31 @@
|
||||||
/*
|
/*
|
||||||
* cuCorrNormalization.cu
|
* @file cuCorrNormalization.cu
|
||||||
* various utilities related to normalization of images
|
* @brief Utilities to normalize the correlation surface
|
||||||
* including calculating mean, subtract mean, ....
|
*
|
||||||
|
* The mean and variance of the normalization factor can be computed from the
|
||||||
|
* cumulative/prefix sum (or sum area table) s(u,v), and s2(u,v).
|
||||||
|
* We follow the algorithm by Evghenii Gaburov, Tim Idzenga, Willem Vermin, in the nxcor package.
|
||||||
|
* 1. Iterate over rows and for each row, the cumulative sum for elements in the row
|
||||||
|
* is computed as c_row(u,v) = \sum_(v'<v) f(u, v')
|
||||||
|
* and we keep track of the sum of area of width Ny, i.e.,
|
||||||
|
* c(u,v) = \sum_{u'<=u} [c_row(u', v+Ny) - c_row(u', v)],
|
||||||
|
* or c(u,v) = c(u-1, v) + [c_row(u, v+Ny) - c_row(u, v)]
|
||||||
|
* 2. When row reaches the window height u=Nx-1,
|
||||||
|
* c(u,v) provides the sum of area for the first batch of windows sa(0,v).
|
||||||
|
* 3. proceeding to row = u+1, we compute both c_row(u+1, v) and c_row(u-Nx, v)
|
||||||
|
* i.e., we add the sum from new row and remove the sum from the first row in c(u,v):
|
||||||
|
* c(u+1,v)= c(u,v) + [c_row(u+1,v+Ny)-c_row(u+1, v)] - [c_row(u-Nx, v+Ny)-c_row(u-Nx, v)].
|
||||||
|
* 4. Iterate 3. over the rest rows, and c(u,v) provides the sum of areas for new row of windows.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
// sum reduction within a block
|
||||||
|
// the following implementation is compatible for sm_20 and above
|
||||||
|
// newer architectures may support faster implementations, such as warp shuffle, cooperative groups
|
||||||
template <const int Nthreads>
|
template <const int Nthreads>
|
||||||
__device__ float sumReduceBlock(float sum, volatile float *shmem)
|
__device__ float sumReduceBlock(float sum, volatile float *shmem)
|
||||||
{
|
{
|
||||||
|
@ -33,7 +51,7 @@ __device__ float sumReduceBlock(float sum, volatile float *shmem)
|
||||||
return shmem[0];
|
return shmem[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* subtracts mean value from the images */
|
// cuda kernel to subtract mean value from the images
|
||||||
template<const int Nthreads>
|
template<const int Nthreads>
|
||||||
__global__ void cuArraysMean_kernel(float *images, float *image_sum, int imageSize, float invSize, int nImages)
|
__global__ void cuArraysMean_kernel(float *images, float *image_sum, int imageSize, float invSize, int nImages)
|
||||||
{
|
{
|
||||||
|
@ -49,28 +67,34 @@ __global__ void cuArraysMean_kernel(float *images, float *image_sum, int imageSi
|
||||||
float *imageD = images + imageOffset;
|
float *imageD = images + imageOffset;
|
||||||
|
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
|
// perform the reduction beyond one block
|
||||||
|
// save the results for each thread in block
|
||||||
for (int i = tid; i < imageSize; i += Nthreads)
|
for (int i = tid; i < imageSize; i += Nthreads)
|
||||||
sum += imageD[i];
|
sum += imageD[i];
|
||||||
|
// reduction within the block
|
||||||
sum = sumReduceBlock<Nthreads>(sum, shmem);
|
sum = sumReduceBlock<Nthreads>(sum, shmem);
|
||||||
|
|
||||||
const float mean = sum * invSize;
|
const float mean = sum * invSize;
|
||||||
if(tid ==0) image_sum[bid] = mean;
|
if(tid ==0) image_sum[bid] = mean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute mean values for images
|
||||||
|
* @param[in] images Input images
|
||||||
|
* @param[out] mean Output mean values
|
||||||
|
* @param[in] stream cudaStream
|
||||||
|
*/
|
||||||
void cuArraysMeanValue(cuArrays<float> *images, cuArrays<float> *mean, cudaStream_t stream)
|
void cuArraysMeanValue(cuArrays<float> *images, cuArrays<float> *mean, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 grid(images->count, 1, 1);
|
const dim3 grid(images->count, 1, 1);
|
||||||
//const int Nthreads=512;
|
|
||||||
const int imageSize = images->width*images->height;
|
const int imageSize = images->width*images->height;
|
||||||
const float invSize = 1.0f/imageSize;
|
const float invSize = 1.0f/imageSize;
|
||||||
|
|
||||||
cuArraysMean_kernel<512> <<<grid,512,0,stream>>>(images->devData, mean->devData, imageSize, invSize, images->count);
|
cuArraysMean_kernel<NTHREADS> <<<grid,NTHREADS,0,stream>>>(images->devData, mean->devData, imageSize, invSize, images->count);
|
||||||
getLastCudaError("cuArraysMeanValue kernel error\n");
|
getLastCudaError("cuArraysMeanValue kernel error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel to compute and subtracts mean value from the images
|
||||||
|
|
||||||
/* subtracts mean value from the images */
|
|
||||||
template<const int Nthreads>
|
template<const int Nthreads>
|
||||||
__global__ void cuArraysSubtractMean_kernel(float *images, int imageSize, float invSize, int nImages)
|
__global__ void cuArraysSubtractMean_kernel(float *images, int imageSize, float invSize, int nImages)
|
||||||
{
|
{
|
||||||
|
@ -85,30 +109,37 @@ __global__ void cuArraysSubtractMean_kernel(float *images, int imageSize, float
|
||||||
const int imageOffset = imageIdx * imageSize;
|
const int imageOffset = imageIdx * imageSize;
|
||||||
float *imageD = images + imageOffset;
|
float *imageD = images + imageOffset;
|
||||||
|
|
||||||
|
// compute the sum
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
for (int i = tid; i < imageSize; i += Nthreads)
|
for (int i = tid; i < imageSize; i += Nthreads)
|
||||||
sum += imageD[i];
|
sum += imageD[i];
|
||||||
sum = sumReduceBlock<Nthreads>(sum, shmem);
|
sum = sumReduceBlock<Nthreads>(sum, shmem);
|
||||||
|
|
||||||
|
// compute the mean
|
||||||
const float mean = sum * invSize;
|
const float mean = sum * invSize;
|
||||||
|
// subtract the mean from each pixel
|
||||||
for (int i = tid; i < imageSize; i += Nthreads)
|
for (int i = tid; i < imageSize; i += Nthreads)
|
||||||
imageD[i] -= mean;
|
imageD[i] -= mean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute and subtract mean values from images
|
||||||
|
* @param[inout] images Input/Output images
|
||||||
|
* @param[out] mean Output mean values
|
||||||
|
* @param[in] stream cudaStream
|
||||||
|
*/
|
||||||
void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream)
|
void cuArraysSubtractMean(cuArrays<float> *images, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 grid(images->count, 1, 1);
|
const dim3 grid(images->count, 1, 1);
|
||||||
//const int Nthreads=512;
|
|
||||||
const int imageSize = images->width*images->height;
|
const int imageSize = images->width*images->height;
|
||||||
const float invSize = 1.0f/imageSize;
|
const float invSize = 1.0f/imageSize;
|
||||||
|
|
||||||
cuArraysSubtractMean_kernel<512> <<<grid,512,0,stream>>>(images->devData, imageSize, invSize, images->count);
|
cuArraysSubtractMean_kernel<NTHREADS> <<<grid,NTHREADS,0,stream>>>(images->devData, imageSize, invSize, images->count);
|
||||||
getLastCudaError("cuArraysSubtractMean kernel error\n");
|
getLastCudaError("cuArraysSubtractMean kernel error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Summation on extracted correlation surface (Minyan)
|
// cuda kernel to compute summation on extracted correlation surface (Minyan)
|
||||||
template<const int Nthreads>
|
template<const int Nthreads>
|
||||||
__global__ void cuArraysSumCorr_kernel(float *images, int *imagesValid, float *imagesSum, int *imagesValidCount, int imageSize, int nImages)
|
__global__ void cuArraysSumCorr_kernel(float *images, int *imagesValid, float *imagesSum, int *imagesValidCount, int imageSize, int nImages)
|
||||||
{
|
{
|
||||||
|
@ -141,24 +172,26 @@ __global__ void cuArraysSumCorr_kernel(float *images, int *imagesValid, float *i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cuArraysSumCorr(cuArrays<float> *images, cuArrays<int> *imagesValid, cuArrays<float> *imagesSum, cuArrays<int> *imagesValidCount, cudaStream_t stream)
|
/**
|
||||||
|
* Compute the variance of images (for SNR)
|
||||||
|
* @param[in] images Input images
|
||||||
|
* @param[in] imagesValid validity flags for each pixel
|
||||||
|
* @param[out] imagesSum variance
|
||||||
|
* @param[out] imagesValidCount count of total valid pixels
|
||||||
|
* @param[in] stream cudaStream
|
||||||
|
*/
|
||||||
|
void cuArraysSumCorr(cuArrays<float> *images, cuArrays<int> *imagesValid, cuArrays<float> *imagesSum,
|
||||||
|
cuArrays<int> *imagesValidCount, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 grid(images->count, 1, 1);
|
const dim3 grid(images->count, 1, 1);
|
||||||
//const int Nthreads=512;
|
const int imageSize = images->width*images->height;
|
||||||
const int imageSize = images->width*images->height;
|
|
||||||
|
|
||||||
cuArraysSumCorr_kernel<512> <<<grid,512,0,stream>>>(images->devData, imagesValid->devData,
|
|
||||||
imagesSum->devData, imagesValidCount->devData, imageSize, images->count);
|
|
||||||
|
|
||||||
getLastCudaError("cuArraysSumValueCorr kernel error\n");
|
|
||||||
|
|
||||||
|
cuArraysSumCorr_kernel<NTHREADS> <<<grid,NTHREADS,0,stream>>>(images->devData, imagesValid->devData,
|
||||||
|
imagesSum->devData, imagesValidCount->devData, imageSize, images->count);
|
||||||
|
getLastCudaError("cuArraysSumValueCorr kernel error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// end of summation on extracted correlation surface (Minyan)
|
// intra-block inclusive prefix sum
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* intra-block inclusive prefix sum */
|
|
||||||
template<int Nthreads2>
|
template<int Nthreads2>
|
||||||
__device__ void inclusive_prefix_sum(float sum, volatile float *shmem)
|
__device__ void inclusive_prefix_sum(float sum, volatile float *shmem)
|
||||||
{
|
{
|
||||||
|
@ -177,7 +210,7 @@ __device__ void inclusive_prefix_sum(float sum, volatile float *shmem)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// prefix sum of pixel value and pixel value^2
|
||||||
template<const int Nthreads2>
|
template<const int Nthreads2>
|
||||||
__device__ float2 partialSums(const float v, volatile float* shmem, const int stride)
|
__device__ float2 partialSums(const float v, volatile float* shmem, const int stride)
|
||||||
{
|
{
|
||||||
|
@ -190,12 +223,10 @@ __device__ float2 partialSums(const float v, volatile float* shmem, const int st
|
||||||
inclusive_prefix_sum<Nthreads2>(v*v, shMem2);
|
inclusive_prefix_sum<Nthreads2>(v*v, shMem2);
|
||||||
const float Sum = shMem [tid-1 + stride] - shMem [tid-1];
|
const float Sum = shMem [tid-1 + stride] - shMem [tid-1];
|
||||||
const float Sum2 = shMem2[tid-1 + stride] - shMem2[tid-1];
|
const float Sum2 = shMem2[tid-1 + stride] - shMem2[tid-1];
|
||||||
//__syncthreads();
|
|
||||||
|
|
||||||
return make_float2(Sum, Sum2);
|
return make_float2(Sum, Sum2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel for cuCorrNormalize
|
||||||
template<const int Nthreads2>
|
template<const int Nthreads2>
|
||||||
__global__ void cuCorrNormalize_kernel(
|
__global__ void cuCorrNormalize_kernel(
|
||||||
int nImages,
|
int nImages,
|
||||||
|
@ -211,9 +242,6 @@ __global__ void cuCorrNormalize_kernel(
|
||||||
const int imageIdx = blockIdx.z;
|
const int imageIdx = blockIdx.z;
|
||||||
if (imageIdx >= nImages) return;
|
if (imageIdx >= nImages) return;
|
||||||
|
|
||||||
//if(tid ==0 ) printf("debug corrNorm, %d %d %d %d %d %d %d %d %d\n", templateNX, templateNY, templateSize,
|
|
||||||
//imageNX, imageNY, imageSize, resultNX, resultNY, resultSize);
|
|
||||||
|
|
||||||
const int imageOffset = imageIdx * imageSize;
|
const int imageOffset = imageIdx * imageSize;
|
||||||
const int templateOffset = imageIdx * templateSize;
|
const int templateOffset = imageIdx * templateSize;
|
||||||
const int resultOffset = imageIdx * resultSize;
|
const int resultOffset = imageIdx * resultSize;
|
||||||
|
@ -222,17 +250,7 @@ __global__ void cuCorrNormalize_kernel(
|
||||||
const float *templateD = templateIn + templateOffset;
|
const float *templateD = templateIn + templateOffset;
|
||||||
float * resultD = resultOut + resultOffset;
|
float * resultD = resultOut + resultOffset;
|
||||||
|
|
||||||
/*template sum squar */
|
// template sum^2
|
||||||
|
|
||||||
float templateSum = 0.0f;
|
|
||||||
|
|
||||||
for(uint i=tid; i<templateSize; i+=Nthreads)
|
|
||||||
{
|
|
||||||
templateSum += templateD[i];
|
|
||||||
}
|
|
||||||
templateSum = sumReduceBlock<Nthreads>(templateSum, shmem);
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
float templateSum2 = 0.0f;
|
float templateSum2 = 0.0f;
|
||||||
for (int i = tid; i < templateSize; i += Nthreads)
|
for (int i = tid; i < templateSize; i += Nthreads)
|
||||||
{
|
{
|
||||||
|
@ -242,94 +260,123 @@ __global__ void cuCorrNormalize_kernel(
|
||||||
templateSum2 = sumReduceBlock<Nthreads>(templateSum2, shmem);
|
templateSum2 = sumReduceBlock<Nthreads>(templateSum2, shmem);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//if(tid ==0) printf("template sum %d %g %g \n", imageIdx, templateSum, templateSum2);
|
// reset shared memory value
|
||||||
/*********/
|
|
||||||
|
|
||||||
shmem[tid] = shmem[tid + Nthreads] = shmem[tid + 2*Nthreads] = 0.0f;
|
shmem[tid] = shmem[tid + Nthreads] = shmem[tid + 2*Nthreads] = 0.0f;
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
|
// perform the prefix sum and sum^2 for secondary window
|
||||||
|
// see notes above
|
||||||
float imageSum = 0.0f;
|
float imageSum = 0.0f;
|
||||||
float imageSum2 = 0.0f;
|
float imageSum2 = 0.0f;
|
||||||
int iaddr = 0;
|
int iaddr = 0;
|
||||||
const int windowSize = templateNX*imageNY;
|
const int windowSize = templateNX*imageNY;
|
||||||
|
// iterative till reaching the templateNX row of the secondary window
|
||||||
|
// or the first row of correlation surface may be computed
|
||||||
while (iaddr < windowSize)
|
while (iaddr < windowSize)
|
||||||
{
|
{
|
||||||
|
// cum sum for each row with a width=templateNY
|
||||||
const float2 res = partialSums<Nthreads2>(imageD[iaddr + tid], shmem, templateNY);
|
const float2 res = partialSums<Nthreads2>(imageD[iaddr + tid], shmem, templateNY);
|
||||||
|
// add to the total, which keeps track of the sum of area for each window
|
||||||
imageSum += res.x;
|
imageSum += res.x;
|
||||||
imageSum2 += res.y;
|
imageSum2 += res.y;
|
||||||
|
// move to next row
|
||||||
iaddr += imageNY;
|
iaddr += imageNY;
|
||||||
}
|
}
|
||||||
|
// row reaches the end of first batch of windows
|
||||||
|
// normalize the first row of the correlation surface
|
||||||
if (tid < resultNY)
|
if (tid < resultNY)
|
||||||
{
|
{
|
||||||
//if(blockIdx.z ==0) printf("image sum %d %g %g \n", tid, imageSum*templateCoeff, sqrtf(imageSum2*templateCoeff));
|
// normalizing factor
|
||||||
|
|
||||||
const float norm2 = (imageSum2 - imageSum*imageSum*templateCoeff)*templateSum2;
|
const float norm2 = (imageSum2 - imageSum*imageSum*templateCoeff)*templateSum2;
|
||||||
|
// normalize the correlation surface
|
||||||
resultD[tid] *= rsqrtf(norm2 + FLT_EPSILON);
|
resultD[tid] *= rsqrtf(norm2 + FLT_EPSILON);
|
||||||
}
|
}
|
||||||
|
// iterative over the rest rows
|
||||||
/*********/
|
|
||||||
|
|
||||||
while (iaddr < imageSize)
|
while (iaddr < imageSize)
|
||||||
{
|
{
|
||||||
|
// the prefix sum of the row removed is recomputed, to be subtracted
|
||||||
const float2 res1 = partialSums<Nthreads2>(imageD[iaddr-windowSize + tid], shmem, templateNY);
|
const float2 res1 = partialSums<Nthreads2>(imageD[iaddr-windowSize + tid], shmem, templateNY);
|
||||||
|
// the prefix sum of the new row, to be added
|
||||||
const float2 res2 = partialSums<Nthreads2>(imageD[iaddr + tid], shmem, templateNY);
|
const float2 res2 = partialSums<Nthreads2>(imageD[iaddr + tid], shmem, templateNY);
|
||||||
imageSum += res2.x - res1.x;
|
imageSum += res2.x - res1.x;
|
||||||
imageSum2 += res2.y - res1.y;
|
imageSum2 += res2.y - res1.y;
|
||||||
|
// move to next row
|
||||||
iaddr += imageNY;
|
iaddr += imageNY;
|
||||||
|
// normalize the correlation surface
|
||||||
if (tid < resultNY)
|
if (tid < resultNY)
|
||||||
{
|
{
|
||||||
const int ix = iaddr/imageNY;
|
const int ix = iaddr/imageNY; // get row index
|
||||||
const int addr = (ix-templateNX)*resultNY;
|
const int addr = (ix-templateNX)*resultNY; // get the correlation surface row index
|
||||||
|
|
||||||
//printf("test norm %d %d %d %d %f\n", tid, ix, addr, addr+tid, resultD[addr + tid]);
|
|
||||||
|
|
||||||
const float norm2 = (imageSum2 - imageSum*imageSum*templateCoeff)*templateSum2;
|
const float norm2 = (imageSum2 - imageSum*imageSum*templateCoeff)*templateSum2;
|
||||||
resultD[addr + tid] *= rsqrtf(norm2 + FLT_EPSILON);
|
resultD[addr + tid] *= rsqrtf(norm2 + FLT_EPSILON);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a correlation surface
|
||||||
|
* @param[in] templates Reference windows with mean subtracted
|
||||||
|
* @param[in] images Secondary windows
|
||||||
|
* @param[inout] results un-normalized correlation surface as input and normalized as output
|
||||||
|
* @param[in] stream cudaStream
|
||||||
|
* @warning The current implementation uses one thread for one column, therefore,
|
||||||
|
* the secondary window width is limited to <=1024, the max threads in a block.
|
||||||
|
* @todo an implementation for arbitrary window width, might not be as efficient
|
||||||
|
*/
|
||||||
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream)
|
void cuCorrNormalize(cuArrays<float> *templates, cuArrays<float> *images, cuArrays<float> *results, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const int nImages = images->count;
|
const int nImages = images->count;
|
||||||
const int imageNY = images->width;
|
const int imageNY = images->width;
|
||||||
const dim3 grid(1, 1, nImages);
|
const dim3 grid(1, 1, nImages);
|
||||||
const float invTemplateSize = 1.0f/templates->size;
|
const float invTemplateSize = 1.0f/templates->size;
|
||||||
//printf("test normalize %d %g\n", templates->size, invTemplateSize);
|
|
||||||
if (imageNY <= 64) cuCorrNormalize_kernel< 6><<<grid, 64, 0, stream>>>(nImages,
|
if (imageNY <= 64) {
|
||||||
|
cuCorrNormalize_kernel< 6><<<grid, 64, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
else if (imageNY <= 128) cuCorrNormalize_kernel< 7><<<grid, 128, 0, stream>>>(nImages,
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 128) {
|
||||||
|
cuCorrNormalize_kernel< 7><<<grid, 128, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
else if (imageNY <= 256) cuCorrNormalize_kernel< 8><<<grid, 256, 0, stream>>>(nImages,
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 256) {
|
||||||
|
cuCorrNormalize_kernel< 8><<<grid, 256, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
else if (imageNY <= 512) cuCorrNormalize_kernel< 9><<<grid, 512, 0, stream>>>(nImages,
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 512) {
|
||||||
|
cuCorrNormalize_kernel< 9><<<grid, 512, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
else if (imageNY <= 1024) cuCorrNormalize_kernel<10><<<grid,1024, 0, stream>>>(nImages,
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 1024) {
|
||||||
|
cuCorrNormalize_kernel<10><<<grid,1024, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size,
|
results->devData, results->height, results->width, results->size,
|
||||||
invTemplateSize);
|
invTemplateSize);
|
||||||
|
getLastCudaError("cuCorrNormalize kernel error");
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "The image size along across direction %d should be smaller than 1024.\n", imageNY);
|
fprintf(stderr, "The (oversampled) window size along the across direction %d should be smaller than 1024.\n", imageNY);
|
||||||
assert(0);
|
throw;
|
||||||
}
|
}
|
||||||
getLastCudaError("cuCorrNormalize kernel error\n");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,16 +1,20 @@
|
||||||
/*
|
/*
|
||||||
* cuCorrTimetime.cu
|
* @file cuCorrTimetime.cu
|
||||||
* correlation between two sets of images in time domain
|
* @brief Correlation between two sets of images in time domain
|
||||||
|
*
|
||||||
|
* This code is adapted from the nxcor package.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
|
|
||||||
|
|
||||||
|
// cuda kernel for cuCorrTimeDomain
|
||||||
template<const int nthreads, const int NPT>
|
template<const int nthreads, const int NPT>
|
||||||
__global__ void cuArraysCorrTime_kernel(
|
__global__ void cuArraysCorrTime_kernel(
|
||||||
const int nImages,
|
const int nImages,
|
||||||
const float *templateIn, const int templateNY, const int templateNX, const int templateSize,
|
const float *templateIn, const int templateNX, const int templateNY, const int templateSize,
|
||||||
const float *imageIn, const int imageNY, const int imageNX, const int imageSize,
|
const float *imageIn, const int imageNX, const int imageNY, const int imageSize,
|
||||||
float *resultOut, const int resultNY, const int resultNX, const int resultSize)
|
float *resultOut, const int resultNX, const int resultNY, const int resultSize)
|
||||||
{
|
{
|
||||||
__shared__ float shmem[nthreads*(1+NPT)];
|
__shared__ float shmem[nthreads*(1+NPT)];
|
||||||
const int tid = threadIdx.x;
|
const int tid = threadIdx.x;
|
||||||
|
@ -26,14 +30,14 @@ __global__ void cuArraysCorrTime_kernel(
|
||||||
const float *templateD = templateIn + templateOffset + tid;
|
const float *templateD = templateIn + templateOffset + tid;
|
||||||
float * resultD = resultOut + resultOffset;
|
float * resultD = resultOut + resultOffset;
|
||||||
|
|
||||||
const int q = min(nthreads/resultNX, 4);
|
const int q = min(nthreads/resultNY, 4);
|
||||||
const int nt = nthreads/q;
|
const int nt = nthreads/q;
|
||||||
const int ty = threadIdx.x / nt;
|
const int ty = threadIdx.x / nt;
|
||||||
const int tx = threadIdx.x - nt * ty;
|
const int tx = threadIdx.x - nt * ty;
|
||||||
|
|
||||||
const int templateNXq = templateNX/q;
|
const int templateNYq = templateNY/q;
|
||||||
const int jbeg = templateNXq * ty;
|
const int jbeg = templateNYq * ty;
|
||||||
const int jend = ty+1 >= q ? templateNX : templateNXq + jbeg;
|
const int jend = ty+1 >= q ? templateNY : templateNYq + jbeg;
|
||||||
|
|
||||||
float *shTemplate = shmem;
|
float *shTemplate = shmem;
|
||||||
float *shImage = shmem + nthreads;
|
float *shImage = shmem + nthreads;
|
||||||
|
@ -43,13 +47,13 @@ __global__ void cuArraysCorrTime_kernel(
|
||||||
for (int k = 0; k < NPT; k++)
|
for (int k = 0; k < NPT; k++)
|
||||||
corrCoeff[k] = 0.0f;
|
corrCoeff[k] = 0.0f;
|
||||||
|
|
||||||
int iaddr = yc*imageNX;
|
int iaddr = yc*imageNY;
|
||||||
|
|
||||||
|
|
||||||
float img[NPT];
|
float img[NPT];
|
||||||
for (int k = 0; k < NPT-1; k++, iaddr += imageNX)
|
for (int k = 0; k < NPT-1; k++, iaddr += imageNY)
|
||||||
img[k] = imageD[iaddr];
|
img[k] = imageD[iaddr];
|
||||||
for (int taddr = 0; taddr < templateSize; taddr += templateNX, iaddr += imageNX)
|
for (int taddr = 0; taddr < templateSize; taddr += templateNY, iaddr += imageNY)
|
||||||
{
|
{
|
||||||
shTemplate[tid] = templateD[taddr];
|
shTemplate[tid] = templateD[taddr];
|
||||||
img [NPT-1] = imageD[iaddr];
|
img [NPT-1] = imageD[iaddr];
|
||||||
|
@ -59,7 +63,7 @@ __global__ void cuArraysCorrTime_kernel(
|
||||||
img[k] = img[k+1];
|
img[k] = img[k+1];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if (tx < resultNX && ty < q)
|
if (tx < resultNY && ty < q)
|
||||||
{
|
{
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (int j = jbeg; j < jend; j++)
|
for (int j = jbeg; j < jend; j++)
|
||||||
|
@ -78,16 +82,22 @@ __global__ void cuArraysCorrTime_kernel(
|
||||||
corrCoeff[k] += shmem[j + nthreads*k];
|
corrCoeff[k] += shmem[j + nthreads*k];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if (tid < resultNX)
|
if (tid < resultNY)
|
||||||
{
|
{
|
||||||
int raddr = yc*resultNX + tid;
|
int raddr = yc*resultNY + tid;
|
||||||
for (int k = 0; k < NPT; k++, raddr += resultNX)
|
for (int k = 0; k < NPT; k++, raddr += resultNY)
|
||||||
if (raddr < resultSize)
|
if (raddr < resultSize)
|
||||||
resultD[raddr] = corrCoeff[k];
|
resultD[raddr] = corrCoeff[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform cross correlation in time domain
|
||||||
|
* @param[in] templates Reference images
|
||||||
|
* @param[in] images Secondary images
|
||||||
|
* @param[out] results Output correlation surface
|
||||||
|
* @param[in] stream cudaStream
|
||||||
|
*/
|
||||||
void cuCorrTimeDomain(cuArrays<float> *templates,
|
void cuCorrTimeDomain(cuArrays<float> *templates,
|
||||||
cuArrays<float> *images,
|
cuArrays<float> *images,
|
||||||
cuArrays<float> *results,
|
cuArrays<float> *results,
|
||||||
|
@ -95,52 +105,84 @@ void cuCorrTimeDomain(cuArrays<float> *templates,
|
||||||
{
|
{
|
||||||
/* compute correlation matrix */
|
/* compute correlation matrix */
|
||||||
const int nImages = images->count;
|
const int nImages = images->count;
|
||||||
const int imageNX = images->width;
|
const int imageNY = images->width;
|
||||||
const int NPT = 8;
|
const int NPT = 8;
|
||||||
|
|
||||||
|
|
||||||
const dim3 grid(nImages, (results->width-1)/NPT+1, 1);
|
const dim3 grid(nImages, (results->width-1)/NPT+1, 1);
|
||||||
//fprintf(stderr, "corrTimeDomain %d %d %d\n", imageNX, templates->height, results->height);
|
if (imageNY <= 64) {
|
||||||
if (imageNX <= 64) cuArraysCorrTime_kernel< 64,NPT><<<grid, 64, 0, stream>>>(nImages,
|
cuArraysCorrTime_kernel< 64,NPT><<<grid, 64, 0, stream>>>(nImages,
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
images->devData, images->height, images->width, images->size,
|
images->devData, images->height, images->width, images->size,
|
||||||
results->devData, results->height, results->width, results->size);
|
results->devData, results->height, results->width, results->size);
|
||||||
else if (imageNX <= 128) cuArraysCorrTime_kernel< 128,NPT><<<grid, 128, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 192) cuArraysCorrTime_kernel< 192,NPT><<<grid, 192, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 256) cuArraysCorrTime_kernel< 256,NPT><<<grid, 256, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 384) cuArraysCorrTime_kernel< 384,NPT><<<grid, 384, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 512) cuArraysCorrTime_kernel< 512,NPT><<<grid, 512, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 640) cuArraysCorrTime_kernel< 640,NPT><<<grid, 640, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 768) cuArraysCorrTime_kernel< 768,NPT><<<grid, 768, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 896) cuArraysCorrTime_kernel< 896,NPT><<<grid, 896, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else if (imageNX <= 1024) cuArraysCorrTime_kernel<1024,NPT><<<grid,1024, 0, stream>>>(nImages,
|
|
||||||
templates->devData, templates->height, templates->width, templates->size,
|
|
||||||
images->devData, images->height, images->width, images->size,
|
|
||||||
results->devData, results->height, results->width, results->size);
|
|
||||||
else assert(0);
|
|
||||||
getLastCudaError("cuArraysCorrTime error");
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 128) {
|
||||||
|
cuArraysCorrTime_kernel< 128,NPT><<<grid, 128, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 192) {
|
||||||
|
cuArraysCorrTime_kernel< 192,NPT><<<grid, 192, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 256) {
|
||||||
|
cuArraysCorrTime_kernel< 256,NPT><<<grid, 256, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 384) {
|
||||||
|
cuArraysCorrTime_kernel< 384,NPT><<<grid, 384, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 512) {
|
||||||
|
cuArraysCorrTime_kernel< 512,NPT><<<grid, 512, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 640) {
|
||||||
|
cuArraysCorrTime_kernel< 640,NPT><<<grid, 640, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 768) {
|
||||||
|
cuArraysCorrTime_kernel< 768,NPT><<<grid, 768, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 896) {
|
||||||
|
cuArraysCorrTime_kernel< 896,NPT><<<grid, 896, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else if (imageNY <= 1024) {
|
||||||
|
cuArraysCorrTime_kernel<1024,NPT><<<grid,1024, 0, stream>>>(nImages,
|
||||||
|
templates->devData, templates->height, templates->width, templates->size,
|
||||||
|
images->devData, images->height, images->width, images->size,
|
||||||
|
results->devData, results->height, results->width, results->size);
|
||||||
|
getLastCudaError("cuArraysCorrTime error");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf(stderr, "The (oversampled) window size along the across direction %d should be smaller than 1024.\n", imageNY);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// end of file
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
/*
|
/*
|
||||||
* cuDeramp.cu
|
* @file cuDeramp.cu
|
||||||
* Derampling a batch of 2D complex images with GPU
|
* @brief Derampling a batch of 2D complex images with GPU
|
||||||
*
|
*
|
||||||
* Method 1: use Fortran code algorithm
|
* A phase ramp is equivalent to a frequency shift in frequency domain,
|
||||||
* Method 2: use phase gradient
|
* which needs to be removed (deramping) in order to move the band center
|
||||||
* Method 0 or else: no deramping
|
* to zero. This is necessary before oversampling a complex signal.
|
||||||
|
* Method 1: each signal is decomposed into real and imaginary parts,
|
||||||
|
* and the average phase shift is obtained as atan(\sum imag / \sum real).
|
||||||
|
* The average is weighted by the amplitudes (coherence).
|
||||||
|
* Method 0 or else: skip deramping
|
||||||
*
|
*
|
||||||
* v1.0 2/1/2017, Lijun Zhu
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
|
@ -19,10 +22,11 @@
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
// note by Lijun
|
|
||||||
// cuda does not have a good support on volatile vector struct, e.g. float2
|
|
||||||
// I have to use regular float type for shared memory (volatile)
|
|
||||||
|
|
||||||
|
|
||||||
|
// cuda does not have a good support on volatile vector struct, e.g. float2
|
||||||
|
// have to use regular float type for shared memory (volatile) data
|
||||||
|
// the following methods are defined to operate float2/complex objects through float
|
||||||
inline static __device__ void copyToShared(volatile float *s, const int i, const float2 x, const int block)
|
inline static __device__ void copyToShared(volatile float *s, const int i, const float2 x, const int block)
|
||||||
{ s[i] = x.x; s[i+block] = x.y; }
|
{ s[i] = x.x; s[i+block] = x.y; }
|
||||||
|
|
||||||
|
@ -34,102 +38,7 @@ inline static __device__ void addInShared(volatile float *s, const int i, const
|
||||||
{ s[i] += s[i+j]; s[i+block] += s[i+j+block];}
|
{ s[i] += s[i+j]; s[i+block] += s[i+j+block];}
|
||||||
|
|
||||||
|
|
||||||
__device__ void debugPhase(float2 c1, float2 c2)
|
// kernel to do sum reduction for float2 within a block
|
||||||
{
|
|
||||||
float2 cp = complexMulConj(c1, c2);
|
|
||||||
float phase = atan2f(cp.y, cp.x);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <const int nthreads>
|
|
||||||
__device__ float sumReduceBlock(float sum, volatile float *shmem)
|
|
||||||
{
|
|
||||||
const int tid = threadIdx.x;
|
|
||||||
shmem[tid] = sum;
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
if (nthreads >=1024) { if (tid < 512) { shmem[tid] = sum = sum + shmem[tid + 512]; } __syncthreads(); }
|
|
||||||
if (nthreads >= 512) { if (tid < 256) { shmem[tid] = sum = sum + shmem[tid + 256]; } __syncthreads(); }
|
|
||||||
if (nthreads >= 256) { if (tid < 128) { shmem[tid] = sum = sum + shmem[tid + 128]; } __syncthreads(); }
|
|
||||||
if (nthreads >= 128) { if (tid < 64) { shmem[tid] = sum = sum + shmem[tid + 64]; } __syncthreads(); }
|
|
||||||
if (tid < 32)
|
|
||||||
{
|
|
||||||
shmem[tid] = sum = sum + shmem[tid + 32];
|
|
||||||
shmem[tid] = sum = sum + shmem[tid + 16];
|
|
||||||
shmem[tid] = sum = sum + shmem[tid + 8];
|
|
||||||
shmem[tid] = sum = sum + shmem[tid + 4];
|
|
||||||
shmem[tid] = sum = sum + shmem[tid + 2];
|
|
||||||
shmem[tid] = sum = sum + shmem[tid + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
__syncthreads();
|
|
||||||
return shmem[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<const int nthreads>
|
|
||||||
__global__ void cuDerampMethod2_kernel(float2 *images, const int imageNX, const int imageNY,
|
|
||||||
const int imageSize, const int nImages, const float normCoefX, const float normCoefY)
|
|
||||||
{
|
|
||||||
|
|
||||||
__shared__ float shmem[nthreads];
|
|
||||||
const int tid = threadIdx.x;
|
|
||||||
const int bid = blockIdx.x;
|
|
||||||
//printf("bid %d\n", bid);
|
|
||||||
float2 *imageD = images + bid*imageSize;
|
|
||||||
|
|
||||||
int pixelIdx, pixelIdxX, pixelIdxY;
|
|
||||||
|
|
||||||
float phaseDiffY = 0.0f;
|
|
||||||
for (int i = tid; i < imageSize; i += nthreads) {
|
|
||||||
pixelIdx = i;
|
|
||||||
pixelIdxY = pixelIdx % imageNY;
|
|
||||||
if(pixelIdxY < imageNY -1)
|
|
||||||
{
|
|
||||||
phaseDiffY += complexArg(complexMulConj(imageD[pixelIdx], imageD[pixelIdx+1]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
phaseDiffY=sumReduceBlock<nthreads>(phaseDiffY, shmem);
|
|
||||||
phaseDiffY*=normCoefY;
|
|
||||||
|
|
||||||
float phaseDiffX = 0.0f;
|
|
||||||
for (int i = tid; i < imageSize; i += nthreads) {
|
|
||||||
pixelIdx = i;
|
|
||||||
pixelIdxX = pixelIdx / imageNY;
|
|
||||||
if(pixelIdxX < imageNX -1)
|
|
||||||
{
|
|
||||||
phaseDiffX += complexArg(complexMulConj(imageD[pixelIdx], imageD[pixelIdx+imageNY]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
phaseDiffX=sumReduceBlock<nthreads>(phaseDiffX, shmem);
|
|
||||||
phaseDiffX*=normCoefX;
|
|
||||||
|
|
||||||
for (int i = tid; i < imageSize; i += nthreads)
|
|
||||||
{
|
|
||||||
int pixelIdx = i;
|
|
||||||
pixelIdxX = pixelIdx/imageNY;
|
|
||||||
pixelIdxY = pixelIdx%imageNY;
|
|
||||||
float phase = pixelIdxX*phaseDiffX + pixelIdxY*phaseDiffY;
|
|
||||||
imageD[pixelIdx] *= make_float2(cosf(phase), sinf(phase));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cuDerampMethod2(cuArrays<float2> *images, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
const dim3 grid(images->count);
|
|
||||||
const int nthreads=512;
|
|
||||||
const int imageSize = images->width*images->height;
|
|
||||||
const float normCoefY = 1.0f/((images->width-1)*images->height);
|
|
||||||
const float normCoefX = 1.0f/((images->height-1)*images->width);
|
|
||||||
cuDerampMethod2_kernel<nthreads> <<<grid, 512,0,stream>>>
|
|
||||||
(images->devData, images->height, images->width, imageSize, images->count, normCoefX, normCoefY);
|
|
||||||
getLastCudaError("cuDerampMethod2 kernel error\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <const int nthreads>
|
template <const int nthreads>
|
||||||
__device__ void complexSumReduceBlock(float2& sum, volatile float *shmem)
|
__device__ void complexSumReduceBlock(float2& sum, volatile float *shmem)
|
||||||
{
|
{
|
||||||
|
@ -154,9 +63,7 @@ __device__ void complexSumReduceBlock(float2& sum, volatile float *shmem)
|
||||||
copyFromShared(sum, shmem, 0, nthreads);
|
copyFromShared(sum, shmem, 0, nthreads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel for cuDerampMethod1
|
||||||
// block id is the image index
|
|
||||||
// thread id ranges all pixels in one image
|
|
||||||
template<const int nthreads>
|
template<const int nthreads>
|
||||||
__global__ void cuDerampMethod1_kernel(float2 *images, const int imageNX, int const imageNY,
|
__global__ void cuDerampMethod1_kernel(float2 *images, const int imageNX, int const imageNY,
|
||||||
const int imageSize, const int nImages, const float normCoef)
|
const int imageSize, const int nImages, const float normCoef)
|
||||||
|
@ -180,7 +87,6 @@ __global__ void cuDerampMethod1_kernel(float2 *images, const int imageNX, int co
|
||||||
complexSumReduceBlock<nthreads>(phaseDiffY, shmem);
|
complexSumReduceBlock<nthreads>(phaseDiffY, shmem);
|
||||||
//phaseDiffY *= normCoef;
|
//phaseDiffY *= normCoef;
|
||||||
float phaseY=atan2f(phaseDiffY.y, phaseDiffY.x);
|
float phaseY=atan2f(phaseDiffY.y, phaseDiffY.x);
|
||||||
//__syncthreads();
|
|
||||||
|
|
||||||
float2 phaseDiffX = make_float2(0.0f, 0.0f);
|
float2 phaseDiffX = make_float2(0.0f, 0.0f);
|
||||||
for (int i = tid; i < imageSize; i += nthreads) {
|
for (int i = tid; i < imageSize; i += nthreads) {
|
||||||
|
@ -207,12 +113,17 @@ __global__ void cuDerampMethod1_kernel(float2 *images, const int imageNX, int co
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deramp a complex signal with Method 1
|
||||||
|
* @brief Each signal is decomposed into real and imaginary parts,
|
||||||
|
* and the average phase shift is obtained as atan(\sum imag / \sum real).
|
||||||
|
* @param[inout] images input/output complex signals
|
||||||
|
* @param[in] stream cuda stream
|
||||||
|
*/
|
||||||
void cuDerampMethod1(cuArrays<float2> *images, cudaStream_t stream)
|
void cuDerampMethod1(cuArrays<float2> *images, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
const dim3 grid(images->count);
|
const dim3 grid(images->count);
|
||||||
//int nthreads;
|
|
||||||
const int imageSize = images->width*images->height;
|
const int imageSize = images->width*images->height;
|
||||||
const float invSize = 1.0f/imageSize;
|
const float invSize = 1.0f/imageSize;
|
||||||
|
|
||||||
|
@ -232,115 +143,19 @@ void cuDerampMethod1(cuArrays<float2> *images, cudaStream_t stream)
|
||||||
cuDerampMethod1_kernel<512> <<<grid, 512, 0, stream>>>
|
cuDerampMethod1_kernel<512> <<<grid, 512, 0, stream>>>
|
||||||
(images->devData, images->height, images->width,
|
(images->devData, images->height, images->width,
|
||||||
imageSize, images->count, invSize); }
|
imageSize, images->count, invSize); }
|
||||||
|
|
||||||
getLastCudaError("cuDerampMethod1 kernel error\n");
|
getLastCudaError("cuDerampMethod1 kernel error\n");
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
static inline double complexAbs (double2 a)
|
|
||||||
{
|
|
||||||
double r = sqrt(a.x*a.x + a.y*a.y);
|
|
||||||
return r;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void cpuDerampMethod3(cuArrays<float2> *imagesD, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
float2 *images = (float2 *) malloc(imagesD->getByteSize());
|
|
||||||
float2 phaseDiffX, phaseDiffY;
|
|
||||||
int idxPixel;
|
|
||||||
|
|
||||||
|
|
||||||
cudaMemcpyAsync(images, imagesD->devData, imagesD->getByteSize(), cudaMemcpyDeviceToHost, stream);
|
|
||||||
|
|
||||||
int count = imagesD->count;
|
|
||||||
int height = imagesD->height;
|
|
||||||
int width = imagesD->width;
|
|
||||||
float2 cprod;
|
|
||||||
float phaseX, phaseY;
|
|
||||||
for (int icount = 0; icount < count; icount ++)
|
|
||||||
{
|
|
||||||
phaseDiffY = make_float2(0.0f, 0.0f);
|
|
||||||
for (int i=0; i<height; i++)
|
|
||||||
{
|
|
||||||
for(int j=0; j<width-1; j++)
|
|
||||||
{
|
|
||||||
idxPixel = icount*width*height + i*width + j;
|
|
||||||
cprod = complexMulConj(images[idxPixel], images[idxPixel+1]);
|
|
||||||
phaseDiffY.x += (cprod.x);
|
|
||||||
phaseDiffY.y += (cprod.y);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//phaseDiffY /= height*(width-1);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (complexAbs(phaseDiffY) < 1.e-5) {
|
|
||||||
phaseY = 0.0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
phaseY = atan2(phaseDiffY.y, phaseDiffY.x);
|
|
||||||
}
|
|
||||||
|
|
||||||
phaseDiffX = make_float2(0.0f, 0.0f);
|
|
||||||
for (int j=0; j<width; j++)
|
|
||||||
{
|
|
||||||
for(int i=0; i<height-1; i++) {
|
|
||||||
idxPixel = icount*width*height + i*width + j;
|
|
||||||
cprod = complexMulConj(images[idxPixel], images[idxPixel+width]);
|
|
||||||
phaseDiffX.x += (cprod.x);
|
|
||||||
phaseDiffX.y += (cprod.y);;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//phaseDiffX /= (height-1)*width;
|
|
||||||
if (complexAbs(phaseDiffX) < 1.e-5) {
|
|
||||||
phaseX = 0.0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
phaseX = atan2(phaseDiffX.y, phaseDiffX.x);
|
|
||||||
}
|
|
||||||
|
|
||||||
//printf("cpu deramp %d (%g,%g) (%g,%g)\n", icount, phaseDiffX.x, phaseDiffX.y, phaseDiffY.x, phaseDiffY.y);
|
|
||||||
|
|
||||||
/*
|
|
||||||
std::setprecision(12);
|
|
||||||
std::cout << "cpu " << icount << " " <<
|
|
||||||
std::setprecision(std::numeric_limits<long double>::digits10 + 1) << phaseX <<
|
|
||||||
" " << std::setprecision(std::numeric_limits<long double>::digits10 + 1) << phaseY << std::endl;
|
|
||||||
std::cout << "cpu " << phaseDiffX.x << " " << phaseDiffX.y << std::endl;
|
|
||||||
std::cout << "cpu " << phaseDiffY.x << " " << phaseDiffY.y << std::endl;
|
|
||||||
*/
|
|
||||||
for(int i=0; i<height; i++)
|
|
||||||
{
|
|
||||||
for(int j=0; j<width; j++)
|
|
||||||
{
|
|
||||||
idxPixel = icount*width*height + i*width + j;
|
|
||||||
float phase = phaseX*i + phaseY*j;
|
|
||||||
images[idxPixel]*=make_float2(cos(phase), sin(phase));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cudaMemcpyAsync(imagesD->devData, images, imagesD->getByteSize(), cudaMemcpyHostToDevice, stream);
|
|
||||||
free(images);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cuDeramp(int method, cuArrays<float2> *images, cudaStream_t stream)
|
void cuDeramp(int method, cuArrays<float2> *images, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
switch(method) {
|
switch(method) {
|
||||||
case 3:
|
|
||||||
cpuDerampMethod3(images, stream);
|
|
||||||
case 1:
|
case 1:
|
||||||
cuDerampMethod1(images, stream);
|
cuDerampMethod1(images, stream);
|
||||||
break;
|
break;
|
||||||
case 2:
|
|
||||||
cuDerampMethod2(images, stream);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// end of file
|
|
@ -1,8 +1,9 @@
|
||||||
/*
|
/**
|
||||||
cuEstimateStats.cu
|
* @file cuEstimateStats.cu
|
||||||
|
* @brief Estimate the statistics of the correlation surface
|
||||||
9/23/2017, Minyan Zhong
|
*
|
||||||
*/
|
* 9/23/2017, Minyan Zhong
|
||||||
|
*/
|
||||||
|
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "float2.h"
|
#include "float2.h"
|
||||||
|
@ -15,7 +16,7 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
template <const int BLOCKSIZE>
|
// cuda kernel for cuEstimateSnr
|
||||||
__global__ void cudaKernel_estimateSnr(const float* corrSum, const int* corrValidCount, const float* maxval, float* snrValue, const int size)
|
__global__ void cudaKernel_estimateSnr(const float* corrSum, const int* corrValidCount, const float* maxval, float* snrValue, const int size)
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -28,50 +29,25 @@ __global__ void cudaKernel_estimateSnr(const float* corrSum, const int* corrVali
|
||||||
snrValue[idx] = maxval[idx] * maxval[idx] / mean;
|
snrValue[idx] = maxval[idx] * maxval[idx] / mean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate the signal to noise ratio (SNR) of the correlation surface
|
||||||
|
* @param[in] corrSum the sum of the correlation surface
|
||||||
|
* @param[in] corrValidCount the number of valid pixels contributing to sum
|
||||||
|
* @param[out] snrValue return snr value
|
||||||
|
* @param[in] stream cuda stream
|
||||||
|
*/
|
||||||
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream)
|
void cuEstimateSnr(cuArrays<float> *corrSum, cuArrays<int> *corrValidCount, cuArrays<float> *maxval, cuArrays<float> *snrValue, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
int size = corrSum->getSize();
|
int size = corrSum->getSize();
|
||||||
|
cudaKernel_estimateSnr<<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
|
||||||
//std::cout<<size<<std::endl;
|
|
||||||
|
|
||||||
//corrSum->allocateHost();
|
|
||||||
|
|
||||||
//corrSum->copyToHost(stream);
|
|
||||||
|
|
||||||
//std::cout<<"corr sum"<<std::endl;
|
|
||||||
|
|
||||||
//corrValidCount->allocateHost();
|
|
||||||
|
|
||||||
//corrValidCount->copyToHost(stream);
|
|
||||||
|
|
||||||
//std::cout<<"valid count"<<std::endl;
|
|
||||||
|
|
||||||
//maxval->allocateHost();
|
|
||||||
|
|
||||||
//maxval->copyToHost(stream);
|
|
||||||
|
|
||||||
//std::cout<<"maxval"<<std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
//for (int i=0; i<size; i++){
|
|
||||||
|
|
||||||
// std::cout<<corrSum->hostData[i]<<std::endl;
|
|
||||||
// std::cout<<corrValidCount->hostData[i]<<std::endl;
|
|
||||||
|
|
||||||
// std::cout<<maxval->hostData[i]<<std::endl;
|
|
||||||
|
|
||||||
//}
|
|
||||||
|
|
||||||
cudaKernel_estimateSnr<NTHREADS><<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
|
|
||||||
(corrSum->devData, corrValidCount->devData, maxval->devData, snrValue->devData, size);
|
(corrSum->devData, corrValidCount->devData, maxval->devData, snrValue->devData, size);
|
||||||
|
|
||||||
getLastCudaError("cuda kernel estimate stats error\n");
|
getLastCudaError("cuda kernel estimate stats error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel for cuEstimateVariance
|
||||||
template <const int BLOCKSIZE> // number of threads per block.
|
__global__ void cudaKernel_estimateVar(const float* corrBatchRaw, const int NX, const int NY,
|
||||||
__global__ void cudaKernel_estimateVar(const float* corrBatchRaw, const int NX, const int NY, const int2* maxloc, const float* maxval, float3* covValue, const int size)
|
const int2* maxloc, const float* maxval, float3* covValue, const int size)
|
||||||
{
|
{
|
||||||
|
|
||||||
// Find image id.
|
// Find image id.
|
||||||
|
@ -135,13 +111,20 @@ __global__ void cudaKernel_estimateVar(const float* corrBatchRaw, const int NX,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate the variance of the correlation surface
|
||||||
|
* @param[in] corrBatchRaw correlation surface
|
||||||
|
* @param[in] maxloc maximum location
|
||||||
|
* @param[in] maxval maximum value
|
||||||
|
* @param[out] covValue variance value
|
||||||
|
* @param[in] stream cuda stream
|
||||||
|
*/
|
||||||
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream)
|
void cuEstimateVariance(cuArrays<float> *corrBatchRaw, cuArrays<int2> *maxloc, cuArrays<float> *maxval, cuArrays<float3> *covValue, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
int size = corrBatchRaw->count;
|
int size = corrBatchRaw->count;
|
||||||
|
|
||||||
// One dimensional launching parameters to loop over every correlation surface.
|
// One dimensional launching parameters to loop over every correlation surface.
|
||||||
cudaKernel_estimateVar<NTHREADS><<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
|
cudaKernel_estimateVar<<< IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
|
||||||
(corrBatchRaw->devData, corrBatchRaw->height, corrBatchRaw->width, maxloc->devData, maxval->devData, covValue->devData, size);
|
(corrBatchRaw->devData, corrBatchRaw->height, corrBatchRaw->width, maxloc->devData, maxval->devData, covValue->devData, size);
|
||||||
getLastCudaError("cudaKernel_estimateVar error\n");
|
getLastCudaError("cudaKernel_estimateVar error\n");
|
||||||
}
|
}
|
||||||
|
//end of file
|
|
@ -1,31 +1,13 @@
|
||||||
/*
|
/*
|
||||||
* maxlocation.cu
|
* @file cuOffset.cu
|
||||||
* Purpose: find the location of maximum for a batch of images/vectors
|
* @brief Utilities used to determine the offset field
|
||||||
* this uses the reduction algorithm similar to summations
|
|
||||||
*
|
*
|
||||||
* Author : Lijun Zhu
|
*/
|
||||||
* Seismo Lab, Caltech
|
|
||||||
* Version 1.0 10/01/16
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
|
|
||||||
/*
|
// find the max between two elements
|
||||||
__device__ float atomicMaxf(float* address, float val)
|
|
||||||
{
|
|
||||||
int *address_as_int =(int*)address;
|
|
||||||
int old = *address_as_int, assumed;
|
|
||||||
while (val > __int_as_float(old)) {
|
|
||||||
assumed = old;
|
|
||||||
old = atomicCAS(address_as_int, assumed,
|
|
||||||
__float_as_int(val));
|
|
||||||
}
|
|
||||||
return __int_as_float(old);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
// comapre two elements
|
|
||||||
inline static __device__ void maxPairReduce(volatile float* maxval, volatile int* maxloc,
|
inline static __device__ void maxPairReduce(volatile float* maxval, volatile int* maxloc,
|
||||||
size_t gid, size_t strideid)
|
size_t gid, size_t strideid)
|
||||||
{
|
{
|
||||||
|
@ -35,7 +17,7 @@ inline static __device__ void maxPairReduce(volatile float* maxval, volatile int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// max reduction kernel, save the results to shared memory
|
// max reduction kernel
|
||||||
template<const int BLOCKSIZE>
|
template<const int BLOCKSIZE>
|
||||||
__device__ void max_reduction(const float* const images,
|
__device__ void max_reduction(const float* const images,
|
||||||
const size_t imageSize,
|
const size_t imageSize,
|
||||||
|
@ -48,8 +30,8 @@ __device__ void max_reduction(const float* const images,
|
||||||
int imageStart = blockIdx.x*imageSize;
|
int imageStart = blockIdx.x*imageSize;
|
||||||
int imagePixel;
|
int imagePixel;
|
||||||
|
|
||||||
// reduction for elements with i, i+BLOCKSIZE, i+2*BLOCKSIZE ...
|
// reduction for intra-block elements
|
||||||
//
|
// i.e., for elements with i, i+BLOCKSIZE, i+2*BLOCKSIZE ...
|
||||||
for(int gid = tid; gid < imageSize; gid+=blockDim.x)
|
for(int gid = tid; gid < imageSize; gid+=blockDim.x)
|
||||||
{
|
{
|
||||||
imagePixel = imageStart+gid;
|
imagePixel = imageStart+gid;
|
||||||
|
@ -60,12 +42,12 @@ __device__ void max_reduction(const float* const images,
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//reduction within a block
|
// reduction within a block
|
||||||
if (BLOCKSIZE >=1024){ if (tid < 512) { maxPairReduce(shval, shloc, tid, tid + 512); } __syncthreads(); }
|
if (BLOCKSIZE >=1024){ if (tid < 512) { maxPairReduce(shval, shloc, tid, tid + 512); } __syncthreads(); }
|
||||||
if (BLOCKSIZE >=512) { if (tid < 256) { maxPairReduce(shval, shloc, tid, tid + 256); } __syncthreads(); }
|
if (BLOCKSIZE >=512) { if (tid < 256) { maxPairReduce(shval, shloc, tid, tid + 256); } __syncthreads(); }
|
||||||
if (BLOCKSIZE >=256) { if (tid < 128) { maxPairReduce(shval, shloc, tid, tid + 128); } __syncthreads(); }
|
if (BLOCKSIZE >=256) { if (tid < 128) { maxPairReduce(shval, shloc, tid, tid + 128); } __syncthreads(); }
|
||||||
if (BLOCKSIZE >=128) { if (tid < 64 ) { maxPairReduce(shval, shloc, tid, tid + 64 ); } __syncthreads(); }
|
if (BLOCKSIZE >=128) { if (tid < 64 ) { maxPairReduce(shval, shloc, tid, tid + 64 ); } __syncthreads(); }
|
||||||
//reduction within a warp
|
// reduction within a warp
|
||||||
if (tid < 32)
|
if (tid < 32)
|
||||||
{
|
{
|
||||||
maxPairReduce(shval, shloc, tid, tid + 32);
|
maxPairReduce(shval, shloc, tid, tid + 32);
|
||||||
|
@ -78,65 +60,11 @@ __device__ void max_reduction(const float* const images,
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
//kernel and function for 1D array, find both max value and location
|
|
||||||
|
// kernel for 2D array(image), find max location only
|
||||||
template <const int BLOCKSIZE>
|
template <const int BLOCKSIZE>
|
||||||
__global__ void cuMaxValLoc_kernel( const float* const images, float *maxval, int* maxloc, const size_t imageSize, const size_t nImages)
|
__global__ void cudaKernel_maxloc2D(const float* const images, int2* maxloc, float* maxval,
|
||||||
{
|
const size_t imageNX, const size_t imageNY, const size_t nImages)
|
||||||
__shared__ float shval[BLOCKSIZE];
|
|
||||||
__shared__ int shloc[BLOCKSIZE];
|
|
||||||
int bid = blockIdx.x;
|
|
||||||
if(bid >= nImages) return;
|
|
||||||
|
|
||||||
max_reduction<BLOCKSIZE>(images, imageSize, nImages, shval, shloc);
|
|
||||||
|
|
||||||
if (threadIdx.x == 0) {
|
|
||||||
maxloc[bid] = shloc[0];
|
|
||||||
maxval[bid] = shval[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuArraysMaxValandLoc(cuArrays<float> *images, cuArrays<float> *maxval, cuArrays<int> *maxloc, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
const size_t imageSize = images->size;
|
|
||||||
const size_t nImages = images->count;
|
|
||||||
dim3 threadsperblock(NTHREADS);
|
|
||||||
dim3 blockspergrid(nImages);
|
|
||||||
cuMaxValLoc_kernel<NTHREADS><<<blockspergrid, threadsperblock, 0, stream>>>
|
|
||||||
(images->devData, maxval->devData, maxloc->devData, imageSize, nImages);
|
|
||||||
getLastCudaError("cudaKernel fine max location error\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
//kernel and function for 1D array, find max location only
|
|
||||||
template <const int BLOCKSIZE>
|
|
||||||
__global__ void cudaKernel_maxloc(const float* const images, int* maxloc,
|
|
||||||
const size_t imageSize, const size_t nImages)
|
|
||||||
{
|
|
||||||
__shared__ float shval[BLOCKSIZE];
|
|
||||||
__shared__ int shloc[BLOCKSIZE];
|
|
||||||
|
|
||||||
int bid = blockIdx.x;
|
|
||||||
if(bid >=nImages) return;
|
|
||||||
|
|
||||||
max_reduction<BLOCKSIZE>(images, imageSize, nImages, shval, shloc);
|
|
||||||
|
|
||||||
if (threadIdx.x == 0) {
|
|
||||||
maxloc[bid] = shloc[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuArraysMaxLoc(cuArrays<float> *images, cuArrays<int> *maxloc, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
int imageSize = images->size;
|
|
||||||
int nImages = maxloc->size;
|
|
||||||
|
|
||||||
cudaKernel_maxloc<NTHREADS><<<nImages, NTHREADS,0, stream>>>
|
|
||||||
(images->devData, maxloc->devData, imageSize, nImages);
|
|
||||||
getLastCudaError("cudaKernel find max location 1D error\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
//kernel and function for 2D array(image), find max location only
|
|
||||||
template <const int BLOCKSIZE>
|
|
||||||
__global__ void cudaKernel_maxloc2D(const float* const images, int2* maxloc, float* maxval, const size_t imageNX, const size_t imageNY, const size_t nImages)
|
|
||||||
{
|
{
|
||||||
__shared__ float shval[BLOCKSIZE];
|
__shared__ float shval[BLOCKSIZE];
|
||||||
__shared__ int shloc[BLOCKSIZE];
|
__shared__ int shloc[BLOCKSIZE];
|
||||||
|
@ -153,6 +81,14 @@ __global__ void cudaKernel_maxloc2D(const float* const images, int2* maxloc, fl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find both the maximum value and the location for a batch of 2D images
|
||||||
|
* @param[in] images input batch of images
|
||||||
|
* @param[out] maxval arrays to hold the max values
|
||||||
|
* @param[out] maxloc arrays to hold the max locations
|
||||||
|
* @param[in] stream cudaStream
|
||||||
|
* @note This routine is overloaded with the routine without maxval
|
||||||
|
*/
|
||||||
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc,
|
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc,
|
||||||
cuArrays<float> *maxval, cudaStream_t stream)
|
cuArrays<float> *maxval, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
@ -181,6 +117,13 @@ __global__ void cudaKernel_maxloc2D(const float* const images, int2* maxloc, co
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find (only) the maximum location for a batch of 2D images
|
||||||
|
* @param[in] images input batch of images
|
||||||
|
* @param[out] maxloc arrays to hold the max locations
|
||||||
|
* @param[in] stream cudaStream
|
||||||
|
* @note This routine is overloaded with the routine with maxval
|
||||||
|
*/
|
||||||
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream)
|
void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaKernel_maxloc2D<NTHREADS><<<images->count, NTHREADS, 0, stream>>>
|
cudaKernel_maxloc2D<NTHREADS><<<images->count, NTHREADS, 0, stream>>>
|
||||||
|
@ -188,10 +131,7 @@ void cuArraysMaxloc2D(cuArrays<float> *images, cuArrays<int2> *maxloc, cudaStrea
|
||||||
getLastCudaError("cudaKernel find max location 2D error\n");
|
getLastCudaError("cudaKernel find max location 2D error\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel for cuSubPixelOffset
|
||||||
|
|
||||||
|
|
||||||
//determine final offset values
|
|
||||||
__global__ void cuSubPixelOffset_kernel(const int2 *offsetInit, const int2 *offsetZoomIn,
|
__global__ void cuSubPixelOffset_kernel(const int2 *offsetInit, const int2 *offsetZoomIn,
|
||||||
float2 *offsetFinal,
|
float2 *offsetFinal,
|
||||||
const float OSratio,
|
const float OSratio,
|
||||||
|
@ -204,123 +144,118 @@ __global__ void cuSubPixelOffset_kernel(const int2 *offsetInit, const int2 *offs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// determine the final offset value
|
/**
|
||||||
/// @param[in]
|
* Determine the final offset value
|
||||||
|
* @param[in] offsetInit max location (adjusted to the starting location for extraction) determined from
|
||||||
void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn, cuArrays<float2> *offsetFinal,
|
* the cross-correlation before oversampling, in dimensions of pixel
|
||||||
|
* @param[in] offsetZoomIn max location from the oversampled cross-correlation surface
|
||||||
|
* @param[out] offsetFinal the combined offset value
|
||||||
|
* @param[in] OversampleRatioZoomIn the correlation surface oversampling factor
|
||||||
|
* @param[in] OversampleRatioRaw the oversampling factor of reference/secondary windows before cross-correlation
|
||||||
|
* @param[in] xHalfRangInit the original half search range along x, to be subtracted
|
||||||
|
* @param[in] yHalfRangInit the original half search range along y, to be subtracted
|
||||||
|
*
|
||||||
|
* 1. Cross-correlation is performed at first for the un-oversampled data with a larger search range.
|
||||||
|
* The secondary window is then extracted to a smaller size (a smaller search range) around the max location.
|
||||||
|
* The extraction starting location (offsetInit) - original half search range (xHalfRangeInit, yHalfRangeInit)
|
||||||
|
* = pixel size offset
|
||||||
|
* 2. Reference/secondary windows are then oversampled by OversampleRatioRaw, and cross-correlated.
|
||||||
|
* 3. The correlation surface is further oversampled by OversampleRatioZoomIn.
|
||||||
|
* The overall oversampling factor is OversampleRatioZoomIn*OversampleRatioRaw.
|
||||||
|
* The max location in oversampled correlation surface (offsetZoomIn) / overall oversampling factor
|
||||||
|
* = subpixel offset
|
||||||
|
* Final offset = pixel size offset + subpixel offset
|
||||||
|
*/
|
||||||
|
void cuSubPixelOffset(cuArrays<int2> *offsetInit, cuArrays<int2> *offsetZoomIn,
|
||||||
|
cuArrays<float2> *offsetFinal,
|
||||||
int OverSampleRatioZoomin, int OverSampleRatioRaw,
|
int OverSampleRatioZoomin, int OverSampleRatioRaw,
|
||||||
int xHalfRangeInit, int yHalfRangeInit,
|
int xHalfRangeInit, int yHalfRangeInit,
|
||||||
int xHalfRangeZoomIn, int yHalfRangeZoomIn,
|
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int size = offsetInit->getSize();
|
int size = offsetInit->getSize();
|
||||||
float OSratio = 1.0f/(float)(OverSampleRatioZoomin*OverSampleRatioRaw);
|
float OSratio = 1.0f/(float)(OverSampleRatioZoomin*OverSampleRatioRaw);
|
||||||
float xoffset = xHalfRangeInit ;
|
float xoffset = xHalfRangeInit ;
|
||||||
float yoffset = yHalfRangeInit ;
|
float yoffset = yHalfRangeInit ;
|
||||||
//std::cout << "subpixel" << xoffset << " " << yoffset << " ratio " << OSratio << std::endl;
|
|
||||||
|
|
||||||
cuSubPixelOffset_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
|
cuSubPixelOffset_kernel<<<IDIVUP(size, NTHREADS), NTHREADS, 0, stream>>>
|
||||||
(offsetInit->devData, offsetZoomIn->devData,
|
(offsetInit->devData, offsetZoomIn->devData,
|
||||||
offsetFinal->devData, OSratio, xoffset, yoffset, size);
|
offsetFinal->devData, OSratio, xoffset, yoffset, size);
|
||||||
getLastCudaError("cuSubPixelOffset_kernel");
|
getLastCudaError("cuSubPixelOffset_kernel");
|
||||||
//offsetInit->debuginfo(stream);
|
|
||||||
//offsetZoomIn->debuginfo(stream);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __device__ int dev_padStart(const size_t padDim, const size_t imageDim, const size_t maxloc)
|
// cuda device function to compute the shift of center
|
||||||
|
static inline __device__ int2 dev_adjustOffset(
|
||||||
|
const int oldRange, const int newRange, const int maxloc)
|
||||||
{
|
{
|
||||||
int halfPadSize = padDim/2;
|
// determine the starting point around the maxloc
|
||||||
int start = maxloc - halfPadSize;
|
// oldRange is the half search window size, e.g., = 32
|
||||||
if(start <0) start =0;
|
// newRange is the half extract size, e.g., = 4
|
||||||
else if(maxloc > imageDim-halfPadSize-1) start = imageDim-padDim-1;
|
// maxloc is in range [0, 64]
|
||||||
return start;
|
// we want to extract \pm 4 centered at maxloc
|
||||||
}
|
// Examples:
|
||||||
|
// 1. maxloc = 40: we set start=maxloc-newRange=36, and extract [36,44), shift=0
|
||||||
|
// 2. maxloc = 2, start=-2: we set start=0, shift=-2,
|
||||||
|
// (shift means the max is -2 from the extracted center 4)
|
||||||
|
// 3. maxloc =64, start=60: set start=56, shift = 4
|
||||||
|
// (shift means the max is 4 from the extracted center 60).
|
||||||
|
|
||||||
//cuda kernel for cuda_determineInterpZone
|
// shift the max location by -newRange to find the start
|
||||||
__global__ void cudaKernel_determineInterpZone(const int2* maxloc, const size_t nImages,
|
int start = maxloc - newRange;
|
||||||
const size_t imageNX, const size_t imageNY,
|
// if start is within the range, the max location will be in the center
|
||||||
const size_t padNX, const size_t padNY, int2* padOffset)
|
int shift = 0;
|
||||||
{
|
// right boundary
|
||||||
int imageIndex = threadIdx.x + blockDim.x *blockIdx.x; //image index
|
int rbound = 2*(oldRange-newRange);
|
||||||
if (imageIndex < nImages) {
|
if(start<0) // if exceeding the limit on the left
|
||||||
padOffset[imageIndex].x = dev_padStart(padNX, imageNX, maxloc[imageIndex].x);
|
{
|
||||||
padOffset[imageIndex].y = dev_padStart(padNY, imageNY, maxloc[imageIndex].y);
|
// set start at 0 and record the shift of center
|
||||||
|
shift = -start;
|
||||||
|
start = 0;
|
||||||
}
|
}
|
||||||
|
else if(start > rbound ) // if exceeding the limit on the right
|
||||||
|
{
|
||||||
|
//
|
||||||
|
shift = start-rbound;
|
||||||
|
start = rbound;
|
||||||
|
}
|
||||||
|
return make_int2(start, shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// cuda kernel for cuDetermineSecondaryExtractOffset
|
||||||
* determine the interpolation area (pad) from the max location and the padSize
|
__global__ void cudaKernel_determineSecondaryExtractOffset(int2 * maxLoc, int2 *shift,
|
||||||
* the pad will be (maxloc-padSize/2, maxloc+padSize/2-1)
|
|
||||||
* @param[in] maxloc[nImages]
|
|
||||||
* @param[in] padSize
|
|
||||||
* @param[in] imageSize
|
|
||||||
* @param[in] nImages
|
|
||||||
* @param[out] padStart[nImages] return values of maxloc-padSize/2
|
|
||||||
*/
|
|
||||||
void cuDetermineInterpZone(cuArrays<int2> *maxloc, cuArrays<int2> *zoomInOffset, cuArrays<float> *corrOrig, cuArrays<float> *corrZoomIn, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
int threadsperblock=NTHREADS;
|
|
||||||
int blockspergrid=IDIVUP(corrOrig->count, threadsperblock);
|
|
||||||
cudaKernel_determineInterpZone<<<blockspergrid, threadsperblock, 0, stream>>>
|
|
||||||
(maxloc->devData, maxloc->size, corrOrig->height, corrOrig->width, corrZoomIn->height, corrZoomIn->width, zoomInOffset->devData);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static inline __device__ int dev_adjustOffset(const size_t newRange, const size_t oldRange, const size_t maxloc)
|
|
||||||
{
|
|
||||||
int maxloc_cor = maxloc;
|
|
||||||
if(maxloc_cor < newRange) {maxloc_cor = oldRange;}
|
|
||||||
else if(maxloc_cor > 2*oldRange-newRange) {maxloc_cor = oldRange;}
|
|
||||||
int start = maxloc_cor - newRange;
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void cudaKernel_determineSecondaryExtractOffset(int2 * maxloc,
|
|
||||||
const size_t nImages, int xOldRange, int yOldRange, int xNewRange, int yNewRange)
|
const size_t nImages, int xOldRange, int yOldRange, int xNewRange, int yNewRange)
|
||||||
{
|
{
|
||||||
int imageIndex = threadIdx.x + blockDim.x *blockIdx.x; //image index
|
int imageIndex = threadIdx.x + blockDim.x *blockIdx.x; //image index
|
||||||
if (imageIndex < nImages)
|
if (imageIndex < nImages)
|
||||||
{
|
{
|
||||||
maxloc[imageIndex].x = dev_adjustOffset(xNewRange, xOldRange, maxloc[imageIndex].x);
|
// get the starting pixel (stored back to maxloc) and shift
|
||||||
maxloc[imageIndex].y = dev_adjustOffset(yNewRange, yOldRange, maxloc[imageIndex].y);
|
int2 result = dev_adjustOffset(xOldRange, xNewRange, maxLoc[imageIndex].x);
|
||||||
|
maxLoc[imageIndex].x = result.x;
|
||||||
|
shift[imageIndex].x = result.y;
|
||||||
|
result = dev_adjustOffset(yOldRange, yNewRange, maxLoc[imageIndex].y);
|
||||||
|
maxLoc[imageIndex].y = result.x;
|
||||||
|
shift[imageIndex].y = result.y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///@param[in] xOldRange, yOldRange are (half) search ranges in first step
|
/**
|
||||||
///@param[in] x
|
* Determine the secondary window extract offset from the max location
|
||||||
void cuDetermineSecondaryExtractOffset(cuArrays<int2> *maxLoc,
|
* @param[in] xOldRange, yOldRange are (half) search ranges in first step
|
||||||
|
* @param[in] xNewRange, yNewRange are (half) search range
|
||||||
|
*
|
||||||
|
* After the first run of cross-correlation, with a larger search range,
|
||||||
|
* We now choose a smaller search range around the max location for oversampling.
|
||||||
|
* This procedure is used to determine the starting pixel locations for extraction.
|
||||||
|
*/
|
||||||
|
void cuDetermineSecondaryExtractOffset(cuArrays<int2> *maxLoc, cuArrays<int2> *maxLocShift,
|
||||||
int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream)
|
int xOldRange, int yOldRange, int xNewRange, int yNewRange, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int threadsperblock=NTHREADS;
|
int threadsperblock=NTHREADS;
|
||||||
int blockspergrid=IDIVUP(maxLoc->size, threadsperblock);
|
int blockspergrid=IDIVUP(maxLoc->size, threadsperblock);
|
||||||
cudaKernel_determineSecondaryExtractOffset<<<blockspergrid, threadsperblock, 0, stream>>>
|
cudaKernel_determineSecondaryExtractOffset<<<blockspergrid, threadsperblock, 0, stream>>>
|
||||||
(maxLoc->devData, maxLoc->size, xOldRange, yOldRange, xNewRange, yNewRange);
|
(maxLoc->devData, maxLocShift->devData, maxLoc->size, xOldRange, yOldRange, xNewRange, yNewRange);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
__global__ void cudaKernel_maxlocPlusZoominOffset(float *offset, const int * padStart, const int * maxlocUpSample,
|
|
||||||
const size_t nImages, float zoomInRatioX, float zoomInRatioY)
|
|
||||||
{
|
|
||||||
int imageIndex = threadIdx.x + blockDim.x *blockIdx.x; //image index
|
|
||||||
if (imageIndex < nImages)
|
|
||||||
{
|
|
||||||
int index=2*imageIndex;
|
|
||||||
offset[index] = padStart[index] + maxlocUpSample[index] * zoomInRatioX;
|
|
||||||
index++;
|
|
||||||
offset[index] = padStart[index] + maxlocUpSample[index] * zoomInRatioY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_maxlocPlusZoominOffset(float *offset, const int * padStart, const int * maxlocUpSample,
|
|
||||||
const size_t nImages, float zoomInRatioX, float zoomInRatioY)
|
|
||||||
{
|
|
||||||
int threadsperblock=NTHREADS;
|
|
||||||
int blockspergrid = IDIVUP(nImages, threadsperblock);
|
|
||||||
cudaKernel_maxlocPlusZoominOffset<<<blockspergrid,threadsperblock>>>(offset, padStart, maxlocUpSample,
|
|
||||||
nImages, zoomInRatioX, zoomInRatioY);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,25 @@
|
||||||
/*
|
/*
|
||||||
* cuOverSampler.cu
|
* @file cuOverSampler.cu
|
||||||
* define cuOverSampler class, to save cufft plans and perform oversampling calculations
|
* @brief Implementations of cuOverSamplerR2R (C2C) class
|
||||||
*/
|
*/
|
||||||
#include "cuArrays.h"
|
|
||||||
|
// my declarations
|
||||||
#include "cuOverSampler.h"
|
#include "cuOverSampler.h"
|
||||||
|
|
||||||
|
// dependencies
|
||||||
|
#include "cuArrays.h"
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
#include "cudaError.h"
|
#include "cudaError.h"
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
|
|
||||||
// Oversampler for complex data
|
/**
|
||||||
|
* Constructor for cuOversamplerC2C
|
||||||
|
* @param input image size inNX x inNY
|
||||||
|
* @param output image size outNX x outNY
|
||||||
|
* @param nImages batches
|
||||||
|
* @param stream_ cuda stream
|
||||||
|
*/
|
||||||
cuOverSamplerC2C::cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_)
|
cuOverSamplerC2C::cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -25,10 +35,13 @@ cuOverSamplerC2C::cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int
|
||||||
int outNYp2 = inNYp2*outNY/inNY;
|
int outNYp2 = inNYp2*outNY/inNY;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// set up work arrays
|
||||||
workIn = new cuArrays<float2>(inNXp2, inNYp2, nImages);
|
workIn = new cuArrays<float2>(inNXp2, inNYp2, nImages);
|
||||||
workIn->allocate();
|
workIn->allocate();
|
||||||
workOut = new cuArrays<float2>(outNXp2, outNYp2, nImages);
|
workOut = new cuArrays<float2>(outNXp2, outNYp2, nImages);
|
||||||
workOut->allocate();
|
workOut->allocate();
|
||||||
|
|
||||||
|
// set up fft plans
|
||||||
int imageSize = inNXp2*inNYp2;
|
int imageSize = inNXp2*inNYp2;
|
||||||
int n[NRANK] ={inNXp2, inNYp2};
|
int n[NRANK] ={inNXp2, inNYp2};
|
||||||
int fImageSize = inNXp2*inNYp2;
|
int fImageSize = inNXp2*inNYp2;
|
||||||
|
@ -36,9 +49,13 @@ cuOverSamplerC2C::cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int
|
||||||
int fImageOverSampleSize = outNXp2*outNYp2;
|
int fImageOverSampleSize = outNXp2*outNYp2;
|
||||||
cufft_Error(cufftPlanMany(&forwardPlan, NRANK, n, NULL, 1, imageSize, NULL, 1, fImageSize, CUFFT_C2C, nImages));
|
cufft_Error(cufftPlanMany(&forwardPlan, NRANK, n, NULL, 1, imageSize, NULL, 1, fImageSize, CUFFT_C2C, nImages));
|
||||||
cufft_Error(cufftPlanMany(&backwardPlan, NRANK, nOverSample, NULL, 1, fImageOverSampleSize, NULL, 1, fImageOverSampleSize, CUFFT_C2C, nImages));
|
cufft_Error(cufftPlanMany(&backwardPlan, NRANK, nOverSample, NULL, 1, fImageOverSampleSize, NULL, 1, fImageOverSampleSize, CUFFT_C2C, nImages));
|
||||||
|
// set cuda stream
|
||||||
setStream(stream_);
|
setStream(stream_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set up cuda stream
|
||||||
|
*/
|
||||||
void cuOverSamplerC2C::setStream(cudaStream_t stream_)
|
void cuOverSamplerC2C::setStream(cudaStream_t stream_)
|
||||||
{
|
{
|
||||||
this->stream = stream_;
|
this->stream = stream_;
|
||||||
|
@ -46,16 +63,12 @@ void cuOverSamplerC2C::setStream(cudaStream_t stream_)
|
||||||
cufftSetStream(backwardPlan, stream);
|
cufftSetStream(backwardPlan, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//tested
|
/**
|
||||||
void cuOverSamplerC2C::execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut)
|
* Execute fft oversampling
|
||||||
{
|
* @param[in] imagesIn input batch of images
|
||||||
//cuArraysCopyPadded(imagesIn, workIn, stream);
|
* @param[out] imagesOut output batch of images
|
||||||
cufft_Error(cufftExecC2C(forwardPlan, imagesIn->devData, workIn->devData, CUFFT_INVERSE));
|
* @param[in] method phase deramping method
|
||||||
cuArraysPaddingMany(workIn, workOut, stream);
|
*/
|
||||||
cufft_Error(cufftExecC2C(backwardPlan, workOut->devData, imagesOut->devData, CUFFT_FORWARD));
|
|
||||||
//cuArraysCopyExtract(workOut, imagesOut, make_int2(0,0), stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuOverSamplerC2C::execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int method)
|
void cuOverSamplerC2C::execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int method)
|
||||||
{
|
{
|
||||||
cuDeramp(method, imagesIn, stream);
|
cuDeramp(method, imagesIn, stream);
|
||||||
|
@ -64,31 +77,41 @@ void cuOverSamplerC2C::execute(cuArrays<float2> *imagesIn, cuArrays<float2> *ima
|
||||||
cufft_Error(cufftExecC2C(backwardPlan, workOut->devData, imagesOut->devData, CUFFT_FORWARD));
|
cufft_Error(cufftExecC2C(backwardPlan, workOut->devData, imagesOut->devData, CUFFT_FORWARD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// destructor
|
||||||
cuOverSamplerC2C::~cuOverSamplerC2C()
|
cuOverSamplerC2C::~cuOverSamplerC2C()
|
||||||
{
|
{
|
||||||
|
// destroy fft handles
|
||||||
cufft_Error(cufftDestroy(forwardPlan));
|
cufft_Error(cufftDestroy(forwardPlan));
|
||||||
cufft_Error(cufftDestroy(backwardPlan));
|
cufft_Error(cufftDestroy(backwardPlan));
|
||||||
|
// deallocate work arrays
|
||||||
delete(workIn);
|
delete(workIn);
|
||||||
delete(workOut);
|
delete(workOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// end of cuOverSamplerC2C
|
||||||
|
|
||||||
// oversampler for real data
|
/**
|
||||||
|
* Constructor for cuOversamplerR2R
|
||||||
|
* @param input image size inNX x inNY
|
||||||
|
* @param output image size outNX x outNY
|
||||||
|
* @param nImages the number of images
|
||||||
|
* @param stream_ cuda stream
|
||||||
|
*/
|
||||||
cuOverSamplerR2R::cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream)
|
cuOverSamplerR2R::cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
/*
|
|
||||||
int inNXp2 = nextpower2(inNX);
|
|
||||||
int inNYp2 = nextpower2(inNY);
|
|
||||||
int outNXp2 = inNXp2*outNX/inNX;
|
|
||||||
int outNYp2 = inNYp2*outNY/inNY;
|
|
||||||
*/
|
|
||||||
|
|
||||||
int inNXp2 = inNX;
|
int inNXp2 = inNX;
|
||||||
int inNYp2 = inNY;
|
int inNYp2 = inNY;
|
||||||
int outNXp2 = outNX;
|
int outNXp2 = outNX;
|
||||||
int outNYp2 = outNY;
|
int outNYp2 = outNY;
|
||||||
|
|
||||||
|
/* if expanded to 2^n
|
||||||
|
int inNXp2 = nextpower2(inNX);
|
||||||
|
int inNYp2 = nextpower2(inNY);
|
||||||
|
int outNXp2 = inNXp2*outNX/inNX;
|
||||||
|
int outNYp2 = inNYp2*outNY/inNY;
|
||||||
|
*/
|
||||||
|
|
||||||
int imageSize = inNXp2 *inNYp2;
|
int imageSize = inNXp2 *inNYp2;
|
||||||
int n[NRANK] ={inNXp2, inNYp2};
|
int n[NRANK] ={inNXp2, inNYp2};
|
||||||
int fImageSize = inNXp2*inNYp2;
|
int fImageSize = inNXp2*inNYp2;
|
||||||
|
@ -110,7 +133,11 @@ void cuOverSamplerR2R::setStream(cudaStream_t stream_)
|
||||||
cufftSetStream(backwardPlan, stream);
|
cufftSetStream(backwardPlan, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//tested
|
/**
|
||||||
|
* Execute fft oversampling
|
||||||
|
* @param[in] imagesIn input batch of images
|
||||||
|
* @param[out] imagesOut output batch of images
|
||||||
|
*/
|
||||||
void cuOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut)
|
void cuOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut)
|
||||||
{
|
{
|
||||||
cuArraysCopyPadded(imagesIn, workSizeIn, stream);
|
cuArraysCopyPadded(imagesIn, workSizeIn, stream);
|
||||||
|
@ -120,6 +147,7 @@ void cuOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *image
|
||||||
cuArraysCopyExtract(workSizeOut, imagesOut, make_int2(0,0), stream);
|
cuArraysCopyExtract(workSizeOut, imagesOut, make_int2(0,0), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// destructor
|
||||||
cuOverSamplerR2R::~cuOverSamplerR2R()
|
cuOverSamplerR2R::~cuOverSamplerR2R()
|
||||||
{
|
{
|
||||||
cufft_Error(cufftDestroy(forwardPlan));
|
cufft_Error(cufftDestroy(forwardPlan));
|
||||||
|
@ -128,6 +156,7 @@ cuOverSamplerR2R::~cuOverSamplerR2R()
|
||||||
workSizeOut->deallocate();
|
workSizeOut->deallocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
/*
|
/*
|
||||||
* cuOverSampler.h
|
* @file cuOverSampler.h
|
||||||
* oversampling with FFT padding method
|
* @brief Oversampling with FFT padding method
|
||||||
* define cuOverSampler class, to save cufft plans and perform oversampling calculations
|
*
|
||||||
* one float image use cuOverSamplerR2R
|
* Define cuOverSampler class, to save cufft plans and perform oversampling calculations
|
||||||
* one complex image use cuOverSamplerC2C
|
* For float images use cuOverSamplerR2R
|
||||||
* many complex images use cuOverSamplerManyC2C
|
* For complex images use cuOverSamplerC2C
|
||||||
|
* @todo use template class to unify these two classes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __CUOVERSAMPLER_H
|
#ifndef __CUOVERSAMPLER_H
|
||||||
|
@ -13,34 +14,40 @@
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
|
|
||||||
|
// FFT Oversampler for complex images
|
||||||
class cuOverSamplerC2C
|
class cuOverSamplerC2C
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
cufftHandle forwardPlan;
|
cufftHandle forwardPlan; // forward fft handle
|
||||||
cufftHandle backwardPlan;
|
cufftHandle backwardPlan; // backward fft handle
|
||||||
cudaStream_t stream;
|
cudaStream_t stream; // cuda stream
|
||||||
cuArrays<float2> *workIn;
|
cuArrays<float2> *workIn; // work array to hold forward fft data
|
||||||
cuArrays<float2> *workOut;
|
cuArrays<float2> *workOut; // work array to hold padded data
|
||||||
public:
|
public:
|
||||||
|
// disable the default constructor
|
||||||
|
cuOverSamplerC2C() = delete;
|
||||||
|
// constructor
|
||||||
cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
cuOverSamplerC2C(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
||||||
|
// set cuda stream
|
||||||
void setStream(cudaStream_t stream_);
|
void setStream(cudaStream_t stream_);
|
||||||
void execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut);
|
// execute oversampling
|
||||||
void execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int deramp_method);
|
void execute(cuArrays<float2> *imagesIn, cuArrays<float2> *imagesOut, int deramp_method=0);
|
||||||
|
// destructor
|
||||||
~cuOverSamplerC2C();
|
~cuOverSamplerC2C();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// FFT Oversampler for complex images
|
||||||
class cuOverSamplerR2R
|
class cuOverSamplerR2R
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
cufftHandle forwardPlan;
|
cufftHandle forwardPlan;
|
||||||
cufftHandle backwardPlan;
|
cufftHandle backwardPlan;
|
||||||
|
cudaStream_t stream;
|
||||||
cuArrays<float2> *workSizeIn;
|
cuArrays<float2> *workSizeIn;
|
||||||
cuArrays<float2> *workSizeOut;
|
cuArrays<float2> *workSizeOut;
|
||||||
cudaStream_t stream;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
cuOverSamplerR2R() = delete;
|
||||||
cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
cuOverSamplerR2R(int inNX, int inNY, int outNX, int outNY, int nImages, cudaStream_t stream_);
|
||||||
void setStream(cudaStream_t stream_);
|
void setStream(cudaStream_t stream_);
|
||||||
void execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut);
|
void execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut);
|
||||||
|
@ -48,7 +55,8 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif //__CUOVERSAMPLER_H
|
||||||
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,43 +1,43 @@
|
||||||
/*
|
/**
|
||||||
* cuSincOverSampler.cu
|
* @file cuSincOverSampler.cu
|
||||||
|
* @brief Implementation for cuSinOversampler class
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
#include "cuArrays.h"
|
|
||||||
|
// my declaration
|
||||||
#include "cuSincOverSampler.h"
|
#include "cuSincOverSampler.h"
|
||||||
|
|
||||||
|
// dependencies
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
#include "cudaError.h"
|
#include "cudaError.h"
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
|
|
||||||
cuSincOverSamplerR2R::cuSincOverSamplerR2R(const int i_intplength_, const int i_covs_, cudaStream_t stream_)
|
/**
|
||||||
: i_intplength(i_intplength_), i_covs(i_covs_)
|
* cuSincOverSamplerR2R constructor
|
||||||
|
* @param i_covs oversampling factor
|
||||||
|
* @param stream cuda stream
|
||||||
|
*/
|
||||||
|
cuSincOverSamplerR2R::cuSincOverSamplerR2R(const int i_covs_, cudaStream_t stream_)
|
||||||
|
: i_covs(i_covs_)
|
||||||
{
|
{
|
||||||
setStream(stream_);
|
stream = stream_;
|
||||||
//i_intplength = int(r_relfiltlen/r_beta);
|
i_intplength = int(r_relfiltlen/r_beta+0.5f);
|
||||||
r_relfiltlen = r_beta * i_intplength;
|
|
||||||
i_filtercoef = i_intplength*i_decfactor;
|
i_filtercoef = i_intplength*i_decfactor;
|
||||||
r_wgthgt = (1.0f - r_pedestal)/2.0f;
|
|
||||||
r_soff = (i_filtercoef)/2.0f;
|
|
||||||
r_soff_inverse = 1.0f/r_soff;
|
|
||||||
r_decfactor_inverse = 1.0f/i_decfactor;
|
|
||||||
checkCudaErrors(cudaMalloc((void **)&r_filter, (i_filtercoef+1)*sizeof(float)));
|
checkCudaErrors(cudaMalloc((void **)&r_filter, (i_filtercoef+1)*sizeof(float)));
|
||||||
cuSetupSincKernel();
|
cuSetupSincKernel();
|
||||||
}
|
}
|
||||||
|
|
||||||
void cuSincOverSamplerR2R::setStream(cudaStream_t stream_)
|
/// destructor
|
||||||
{
|
|
||||||
stream = stream_;
|
|
||||||
}
|
|
||||||
|
|
||||||
cuSincOverSamplerR2R::~cuSincOverSamplerR2R()
|
cuSincOverSamplerR2R::~cuSincOverSamplerR2R()
|
||||||
{
|
{
|
||||||
checkCudaErrors(cudaFree(r_filter));
|
checkCudaErrors(cudaFree(r_filter));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cuda kernel for cuSetupSincKernel
|
||||||
__global__ void cuSetupSincKernel_kernel(float *r_filter_, const int i_filtercoef_,
|
__global__ void cuSetupSincKernel_kernel(float *r_filter_, const int i_filtercoef_,
|
||||||
const float r_soff_, const float r_wgthgt_, const int i_weight_,
|
const float r_soff_, const float r_wgthgt_, const int i_weight_,
|
||||||
const float r_soff_inverse_, const float r_beta_, const float r_decfactor_inverse_,
|
const float r_soff_inverse_, const float r_beta_, const float r_decfactor_inverse_)
|
||||||
const float r_relfiltlen_inverse_)
|
|
||||||
{
|
{
|
||||||
int i = threadIdx.x + blockDim.x*blockIdx.x;
|
int i = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
if(i > i_filtercoef_) return;
|
if(i > i_filtercoef_) return;
|
||||||
|
@ -57,76 +57,116 @@ __global__ void cuSetupSincKernel_kernel(float *r_filter_, const int i_filtercoe
|
||||||
else {
|
else {
|
||||||
r_filter_[i] = r_fct;
|
r_filter_[i] = r_fct;
|
||||||
}
|
}
|
||||||
//printf("kernel %d %f\n", i, r_filter_[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set up the sinc interpolation kernel (coefficient)
|
||||||
|
*/
|
||||||
void cuSincOverSamplerR2R::cuSetupSincKernel()
|
void cuSincOverSamplerR2R::cuSetupSincKernel()
|
||||||
{
|
{
|
||||||
const int nthreads = 128;
|
const int nthreads = 128;
|
||||||
const int nblocks = IDIVUP(i_filtercoef, nthreads);
|
const int nblocks = IDIVUP(i_filtercoef+1, nthreads);
|
||||||
float r_relfiltlen_inverse = 1.0f/r_relfiltlen;
|
|
||||||
|
// compute some commonly used constants at first
|
||||||
|
float r_wgthgt = (1.0f - r_pedestal)/2.0f;
|
||||||
|
float r_soff = (i_filtercoef-1.0f)/2.0f;
|
||||||
|
float r_soff_inverse = 1.0f/r_soff;
|
||||||
|
float r_decfactor_inverse = 1.0f/i_decfactor;
|
||||||
|
|
||||||
cuSetupSincKernel_kernel<<<nblocks, nthreads, 0, stream>>> (
|
cuSetupSincKernel_kernel<<<nblocks, nthreads, 0, stream>>> (
|
||||||
r_filter, i_filtercoef, r_soff, r_wgthgt, i_weight,
|
r_filter, i_filtercoef, r_soff, r_wgthgt, i_weight,
|
||||||
r_soff_inverse, r_beta, r_decfactor_inverse, r_relfiltlen_inverse);
|
r_soff_inverse, r_beta, r_decfactor_inverse);
|
||||||
getLastCudaError("cuSetupSincKernel_kernel");
|
getLastCudaError("cuSetupSincKernel_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// cuda kernel for cuSincOverSamplerR2R::execute
|
||||||
__global__ void cuSincInterpolation_kernel(const int nImages,
|
__global__ void cuSincInterpolation_kernel(const int nImages,
|
||||||
const float * imagesIn, const int inNX, const int inNY,
|
const float * imagesIn, const int inNX, const int inNY,
|
||||||
float * imagesOut, const int outNX, const int outNY,
|
float * imagesOut, const int outNX, const int outNY,
|
||||||
|
int2 *centerShift, int factor,
|
||||||
const float * r_filter_, const int i_covs_, const int i_decfactor_, const int i_intplength_,
|
const float * r_filter_, const int i_covs_, const int i_decfactor_, const int i_intplength_,
|
||||||
const int i_startX, const int i_startY, const int i_int_size)
|
const int i_startX, const int i_startY, const int i_int_size)
|
||||||
{
|
{
|
||||||
|
// get image index
|
||||||
int idxImage = blockIdx.z;
|
int idxImage = blockIdx.z;
|
||||||
|
// get the xy threads for output image pixel indices
|
||||||
int idxX = threadIdx.x + blockDim.x*blockIdx.x;
|
int idxX = threadIdx.x + blockDim.x*blockIdx.x;
|
||||||
int idxY = threadIdx.y + blockDim.y*blockIdx.y;
|
int idxY = threadIdx.y + blockDim.y*blockIdx.y;
|
||||||
|
// cuda: to make sure extra allocated threads doing nothing
|
||||||
if(idxImage >=nImages || idxX >= i_int_size || idxY >= i_int_size) return;
|
if(idxImage >=nImages || idxX >= i_int_size || idxY >= i_int_size) return;
|
||||||
int outx = idxX + i_startX;
|
// decide the center shift
|
||||||
int outy = idxY + i_startY;
|
int2 shift = centerShift[idxImage];
|
||||||
|
// determine the output pixel indices
|
||||||
|
int outx = idxX + i_startX + shift.x*factor;
|
||||||
|
if (outx >= outNX) outx-=outNX;
|
||||||
|
int outy = idxY + i_startY + shift.y*factor;
|
||||||
|
if (outy >= outNY) outy-=outNY;
|
||||||
|
// flattened to 1d
|
||||||
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
int idxOut = idxImage*outNX*outNY + outx*outNY + outy;
|
||||||
|
|
||||||
|
// index in input grids
|
||||||
float r_xout = (float)outx/i_covs_;
|
float r_xout = (float)outx/i_covs_;
|
||||||
|
// integer part
|
||||||
int i_xout = int(r_xout);
|
int i_xout = int(r_xout);
|
||||||
|
// factional part
|
||||||
float r_xfrac = r_xout - i_xout;
|
float r_xfrac = r_xout - i_xout;
|
||||||
|
// fractional part in terms of the interpolation kernel grids
|
||||||
int i_xfrac = int(r_xfrac*i_decfactor_);
|
int i_xfrac = int(r_xfrac*i_decfactor_);
|
||||||
|
|
||||||
|
// same procedure for y
|
||||||
float r_yout = (float)outy/i_covs_;
|
float r_yout = (float)outy/i_covs_;
|
||||||
int i_yout = int(r_yout);
|
int i_yout = int(r_yout);
|
||||||
float r_yfrac = r_yout - i_yout;
|
float r_yfrac = r_yout - i_yout;
|
||||||
int i_yfrac = int(r_yfrac*i_decfactor_);
|
int i_yfrac = int(r_yfrac*i_decfactor_);
|
||||||
|
|
||||||
float intpData = 0.0f;
|
// temp variables
|
||||||
float r_sincwgt = 0.0f;
|
float intpData = 0.0f; // interpolated value
|
||||||
float r_sinc_coef;
|
float r_sincwgt = 0.0f; // total filter weight
|
||||||
|
float r_sinc_coef; // filter weight
|
||||||
|
|
||||||
for(int i=0; i < inNX; i++) {
|
// iterate over lines of input image
|
||||||
int i_xindex = i_xout - i + i_intplength_/2;
|
// i=0 -> -i_intplength/2
|
||||||
if(i_xindex < 0) i_xindex+= i_intplength_;
|
for(int i=0; i < i_intplength_; i++) {
|
||||||
if(i_xindex >= i_intplength_) i_xindex-=i_intplength_;
|
// find the corresponding pixel in input(unsampled) image
|
||||||
float r_xsinc_coef = r_filter_[i_xindex*i_decfactor_+i_xfrac];
|
|
||||||
|
|
||||||
for(int j=0; j< inNY; j++) {
|
int inx = i_xout - i + i_intplength_/2;
|
||||||
int i_yindex = i_yout - j + i_intplength_/2;
|
|
||||||
if(i_yindex < 0) i_yindex+= i_intplength_;
|
if(inx < 0) inx+= inNX;
|
||||||
if(i_yindex >= i_intplength_) i_yindex-=i_intplength_;
|
if(inx >= inNX) inx-= inNY;
|
||||||
float r_ysinc_coef = r_filter_[i_yindex*i_decfactor_+i_yfrac];
|
|
||||||
|
float r_xsinc_coef = r_filter_[i*i_decfactor_+i_xfrac];
|
||||||
|
|
||||||
|
for(int j=0; j< i_intplength_; j++) {
|
||||||
|
// find the corresponding pixel in input(unsampled) image
|
||||||
|
int iny = i_yout - j + i_intplength_/2;
|
||||||
|
if(iny < 0) iny += inNY;
|
||||||
|
if(iny >= inNY) iny -= inNY;
|
||||||
|
|
||||||
|
float r_ysinc_coef = r_filter_[j*i_decfactor_+i_yfrac];
|
||||||
|
// multiply the factors from xy
|
||||||
r_sinc_coef = r_xsinc_coef*r_ysinc_coef;
|
r_sinc_coef = r_xsinc_coef*r_ysinc_coef;
|
||||||
|
// add to total sinc weight
|
||||||
r_sincwgt += r_sinc_coef;
|
r_sincwgt += r_sinc_coef;
|
||||||
intpData += imagesIn[idxImage*inNX*inNY+i*inNY+j]*r_sinc_coef;
|
// multiply by the original signal and add to results
|
||||||
/*
|
intpData += imagesIn[idxImage*inNX*inNY+inx*inNY+iny]*r_sinc_coef;
|
||||||
if(outx == 0 && outy == 1) {
|
|
||||||
printf("intp kernel %d %d %d %d %d %d %d %f\n", i, j, i_xindex, i_yindex, i_xindex*i_decfactor_+i_xfrac,
|
|
||||||
i_yindex*i_decfactor_+i_yfrac, idxImage*inNX*inNY+i*inNY+j, r_sinc_coef);
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
imagesOut[idxOut] = intpData/r_sincwgt;
|
imagesOut[idxOut] = intpData/r_sincwgt;
|
||||||
//printf("test int kernel %d %d %f %f %f\n", outx, outy, intpData, r_sincwgt, imagesOut[idxOut]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute sinc interpolation
|
||||||
|
* @param[in] imagesIn input images
|
||||||
|
* @param[out] imagesOut output images
|
||||||
|
* @param[in] centerShift the shift of interpolation center
|
||||||
|
* @param[in] rawOversamplingFactor the multiplier of the centerShift
|
||||||
|
* @note rawOversamplingFactor is for the centerShift, not the signal oversampling factor
|
||||||
|
*/
|
||||||
|
|
||||||
void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut)
|
void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut,
|
||||||
|
cuArrays<int2> *centerShift, int rawOversamplingFactor)
|
||||||
{
|
{
|
||||||
const int nImages = imagesIn->count;
|
const int nImages = imagesIn->count;
|
||||||
const int inNX = imagesIn->height;
|
const int inNX = imagesIn->height;
|
||||||
|
@ -134,11 +174,13 @@ void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *i
|
||||||
const int outNX = imagesOut->height;
|
const int outNX = imagesOut->height;
|
||||||
const int outNY = imagesOut->width;
|
const int outNY = imagesOut->width;
|
||||||
|
|
||||||
|
// only compute the overampled signals within a window
|
||||||
const int i_int_range = i_sincwindow * i_covs;
|
const int i_int_range = i_sincwindow * i_covs;
|
||||||
|
// set the start pixel, will be shifted by centerShift*oversamplingFactor (from raw image)
|
||||||
const int i_int_startX = outNX/2 - i_int_range;
|
const int i_int_startX = outNX/2 - i_int_range;
|
||||||
const int i_int_startY = outNY/2 - i_int_range;
|
const int i_int_startY = outNY/2 - i_int_range;
|
||||||
const int i_int_size = 2*i_int_range + 1;
|
const int i_int_size = 2*i_int_range + 1;
|
||||||
|
// preset all pixels in out image to 0
|
||||||
imagesOut->setZero(stream);
|
imagesOut->setZero(stream);
|
||||||
|
|
||||||
static const int nthreads = 16;
|
static const int nthreads = 16;
|
||||||
|
@ -147,11 +189,9 @@ void cuSincOverSamplerR2R::execute(cuArrays<float> *imagesIn, cuArrays<float> *i
|
||||||
cuSincInterpolation_kernel<<<blockspergrid, threadsperblock, 0, stream>>>(nImages,
|
cuSincInterpolation_kernel<<<blockspergrid, threadsperblock, 0, stream>>>(nImages,
|
||||||
imagesIn->devData, inNX, inNY,
|
imagesIn->devData, inNX, inNY,
|
||||||
imagesOut->devData, outNX, outNY,
|
imagesOut->devData, outNX, outNY,
|
||||||
|
centerShift->devData, rawOversamplingFactor,
|
||||||
r_filter, i_covs, i_decfactor, i_intplength, i_int_startX, i_int_startY, i_int_size);
|
r_filter, i_covs, i_decfactor, i_intplength, i_int_startX, i_int_startY, i_int_size);
|
||||||
getLastCudaError("cuSincInterpolation_kernel");
|
getLastCudaError("cuSincInterpolation_kernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
// end of file
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,48 +1,63 @@
|
||||||
/*
|
/*
|
||||||
* cuSincOverSampler.h
|
* @file cuSincOverSampler.h
|
||||||
* oversampling with sinc interpolation method
|
* @brief A class performs sinc interpolation/oversampling
|
||||||
|
*
|
||||||
|
* Oversample a given 2d signal by i_covs factor.
|
||||||
|
* Only signals within(-i_sincwindow, i_sincwindow) are oversampled
|
||||||
|
* The interpolation zone may also be shifted, if the max location is not at the center.
|
||||||
|
*
|
||||||
|
* The sinc interpolation is based on the formula
|
||||||
|
* $$x(t) = \sum_{n=-\infty}^{\infty} x_n f( \Omega_c t-n )$$
|
||||||
|
* with $f(x) = \text{sinc}(x)$, or a complex filter
|
||||||
|
* such as the sinc(x) convoluted with Hamming Window used here.
|
||||||
|
* In practice, a finite length of n (i_intplength) is used for interpolation.
|
||||||
|
*
|
||||||
|
* @note most parameters are currently hardwired; you need to change
|
||||||
|
* the source code below if you need to adjust the parameters.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// code guard
|
||||||
#ifndef __CUSINCOVERSAMPLER_H
|
#ifndef __CUSINCOVERSAMPLER_H
|
||||||
#define __CUSINCOVERSAMPLER_H
|
#define __CUSINCOVERSAMPLER_H
|
||||||
|
|
||||||
|
// dependencites
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
|
|
||||||
#define PI 3.141592654f
|
#ifndef PI
|
||||||
|
#define PI 3.14159265359f
|
||||||
|
#endif
|
||||||
|
|
||||||
class cuSincOverSamplerR2R
|
class cuSincOverSamplerR2R
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
static const int i_sincwindow = 2;
|
static const int i_sincwindow = 2;
|
||||||
static const int i_decfactor = 4096; // division between orignal pixels
|
///< the oversampling is only performed within \pm i_sincwindow*i_covs around the peak
|
||||||
static const int i_weight = 1; // weight for cos() pedestal
|
static const int i_weight = 1; ///< weight for cos() pedestal
|
||||||
const float r_pedestal = 0.0f; // height of pedestal
|
const float r_pedestal = 0.0f; ///< height of pedestal
|
||||||
const float r_beta = 0.75f; // factor r_relfiltlen/i_intplength
|
const float r_beta = 0.75f; ///< a low-band pass
|
||||||
|
const float r_relfiltlen = 6.0f; ///< relative filter length
|
||||||
|
|
||||||
int i_covs;
|
static const int i_decfactor = 4096; ///< max decimals between original grid to set up the sinc kernel
|
||||||
int i_intplength;
|
|
||||||
float r_relfiltlen;
|
int i_covs; ///< oversampling factor
|
||||||
int i_filtercoef;
|
int i_intplength; ///< actual filter length = r_relfiltlen/r_beta
|
||||||
float r_wgthgt;
|
int i_filtercoef; //< length of the sinc kernel i_intplength*i_decfactor+1
|
||||||
float r_soff;
|
|
||||||
float r_soff_inverse;
|
float * r_filter; // sinc kernel with size i_filtercoef
|
||||||
float r_decfactor_inverse;
|
|
||||||
|
|
||||||
cudaStream_t stream;
|
cudaStream_t stream;
|
||||||
float * r_filter;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
cuSincOverSamplerR2R(const int i_intplength_, const int i_covs_, cudaStream_t stream_);
|
// constructor
|
||||||
void setStream(cudaStream_t stream_);
|
cuSincOverSamplerR2R(const int i_covs_, cudaStream_t stream_);
|
||||||
|
// set up sinc interpolation coefficients
|
||||||
void cuSetupSincKernel();
|
void cuSetupSincKernel();
|
||||||
void execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut);
|
// execute interface
|
||||||
|
void execute(cuArrays<float> *imagesIn, cuArrays<float> *imagesOut, cuArrays<int2> *center, int oversamplingFactor);
|
||||||
|
// destructor
|
||||||
~cuSincOverSamplerR2R();
|
~cuSincOverSamplerR2R();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // _CUSINCOVERSAMPLER_H
|
#endif // _CUSINCOVERSAMPLER_H
|
||||||
|
|
||||||
// end of file
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,10 @@
|
||||||
/**
|
/**
|
||||||
* cudaError.h
|
* @file cudaError.h
|
||||||
* Purpose: check various errors in cuda/cufft/cublas calls
|
* @brief Define error checking in cuda calls
|
||||||
* Lijun Zhu
|
*
|
||||||
* Last modified 09/07/2017
|
|
||||||
**/
|
**/
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
// code guard
|
||||||
// These are CUDA Helper functions for initialization and error checking
|
|
||||||
|
|
||||||
#ifndef _CUDAERROR_CUH
|
#ifndef _CUDAERROR_CUH
|
||||||
#define _CUDAERROR_CUH
|
#define _CUDAERROR_CUH
|
||||||
|
|
||||||
|
@ -34,22 +31,20 @@
|
||||||
template<typename T >
|
template<typename T >
|
||||||
void check(T result, char const *const func, const char *const file, int const line)
|
void check(T result, char const *const func, const char *const file, int const line)
|
||||||
{
|
{
|
||||||
#ifdef CUDA_ERROR_CHECK
|
|
||||||
if (result)
|
if (result)
|
||||||
{
|
{
|
||||||
|
|
||||||
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n",
|
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n",
|
||||||
file, line, static_cast<unsigned int>(result), func);
|
file, line, static_cast<unsigned int>(result), func);
|
||||||
DEVICE_RESET
|
DEVICE_RESET
|
||||||
// Make sure we call CUDA Device Reset before exiting
|
// Make sure we call CUDA Device Reset before exiting
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This will output the proper error string when calling cudaGetLastError
|
// This will output the proper error string when calling cudaGetLastError
|
||||||
inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
|
inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
|
||||||
{
|
{
|
||||||
#ifdef CUDA_ERROR_CHECK
|
|
||||||
cudaError_t err = cudaGetLastError();
|
cudaError_t err = cudaGetLastError();
|
||||||
|
|
||||||
if (cudaSuccess != err)
|
if (cudaSuccess != err)
|
||||||
|
@ -59,13 +54,17 @@ inline void __getLastCudaError(const char *errorMessage, const char *file, const
|
||||||
DEVICE_RESET
|
DEVICE_RESET
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
|
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
|
||||||
|
#ifdef CUDA_ERROR_CHECK
|
||||||
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
|
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
|
||||||
#define cufft_Error(val) check ( (val), #val, __FILE__, __LINE__ )
|
#define cufft_Error(val) check ( (val), #val, __FILE__, __LINE__ )
|
||||||
#define cublas_Error(val) check ( (val), #val, __FILE__, __LINE__ )
|
|
||||||
#define getLastCudaError(var) __getLastCudaError (var, __FILE__, __LINE__)
|
#define getLastCudaError(var) __getLastCudaError (var, __FILE__, __LINE__)
|
||||||
|
#else
|
||||||
|
#define checkCudaErrors(val) val
|
||||||
|
#define cufft_Error(val) val
|
||||||
|
#define getLastCudaError(val)
|
||||||
|
#endif //CUDA_ERROR_CHECK
|
||||||
|
|
||||||
#endif //__CUDAERROR_CUH
|
#endif //__CUDAERROR_CUH
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
/**
|
/**
|
||||||
* cudaUtil.h
|
* @file cudaUtil.h
|
||||||
* Purpose: various cuda related parameters and utilities
|
* @brief Various cuda related parameters and utilities
|
||||||
*
|
*
|
||||||
* Some routines are adapted from Nvidia CUDA samples/common/inc/help_cuda.h
|
* Some routines are adapted from Nvidia CUDA samples/common/inc/help_cuda.h
|
||||||
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
*
|
|
||||||
**/
|
**/
|
||||||
|
|
||||||
#ifndef __CUDAUTIL_H
|
#ifndef __CUDAUTIL_H
|
||||||
|
@ -54,7 +53,7 @@ inline int ftoi(float value)
|
||||||
return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
|
return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compute the next integer in power of 2
|
||||||
inline int nextpower2(int value)
|
inline int nextpower2(int value)
|
||||||
{
|
{
|
||||||
int r=1;
|
int r=1;
|
||||||
|
@ -62,153 +61,6 @@ inline int nextpower2(int value)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Beginning of GPU Architecture definitions
|
|
||||||
inline int _ConvertSMVer2Cores(int major, int minor)
|
|
||||||
{
|
|
||||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
|
||||||
int Cores;
|
|
||||||
} sSMtoCores;
|
|
||||||
|
|
||||||
sSMtoCores nGpuArchCoresPerSM[] =
|
|
||||||
{
|
|
||||||
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
|
||||||
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
|
||||||
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
|
|
||||||
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
|
|
||||||
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
|
|
||||||
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
|
|
||||||
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
|
|
||||||
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
|
|
||||||
{ 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class
|
|
||||||
{ 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class
|
|
||||||
{ 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class
|
|
||||||
{ 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class
|
|
||||||
{ -1, -1 }
|
|
||||||
};
|
|
||||||
|
|
||||||
int index = 0;
|
|
||||||
|
|
||||||
while (nGpuArchCoresPerSM[index].SM != -1)
|
|
||||||
{
|
|
||||||
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
|
|
||||||
{
|
|
||||||
return nGpuArchCoresPerSM[index].Cores;
|
|
||||||
}
|
|
||||||
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we don't find the values, we default use the previous one to run properly
|
|
||||||
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
|
|
||||||
return nGpuArchCoresPerSM[index-1].Cores;
|
|
||||||
}
|
|
||||||
// end of GPU Architecture definitions
|
|
||||||
|
|
||||||
#ifdef __CUDA_RUNTIME_H__
|
|
||||||
|
|
||||||
// This function returns the best GPU (with maximum GFLOPS)
|
|
||||||
inline int gpuGetMaxGflopsDeviceId()
|
|
||||||
{
|
|
||||||
int current_device = 0, sm_per_multiproc = 0;
|
|
||||||
int max_perf_device = 0;
|
|
||||||
int device_count = 0, best_SM_arch = 0;
|
|
||||||
int devices_prohibited = 0;
|
|
||||||
|
|
||||||
unsigned long long max_compute_perf = 0;
|
|
||||||
cudaDeviceProp deviceProp;
|
|
||||||
cudaGetDeviceCount(&device_count);
|
|
||||||
|
|
||||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
|
||||||
|
|
||||||
if (device_count == 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the best major SM Architecture GPU device
|
|
||||||
while (current_device < device_count)
|
|
||||||
{
|
|
||||||
cudaGetDeviceProperties(&deviceProp, current_device);
|
|
||||||
|
|
||||||
// If this GPU is not running on Compute Mode prohibited, then we can add it to the list
|
|
||||||
if (deviceProp.computeMode != cudaComputeModeProhibited)
|
|
||||||
{
|
|
||||||
if (deviceProp.major > 0 && deviceProp.major < 9999)
|
|
||||||
{
|
|
||||||
best_SM_arch = MAX(best_SM_arch, deviceProp.major);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
devices_prohibited++;
|
|
||||||
}
|
|
||||||
|
|
||||||
current_device++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (devices_prohibited == device_count)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the best CUDA capable GPU device
|
|
||||||
current_device = 0;
|
|
||||||
|
|
||||||
while (current_device < device_count)
|
|
||||||
{
|
|
||||||
cudaGetDeviceProperties(&deviceProp, current_device);
|
|
||||||
|
|
||||||
// If this GPU is not running on Compute Mode prohibited, then we can add it to the list
|
|
||||||
if (deviceProp.computeMode != cudaComputeModeProhibited)
|
|
||||||
{
|
|
||||||
if (deviceProp.major == 9999 && deviceProp.minor == 9999)
|
|
||||||
{
|
|
||||||
sm_per_multiproc = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
|
|
||||||
|
|
||||||
//fprintf(stderr, "Device %d has performamce %llu.\n", current_device, compute_perf);
|
|
||||||
|
|
||||||
if (compute_perf > max_compute_perf)
|
|
||||||
{
|
|
||||||
/* Let the GPU with max flops win! --LJ
|
|
||||||
// If we find GPU with SM major > 2, search only these
|
|
||||||
if (best_SM_arch > 2)
|
|
||||||
{
|
|
||||||
// If our device==best_SM_arch, choose this, or else pass
|
|
||||||
if (deviceProp.major == best_SM_arch)
|
|
||||||
{
|
|
||||||
max_compute_perf = compute_perf;
|
|
||||||
max_perf_device = current_device;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
max_compute_perf = compute_perf;
|
|
||||||
max_perf_device = current_device;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
max_compute_perf = compute_perf;
|
|
||||||
max_perf_device = current_device;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
++current_device;
|
|
||||||
}
|
|
||||||
|
|
||||||
return max_perf_device;
|
|
||||||
}
|
|
||||||
|
|
||||||
// General GPU Device CUDA Initialization
|
// General GPU Device CUDA Initialization
|
||||||
inline int gpuDeviceInit(int devID)
|
inline int gpuDeviceInit(int devID)
|
||||||
|
@ -225,8 +77,7 @@ inline int gpuDeviceInit(int devID)
|
||||||
if (devID < 0 || devID > device_count-1)
|
if (devID < 0 || devID > device_count-1)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
|
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
|
||||||
fprintf(stderr, "gpuDeviceInit() Finding the GPU with max GFlops instead ...\n");
|
exit(EXIT_FAILURE);
|
||||||
devID = gpuGetMaxGflopsDeviceId();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
checkCudaErrors(cudaSetDevice(devID));
|
checkCudaErrors(cudaSetDevice(devID));
|
||||||
|
@ -235,7 +86,6 @@ inline int gpuDeviceInit(int devID)
|
||||||
return devID;
|
return devID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// This function lists all available GPUs
|
// This function lists all available GPUs
|
||||||
inline void gpuDeviceList()
|
inline void gpuDeviceList()
|
||||||
{
|
{
|
||||||
|
@ -267,9 +117,7 @@ inline void gpuDeviceList()
|
||||||
}
|
}
|
||||||
current_device++;
|
current_device++;
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Device %d has the max Gflops\n", gpuGetMaxGflopsDeviceId());
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif //__CUDAUTIL_H
|
#endif //__CUDAUTIL_H
|
||||||
|
//end of file
|
|
@ -1,17 +1,22 @@
|
||||||
#ifndef __DEBUG_H
|
/**
|
||||||
#define __DEBUG_H
|
* @file debug.h
|
||||||
|
* @brief Define flags to control the debugging
|
||||||
|
*
|
||||||
|
* CUAMPCOR_DEBUG is used to output debugging information and intermediate results,
|
||||||
|
* disabled when NDEBUG macro is defined.
|
||||||
|
* CUDA_ERROR_CHECK is always enabled, to check CUDA routine errors
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
#pragma once
|
// code guard
|
||||||
|
#ifndef __CUAMPCOR_DEBUG_H
|
||||||
|
#define __CUAMPCOR_DEBUG_H
|
||||||
|
|
||||||
#include <iostream>
|
#ifndef NDEBUG
|
||||||
#include <assert.h>
|
#define CUAMPCOR_DEBUG
|
||||||
|
#endif //NDEBUG
|
||||||
#define _DEBUG_ 1
|
|
||||||
|
|
||||||
#define CUDA_ERROR_CHECK
|
#define CUDA_ERROR_CHECK
|
||||||
|
|
||||||
#define debugmsg(msg) if(_DEBUG_) fprintf(stderr, msg)
|
#endif //__CUAMPCOR_DEBUG_H
|
||||||
|
//end of file
|
||||||
//__CUDA_ARCH__
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* float2.h
|
* @file float2.h
|
||||||
* define operators and functions on float2 (cuComplex) datatype
|
* @brief Define operators and functions on float2 (cuComplex) datatype
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -11,20 +11,19 @@
|
||||||
|
|
||||||
inline __host__ __device__ void zero(float2 &a) { a.x = 0.0f; a.y = 0.0f; }
|
inline __host__ __device__ void zero(float2 &a) { a.x = 0.0f; a.y = 0.0f; }
|
||||||
|
|
||||||
|
// negative
|
||||||
//negate
|
|
||||||
inline __host__ __device__ float2 operator-(float2 &a)
|
inline __host__ __device__ float2 operator-(float2 &a)
|
||||||
{
|
{
|
||||||
return make_float2(-a.x, -a.y);
|
return make_float2(-a.x, -a.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
//conjugate
|
// complex conjugate
|
||||||
inline __host__ __device__ float2 conjugate(float2 a)
|
inline __host__ __device__ float2 conjugate(float2 a)
|
||||||
{
|
{
|
||||||
return make_float2(a.x, -a.y);
|
return make_float2(a.x, -a.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
//addition
|
// addition
|
||||||
inline __host__ __device__ float2 operator+(float2 a, float2 b)
|
inline __host__ __device__ float2 operator+(float2 a, float2 b)
|
||||||
{
|
{
|
||||||
return make_float2(a.x + b.x, a.y + b.y);
|
return make_float2(a.x + b.x, a.y + b.y);
|
||||||
|
@ -44,7 +43,7 @@ inline __host__ __device__ void operator+=(float2 &a, float b)
|
||||||
a.x += b;
|
a.x += b;
|
||||||
}
|
}
|
||||||
|
|
||||||
//subtraction
|
// subtraction
|
||||||
inline __host__ __device__ float2 operator-(float2 a, float2 b)
|
inline __host__ __device__ float2 operator-(float2 a, float2 b)
|
||||||
{
|
{
|
||||||
return make_float2(a.x - b.x, a.y - b.y);
|
return make_float2(a.x - b.x, a.y - b.y);
|
||||||
|
@ -98,22 +97,8 @@ inline __host__ __device__ float2 complexMul(float2 a, float2 b)
|
||||||
inline __host__ __device__ float2 complexMulConj(float2 a, float2 b)
|
inline __host__ __device__ float2 complexMulConj(float2 a, float2 b)
|
||||||
{
|
{
|
||||||
return make_float2(a.x*b.x + a.y*b.y, a.y*b.x - a.x*b.y);
|
return make_float2(a.x*b.x + a.y*b.y, a.y*b.x - a.x*b.y);
|
||||||
//return a*conjugate(b)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// division
|
|
||||||
/*
|
|
||||||
* inline __host__ __device__ float2 operator/(float2 a, float2 b)
|
|
||||||
{
|
|
||||||
return make_float2(a.x / b.x, a.y / b.y);
|
|
||||||
}
|
|
||||||
inline __host__ __device__ void operator/=(float2 &a, float2 b)
|
|
||||||
{
|
|
||||||
a.x /= b.x;
|
|
||||||
a.y /= b.y;
|
|
||||||
}
|
|
||||||
*
|
|
||||||
* */
|
|
||||||
inline __host__ __device__ float2 operator/(float2 a, float b)
|
inline __host__ __device__ float2 operator/(float2 a, float b)
|
||||||
{
|
{
|
||||||
return make_float2(a.x / b, a.y / b);
|
return make_float2(a.x / b, a.y / b);
|
||||||
|
@ -134,14 +119,11 @@ inline __host__ __device__ float complexArg(float2 a)
|
||||||
return atan2f(a.y, a.x);
|
return atan2f(a.y, a.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// make a complex number from phase
|
||||||
inline __host__ __device__ float2 complexExp(float arg)
|
inline __host__ __device__ float2 complexExp(float arg)
|
||||||
{
|
{
|
||||||
return make_float2(cosf(arg), sinf(arg));
|
return make_float2(cosf(arg), sinf(arg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif //__FLOAT2_H
|
||||||
|
// end of file
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#
|
#
|
||||||
# Implementation: python setup_cuAmpcor.py build_ext --inplace
|
# Implementation: python setup.py build_ext --inplace
|
||||||
# Generates PyCuAmpcor.xxx.so (where xxx is just some local sys-arch information).
|
# Generates PyCuAmpcor.xxx.so (where xxx is just some local sys-arch information).
|
||||||
# Note you need to run your makefile *FIRST* to generate the cuAmpcor.o object.
|
# Note you need to run your makefile *FIRST* to generate the cuAmpcor.o object.
|
||||||
#
|
#
|
||||||
|
@ -11,6 +11,7 @@ from Cython.Build import cythonize
|
||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
setup( name = 'PyCuAmpcor',
|
setup( name = 'PyCuAmpcor',
|
||||||
|
version = '2.0.0',
|
||||||
ext_modules = cythonize(Extension(
|
ext_modules = cythonize(Extension(
|
||||||
"PyCuAmpcor",
|
"PyCuAmpcor",
|
||||||
sources=['PyCuAmpcor.pyx'],
|
sources=['PyCuAmpcor.pyx'],
|
||||||
|
@ -22,6 +23,6 @@ setup( name = 'PyCuAmpcor',
|
||||||
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o','cuEstimateStats.o'],
|
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o','cuEstimateStats.o'],
|
||||||
extra_link_args=['-L/usr/local/cuda/lib64',
|
extra_link_args=['-L/usr/local/cuda/lib64',
|
||||||
'-L/usr/lib64/nvidia',
|
'-L/usr/lib64/nvidia',
|
||||||
'-lcuda','-lcudart','-lcufft','-lcublas','-lgdal'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
|
'-lcuda','-lcudart','-lcufft','-lgdal'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
|
||||||
language='c++'
|
language='c++'
|
||||||
)))
|
)))
|
||||||
|
|
|
@ -7,7 +7,7 @@ import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
from imageMath import IML
|
from imageMath import IML
|
||||||
from osgeo import gdal
|
from osgeo import gdal
|
||||||
from gdalconst import *
|
from osgeo.gdalconst import *
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import scipy.spatial as SS
|
import scipy.spatial as SS
|
||||||
|
|
|
@ -3,9 +3,9 @@
|
||||||
|
|
||||||
|
|
||||||
[](https://www.python.org/)
|
[](https://www.python.org/)
|
||||||
[](https://github.com/leiyangleon/autoRIFT/releases)
|
[](https://github.com/leiyangleon/autoRIFT/releases)
|
||||||
[](https://github.com/leiyangleon/autoRIFT/blob/master/LICENSE)
|
[](https://github.com/leiyangleon/autoRIFT/blob/master/LICENSE)
|
||||||
[](https://doi.org/10.5281/zenodo.4025445)
|
[](https://doi.org/10.5281/zenodo.4211013)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ Copyright (C) 2019 California Institute of Technology. Government Sponsorship A
|
||||||
|
|
||||||
Link: https://github.com/leiyangleon/autoRIFT
|
Link: https://github.com/leiyangleon/autoRIFT
|
||||||
|
|
||||||
Citation: https://doi.org/10.5281/zenodo.4025445
|
Citation: https://doi.org/10.5281/zenodo.4211013
|
||||||
|
|
||||||
|
|
||||||
## 1. Authors
|
## 1. Authors
|
||||||
|
@ -58,6 +58,7 @@ This effort was funded by the NASA MEaSUREs program in contribution to the Inter
|
||||||
* the current version can be installed with the ISCE software (that supports both Cartesian and radar coordinates) or as a standalone Python module (Cartesian coordinates only)
|
* the current version can be installed with the ISCE software (that supports both Cartesian and radar coordinates) or as a standalone Python module (Cartesian coordinates only)
|
||||||
* when used in combination with the Geogrid Python module (https://github.com/leiyangleon/Geogrid), autoRIFT can be used for feature tracking between image pair over a grid defined in an arbitrary geographic Cartesian (northing/easting) coordinate projection
|
* when used in combination with the Geogrid Python module (https://github.com/leiyangleon/Geogrid), autoRIFT can be used for feature tracking between image pair over a grid defined in an arbitrary geographic Cartesian (northing/easting) coordinate projection
|
||||||
* when the grid is provided in geographic Cartesian (northing/easting) coordinates, outputs are returned in geocoded GeoTIFF image file format with the same EPSG projection code as input search grid
|
* when the grid is provided in geographic Cartesian (northing/easting) coordinates, outputs are returned in geocoded GeoTIFF image file format with the same EPSG projection code as input search grid
|
||||||
|
* **[NEW]** the program now supports fetching optical images (Landsat-8 GeoTIFF and Sentinel-2 COG formats are included) as well as other inputs (e.g. DEM, slope, etc; all in GeoTIFF format) from either local machine or URL links. See the changes on the Geogrid [commands](https://github.com/leiyangleon/Geogrid). When using the autoRIFT commands below, users need to append a url flag: "-urlflag 1" for using URL links and performing coregistration, and "-urlflag 0" (default; can be omitted) for using coregistered files on local machine.
|
||||||
|
|
||||||
## 4. Possible Future Development
|
## 4. Possible Future Development
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,6 @@ class autoRIFT:
|
||||||
# import scipy.io as sio
|
# import scipy.io as sio
|
||||||
|
|
||||||
|
|
||||||
if self.zeroMask is not None:
|
|
||||||
self.zeroMask = (self.I1 == 0)
|
self.zeroMask = (self.I1 == 0)
|
||||||
|
|
||||||
kernel = np.ones((self.WallisFilterWidth,self.WallisFilterWidth), dtype=np.float32)
|
kernel = np.ones((self.WallisFilterWidth,self.WallisFilterWidth), dtype=np.float32)
|
||||||
|
@ -115,8 +114,6 @@ class autoRIFT:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
if self.zeroMask is not None:
|
|
||||||
self.zeroMask = (self.I1 == 0)
|
|
||||||
|
|
||||||
kernel = -np.ones((self.WallisFilterWidth,self.WallisFilterWidth), dtype=np.float32)
|
kernel = -np.ones((self.WallisFilterWidth,self.WallisFilterWidth), dtype=np.float32)
|
||||||
|
|
||||||
|
@ -140,7 +137,7 @@ class autoRIFT:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
if self.zeroMask is not None:
|
|
||||||
self.zeroMask = (self.I1 == 0)
|
self.zeroMask = (self.I1 == 0)
|
||||||
|
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
@ -161,8 +158,6 @@ class autoRIFT:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if self.zeroMask is not None:
|
|
||||||
self.zeroMask = (self.I1 == 0)
|
|
||||||
|
|
||||||
sobelx = cv2.getDerivKernels(1,0,self.WallisFilterWidth)
|
sobelx = cv2.getDerivKernels(1,0,self.WallisFilterWidth)
|
||||||
|
|
||||||
|
@ -194,7 +189,7 @@ class autoRIFT:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
if self.zeroMask is not None:
|
|
||||||
self.zeroMask = (self.I1 == 0)
|
self.zeroMask = (self.I1 == 0)
|
||||||
|
|
||||||
self.I1 = 20.0 * np.log10(self.I1)
|
self.I1 = 20.0 * np.log10(self.I1)
|
||||||
|
@ -216,19 +211,27 @@ class autoRIFT:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
if self.DataType == 0:
|
if self.DataType == 0:
|
||||||
# validData = np.logical_not(np.isnan(self.I1))
|
if self.zeroMask is not None:
|
||||||
|
# validData = np.logical_not(np.isnan(self.I1))
|
||||||
validData = np.isfinite(self.I1)
|
validData = np.isfinite(self.I1)
|
||||||
S1 = np.std(self.I1[validData])*np.sqrt(self.I1[validData].size/(self.I1[validData].size-1.0))
|
temp = self.I1[validData]
|
||||||
M1 = np.mean(self.I1[validData])
|
else:
|
||||||
|
temp = self.I1
|
||||||
|
S1 = np.std(temp)*np.sqrt(temp.size/(temp.size-1.0))
|
||||||
|
M1 = np.mean(temp)
|
||||||
self.I1 = (self.I1 - (M1 - 3*S1)) / (6*S1) * (2**8 - 0)
|
self.I1 = (self.I1 - (M1 - 3*S1)) / (6*S1) * (2**8 - 0)
|
||||||
|
|
||||||
# self.I1[np.logical_not(np.isfinite(self.I1))] = 0
|
# self.I1[np.logical_not(np.isfinite(self.I1))] = 0
|
||||||
self.I1 = np.round(np.clip(self.I1, 0, 255)).astype(np.uint8)
|
self.I1 = np.round(np.clip(self.I1, 0, 255)).astype(np.uint8)
|
||||||
|
|
||||||
# validData = np.logical_not(np.isnan(self.I2))
|
if self.zeroMask is not None:
|
||||||
|
# validData = np.logical_not(np.isnan(self.I2))
|
||||||
validData = np.isfinite(self.I2)
|
validData = np.isfinite(self.I2)
|
||||||
S2 = np.std(self.I2[validData])*np.sqrt(self.I2[validData].size/(self.I2[validData].size-1.0))
|
temp = self.I2[validData]
|
||||||
M2 = np.mean(self.I2[validData])
|
else:
|
||||||
|
temp = self.I2
|
||||||
|
S2 = np.std(temp)*np.sqrt(temp.size/(temp.size-1.0))
|
||||||
|
M2 = np.mean(temp)
|
||||||
self.I2 = (self.I2 - (M2 - 3*S2)) / (6*S2) * (2**8 - 0)
|
self.I2 = (self.I2 - (M2 - 3*S2)) / (6*S2) * (2**8 - 0)
|
||||||
|
|
||||||
# self.I2[np.logical_not(np.isfinite(self.I2))] = 0
|
# self.I2[np.logical_not(np.isfinite(self.I2))] = 0
|
||||||
|
@ -240,8 +243,11 @@ class autoRIFT:
|
||||||
self.zeroMask = None
|
self.zeroMask = None
|
||||||
|
|
||||||
elif self.DataType == 1:
|
elif self.DataType == 1:
|
||||||
|
|
||||||
|
if self.zeroMask is not None:
|
||||||
self.I1[np.logical_not(np.isfinite(self.I1))] = 0
|
self.I1[np.logical_not(np.isfinite(self.I1))] = 0
|
||||||
self.I2[np.logical_not(np.isfinite(self.I2))] = 0
|
self.I2[np.logical_not(np.isfinite(self.I2))] = 0
|
||||||
|
|
||||||
self.I1 = self.I1.astype(np.float32)
|
self.I1 = self.I1.astype(np.float32)
|
||||||
self.I2 = self.I2.astype(np.float32)
|
self.I2 = self.I2.astype(np.float32)
|
||||||
|
|
||||||
|
@ -409,9 +415,9 @@ class autoRIFT:
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
|
||||||
if self.I1.dtype == np.uint8:
|
if self.I1.dtype == np.uint8:
|
||||||
DxC, DyC = arImgDisp_u(self.I2.copy(), self.I1.copy(), xGrid0C.copy(), yGrid0C.copy(), ChipSizeXC, ChipSizeYC, SearchLimitX0C.copy(), SearchLimitY0C.copy(), Dx0C.copy(), Dy0C.copy(), SubPixFlag, overSampleRatio)
|
DxC, DyC = arImgDisp_u(self.I2.copy(), self.I1.copy(), xGrid0C.copy(), yGrid0C.copy(), ChipSizeXC, ChipSizeYC, SearchLimitX0C.copy(), SearchLimitY0C.copy(), Dx0C.copy(), Dy0C.copy(), SubPixFlag, overSampleRatio, self.MultiThread)
|
||||||
elif self.I1.dtype == np.float32:
|
elif self.I1.dtype == np.float32:
|
||||||
DxC, DyC = arImgDisp_s(self.I2.copy(), self.I1.copy(), xGrid0C.copy(), yGrid0C.copy(), ChipSizeXC, ChipSizeYC, SearchLimitX0C.copy(), SearchLimitY0C.copy(), Dx0C.copy(), Dy0C.copy(), SubPixFlag, overSampleRatio)
|
DxC, DyC = arImgDisp_s(self.I2.copy(), self.I1.copy(), xGrid0C.copy(), yGrid0C.copy(), ChipSizeXC, ChipSizeYC, SearchLimitX0C.copy(), SearchLimitY0C.copy(), Dx0C.copy(), Dy0C.copy(), SubPixFlag, overSampleRatio, self.MultiThread)
|
||||||
else:
|
else:
|
||||||
sys.exit('invalid data type for the image pair which must be unsigned integer 8 or 32-bit float')
|
sys.exit('invalid data type for the image pair which must be unsigned integer 8 or 32-bit float')
|
||||||
|
|
||||||
|
@ -459,9 +465,9 @@ class autoRIFT:
|
||||||
ChipSizeYF = np.float32(np.round(ChipSizeXF*self.ScaleChipSizeY/2)*2)
|
ChipSizeYF = np.float32(np.round(ChipSizeXF*self.ScaleChipSizeY/2)*2)
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
if self.I1.dtype == np.uint8:
|
if self.I1.dtype == np.uint8:
|
||||||
DxF, DyF = arImgDisp_u(self.I2.copy(), self.I1.copy(), xGrid0.copy(), yGrid0.copy(), ChipSizeXF, ChipSizeYF, SearchLimitX0.copy(), SearchLimitY0.copy(), Dx00.copy(), Dy00.copy(), SubPixFlag, overSampleRatio)
|
DxF, DyF = arImgDisp_u(self.I2.copy(), self.I1.copy(), xGrid0.copy(), yGrid0.copy(), ChipSizeXF, ChipSizeYF, SearchLimitX0.copy(), SearchLimitY0.copy(), Dx00.copy(), Dy00.copy(), SubPixFlag, overSampleRatio, self.MultiThread)
|
||||||
elif self.I1.dtype == np.float32:
|
elif self.I1.dtype == np.float32:
|
||||||
DxF, DyF = arImgDisp_s(self.I2.copy(), self.I1.copy(), xGrid0.copy(), yGrid0.copy(), ChipSizeXF, ChipSizeYF, SearchLimitX0.copy(), SearchLimitY0.copy(), Dx00.copy(), Dy00.copy(), SubPixFlag, overSampleRatio)
|
DxF, DyF = arImgDisp_s(self.I2.copy(), self.I1.copy(), xGrid0.copy(), yGrid0.copy(), ChipSizeXF, ChipSizeYF, SearchLimitX0.copy(), SearchLimitY0.copy(), Dx00.copy(), Dy00.copy(), SubPixFlag, overSampleRatio, self.MultiThread)
|
||||||
else:
|
else:
|
||||||
sys.exit('invalid data type for the image pair which must be unsigned integer 8 or 32-bit float')
|
sys.exit('invalid data type for the image pair which must be unsigned integer 8 or 32-bit float')
|
||||||
|
|
||||||
|
@ -647,6 +653,7 @@ class autoRIFT:
|
||||||
self.CoarseCorCutoff = 0.01
|
self.CoarseCorCutoff = 0.01
|
||||||
self.OverSampleRatio = 16
|
self.OverSampleRatio = 16
|
||||||
self.DataType = 0
|
self.DataType = 0
|
||||||
|
self.MultiThread = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -659,8 +666,23 @@ class AUTO_RIFT_CORE:
|
||||||
self._autoriftcore = None
|
self._autoriftcore = None
|
||||||
|
|
||||||
|
|
||||||
|
var_dict = {}
|
||||||
|
|
||||||
def arImgDisp_u(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, SearchLimitY, Dx0, Dy0, SubPixFlag, oversample):
|
def initializer(I1, I2, xGrid, yGrid, SearchLimitX, SearchLimitY, ChipSizeX, ChipSizeY, Dx0, Dy0):
|
||||||
|
var_dict['I1'] = I1
|
||||||
|
var_dict['I2'] = I2
|
||||||
|
var_dict['xGrid'] = xGrid
|
||||||
|
var_dict['yGrid'] = yGrid
|
||||||
|
var_dict['SearchLimitX'] = SearchLimitX
|
||||||
|
var_dict['SearchLimitY'] = SearchLimitY
|
||||||
|
var_dict['ChipSizeX'] = ChipSizeX
|
||||||
|
var_dict['ChipSizeY'] = ChipSizeY
|
||||||
|
var_dict['Dx0'] = Dx0
|
||||||
|
var_dict['Dy0'] = Dy0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def unpacking_loop_u(tup):
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from . import autoriftcore
|
from . import autoriftcore
|
||||||
|
@ -671,6 +693,168 @@ def arImgDisp_u(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, Search
|
||||||
|
|
||||||
core._autoriftcore = autoriftcore.createAutoRiftCore_Py()
|
core._autoriftcore = autoriftcore.createAutoRiftCore_Py()
|
||||||
|
|
||||||
|
k, chunkInds, SubPixFlag, oversample, in_shape, I_shape = tup
|
||||||
|
|
||||||
|
I1 = np.frombuffer(var_dict['I1'],dtype=np.uint8).reshape(I_shape)
|
||||||
|
I2 = np.frombuffer(var_dict['I2'],dtype=np.uint8).reshape(I_shape)
|
||||||
|
xGrid = np.frombuffer(var_dict['xGrid'],dtype=np.float32).reshape(in_shape)
|
||||||
|
yGrid = np.frombuffer(var_dict['yGrid'],dtype=np.float32).reshape(in_shape)
|
||||||
|
SearchLimitX = np.frombuffer(var_dict['SearchLimitX'],dtype=np.float32).reshape(in_shape)
|
||||||
|
SearchLimitY = np.frombuffer(var_dict['SearchLimitY'],dtype=np.float32).reshape(in_shape)
|
||||||
|
ChipSizeX = np.frombuffer(var_dict['ChipSizeX'],dtype=np.float32).reshape(in_shape)
|
||||||
|
ChipSizeY = np.frombuffer(var_dict['ChipSizeY'],dtype=np.float32).reshape(in_shape)
|
||||||
|
Dx0 = np.frombuffer(var_dict['Dx0'],dtype=np.float32).reshape(in_shape)
|
||||||
|
Dy0 = np.frombuffer(var_dict['Dy0'],dtype=np.float32).reshape(in_shape)
|
||||||
|
|
||||||
|
Dx = np.empty(chunkInds.shape,dtype=np.float32)
|
||||||
|
Dx.fill(np.nan)
|
||||||
|
Dy = Dx.copy()
|
||||||
|
|
||||||
|
|
||||||
|
# print(k)
|
||||||
|
# print(np.min(chunkInds),np.max(chunkInds))
|
||||||
|
# print(chunkInds.shape)
|
||||||
|
|
||||||
|
for ind in chunkInds:
|
||||||
|
|
||||||
|
ind1 = np.where(chunkInds == ind)[0][0]
|
||||||
|
|
||||||
|
ii, jj = [v[0] for v in np.unravel_index([ind], in_shape)]
|
||||||
|
|
||||||
|
if (SearchLimitX[ii,jj] == 0) & (SearchLimitY[ii,jj] == 0):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# remember motion terms Dx and Dy correspond to I1 relative to I2 (reference)
|
||||||
|
clx = np.floor(ChipSizeX[ii,jj]/2)
|
||||||
|
ChipRangeX = slice(int(-clx - Dx0[ii,jj] + xGrid[ii,jj]) , int(clx - Dx0[ii,jj] + xGrid[ii,jj]))
|
||||||
|
cly = np.floor(ChipSizeY[ii,jj]/2)
|
||||||
|
ChipRangeY = slice(int(-cly - Dy0[ii,jj] + yGrid[ii,jj]) , int(cly - Dy0[ii,jj] + yGrid[ii,jj]))
|
||||||
|
ChipI = I2[ChipRangeY,ChipRangeX]
|
||||||
|
|
||||||
|
SearchRangeX = slice(int(-clx - SearchLimitX[ii,jj] + xGrid[ii,jj]) , int(clx + SearchLimitX[ii,jj] - 1 + xGrid[ii,jj]))
|
||||||
|
SearchRangeY = slice(int(-cly - SearchLimitY[ii,jj] + yGrid[ii,jj]) , int(cly + SearchLimitY[ii,jj] - 1 + yGrid[ii,jj]))
|
||||||
|
RefI = I1[SearchRangeY,SearchRangeX]
|
||||||
|
|
||||||
|
minChipI = np.min(ChipI)
|
||||||
|
if minChipI < 0:
|
||||||
|
ChipI = ChipI - minChipI
|
||||||
|
if np.all(ChipI == ChipI[0,0]):
|
||||||
|
continue
|
||||||
|
|
||||||
|
minRefI = np.min(RefI)
|
||||||
|
if minRefI < 0:
|
||||||
|
RefI = RefI - minRefI
|
||||||
|
if np.all(RefI == RefI[0,0]):
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
if SubPixFlag:
|
||||||
|
# call C++
|
||||||
|
Dx[ind1], Dy[ind1] = np.float32(autoriftcore.arSubPixDisp_u_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel(),oversample))
|
||||||
|
# # call Python
|
||||||
|
# Dx1[ii], Dy1[ii] = arSubPixDisp(ChipI,RefI)
|
||||||
|
else:
|
||||||
|
# call C++
|
||||||
|
Dx[ind1], Dy[ind1] = np.float32(autoriftcore.arPixDisp_u_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel()))
|
||||||
|
# # call Python
|
||||||
|
# Dx1[ii], Dy1[ii] = arPixDisp(ChipI,RefI)
|
||||||
|
return Dx, Dy
|
||||||
|
|
||||||
|
|
||||||
|
def unpacking_loop_s(tup):
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from . import autoriftcore
|
||||||
|
|
||||||
|
core = AUTO_RIFT_CORE()
|
||||||
|
if core._autoriftcore is not None:
|
||||||
|
autoriftcore.destroyAutoRiftCore_Py(core._autoriftcore)
|
||||||
|
|
||||||
|
core._autoriftcore = autoriftcore.createAutoRiftCore_Py()
|
||||||
|
|
||||||
|
|
||||||
|
k, chunkInds, SubPixFlag, oversample, in_shape, I_shape = tup
|
||||||
|
|
||||||
|
I1 = np.frombuffer(var_dict['I1'],dtype=np.float32).reshape(I_shape)
|
||||||
|
I2 = np.frombuffer(var_dict['I2'],dtype=np.float32).reshape(I_shape)
|
||||||
|
xGrid = np.frombuffer(var_dict['xGrid'],dtype=np.float32).reshape(in_shape)
|
||||||
|
yGrid = np.frombuffer(var_dict['yGrid'],dtype=np.float32).reshape(in_shape)
|
||||||
|
SearchLimitX = np.frombuffer(var_dict['SearchLimitX'],dtype=np.float32).reshape(in_shape)
|
||||||
|
SearchLimitY = np.frombuffer(var_dict['SearchLimitY'],dtype=np.float32).reshape(in_shape)
|
||||||
|
ChipSizeX = np.frombuffer(var_dict['ChipSizeX'],dtype=np.float32).reshape(in_shape)
|
||||||
|
ChipSizeY = np.frombuffer(var_dict['ChipSizeY'],dtype=np.float32).reshape(in_shape)
|
||||||
|
Dx0 = np.frombuffer(var_dict['Dx0'],dtype=np.float32).reshape(in_shape)
|
||||||
|
Dy0 = np.frombuffer(var_dict['Dy0'],dtype=np.float32).reshape(in_shape)
|
||||||
|
|
||||||
|
|
||||||
|
Dx = np.empty(chunkInds.shape,dtype=np.float32)
|
||||||
|
Dx.fill(np.nan)
|
||||||
|
Dy = Dx.copy()
|
||||||
|
|
||||||
|
# print(k)
|
||||||
|
# print(np.min(chunkInds),np.max(chunkInds))
|
||||||
|
# print(chunkInds.shape)
|
||||||
|
|
||||||
|
for ind in chunkInds:
|
||||||
|
|
||||||
|
ind1 = np.where(chunkInds == ind)[0][0]
|
||||||
|
|
||||||
|
ii, jj = [v[0] for v in np.unravel_index([ind], in_shape)]
|
||||||
|
|
||||||
|
if (SearchLimitX[ii,jj] == 0) & (SearchLimitY[ii,jj] == 0):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# remember motion terms Dx and Dy correspond to I1 relative to I2 (reference)
|
||||||
|
clx = np.floor(ChipSizeX[ii,jj]/2)
|
||||||
|
ChipRangeX = slice(int(-clx - Dx0[ii,jj] + xGrid[ii,jj]) , int(clx - Dx0[ii,jj] + xGrid[ii,jj]))
|
||||||
|
cly = np.floor(ChipSizeY[ii,jj]/2)
|
||||||
|
ChipRangeY = slice(int(-cly - Dy0[ii,jj] + yGrid[ii,jj]) , int(cly - Dy0[ii,jj] + yGrid[ii,jj]))
|
||||||
|
ChipI = I2[ChipRangeY,ChipRangeX]
|
||||||
|
|
||||||
|
SearchRangeX = slice(int(-clx - SearchLimitX[ii,jj] + xGrid[ii,jj]) , int(clx + SearchLimitX[ii,jj] - 1 + xGrid[ii,jj]))
|
||||||
|
SearchRangeY = slice(int(-cly - SearchLimitY[ii,jj] + yGrid[ii,jj]) , int(cly + SearchLimitY[ii,jj] - 1 + yGrid[ii,jj]))
|
||||||
|
RefI = I1[SearchRangeY,SearchRangeX]
|
||||||
|
|
||||||
|
minChipI = np.min(ChipI)
|
||||||
|
if minChipI < 0:
|
||||||
|
ChipI = ChipI - minChipI
|
||||||
|
if np.all(ChipI == ChipI[0,0]):
|
||||||
|
continue
|
||||||
|
|
||||||
|
minRefI = np.min(RefI)
|
||||||
|
if minRefI < 0:
|
||||||
|
RefI = RefI - minRefI
|
||||||
|
if np.all(RefI == RefI[0,0]):
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
if SubPixFlag:
|
||||||
|
# call C++
|
||||||
|
Dx[ind1], Dy[ind1] = np.float32(autoriftcore.arSubPixDisp_s_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel(),oversample))
|
||||||
|
# # call Python
|
||||||
|
# Dx1[ii], Dy1[ii] = arSubPixDisp(ChipI,RefI)
|
||||||
|
else:
|
||||||
|
# call C++
|
||||||
|
Dx[ind1], Dy[ind1] = np.float32(autoriftcore.arPixDisp_s_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel()))
|
||||||
|
# # call Python
|
||||||
|
# Dx1[ii], Dy1[ii] = arPixDisp(ChipI,RefI)
|
||||||
|
return Dx, Dy
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def arImgDisp_u(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, SearchLimitY, Dx0, Dy0, SubPixFlag, oversample, MultiThread):
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from . import autoriftcore
|
||||||
|
import multiprocessing as mp
|
||||||
|
|
||||||
|
core = AUTO_RIFT_CORE()
|
||||||
|
if core._autoriftcore is not None:
|
||||||
|
autoriftcore.destroyAutoRiftCore_Py(core._autoriftcore)
|
||||||
|
|
||||||
|
core._autoriftcore = autoriftcore.createAutoRiftCore_Py()
|
||||||
|
|
||||||
|
|
||||||
# if np.size(I1) == 1:
|
# if np.size(I1) == 1:
|
||||||
# if np.logical_not(isinstance(I1,np.uint8) & isinstance(I2,np.uint8)):
|
# if np.logical_not(isinstance(I1,np.uint8) & isinstance(I2,np.uint8)):
|
||||||
|
@ -756,7 +940,7 @@ def arImgDisp_u(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, Search
|
||||||
Dx.fill(np.nan)
|
Dx.fill(np.nan)
|
||||||
Dy = Dx.copy()
|
Dy = Dx.copy()
|
||||||
|
|
||||||
|
if MultiThread == 0:
|
||||||
for jj in range(xGrid.shape[1]):
|
for jj in range(xGrid.shape[1]):
|
||||||
if np.all(SearchLimitX[:,jj] == 0) & np.all(SearchLimitY[:,jj] == 0):
|
if np.all(SearchLimitX[:,jj] == 0) & np.all(SearchLimitY[:,jj] == 0):
|
||||||
continue
|
continue
|
||||||
|
@ -800,6 +984,92 @@ def arImgDisp_u(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, Search
|
||||||
Dx1[ii], Dy1[ii] = np.float32(autoriftcore.arPixDisp_u_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel()))
|
Dx1[ii], Dy1[ii] = np.float32(autoriftcore.arPixDisp_u_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel()))
|
||||||
# # call Python
|
# # call Python
|
||||||
# Dx1[ii], Dy1[ii] = arPixDisp(ChipI,RefI)
|
# Dx1[ii], Dy1[ii] = arPixDisp(ChipI,RefI)
|
||||||
|
else:
|
||||||
|
# Preparation for parallel
|
||||||
|
in_shape = xGrid.shape
|
||||||
|
I_shape = I1.shape
|
||||||
|
shape_prod = np.asscalar(np.prod(in_shape))
|
||||||
|
|
||||||
|
# import pdb
|
||||||
|
# pdb.set_trace()
|
||||||
|
XI1 = mp.RawArray('b', np.asscalar(np.prod(I_shape)))
|
||||||
|
XI1_np = np.frombuffer(XI1,dtype=np.uint8).reshape(I_shape)
|
||||||
|
np.copyto(XI1_np,I1)
|
||||||
|
del I1
|
||||||
|
|
||||||
|
XI2 = mp.RawArray('b', np.asscalar(np.prod(I_shape)))
|
||||||
|
XI2_np = np.frombuffer(XI2,dtype=np.uint8).reshape(I_shape)
|
||||||
|
np.copyto(XI2_np,I2)
|
||||||
|
del I2
|
||||||
|
|
||||||
|
XxGrid = mp.RawArray('f', shape_prod)
|
||||||
|
XxGrid_np = np.frombuffer(XxGrid,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XxGrid_np,xGrid)
|
||||||
|
del xGrid
|
||||||
|
|
||||||
|
XyGrid = mp.RawArray('f', shape_prod)
|
||||||
|
XyGrid_np = np.frombuffer(XyGrid,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XyGrid_np,yGrid)
|
||||||
|
del yGrid
|
||||||
|
|
||||||
|
XSearchLimitX = mp.RawArray('f', shape_prod)
|
||||||
|
XSearchLimitX_np = np.frombuffer(XSearchLimitX,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XSearchLimitX_np,SearchLimitX)
|
||||||
|
|
||||||
|
XSearchLimitY = mp.RawArray('f', shape_prod)
|
||||||
|
XSearchLimitY_np = np.frombuffer(XSearchLimitY,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XSearchLimitY_np,SearchLimitY)
|
||||||
|
|
||||||
|
XChipSizeX = mp.RawArray('f', shape_prod)
|
||||||
|
XChipSizeX_np = np.frombuffer(XChipSizeX,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XChipSizeX_np,ChipSizeX)
|
||||||
|
del ChipSizeX
|
||||||
|
|
||||||
|
XChipSizeY = mp.RawArray('f', shape_prod)
|
||||||
|
XChipSizeY_np = np.frombuffer(XChipSizeY,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XChipSizeY_np,ChipSizeY)
|
||||||
|
del ChipSizeY
|
||||||
|
|
||||||
|
XDx0 = mp.RawArray('f', shape_prod)
|
||||||
|
XDx0_np = np.frombuffer(XDx0,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XDx0_np,Dx0)
|
||||||
|
|
||||||
|
XDy0 = mp.RawArray('f', shape_prod)
|
||||||
|
XDy0_np = np.frombuffer(XDy0,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XDy0_np,Dy0)
|
||||||
|
# import pdb
|
||||||
|
# pdb.set_trace()
|
||||||
|
|
||||||
|
|
||||||
|
# Nchunks = mp.cpu_count() // 8 * MultiThread
|
||||||
|
Nchunks = MultiThread
|
||||||
|
chunkSize = int(np.floor(shape_prod / Nchunks))
|
||||||
|
chunkRem = shape_prod - chunkSize * Nchunks
|
||||||
|
|
||||||
|
CHUNKS = []
|
||||||
|
|
||||||
|
for k in range(Nchunks):
|
||||||
|
# print(k)
|
||||||
|
if k == (Nchunks-1):
|
||||||
|
chunkInds = np.arange(k*chunkSize, (k+1)*chunkSize+chunkRem)
|
||||||
|
else:
|
||||||
|
chunkInds = np.arange(k*chunkSize, (k+1)*chunkSize)
|
||||||
|
CHUNKS.append(chunkInds)
|
||||||
|
# print(CHUNKS)
|
||||||
|
|
||||||
|
chunk_inputs = [(kk, CHUNKS[kk], SubPixFlag, oversample, in_shape, I_shape)
|
||||||
|
for kk in range(Nchunks)]
|
||||||
|
|
||||||
|
with mp.Pool(initializer=initializer, initargs=(XI1, XI2, XxGrid, XyGrid, XSearchLimitX, XSearchLimitY, XChipSizeX, XChipSizeY, XDx0, XDy0)) as pool:
|
||||||
|
Dx, Dy = zip(*pool.map(unpacking_loop_u, chunk_inputs))
|
||||||
|
|
||||||
|
Dx = np.concatenate(Dx)
|
||||||
|
Dy = np.concatenate(Dy)
|
||||||
|
|
||||||
|
Dx = np.reshape(Dx, in_shape)
|
||||||
|
Dy = np.reshape(Dy, in_shape)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# add back 1) I1 (RefI) relative to I2 (ChipI) initial offset Dx0 and Dy0, and
|
# add back 1) I1 (RefI) relative to I2 (ChipI) initial offset Dx0 and Dy0, and
|
||||||
|
@ -821,7 +1091,7 @@ def arImgDisp_u(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, Search
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def arImgDisp_s(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, SearchLimitY, Dx0, Dy0, SubPixFlag, oversample):
|
def arImgDisp_s(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, SearchLimitY, Dx0, Dy0, SubPixFlag, oversample, MultiThread):
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from . import autoriftcore
|
from . import autoriftcore
|
||||||
|
@ -917,7 +1187,7 @@ def arImgDisp_s(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, Search
|
||||||
Dx.fill(np.nan)
|
Dx.fill(np.nan)
|
||||||
Dy = Dx.copy()
|
Dy = Dx.copy()
|
||||||
|
|
||||||
|
if MultiThread == 0:
|
||||||
for jj in range(xGrid.shape[1]):
|
for jj in range(xGrid.shape[1]):
|
||||||
if np.all(SearchLimitX[:,jj] == 0) & np.all(SearchLimitY[:,jj] == 0):
|
if np.all(SearchLimitX[:,jj] == 0) & np.all(SearchLimitY[:,jj] == 0):
|
||||||
continue
|
continue
|
||||||
|
@ -953,7 +1223,7 @@ def arImgDisp_s(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, Search
|
||||||
|
|
||||||
if SubPixFlag:
|
if SubPixFlag:
|
||||||
# call C++
|
# call C++
|
||||||
Dx1[ii], Dy1[ii] = np.float32(autoriftcore.arSubPixDisp_s_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel(), oversample))
|
Dx1[ii], Dy1[ii] = np.float32(autoriftcore.arSubPixDisp_s_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel(),oversample))
|
||||||
# # call Python
|
# # call Python
|
||||||
# Dx1[ii], Dy1[ii] = arSubPixDisp(ChipI,RefI)
|
# Dx1[ii], Dy1[ii] = arSubPixDisp(ChipI,RefI)
|
||||||
else:
|
else:
|
||||||
|
@ -961,7 +1231,90 @@ def arImgDisp_s(I1, I2, xGrid, yGrid, ChipSizeX, ChipSizeY, SearchLimitX, Search
|
||||||
Dx1[ii], Dy1[ii] = np.float32(autoriftcore.arPixDisp_s_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel()))
|
Dx1[ii], Dy1[ii] = np.float32(autoriftcore.arPixDisp_s_Py(core._autoriftcore,ChipI.shape[1],ChipI.shape[0],ChipI.ravel(),RefI.shape[1],RefI.shape[0],RefI.ravel()))
|
||||||
# # call Python
|
# # call Python
|
||||||
# Dx1[ii], Dy1[ii] = arPixDisp(ChipI,RefI)
|
# Dx1[ii], Dy1[ii] = arPixDisp(ChipI,RefI)
|
||||||
|
else:
|
||||||
|
# Preparation for parallel
|
||||||
|
in_shape = xGrid.shape
|
||||||
|
I_shape = I1.shape
|
||||||
|
shape_prod = np.asscalar(np.prod(in_shape))
|
||||||
|
|
||||||
|
# import pdb
|
||||||
|
# pdb.set_trace()
|
||||||
|
XI1 = mp.RawArray('f', np.asscalar(np.prod(I_shape)))
|
||||||
|
XI1_np = np.frombuffer(XI1,dtype=np.float32).reshape(I_shape)
|
||||||
|
np.copyto(XI1_np,I1)
|
||||||
|
del I1
|
||||||
|
|
||||||
|
XI2 = mp.RawArray('f', np.asscalar(np.prod(I_shape)))
|
||||||
|
XI2_np = np.frombuffer(XI2,dtype=np.float32).reshape(I_shape)
|
||||||
|
np.copyto(XI2_np,I2)
|
||||||
|
del I2
|
||||||
|
|
||||||
|
XxGrid = mp.RawArray('f', shape_prod)
|
||||||
|
XxGrid_np = np.frombuffer(XxGrid,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XxGrid_np,xGrid)
|
||||||
|
del xGrid
|
||||||
|
|
||||||
|
XyGrid = mp.RawArray('f', shape_prod)
|
||||||
|
XyGrid_np = np.frombuffer(XyGrid,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XyGrid_np,yGrid)
|
||||||
|
del yGrid
|
||||||
|
|
||||||
|
XSearchLimitX = mp.RawArray('f', shape_prod)
|
||||||
|
XSearchLimitX_np = np.frombuffer(XSearchLimitX,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XSearchLimitX_np,SearchLimitX)
|
||||||
|
|
||||||
|
XSearchLimitY = mp.RawArray('f', shape_prod)
|
||||||
|
XSearchLimitY_np = np.frombuffer(XSearchLimitY,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XSearchLimitY_np,SearchLimitY)
|
||||||
|
|
||||||
|
XChipSizeX = mp.RawArray('f', shape_prod)
|
||||||
|
XChipSizeX_np = np.frombuffer(XChipSizeX,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XChipSizeX_np,ChipSizeX)
|
||||||
|
del ChipSizeX
|
||||||
|
|
||||||
|
XChipSizeY = mp.RawArray('f', shape_prod)
|
||||||
|
XChipSizeY_np = np.frombuffer(XChipSizeY,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XChipSizeY_np,ChipSizeY)
|
||||||
|
del ChipSizeY
|
||||||
|
|
||||||
|
XDx0 = mp.RawArray('f', shape_prod)
|
||||||
|
XDx0_np = np.frombuffer(XDx0,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XDx0_np,Dx0)
|
||||||
|
|
||||||
|
XDy0 = mp.RawArray('f', shape_prod)
|
||||||
|
XDy0_np = np.frombuffer(XDy0,dtype=np.float32).reshape(in_shape)
|
||||||
|
np.copyto(XDy0_np,Dy0)
|
||||||
|
# import pdb
|
||||||
|
# pdb.set_trace()
|
||||||
|
|
||||||
|
|
||||||
|
# Nchunks = mp.cpu_count() // 8 * MultiThread
|
||||||
|
Nchunks = MultiThread
|
||||||
|
chunkSize = int(np.floor(shape_prod / Nchunks))
|
||||||
|
chunkRem = shape_prod - chunkSize * Nchunks
|
||||||
|
|
||||||
|
CHUNKS = []
|
||||||
|
|
||||||
|
for k in range(Nchunks):
|
||||||
|
# print(k)
|
||||||
|
if k == (Nchunks-1):
|
||||||
|
chunkInds = np.arange(k*chunkSize, (k+1)*chunkSize+chunkRem)
|
||||||
|
else:
|
||||||
|
chunkInds = np.arange(k*chunkSize, (k+1)*chunkSize)
|
||||||
|
CHUNKS.append(chunkInds)
|
||||||
|
# print(CHUNKS)
|
||||||
|
|
||||||
|
chunk_inputs = [(kk, CHUNKS[kk], SubPixFlag, oversample, in_shape, I_shape)
|
||||||
|
for kk in range(Nchunks)]
|
||||||
|
|
||||||
|
with mp.Pool(initializer=initializer, initargs=(XI1, XI2, XxGrid, XyGrid, XSearchLimitX, XSearchLimitY, XChipSizeX, XChipSizeY, XDx0, XDy0)) as pool:
|
||||||
|
Dx, Dy = zip(*pool.map(unpacking_loop_s, chunk_inputs))
|
||||||
|
|
||||||
|
Dx = np.concatenate(Dx)
|
||||||
|
Dy = np.concatenate(Dy)
|
||||||
|
|
||||||
|
Dx = np.reshape(Dx, in_shape)
|
||||||
|
Dy = np.reshape(Dy, in_shape)
|
||||||
|
|
||||||
# add back 1) I1 (RefI) relative to I2 (ChipI) initial offset Dx0 and Dy0, and
|
# add back 1) I1 (RefI) relative to I2 (ChipI) initial offset Dx0 and Dy0, and
|
||||||
# 2) RefI relative to ChipI has a left/top boundary offset of -SearchLimitX and -SearchLimitY
|
# 2) RefI relative to ChipI has a left/top boundary offset of -SearchLimitX and -SearchLimitY
|
||||||
|
|
|
@ -186,6 +186,13 @@ DATA_TYPE = Component.Parameter('DataType',
|
||||||
mandatory=False,
|
mandatory=False,
|
||||||
doc = 'Input data type: 0 -> uint8, 1 -> float32')
|
doc = 'Input data type: 0 -> uint8, 1 -> float32')
|
||||||
|
|
||||||
|
MULTI_THREAD = Component.Parameter('MultiThread',
|
||||||
|
public_name = 'MULTI_THREAD',
|
||||||
|
default = 0,
|
||||||
|
type = int,
|
||||||
|
mandatory=False,
|
||||||
|
doc = 'Number of Threads; default specified by 0 uses single core and surpasses the multithreading routine')
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Try Autorift within ISCE first
|
# Try Autorift within ISCE first
|
||||||
|
@ -223,7 +230,8 @@ class autoRIFT_ISCE(autoRIFT, Component):
|
||||||
BUFF_DISTANCE_C,
|
BUFF_DISTANCE_C,
|
||||||
COARSE_COR_CUTOFF,
|
COARSE_COR_CUTOFF,
|
||||||
OVER_SAMPLE_RATIO,
|
OVER_SAMPLE_RATIO,
|
||||||
DATA_TYPE)
|
DATA_TYPE,
|
||||||
|
MULTI_THREAD)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,9 @@ class Geogrid(Component):
|
||||||
raise Exception('At least the DEM parameter must be set for geogrid')
|
raise Exception('At least the DEM parameter must be set for geogrid')
|
||||||
|
|
||||||
from osgeo import gdal, osr
|
from osgeo import gdal, osr
|
||||||
|
if self.urlflag == 1:
|
||||||
|
ds = gdal.Open('/vsicurl/%s' %(self.demname))
|
||||||
|
else:
|
||||||
ds = gdal.Open(self.demname, gdal.GA_ReadOnly)
|
ds = gdal.Open(self.demname, gdal.GA_ReadOnly)
|
||||||
srs = osr.SpatialReference()
|
srs = osr.SpatialReference()
|
||||||
srs.ImportFromWkt(ds.GetProjection())
|
srs.ImportFromWkt(ds.GetProjection())
|
||||||
|
@ -93,6 +96,9 @@ class Geogrid(Component):
|
||||||
else:
|
else:
|
||||||
raise Exception('Non-standard coordinate system encountered')
|
raise Exception('Non-standard coordinate system encountered')
|
||||||
if not epsgstr: #Empty string->use shell command gdalsrsinfo for last trial
|
if not epsgstr: #Empty string->use shell command gdalsrsinfo for last trial
|
||||||
|
if self.urlflag == 1:
|
||||||
|
cmd = 'gdalsrsinfo -o epsg /vsicurl/{0}'.format(self.demname)
|
||||||
|
else:
|
||||||
cmd = 'gdalsrsinfo -o epsg {0}'.format(self.demname)
|
cmd = 'gdalsrsinfo -o epsg {0}'.format(self.demname)
|
||||||
epsgstr = subprocess.check_output(cmd, shell=True)
|
epsgstr = subprocess.check_output(cmd, shell=True)
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
@ -274,6 +280,9 @@ class Geogrid(Component):
|
||||||
geogrid.setRO2VYFilename_Py( self._geogrid, self.winro2vyname)
|
geogrid.setRO2VYFilename_Py( self._geogrid, self.winro2vyname)
|
||||||
geogrid.setLookSide_Py(self._geogrid, self.lookSide)
|
geogrid.setLookSide_Py(self._geogrid, self.lookSide)
|
||||||
geogrid.setNodataOut_Py(self._geogrid, self.nodata_out)
|
geogrid.setNodataOut_Py(self._geogrid, self.nodata_out)
|
||||||
|
if self.urlflag is None:
|
||||||
|
self.urlflag = 0
|
||||||
|
geogrid.setUrlFlag_Py(self._geogrid, self.urlflag)
|
||||||
|
|
||||||
self._orbit = self.orbit.exportToC()
|
self._orbit = self.orbit.exportToC()
|
||||||
geogrid.setOrbit_Py(self._geogrid, self._orbit)
|
geogrid.setOrbit_Py(self._geogrid, self._orbit)
|
||||||
|
@ -321,6 +330,7 @@ class Geogrid(Component):
|
||||||
self.csmaxxname = None
|
self.csmaxxname = None
|
||||||
self.csmaxyname = None
|
self.csmaxyname = None
|
||||||
self.ssmname = None
|
self.ssmname = None
|
||||||
|
self.urlflag = None
|
||||||
|
|
||||||
##Output related parameters
|
##Output related parameters
|
||||||
self.winlocname = None
|
self.winlocname = None
|
||||||
|
|
|
@ -48,8 +48,8 @@ class GeogridOptical():
|
||||||
'''
|
'''
|
||||||
|
|
||||||
##Determine appropriate EPSG system
|
##Determine appropriate EPSG system
|
||||||
self.epsgDem = self.getProjectionSystem(self.demname)
|
self.epsgDem = self.getProjectionSystem(self.demname, self.urlflag)
|
||||||
self.epsgDat = self.getProjectionSystem(self.dat1name)
|
self.epsgDat = self.getProjectionSystem(self.dat1name, self.urlflag)
|
||||||
|
|
||||||
###Determine extent of data needed
|
###Determine extent of data needed
|
||||||
bbox = self.determineBbox()
|
bbox = self.determineBbox()
|
||||||
|
@ -59,7 +59,7 @@ class GeogridOptical():
|
||||||
self.geogrid()
|
self.geogrid()
|
||||||
|
|
||||||
|
|
||||||
def getProjectionSystem(self, filename):
|
def getProjectionSystem(self, filename, urlflag):
|
||||||
'''
|
'''
|
||||||
Testing with Greenland.
|
Testing with Greenland.
|
||||||
'''
|
'''
|
||||||
|
@ -67,6 +67,9 @@ class GeogridOptical():
|
||||||
raise Exception('File {0} does not exist'.format(filename))
|
raise Exception('File {0} does not exist'.format(filename))
|
||||||
|
|
||||||
from osgeo import gdal, osr
|
from osgeo import gdal, osr
|
||||||
|
if urlflag == 1:
|
||||||
|
ds = gdal.Open('/vsicurl/%s' %(filename))
|
||||||
|
else:
|
||||||
ds = gdal.Open(filename, gdal.GA_ReadOnly)
|
ds = gdal.Open(filename, gdal.GA_ReadOnly)
|
||||||
srs = osr.SpatialReference()
|
srs = osr.SpatialReference()
|
||||||
srs.ImportFromWkt(ds.GetProjection())
|
srs.ImportFromWkt(ds.GetProjection())
|
||||||
|
@ -83,6 +86,9 @@ class GeogridOptical():
|
||||||
else:
|
else:
|
||||||
raise Exception('Non-standard coordinate system encountered')
|
raise Exception('Non-standard coordinate system encountered')
|
||||||
if not epsgstr: #Empty string->use shell command gdalsrsinfo for last trial
|
if not epsgstr: #Empty string->use shell command gdalsrsinfo for last trial
|
||||||
|
if urlflag == 1:
|
||||||
|
cmd = 'gdalsrsinfo -o epsg /vsicurl/{0}'.format(filename)
|
||||||
|
else:
|
||||||
cmd = 'gdalsrsinfo -o epsg {0}'.format(filename)
|
cmd = 'gdalsrsinfo -o epsg {0}'.format(filename)
|
||||||
epsgstr = subprocess.check_output(cmd, shell=True)
|
epsgstr = subprocess.check_output(cmd, shell=True)
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
@ -155,6 +161,13 @@ class GeogridOptical():
|
||||||
|
|
||||||
# For now print inputs that were obtained
|
# For now print inputs that were obtained
|
||||||
|
|
||||||
|
urlflag = self.urlflag
|
||||||
|
|
||||||
|
if urlflag == 1:
|
||||||
|
print("\nReading input images into memory directly from URL's")
|
||||||
|
else:
|
||||||
|
print("\nReading input images locally from files")
|
||||||
|
|
||||||
print("\nOptical Image parameters: ")
|
print("\nOptical Image parameters: ")
|
||||||
print("X-direction coordinate: " + str(self.startingX) + " " + str(self.XSize))
|
print("X-direction coordinate: " + str(self.startingX) + " " + str(self.XSize))
|
||||||
print("Y-direction coordinate: " + str(self.startingY) + " " + str(self.YSize))
|
print("Y-direction coordinate: " + str(self.startingY) + " " + str(self.YSize))
|
||||||
|
@ -218,6 +231,21 @@ class GeogridOptical():
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
if urlflag == 1:
|
||||||
|
self.demname = '/vsicurl/%s' %(self.demname)
|
||||||
|
self.dhdxname = '/vsicurl/%s' %(self.dhdxname)
|
||||||
|
self.dhdyname = '/vsicurl/%s' %(self.dhdyname)
|
||||||
|
self.vxname = '/vsicurl/%s' %(self.vxname)
|
||||||
|
self.vyname = '/vsicurl/%s' %(self.vyname)
|
||||||
|
self.srxname = '/vsicurl/%s' %(self.srxname)
|
||||||
|
self.sryname = '/vsicurl/%s' %(self.sryname)
|
||||||
|
self.csminxname = '/vsicurl/%s' %(self.csminxname)
|
||||||
|
self.csminyname = '/vsicurl/%s' %(self.csminyname)
|
||||||
|
self.csmaxxname = '/vsicurl/%s' %(self.csmaxxname)
|
||||||
|
self.csmaxyname = '/vsicurl/%s' %(self.csmaxyname)
|
||||||
|
self.ssmname = '/vsicurl/%s' %(self.ssmname)
|
||||||
|
|
||||||
|
|
||||||
demDS = gdal.Open(self.demname, gdal.GA_ReadOnly)
|
demDS = gdal.Open(self.demname, gdal.GA_ReadOnly)
|
||||||
|
|
||||||
if (self.dhdxname != ""):
|
if (self.dhdxname != ""):
|
||||||
|
@ -731,8 +759,94 @@ class GeogridOptical():
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def coregister(self,in1,in2,urlflag):
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from osgeo import gdal, osr
|
||||||
|
import struct
|
||||||
|
|
||||||
|
if urlflag == 1:
|
||||||
|
DS1 = gdal.Open('/vsicurl/%s' %(in1))
|
||||||
|
else:
|
||||||
|
DS1 = gdal.Open(in1, gdal.GA_ReadOnly)
|
||||||
|
trans1 = DS1.GetGeoTransform()
|
||||||
|
xsize1 = DS1.RasterXSize
|
||||||
|
ysize1 = DS1.RasterYSize
|
||||||
|
|
||||||
|
if urlflag == 1:
|
||||||
|
DS2 = gdal.Open('/vsicurl/%s' %(in2))
|
||||||
|
else:
|
||||||
|
DS2 = gdal.Open(in2, gdal.GA_ReadOnly)
|
||||||
|
trans2 = DS2.GetGeoTransform()
|
||||||
|
xsize2 = DS2.RasterXSize
|
||||||
|
ysize2 = DS2.RasterYSize
|
||||||
|
|
||||||
|
W = np.max([trans1[0],trans2[0]])
|
||||||
|
N = np.min([trans1[3],trans2[3]])
|
||||||
|
E = np.min([trans1[0]+xsize1*trans1[1],trans2[0]+xsize2*trans2[1]])
|
||||||
|
S = np.max([trans1[3]+ysize1*trans1[5],trans2[3]+ysize2*trans2[5]])
|
||||||
|
|
||||||
|
x1a = int(np.round((W-trans1[0])/trans1[1]))
|
||||||
|
x1b = int(np.round((E-trans1[0])/trans1[1]))
|
||||||
|
y1a = int(np.round((N-trans1[3])/trans1[5]))
|
||||||
|
y1b = int(np.round((S-trans1[3])/trans1[5]))
|
||||||
|
|
||||||
|
x2a = int(np.round((W-trans2[0])/trans2[1]))
|
||||||
|
x2b = int(np.round((E-trans2[0])/trans2[1]))
|
||||||
|
y2a = int(np.round((N-trans2[3])/trans2[5]))
|
||||||
|
y2b = int(np.round((S-trans2[3])/trans2[5]))
|
||||||
|
|
||||||
|
x1a = np.min([x1a, xsize1-1])
|
||||||
|
x1b = np.min([x1b, xsize1-1])
|
||||||
|
y1a = np.min([y1a, ysize1-1])
|
||||||
|
y1b = np.min([y1b, ysize1-1])
|
||||||
|
x2a = np.min([x2a, xsize2-1])
|
||||||
|
x2b = np.min([x2b, xsize2-1])
|
||||||
|
y2a = np.min([y2a, ysize2-1])
|
||||||
|
y2b = np.min([y2b, ysize2-1])
|
||||||
|
|
||||||
|
x1a = np.max([x1a, 0])
|
||||||
|
x1b = np.max([x1b, 0])
|
||||||
|
y1a = np.max([y1a, 0])
|
||||||
|
y1b = np.max([y1b, 0])
|
||||||
|
x2a = np.max([x2a, 0])
|
||||||
|
x2b = np.max([x2b, 0])
|
||||||
|
y2a = np.max([y2a, 0])
|
||||||
|
y2b = np.max([y2b, 0])
|
||||||
|
|
||||||
|
x1a = int(x1a)
|
||||||
|
x1b = int(x1b)
|
||||||
|
y1a = int(y1a)
|
||||||
|
y1b = int(y1b)
|
||||||
|
x2a = int(x2a)
|
||||||
|
x2b = int(x2b)
|
||||||
|
y2a = int(y2a)
|
||||||
|
y2b = int(y2b)
|
||||||
|
|
||||||
|
trans = (W, trans1[1], 0.0, N, 0.0, trans1[5])
|
||||||
|
|
||||||
|
if urlflag == 0:
|
||||||
|
|
||||||
|
I1 = DS1.ReadAsArray(xoff=x1a, yoff=y1a, xsize=x1b-x1a+1, ysize=y1b-y1a+1)
|
||||||
|
I2 = DS2.ReadAsArray(xoff=x2a, yoff=y2a, xsize=x2b-x2a+1, ysize=y2b-y2a+1)
|
||||||
|
|
||||||
|
fileformat = "GTiff"
|
||||||
|
driver = gdal.GetDriverByName(fileformat)
|
||||||
|
|
||||||
|
DST1 = driver.Create(os.path.basename(in1), xsize=(x1b-x1a+1), ysize=(y1b-y1a+1), bands=1, eType=gdal.GDT_UInt16)
|
||||||
|
DST1.SetGeoTransform(trans)
|
||||||
|
DST1.SetProjection(DS1.GetProjectionRef())
|
||||||
|
DST1.GetRasterBand(1).WriteArray(I1)
|
||||||
|
DST1 = None
|
||||||
|
|
||||||
|
DST2 = driver.Create(os.path.basename(in2), xsize=(x2b-x2a+1), ysize=(y2b-y2a+1), bands=1, eType=gdal.GDT_UInt16)
|
||||||
|
DST2.SetGeoTransform(trans)
|
||||||
|
DST2.SetProjection(DS2.GetProjectionRef())
|
||||||
|
DST2.GetRasterBand(1).WriteArray(I2)
|
||||||
|
DST2 = None
|
||||||
|
|
||||||
|
return x1a, y1a, x1b-x1a+1, y1b-y1a+1, x2a, y2a, x2b-x2a+1, y2b-y2a+1, trans
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -753,6 +867,7 @@ class GeogridOptical():
|
||||||
self.numberOfLines = None
|
self.numberOfLines = None
|
||||||
self.repeatTime = None
|
self.repeatTime = None
|
||||||
self.chipSizeX0 = None
|
self.chipSizeX0 = None
|
||||||
|
self.urlflag = None
|
||||||
|
|
||||||
##Input related parameters
|
##Input related parameters
|
||||||
self.dat1name = None
|
self.dat1name = None
|
||||||
|
|
|
@ -430,6 +430,19 @@ PyObject* setNodataOut(PyObject *self, PyObject *args)
|
||||||
return Py_BuildValue("i", 0);
|
return Py_BuildValue("i", 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject* setUrlFlag(PyObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
uint64_t ptr;
|
||||||
|
int urlflag;
|
||||||
|
if (!PyArg_ParseTuple(args, "Ki", &ptr, &urlflag))
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
((geoGrid*)(ptr))->urlflag = urlflag;
|
||||||
|
return Py_BuildValue("i", 0);
|
||||||
|
}
|
||||||
|
|
||||||
PyObject* setOrbit(PyObject *self, PyObject *args)
|
PyObject* setOrbit(PyObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
uint64_t ptr, orb;
|
uint64_t ptr, orb;
|
||||||
|
|
|
@ -69,6 +69,7 @@ struct geoGrid
|
||||||
int nPixels;
|
int nPixels;
|
||||||
int lookSide;
|
int lookSide;
|
||||||
int nodata_out;
|
int nodata_out;
|
||||||
|
int urlflag;
|
||||||
double incidenceAngle;
|
double incidenceAngle;
|
||||||
|
|
||||||
//Output file names
|
//Output file names
|
||||||
|
|
|
@ -53,6 +53,7 @@ extern "C"
|
||||||
PyObject * setOrbit(PyObject *, PyObject *);
|
PyObject * setOrbit(PyObject *, PyObject *);
|
||||||
PyObject * setLookSide(PyObject *, PyObject *);
|
PyObject * setLookSide(PyObject *, PyObject *);
|
||||||
PyObject * setNodataOut(PyObject *, PyObject *);
|
PyObject * setNodataOut(PyObject *, PyObject *);
|
||||||
|
PyObject * setUrlFlag(PyObject *, PyObject *);
|
||||||
|
|
||||||
PyObject * setWindowLocationsFilename(PyObject *, PyObject *);
|
PyObject * setWindowLocationsFilename(PyObject *, PyObject *);
|
||||||
PyObject * setWindowOffsetsFilename(PyObject *, PyObject *);
|
PyObject * setWindowOffsetsFilename(PyObject *, PyObject *);
|
||||||
|
@ -91,6 +92,7 @@ static PyMethodDef geogrid_methods[] =
|
||||||
{"setOrbit_Py", setOrbit, METH_VARARGS, " "},
|
{"setOrbit_Py", setOrbit, METH_VARARGS, " "},
|
||||||
{"setLookSide_Py", setLookSide, METH_VARARGS, " "},
|
{"setLookSide_Py", setLookSide, METH_VARARGS, " "},
|
||||||
{"setNodataOut_Py", setNodataOut, METH_VARARGS, " "},
|
{"setNodataOut_Py", setNodataOut, METH_VARARGS, " "},
|
||||||
|
{"setUrlFlag_Py", setUrlFlag, METH_VARARGS, " "},
|
||||||
{"setXLimits_Py", setXLimits, METH_VARARGS, " "},
|
{"setXLimits_Py", setXLimits, METH_VARARGS, " "},
|
||||||
{"setYLimits_Py", setYLimits, METH_VARARGS, " "},
|
{"setYLimits_Py", setYLimits, METH_VARARGS, " "},
|
||||||
{"setWindowLocationsFilename_Py", setWindowLocationsFilename, METH_VARARGS, " "},
|
{"setWindowLocationsFilename_Py", setWindowLocationsFilename, METH_VARARGS, " "},
|
||||||
|
|
|
@ -47,6 +47,14 @@ void geoGrid::geogrid()
|
||||||
|
|
||||||
//For now print inputs that were obtained
|
//For now print inputs that were obtained
|
||||||
|
|
||||||
|
if (urlflag == 1){
|
||||||
|
std::cout << "\nReading input images into memory directly from URL's\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "\nReading input images locally from files\n";
|
||||||
|
}
|
||||||
|
|
||||||
std::cout << "\nRadar parameters: \n";
|
std::cout << "\nRadar parameters: \n";
|
||||||
std::cout << "Range: " << startingRange << " " << dr << "\n";
|
std::cout << "Range: " << startingRange << " " << dr << "\n";
|
||||||
std::cout << "Azimuth: " << sensingStart << " " << prf << "\n";
|
std::cout << "Azimuth: " << sensingStart << " " << prf << "\n";
|
||||||
|
@ -145,6 +153,22 @@ void geoGrid::geogrid()
|
||||||
|
|
||||||
double geoTrans[6];
|
double geoTrans[6];
|
||||||
|
|
||||||
|
std::string url ("/vsicurl/");
|
||||||
|
if (urlflag == 1)
|
||||||
|
{
|
||||||
|
demname = url + demname;
|
||||||
|
dhdxname = url + dhdxname;
|
||||||
|
dhdyname = url + dhdyname;
|
||||||
|
vxname = url + vxname;
|
||||||
|
vyname = url + vyname;
|
||||||
|
srxname = url + srxname;
|
||||||
|
sryname = url + sryname;
|
||||||
|
csminxname = url + csminxname;
|
||||||
|
csminyname = url + csminyname;
|
||||||
|
csmaxxname = url + csmaxxname;
|
||||||
|
csmaxyname = url + csmaxyname;
|
||||||
|
ssmname = url + ssmname;
|
||||||
|
}
|
||||||
demDS = reinterpret_cast<GDALDataset *>(GDALOpenShared(demname.c_str(), GA_ReadOnly));
|
demDS = reinterpret_cast<GDALDataset *>(GDALOpenShared(demname.c_str(), GA_ReadOnly));
|
||||||
if (dhdxname != "")
|
if (dhdxname != "")
|
||||||
{
|
{
|
||||||
|
|
|
@ -41,6 +41,7 @@ here insofar as such code may exist in this Software.
|
||||||
#include <Xm/DrawnB.h>
|
#include <Xm/DrawnB.h>
|
||||||
#include <Xm/ScrolledW.h>
|
#include <Xm/ScrolledW.h>
|
||||||
#include <Xm/Label.h>
|
#include <Xm/Label.h>
|
||||||
|
#include <Xm/ScrollBar.h>
|
||||||
#include <X11/cursorfont.h>
|
#include <X11/cursorfont.h>
|
||||||
#include <X11/keysym.h> /* for popup window only */
|
#include <X11/keysym.h> /* for popup window only */
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,8 @@ import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import ndimage
|
from scipy import ndimage
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
from gdalconst import GA_ReadOnly
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
|
|
||||||
# suppress the DEBUG message
|
# suppress the DEBUG message
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -9,7 +9,7 @@ import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
from isceobj.Constants import SPEED_OF_LIGHT
|
from isceobj.Constants import SPEED_OF_LIGHT
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import shelve
|
import shelve
|
||||||
|
|
||||||
from scipy import ndimage
|
from scipy import ndimage
|
||||||
|
|
|
@ -7,7 +7,7 @@ import argparse
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import os
|
import os
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
def createParser():
|
def createParser():
|
||||||
|
|
|
@ -15,7 +15,7 @@ from isceobj.Util.Poly2D import Poly2D
|
||||||
import h5py
|
import h5py
|
||||||
from insarPair import insarPair
|
from insarPair import insarPair
|
||||||
from insarStack import insarStack
|
from insarStack import insarStack
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
|
|
||||||
|
|
||||||
#################################################################
|
#################################################################
|
||||||
|
|
|
@ -8,8 +8,8 @@ import os
|
||||||
import glob
|
import glob
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
from gdalconst import GA_ReadOnly
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
#import s1a_isce_utils as ut
|
#import s1a_isce_utils as ut
|
||||||
from isceobj.Planet.Planet import Planet
|
from isceobj.Planet.Planet import Planet
|
||||||
import shelve
|
import shelve
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
# Heresh Fattahi
|
# Heresh Fattahi
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
from gdalconst import GA_ReadOnly
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from lxml import objectify
|
from lxml import objectify
|
||||||
|
|
||||||
|
|
|
@ -7,9 +7,8 @@ import os
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import shelve
|
import shelve
|
||||||
import gdal
|
from osgeo import gdal, osr
|
||||||
import osr
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
from gdalconst import GA_ReadOnly
|
|
||||||
from scipy import ndimage
|
from scipy import ndimage
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,9 +8,8 @@ import os
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import shelve
|
import shelve
|
||||||
import gdal
|
from osgeo import gdal, osr
|
||||||
import osr
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
from gdalconst import GA_ReadOnly
|
|
||||||
from scipy import ndimage
|
from scipy import ndimage
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ import argparse
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import os
|
import os
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import matplotlib as mpl; #mpl.use('Agg')
|
import matplotlib as mpl; #mpl.use('Agg')
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from pykml.factory import KML_ElementMaker as KML
|
from pykml.factory import KML_ElementMaker as KML
|
||||||
|
|
|
@ -16,7 +16,7 @@ import time
|
||||||
#import matplotlib.pyplot as plt
|
#import matplotlib.pyplot as plt
|
||||||
from contrib.splitSpectrum import SplitRangeSpectrum as splitSpectrum
|
from contrib.splitSpectrum import SplitRangeSpectrum as splitSpectrum
|
||||||
from isceobj.Constants import SPEED_OF_LIGHT
|
from isceobj.Constants import SPEED_OF_LIGHT
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
|
|
||||||
|
|
||||||
def createParser():
|
def createParser():
|
||||||
|
|
|
@ -16,7 +16,7 @@ import time
|
||||||
#import matplotlib.pyplot as plt
|
#import matplotlib.pyplot as plt
|
||||||
from contrib.splitSpectrum import SplitRangeSpectrum as splitSpectrum
|
from contrib.splitSpectrum import SplitRangeSpectrum as splitSpectrum
|
||||||
from isceobj.Constants import SPEED_OF_LIGHT
|
from isceobj.Constants import SPEED_OF_LIGHT
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
|
|
||||||
|
|
||||||
def createParser():
|
def createParser():
|
||||||
|
|
|
@ -6,7 +6,7 @@ import shelve
|
||||||
import datetime
|
import datetime
|
||||||
import shutil
|
import shutil
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
from isceobj.Constants import SPEED_OF_LIGHT
|
from isceobj.Constants import SPEED_OF_LIGHT
|
||||||
|
|
|
@ -9,8 +9,8 @@ import argparse
|
||||||
import os
|
import os
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
from gdalconst import GA_ReadOnly
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
from scipy import ndimage
|
from scipy import ndimage
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import os
|
||||||
import argparse
|
import argparse
|
||||||
import glob
|
import glob
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
from isceobj.Sensor.TOPS import createTOPSSwathSLCProduct
|
from isceobj.Sensor.TOPS import createTOPSSwathSLCProduct
|
||||||
|
|
|
@ -13,7 +13,7 @@ import copy
|
||||||
from isceobj.Sensor.TOPS import createTOPSSwathSLCProduct
|
from isceobj.Sensor.TOPS import createTOPSSwathSLCProduct
|
||||||
from mroipac.correlation.correlation import Correlation
|
from mroipac.correlation.correlation import Correlation
|
||||||
import s1a_isce_utils as ut
|
import s1a_isce_utils as ut
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
|
|
||||||
|
|
||||||
def createParser():
|
def createParser():
|
||||||
|
|
|
@ -7,7 +7,7 @@ import argparse
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import os
|
import os
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ from iscesys.Component.ProductManager import ProductManager as PM
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from netCDF4 import Dataset
|
from netCDF4 import Dataset
|
||||||
#from mpl_toolkits.basemap import Basemap
|
#from mpl_toolkits.basemap import Basemap
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
|
|
||||||
from scipy.interpolate import interp2d, griddata
|
from scipy.interpolate import interp2d, griddata
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,8 @@ import os
|
||||||
import glob
|
import glob
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
from gdalconst import GA_ReadOnly
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
import s1a_isce_utils as ut
|
import s1a_isce_utils as ut
|
||||||
from isceobj.Planet.Planet import Planet
|
from isceobj.Planet.Planet import Planet
|
||||||
|
|
||||||
|
|
|
@ -11,9 +11,8 @@ import os
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import shelve
|
import shelve
|
||||||
import gdal
|
from osgeo import gdal, osr
|
||||||
import osr
|
from osgeo.gdalconst import GA_ReadOnly
|
||||||
from gdalconst import GA_ReadOnly
|
|
||||||
from scipy import ndimage
|
from scipy import ndimage
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ import argparse
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
import os
|
import os
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import matplotlib as mpl; #mpl.use('Agg')
|
import matplotlib as mpl; #mpl.use('Agg')
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from pykml.factory import KML_ElementMaker as KML
|
from pykml.factory import KML_ElementMaker as KML
|
||||||
|
|
|
@ -11,12 +11,6 @@ import datetime
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# suppress matplotlib DEBUG message
|
|
||||||
from matplotlib.path import Path as Path
|
|
||||||
import logging
|
|
||||||
mpl_logger = logging.getLogger('matplotlib')
|
|
||||||
mpl_logger.setLevel(logging.WARNING)
|
|
||||||
|
|
||||||
import isce
|
import isce
|
||||||
import isceobj
|
import isceobj
|
||||||
from isceobj.Sensor.TOPS.Sentinel1 import Sentinel1
|
from isceobj.Sensor.TOPS.Sentinel1 import Sentinel1
|
||||||
|
@ -196,6 +190,17 @@ def cmdLineParse(iargs = None):
|
||||||
return inps
|
return inps
|
||||||
|
|
||||||
|
|
||||||
|
def generate_geopolygon(bbox):
|
||||||
|
"""generate shapely Polygon"""
|
||||||
|
from shapely.geometry import Point, Polygon
|
||||||
|
|
||||||
|
# convert pnts to shapely polygon format
|
||||||
|
# the order of pnts is conter-clockwise, starting from the lower ldft corner
|
||||||
|
# the order for Point is lon,lat
|
||||||
|
points = [Point(bbox[i][0], bbox[i][1]) for i in range(4)]
|
||||||
|
|
||||||
|
return Polygon([(p.coords.xy[0][0], p.coords.xy[1][0]) for p in points])
|
||||||
|
|
||||||
####################################
|
####################################
|
||||||
def get_dates(inps):
|
def get_dates(inps):
|
||||||
# Given the SLC directory This function extracts the acquisition dates
|
# Given the SLC directory This function extracts the acquisition dates
|
||||||
|
@ -251,7 +256,8 @@ def get_dates(inps):
|
||||||
f = open('SAFE_files.txt','w')
|
f = open('SAFE_files.txt','w')
|
||||||
safe_count=0
|
safe_count=0
|
||||||
safe_dict={}
|
safe_dict={}
|
||||||
bbox_poly = [[bbox[0],bbox[2]],[bbox[0],bbox[3]],[bbox[1],bbox[3]],[bbox[1],bbox[2]]]
|
bbox_poly = np.array([[bbox[2],bbox[0]],[bbox[3],bbox[0]],[bbox[3],bbox[1]],[bbox[2],bbox[1]]])
|
||||||
|
|
||||||
for safe in SAFE_files:
|
for safe in SAFE_files:
|
||||||
safeObj=sentinelSLC(safe)
|
safeObj=sentinelSLC(safe)
|
||||||
safeObj.get_dates()
|
safeObj.get_dates()
|
||||||
|
@ -268,45 +274,22 @@ def get_dates(inps):
|
||||||
reject_SAFE=True
|
reject_SAFE=True
|
||||||
pnts = safeObj.getkmlQUAD(safe)
|
pnts = safeObj.getkmlQUAD(safe)
|
||||||
|
|
||||||
# looping over the corners, keep the SAF is one of the corners is within the BBOX
|
# process pnts to use generate_geopolygon function
|
||||||
lats = []
|
pnts_bbox = np.empty((4,2))
|
||||||
lons = []
|
count = 0
|
||||||
for pnt in pnts:
|
for pnt in pnts:
|
||||||
lon = float(pnt.split(',')[0])
|
pnts_bbox[count, 0] = float(pnt.split(',')[0]) # longitude
|
||||||
lat = float(pnt.split(',')[1])
|
pnts_bbox[count, 1] = float(pnt.split(',')[1]) # latitude
|
||||||
|
count += 1
|
||||||
# keep track of all the corners to see of the product is larger than the bbox
|
pnts_polygon = generate_geopolygon(pnts_bbox)
|
||||||
lats.append(lat)
|
bbox_polygon = generate_geopolygon(bbox_poly)
|
||||||
lons.append(lon)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# bbox = SNWE
|
|
||||||
# polygon = bbox[0] bbox[2] SW
|
|
||||||
# bbox[0] bbox[3] SE
|
|
||||||
# bbox[1] bbox[3] NE
|
|
||||||
# bbox[1] bbox[2] NW
|
|
||||||
|
|
||||||
poly = Path(bbox_poly)
|
|
||||||
point = (lat,lon)
|
|
||||||
in_bbox = poly.contains_point(point)
|
|
||||||
|
|
||||||
|
|
||||||
# product corner falls within BBOX (SNWE)
|
|
||||||
if in_bbox:
|
|
||||||
reject_SAFE=False
|
|
||||||
|
|
||||||
|
|
||||||
# If the product is till being rejected, check if the BBOX corners fall within the frame
|
|
||||||
if reject_SAFE:
|
|
||||||
for point in bbox_poly:
|
|
||||||
frame = [[a,b] for a,b in zip(lats,lons)]
|
|
||||||
poly = Path(frame)
|
|
||||||
in_frame = poly.contains_point(point)
|
|
||||||
if in_frame:
|
|
||||||
reject_SAFE=False
|
|
||||||
|
|
||||||
|
# judge whether these two polygon intersect with each other
|
||||||
|
overlap_flag = pnts_polygon.intersects(bbox_polygon)
|
||||||
|
if overlap_flag:
|
||||||
|
reject_SAFE = False
|
||||||
|
else:
|
||||||
|
reject_SAFE = True
|
||||||
|
|
||||||
if not reject_SAFE:
|
if not reject_SAFE:
|
||||||
if safeObj.date not in safe_dict.keys() and safeObj.date not in excludeList:
|
if safeObj.date not in safe_dict.keys() and safeObj.date not in excludeList:
|
||||||
|
|
|
@ -38,7 +38,7 @@ from isceobj.Constants import SPEED_OF_LIGHT
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
import numpy as np
|
import numpy as np
|
||||||
#import shelve
|
#import shelve
|
||||||
import s1a_isce_utils as ut
|
import s1a_isce_utils as ut
|
||||||
|
|
|
@ -56,7 +56,7 @@ def baselinegrid(inps):
|
||||||
"""
|
"""
|
||||||
Basline files are given as grids
|
Basline files are given as grids
|
||||||
"""
|
"""
|
||||||
import gdal
|
from osgeo import gdal
|
||||||
|
|
||||||
# parsing the command line inputs
|
# parsing the command line inputs
|
||||||
baseline_dir = inps.baseline_dir
|
baseline_dir = inps.baseline_dir
|
||||||
|
|
|
@ -16,7 +16,7 @@ RUN set -ex \
|
||||||
&& yum install -y \
|
&& yum install -y \
|
||||||
make ruby-devel rpm-build rubygems \
|
make ruby-devel rpm-build rubygems \
|
||||||
&& gem install ffi -v 1.12.2 \
|
&& gem install ffi -v 1.12.2 \
|
||||||
&& gem install --no-ri --no-rdoc fpm
|
&& gem install --no-ri --no-rdoc fpm -v 1.11.0
|
||||||
|
|
||||||
# install isce requirements
|
# install isce requirements
|
||||||
RUN set -ex \
|
RUN set -ex \
|
||||||
|
|
Loading…
Reference in New Issue