Merge pull request #622 from rtburns-jpl/pycuampcor-cpp-files
PyCuAmpcor: compile files as pure C++ when possible, fix MakefileLT1AB
commit
e7b506de0c
|
|
@ -11,24 +11,26 @@ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||||
|
|
||||||
pybind11_add_module(PyCuAmpcor
|
pybind11_add_module(PyCuAmpcor
|
||||||
src/PyCuAmpcor.cpp
|
src/PyCuAmpcor.cpp
|
||||||
src/GDALImage.cu
|
src/GDALImage.cpp
|
||||||
src/SConscript
|
src/SConscript
|
||||||
src/cuAmpcorChunk.cu
|
src/cuAmpcorChunk.cpp
|
||||||
src/cuAmpcorController.cu
|
src/cuAmpcorController.cpp
|
||||||
src/cuAmpcorParameter.cu
|
src/cuAmpcorParameter.cpp
|
||||||
src/cuArrays.cu
|
src/cuArrays.cpp
|
||||||
src/cuArraysCopy.cu
|
src/cuArraysCopy.cu
|
||||||
src/cuArraysPadding.cu
|
src/cuArraysPadding.cu
|
||||||
src/cuCorrFrequency.cu
|
src/cuCorrFrequency.cu
|
||||||
src/cuCorrNormalization.cu
|
src/cuCorrNormalization.cu
|
||||||
src/cuCorrNormalizationSAT.cu
|
src/cuCorrNormalizationSAT.cu
|
||||||
src/cuCorrNormalizer.cu
|
src/cuCorrNormalizer.cpp
|
||||||
src/cuCorrTimeDomain.cu
|
src/cuCorrTimeDomain.cu
|
||||||
src/cuDeramp.cu
|
src/cuDeramp.cu
|
||||||
src/cuEstimateStats.cu
|
src/cuEstimateStats.cu
|
||||||
src/cuOffset.cu
|
src/cuOffset.cu
|
||||||
src/cuOverSampler.cu
|
src/cuOverSampler.cpp
|
||||||
src/cuSincOverSampler.cu
|
src/cuSincOverSampler.cu
|
||||||
|
src/cudaError.cpp
|
||||||
|
src/cudaUtil.cpp
|
||||||
)
|
)
|
||||||
target_include_directories(PyCuAmpcor PRIVATE
|
target_include_directories(PyCuAmpcor PRIVATE
|
||||||
src
|
src
|
||||||
|
|
|
||||||
|
|
@ -59,8 +59,6 @@ You may also install PyCuAmpcor as a standalone package.
|
||||||
# edit Makefile to provide the correct gdal include path and gpu architecture to NVCCFLAGS
|
# edit Makefile to provide the correct gdal include path and gpu architecture to NVCCFLAGS
|
||||||
# call make to compile
|
# call make to compile
|
||||||
make
|
make
|
||||||
# install
|
|
||||||
python3 setup.py install
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## 3. User Guide
|
## 3. User Guide
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@
|
||||||
#include "GDALImage.h"
|
#include "GDALImage.h"
|
||||||
|
|
||||||
// dependencies
|
// dependencies
|
||||||
|
#include <cuda_runtime.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "cudaError.h"
|
#include "cudaError.h"
|
||||||
|
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
#define __GDALIMAGE_H
|
#define __GDALIMAGE_H
|
||||||
|
|
||||||
// dependencies
|
// dependencies
|
||||||
|
#include <driver_types.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <gdal_priv.h>
|
#include <gdal_priv.h>
|
||||||
#include <cpl_conv.h>
|
#include <cpl_conv.h>
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,30 @@
|
||||||
PROJECT = CUAMPCOR
|
CXX ?= g++
|
||||||
|
NVCC ?= nvcc
|
||||||
|
|
||||||
LDFLAGS = -lcuda -lcudart -lcufft -lgdal
|
CUDA_ROOT ?= $(dir $(shell which $(NVCC)))..
|
||||||
CXXFLAGS = -std=c++11 -fpermissive -DNDEBUG -fPIC -shared
|
|
||||||
NVCCFLAGS = -std=c++11 -m64 -DNDEBUG \
|
LDFLAGS = -L$(CUDA_ROOT)/lib64 -L$(CUDA_ROOT)/lib64/stubs -lcuda -lcudart -lcufft -lgdal
|
||||||
|
CXXFLAGS = -std=c++11 -fPIC -shared -I$(CUDA_ROOT)/include
|
||||||
|
NVCCFLAGS = -std=c++11 -m64 \
|
||||||
-gencode arch=compute_35,code=sm_35 \
|
-gencode arch=compute_35,code=sm_35 \
|
||||||
-gencode arch=compute_60,code=sm_60 \
|
-gencode arch=compute_60,code=sm_60 \
|
||||||
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
|
-Xcompiler -fPIC -shared -Wno-deprecated-gpu-targets \
|
||||||
-ftz=false -prec-div=true -prec-sqrt=true \
|
-ftz=false -prec-div=true -prec-sqrt=true \
|
||||||
-I/usr/include/gdal
|
-I/usr/include/gdal
|
||||||
|
|
||||||
CXX=g++
|
CXXFLAGS += -O2 -DNDEBUG
|
||||||
NVCC=nvcc
|
NVCCFLAGS += -O2 -DNDEBUG
|
||||||
|
|
||||||
|
# pybind11 configuration
|
||||||
|
PYTHON ?= python3
|
||||||
|
PYTHON_CONFIG ?= python3-config
|
||||||
|
PYTHON_EXT_SUFFIX := $(shell "$(PYTHON_CONFIG)" --extension-suffix)
|
||||||
|
PYTHON_INCLUDES := $(shell "$(PYTHON)" -m pybind11 --includes) \
|
||||||
|
$(shell "$(PYTHON_CONFIG)" --includes)
|
||||||
|
|
||||||
DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h
|
DEPS = cudaUtil.h cudaError.h cuArrays.h GDALImage.h cuAmpcorParameter.h
|
||||||
OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
|
OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o \
|
||||||
|
cudaError.o cudaUtil.o \
|
||||||
cuSincOverSampler.o cuDeramp.o cuOffset.o \
|
cuSincOverSampler.o cuDeramp.o cuOffset.o \
|
||||||
cuCorrNormalization.o cuCorrNormalizationSAT.o cuCorrNormalizer.o \
|
cuCorrNormalization.o cuCorrNormalizationSAT.o cuCorrNormalizer.o \
|
||||||
cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \
|
cuAmpcorParameter.o cuCorrTimeDomain.o cuCorrFrequency.o \
|
||||||
|
|
@ -21,60 +32,16 @@ OBJS = GDALImage.o cuArrays.o cuArraysCopy.o cuArraysPadding.o cuOverSampler.o
|
||||||
|
|
||||||
all: pyampcor
|
all: pyampcor
|
||||||
|
|
||||||
GDALImage.o: GDALImage.cu $(DEPS)
|
pyampcor: PyCuAmpcor$(PYTHON_EXT_SUFFIX)
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ GDALImage.cu
|
|
||||||
|
|
||||||
cuArrays.o: cuArrays.cu $(DEPS)
|
PyCuAmpcor$(PYTHON_EXT_SUFFIX): PyCuAmpcor.cpp $(OBJS)
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuArrays.cu
|
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(PYTHON_INCLUDES) $^ -o $@
|
||||||
|
|
||||||
cuArraysCopy.o: cuArraysCopy.cu $(DEPS)
|
%.o: %.cu $(DEPS)
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuArraysCopy.cu
|
$(NVCC) $(NVCCFLAGS) -c -o $@ $<
|
||||||
|
|
||||||
cuArraysPadding.o: cuArraysPadding.cu $(DEPS)
|
%.o: %.cpp $(DEPS)
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuArraysPadding.cu
|
$(CXX) $(CXXFLAGS) -c -o $@ $<
|
||||||
|
|
||||||
cuSincOverSampler.o: cuSincOverSampler.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuSincOverSampler.cu
|
|
||||||
|
|
||||||
cuOverSampler.o: cuOverSampler.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuOverSampler.cu
|
|
||||||
|
|
||||||
cuDeramp.o: cuDeramp.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuDeramp.cu
|
|
||||||
|
|
||||||
cuOffset.o: cuOffset.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuOffset.cu
|
|
||||||
|
|
||||||
cuCorrNormalization.o: cuCorrNormalization.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalization.cu
|
|
||||||
|
|
||||||
cuCorrNormalizationSAT.o: cuCorrNormalizationSAT.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalizationSAT.cu
|
|
||||||
|
|
||||||
cuCorrNormalizer.o: cuCorrNormalizer.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrNormalizer.cu
|
|
||||||
|
|
||||||
cuAmpcorParameter.o: cuAmpcorParameter.cu
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorParameter.cu
|
|
||||||
|
|
||||||
cuCorrTimeDomain.o: cuCorrTimeDomain.cu $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrTimeDomain.cu
|
|
||||||
|
|
||||||
cuCorrFrequency.o: cuCorrFrequency.cu $(DEPS) cuCorrFrequency.h
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuCorrFrequency.cu
|
|
||||||
|
|
||||||
cuAmpcorChunk.o: cuAmpcorChunk.cu cuAmpcorUtil.h $(DEPS)
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorChunk.cu
|
|
||||||
|
|
||||||
cuAmpcorController.o: cuAmpcorController.cu
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuAmpcorController.cu
|
|
||||||
|
|
||||||
cuEstimateStats.o: cuEstimateStats.cu
|
|
||||||
$(NVCC) $(NVCCFLAGS) -c -o $@ cuEstimateStats.cu
|
|
||||||
|
|
||||||
|
|
||||||
pyampcor: $(OBJS)
|
|
||||||
rm -f PyCuAmpcor.cpp && python3 setup.py build_ext --inplace
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o *so build *~ PyCuAmpcor.cpp *.dat
|
rm -rf *.o *.so build *~
|
||||||
|
|
|
||||||
|
|
@ -7,14 +7,15 @@ package = envPyCuAmpcor['PACKAGE']
|
||||||
project = envPyCuAmpcor['PROJECT']
|
project = envPyCuAmpcor['PROJECT']
|
||||||
build = envPyCuAmpcor['PRJ_LIB_DIR']
|
build = envPyCuAmpcor['PRJ_LIB_DIR']
|
||||||
install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project
|
install = envPyCuAmpcor['PRJ_SCONS_INSTALL'] + '/' + package + '/' + project
|
||||||
listFiles = ['GDALImage.cu', 'cuArrays.cu', 'cuArraysCopy.cu',
|
listFiles = ['GDALImage.cpp', 'cuArrays.cpp', 'cuArraysCopy.cu',
|
||||||
|
'cudaError.cpp', 'cudaUtil.cpp',
|
||||||
'cuArraysPadding.cu', 'cuOverSampler.cu',
|
'cuArraysPadding.cu', 'cuOverSampler.cu',
|
||||||
'cuSincOverSampler.cu', 'cuDeramp.cu',
|
'cuSincOverSampler.cpp', 'cuDeramp.cu',
|
||||||
'cuOffset.cu', 'cuCorrNormalization.cu',
|
'cuOffset.cu', 'cuCorrNormalization.cu',
|
||||||
'cuCorrNormalizationSAT.cu', 'cuCorrNormalizer.cu',
|
'cuCorrNormalizationSAT.cu', 'cuCorrNormalizer.cpp',
|
||||||
'cuAmpcorParameter.cu', 'cuCorrTimeDomain.cu',
|
'cuAmpcorParameter.cpp', 'cuCorrTimeDomain.cu',
|
||||||
'cuAmpcorController.cu', 'cuCorrFrequency.cu',
|
'cuAmpcorController.cpp', 'cuCorrFrequency.cu',
|
||||||
'cuAmpcorChunk.cu', 'cuEstimateStats.cu']
|
'cuAmpcorChunk.cpp', 'cuEstimateStats.cu']
|
||||||
|
|
||||||
lib = envPyCuAmpcor.SharedLibrary(target = 'PyCuAmpcor', source= listFiles, SHLIBPREFIX='')
|
lib = envPyCuAmpcor.SharedLibrary(target = 'PyCuAmpcor', source= listFiles, SHLIBPREFIX='')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,8 @@
|
||||||
#include "cuAmpcorChunk.h"
|
#include "cuAmpcorChunk.h"
|
||||||
|
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
|
#include <cufft.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run ampcor process for a batch of images (a chunk)
|
* Run ampcor process for a batch of images (a chunk)
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
#include "cudaUtil.h"
|
#include "cudaUtil.h"
|
||||||
#include "cuAmpcorChunk.h"
|
#include "cuAmpcorChunk.h"
|
||||||
#include "cuAmpcorUtil.h"
|
#include "cuAmpcorUtil.h"
|
||||||
|
#include <cuda_runtime.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
// constructor
|
// constructor
|
||||||
|
|
@ -7,6 +7,9 @@
|
||||||
// dependencies
|
// dependencies
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaError.h"
|
#include "cudaError.h"
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
// allocate arrays in device memory
|
// allocate arrays in device memory
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
@ -12,14 +12,9 @@
|
||||||
#define __CUARRAYS_H
|
#define __CUARRAYS_H
|
||||||
|
|
||||||
// cuda dependencies
|
// cuda dependencies
|
||||||
#include <cuda.h>
|
|
||||||
#include <driver_types.h>
|
#include <driver_types.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <string>
|
||||||
#include <fstream>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <ctime>
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class cuArrays{
|
class cuArrays{
|
||||||
|
|
|
||||||
|
|
@ -109,4 +109,4 @@ void cuArraysElementMultiplyConjugate(cuArrays<float2> *image1, cuArrays<float2>
|
||||||
cudaKernel_elementMulConjugate<<<blockspergrid, threadsperblock, 0, stream>>>(image1->devData, image2->devData, size, coef );
|
cudaKernel_elementMulConjugate<<<blockspergrid, threadsperblock, 0, stream>>>(image1->devData, image2->devData, size, coef );
|
||||||
getLastCudaError("cuArraysElementMultiply error\n");
|
getLastCudaError("cuArraysElementMultiply error\n");
|
||||||
}
|
}
|
||||||
//end of file
|
//end of file
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,8 @@
|
||||||
#define __CUCORRFREQUENCY_H
|
#define __CUCORRFREQUENCY_H
|
||||||
|
|
||||||
// dependencies
|
// dependencies
|
||||||
#include "cudaUtil.h"
|
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
|
#include <cufft.h>
|
||||||
|
|
||||||
class cuFreqCorrelator
|
class cuFreqCorrelator
|
||||||
{
|
{
|
||||||
|
|
@ -34,4 +34,4 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__CUCORRFREQUENCY_H
|
#endif //__CUCORRFREQUENCY_H
|
||||||
// end of file
|
// end of file
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@
|
||||||
#define __CUNORMALIZER_H
|
#define __CUNORMALIZER_H
|
||||||
|
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract class interface for correlation surface normalization processor
|
* Abstract class interface for correlation surface normalization processor
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@
|
||||||
#define __CUOVERSAMPLER_H
|
#define __CUOVERSAMPLER_H
|
||||||
|
|
||||||
#include "cuArrays.h"
|
#include "cuArrays.h"
|
||||||
#include "cudaUtil.h"
|
#include <cufft.h>
|
||||||
|
|
||||||
// FFT Oversampler for complex images
|
// FFT Oversampler for complex images
|
||||||
class cuOverSamplerC2C
|
class cuOverSamplerC2C
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
#include "cudaError.h"
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cufft.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef __DRIVER_TYPES_H__
|
||||||
|
#ifndef DEVICE_RESET
|
||||||
|
#define DEVICE_RESET cudaDeviceReset();
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#ifndef DEVICE_RESET
|
||||||
|
#define DEVICE_RESET
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T >
|
||||||
|
void check(T result, char const *const func, const char *const file, int const line)
|
||||||
|
{
|
||||||
|
if (result) {
|
||||||
|
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n",
|
||||||
|
file, line, static_cast<unsigned int>(result), func);
|
||||||
|
DEVICE_RESET
|
||||||
|
// Make sure we call CUDA Device Reset before exiting
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template void check(cudaError_t, char const *const, const char *const, int const);
|
||||||
|
template void check(cufftResult_t, char const *const, const char *const, int const);
|
||||||
|
|
||||||
|
void __getLastCudaError(const char *errorMessage, const char *file, const int line)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
|
||||||
|
if (cudaSuccess != err)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s(%i) : CUDA error : %s : (%d) %s.\n",
|
||||||
|
file, line, errorMessage, (int)err, cudaGetErrorString(err));
|
||||||
|
DEVICE_RESET
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -10,51 +10,13 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cufft.h>
|
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
#include <cuda.h>
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __DRIVER_TYPES_H__
|
|
||||||
#ifndef DEVICE_RESET
|
|
||||||
#define DEVICE_RESET cudaDeviceReset();
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#ifndef DEVICE_RESET
|
|
||||||
#define DEVICE_RESET
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template<typename T >
|
template<typename T >
|
||||||
void check(T result, char const *const func, const char *const file, int const line)
|
void check(T result, char const *const func, const char *const file, int const line);
|
||||||
{
|
|
||||||
if (result)
|
|
||||||
{
|
|
||||||
|
|
||||||
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n",
|
|
||||||
file, line, static_cast<unsigned int>(result), func);
|
|
||||||
DEVICE_RESET
|
|
||||||
// Make sure we call CUDA Device Reset before exiting
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This will output the proper error string when calling cudaGetLastError
|
// This will output the proper error string when calling cudaGetLastError
|
||||||
inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
|
void __getLastCudaError(const char *errorMessage, const char *file, const int line);
|
||||||
{
|
|
||||||
cudaError_t err = cudaGetLastError();
|
|
||||||
|
|
||||||
if (cudaSuccess != err)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "%s(%i) : CUDA error : %s : (%d) %s.\n",
|
|
||||||
file, line, errorMessage, (int)err, cudaGetErrorString(err));
|
|
||||||
DEVICE_RESET
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
|
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
|
||||||
#ifdef CUDA_ERROR_CHECK
|
#ifdef CUDA_ERROR_CHECK
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
#include "cudaUtil.h"
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cudaError.h"
|
||||||
|
|
||||||
|
int gpuDeviceInit(int devID)
|
||||||
|
{
|
||||||
|
int device_count;
|
||||||
|
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
||||||
|
|
||||||
|
if (device_count == 0) {
|
||||||
|
fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devID < 0 || devID > device_count - 1) {
|
||||||
|
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
checkCudaErrors(cudaSetDevice(devID));
|
||||||
|
printf("Using CUDA Device %d ...\n", devID);
|
||||||
|
|
||||||
|
return devID;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpuDeviceList()
|
||||||
|
{
|
||||||
|
int device_count = 0;
|
||||||
|
int current_device = 0;
|
||||||
|
cudaDeviceProp deviceProp;
|
||||||
|
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
||||||
|
|
||||||
|
fprintf(stderr, "Detecting all CUDA devices ...\n");
|
||||||
|
if (device_count == 0) {
|
||||||
|
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (current_device < device_count) {
|
||||||
|
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
|
||||||
|
if (deviceProp.computeMode == cudaComputeModeProhibited) {
|
||||||
|
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: "
|
||||||
|
"device is running in <Compute Mode Prohibited> \n",
|
||||||
|
current_device, deviceProp.name);
|
||||||
|
} else if (deviceProp.major < 1) {
|
||||||
|
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: "
|
||||||
|
"device does not support CUDA \n",
|
||||||
|
current_device, deviceProp.name);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "CUDA Device [%d]: \"%s\" is available.\n",
|
||||||
|
current_device, deviceProp.name);
|
||||||
|
}
|
||||||
|
current_device++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -10,9 +10,6 @@
|
||||||
#ifndef __CUDAUTIL_H
|
#ifndef __CUDAUTIL_H
|
||||||
#define __CUDAUTIL_H
|
#define __CUDAUTIL_H
|
||||||
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include "cudaError.h"
|
|
||||||
|
|
||||||
// for 2D FFT
|
// for 2D FFT
|
||||||
#define NRANK 2
|
#define NRANK 2
|
||||||
|
|
||||||
|
|
@ -47,12 +44,6 @@
|
||||||
#define MIN(a,b) (a > b ? b: a)
|
#define MIN(a,b) (a > b ? b: a)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Float To Int conversion
|
|
||||||
inline int ftoi(float value)
|
|
||||||
{
|
|
||||||
return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
|
|
||||||
}
|
|
||||||
|
|
||||||
// compute the next integer in power of 2
|
// compute the next integer in power of 2
|
||||||
inline int nextpower2(int value)
|
inline int nextpower2(int value)
|
||||||
{
|
{
|
||||||
|
|
@ -61,63 +52,11 @@ inline int nextpower2(int value)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// General GPU Device CUDA Initialization
|
// General GPU Device CUDA Initialization
|
||||||
inline int gpuDeviceInit(int devID)
|
int gpuDeviceInit(int devID);
|
||||||
{
|
|
||||||
int device_count;
|
|
||||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
|
||||||
|
|
||||||
if (device_count == 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (devID < 0 || devID > device_count-1)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "gpuDeviceInit() Device %d is not a valid GPU device. \n", devID);
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
checkCudaErrors(cudaSetDevice(devID));
|
|
||||||
printf("Using CUDA Device %d ...\n", devID);
|
|
||||||
|
|
||||||
return devID;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function lists all available GPUs
|
// This function lists all available GPUs
|
||||||
inline void gpuDeviceList()
|
void gpuDeviceList();
|
||||||
{
|
|
||||||
int device_count = 0;
|
|
||||||
int current_device = 0;
|
|
||||||
cudaDeviceProp deviceProp;
|
|
||||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
|
||||||
|
|
||||||
fprintf(stderr, "Detecting all CUDA devices ...\n");
|
|
||||||
if (device_count == 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (current_device < device_count)
|
|
||||||
{
|
|
||||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
|
|
||||||
if (deviceProp.computeMode == cudaComputeModeProhibited)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: device is running in <Compute Mode Prohibited> \n", current_device, deviceProp.name);
|
|
||||||
}
|
|
||||||
else if (deviceProp.major < 1)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "CUDA Device [%d]: \"%s\" is not available: device does not support CUDA \n", current_device, deviceProp.name);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "CUDA Device [%d]: \"%s\" is available.\n", current_device, deviceProp.name);
|
|
||||||
}
|
|
||||||
current_device++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif //__CUDAUTIL_H
|
#endif //__CUDAUTIL_H
|
||||||
//end of file
|
//end of file
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,8 @@
|
||||||
#ifndef __FLOAT2_H
|
#ifndef __FLOAT2_H
|
||||||
#define __FLOAT2_H
|
#define __FLOAT2_H
|
||||||
|
|
||||||
#include <vector_types.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
inline __host__ __device__ void zero(float2 &a) { a.x = 0.0f; a.y = 0.0f; }
|
inline __host__ __device__ void zero(float2 &a) { a.x = 0.0f; a.y = 0.0f; }
|
||||||
|
|
||||||
|
|
@ -126,4 +127,4 @@ inline __host__ __device__ float2 complexExp(float arg)
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //__FLOAT2_H
|
#endif //__FLOAT2_H
|
||||||
// end of file
|
// end of file
|
||||||
|
|
|
||||||
|
|
@ -1,28 +0,0 @@
|
||||||
#
|
|
||||||
# Implementation: python setup.py build_ext --inplace
|
|
||||||
# Generates PyCuAmpcor.xxx.so (where xxx is just some local sys-arch information).
|
|
||||||
# Note you need to run your makefile *FIRST* to generate the cuAmpcor.o object.
|
|
||||||
#
|
|
||||||
|
|
||||||
from distutils.core import setup
|
|
||||||
from distutils.extension import Extension
|
|
||||||
from Cython.Build import cythonize
|
|
||||||
|
|
||||||
import numpy
|
|
||||||
|
|
||||||
setup( name = 'PyCuAmpcor',
|
|
||||||
version = '2.0.0',
|
|
||||||
ext_modules = cythonize(Extension(
|
|
||||||
"PyCuAmpcor",
|
|
||||||
sources=['PyCuAmpcor.pyx'],
|
|
||||||
include_dirs=['/usr/local/cuda/include', numpy.get_include()], # REPLACE WITH YOUR PATH TO YOUR CUDA LIBRARY HEADERS
|
|
||||||
extra_compile_args=['-fPIC','-fpermissive'],
|
|
||||||
extra_objects=['GDALImage.o','cuAmpcorChunk.o','cuAmpcorParameter.o','cuCorrFrequency.o',
|
|
||||||
'cuCorrNormalization.o','cuCorrTimeDomain.o','cuArraysCopy.o',
|
|
||||||
'cuArrays.o','cuArraysPadding.o','cuOffset.o','cuOverSampler.o',
|
|
||||||
'cuSincOverSampler.o', 'cuDeramp.o','cuAmpcorController.o','cuEstimateStats.o'],
|
|
||||||
extra_link_args=['-L/usr/local/cuda/lib64',
|
|
||||||
'-L/usr/lib64/nvidia',
|
|
||||||
'-lcuda','-lcudart','-lcufft','-lgdal'], # REPLACE FIRST PATH WITH YOUR PATH TO YOUR CUDA LIBRARIES
|
|
||||||
language='c++'
|
|
||||||
)))
|
|
||||||
Loading…
Reference in New Issue