GPU geo2rdr and topo memory allocation fix
parent
31803ef7fa
commit
74c92a1dc2
|
@ -4,6 +4,7 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <cassert>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
@ -65,7 +66,7 @@ struct Poly1d {
|
||||||
__constant__ double d_inpts_double[9];
|
__constant__ double d_inpts_double[9];
|
||||||
__constant__ int d_inpts_int[3];
|
__constant__ int d_inpts_int[3];
|
||||||
|
|
||||||
// Mem usage: 27 doubles (216 bytes) per call
|
// Mem usage: 27 doubles (216 bytes) per call
|
||||||
__device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double *vel) {
|
__device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double *vel) {
|
||||||
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
|
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
|
@ -197,7 +198,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
|
||||||
if (pixel < NPIXELS) { // The number of pixels in a run changes based on if it's a full run or a partial run
|
if (pixel < NPIXELS) { // The number of pixels in a run changes based on if it's a full run or a partial run
|
||||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||||
* Input mapping
|
* Input mapping
|
||||||
*
|
*
|
||||||
* int[0] = demLength
|
* int[0] = demLength
|
||||||
* int[1] = demWidth
|
* int[1] = demWidth
|
||||||
* int[2] = bistatic
|
* int[2] = bistatic
|
||||||
|
@ -212,7 +213,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
|
||||||
* double[7] = dmrg
|
* double[7] = dmrg
|
||||||
* double[8] = dtaz
|
* double[8] = dtaz
|
||||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||||
|
|
||||||
double xyz[3], llh[3], satx[3], satv[3], dr[3];
|
double xyz[3], llh[3], satx[3], satv[3], dr[3];
|
||||||
double rngpix, tline, tprev, fnprime, fdop, fdopder;
|
double rngpix, tline, tprev, fnprime, fdop, fdopder;
|
||||||
int stat, i, j;
|
int stat, i, j;
|
||||||
|
@ -231,7 +232,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
|
||||||
llh2xyz(&elp,xyz,llh);
|
llh2xyz(&elp,xyz,llh);
|
||||||
|
|
||||||
tline = .5 * (d_inpts_double[2] + d_inpts_double[3]);
|
tline = .5 * (d_inpts_double[2] + d_inpts_double[3]);
|
||||||
stat = interpolateOrbit(&orb, tline, satx, satv); // Originally we got xyz_mid and vel_mid, then copied into satx/satv,
|
stat = interpolateOrbit(&orb, tline, satx, satv); // Originally we got xyz_mid and vel_mid, then copied into satx/satv,
|
||||||
// but since these are all independent here it's fine
|
// but since these are all independent here it's fine
|
||||||
if (stat != 0) isOutside = true; // Should exit, but this is next-best thing...
|
if (stat != 0) isOutside = true; // Should exit, but this is next-best thing...
|
||||||
|
|
||||||
|
@ -294,11 +295,17 @@ int nLinesPossible(int length, int width) {
|
||||||
size_t freeByte, totalByte;
|
size_t freeByte, totalByte;
|
||||||
int linesPerRun;
|
int linesPerRun;
|
||||||
cudaMemGetInfo(&freeByte, &totalByte);
|
cudaMemGetInfo(&freeByte, &totalByte);
|
||||||
printf("tb %ld\n", totalByte);
|
printf("Available free gpu memory in bytes %ld\n", freeByte);
|
||||||
totalByte = size_t((double(totalByte) / 5.e8) * 5.e8); // Round down to nearest .5 GB
|
// use 100Mb as a rounding unit , may be adjusted
|
||||||
printf("tba %ld\n", totalByte);
|
size_t memoryRoundingUnit = 1024ULL * 1024ULL * 100;
|
||||||
printf("Device has roughly %.4f GB of memory, ", double(totalByte)/1.e9);
|
// use 2*memoryRoundingUnit as an overhead for safety
|
||||||
linesPerRun = totalByte / (556 * width);
|
freeByte = (freeByte / memoryRoundingUnit -2) * memoryRoundingUnit;
|
||||||
|
assert(freeByte >0);
|
||||||
|
// printf("GPU Memory to be used %ld\n", freeByte);
|
||||||
|
// printf("Device has roughly %.4f GB of memory, ", double(totalByte)/1.e9);
|
||||||
|
// determine the allowed max lines per run, 556 is per pixel memory usage (estimated)
|
||||||
|
linesPerRun = freeByte / (7*sizeof(double) * width);
|
||||||
|
assert(linesPerRun>0);
|
||||||
printf("and can process roughly %d lines (each with %d pixels) per run.\n", linesPerRun, width);
|
printf("and can process roughly %d lines (each with %d pixels) per run.\n", linesPerRun, width);
|
||||||
return linesPerRun;
|
return linesPerRun;
|
||||||
}
|
}
|
||||||
|
@ -319,9 +326,9 @@ void freePoly1d(struct Poly1d *poly) {
|
||||||
free(poly->coeffs);
|
free(poly->coeffs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void runGPUGeo(int iter, int numPix, double *h_inpts_dbl, int *h_inpts_int, double *h_lat, double *h_lon, double *h_dem, int h_orbNvec, double *h_orbSvs,
|
void runGPUGeo(int iter, int numPix, double *h_inpts_dbl, int *h_inpts_int, double *h_lat, double *h_lon, double *h_dem, int h_orbNvec, double *h_orbSvs,
|
||||||
int h_polyOrd, double h_polyMean, double h_polyNorm, double *h_polyCoeffs, double h_polyPRF, double **accArr) {
|
int h_polyOrd, double h_polyMean, double h_polyNorm, double *h_polyCoeffs, double h_polyPRF, double **accArr) {
|
||||||
|
|
||||||
double iStartCpy, iStartRun, iEndRun, iEndCpy;
|
double iStartCpy, iStartRun, iEndRun, iEndCpy;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,7 @@ Geo2rdr::Geo2rdr() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Geo2rdr::geo2rdr() {
|
void Geo2rdr::geo2rdr() {
|
||||||
|
|
||||||
double *lat, *lon, *dem, *rgm, *azt, *rgoff, *azoff;
|
double *lat, *lon, *dem, *rgm, *azt, *rgoff, *azoff;
|
||||||
double xyz_mid[3], vel_mid[3], llh[3], xyz[3], satx[3], satv[3], dr[3];
|
double xyz_mid[3], vel_mid[3], llh[3], xyz[3], satx[3], satv[3], dr[3];
|
||||||
double tend, tline, tprev, rngend, rngpix, tmid, temp, dtaz, dmrg, fdop, fdopder, fnprime;
|
double tend, tline, tprev, rngend, rngpix, tmid, temp, dtaz, dmrg, fdop, fdopder, fnprime;
|
||||||
|
@ -137,7 +137,7 @@ void Geo2rdr::geo2rdr() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// OpenMP replacement for clock() (clock reports cumulative thread time, not single thread
|
// OpenMP replacement for clock() (clock reports cumulative thread time, not single thread
|
||||||
// time, so clock() on 4 threads would report 4 x the true runtime)
|
// time, so clock() on 4 threads would report 4 x the true runtime)
|
||||||
timer_start = omp_get_wtime();
|
timer_start = omp_get_wtime();
|
||||||
cnt = 0;
|
cnt = 0;
|
||||||
printf("Geo2rdr executing on %d threads...\n", omp_get_max_threads());
|
printf("Geo2rdr executing on %d threads...\n", omp_get_max_threads());
|
||||||
|
@ -259,12 +259,20 @@ void Geo2rdr::geo2rdr() {
|
||||||
wd.width = demWidth;
|
wd.width = demWidth;
|
||||||
wd.firstWrite = true; // Flag to ignore write instructions
|
wd.firstWrite = true; // Flag to ignore write instructions
|
||||||
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); // Fires empty thread
|
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); // Fires empty thread
|
||||||
|
|
||||||
int totalPixels = demLength * demWidth;
|
size_t totalPixels = demLength * demWidth;
|
||||||
//int linesPerRun = min(demLength, nLinesPossible(demLength, demWidth));
|
// adjust the lines per run by the available gpu memory
|
||||||
int linesPerRun = demLength;
|
int linesPerRun = std::min(demLength, nLinesPossible(demLength, demWidth));
|
||||||
while ((linesPerRun*demWidth) > 2e8) linesPerRun--;
|
// ! To best parallelize the computation, use the max available gpu memory is the best option
|
||||||
int pixPerRun = linesPerRun * demWidth;
|
// ! the following adjustment is not needed
|
||||||
|
// adjust further by the max pixels per run, prefavorbly as a user configurable parameter
|
||||||
|
// temp set as 2^20
|
||||||
|
// size_t maxPixPerRun = 1 << 20;
|
||||||
|
// size_t pixPerRun = std::min((size_t)linesPerRun*demWidth, maxPixPerRun);
|
||||||
|
// linesPerRun = pixPerRun/demWidth *demWidth;
|
||||||
|
|
||||||
|
// recalculate run info
|
||||||
|
size_t pixPerRun = linesPerRun * demWidth;
|
||||||
int nRuns = demLength / linesPerRun;
|
int nRuns = demLength / linesPerRun;
|
||||||
int remPix = totalPixels - (nRuns * pixPerRun);
|
int remPix = totalPixels - (nRuns * pixPerRun);
|
||||||
int remLines = remPix / demWidth;
|
int remLines = remPix / demWidth;
|
||||||
|
@ -273,7 +281,7 @@ void Geo2rdr::geo2rdr() {
|
||||||
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
|
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
lat = new double[pixPerRun];
|
lat = new double[pixPerRun];
|
||||||
lon = new double[pixPerRun];
|
lon = new double[pixPerRun];
|
||||||
dem = new double[pixPerRun];
|
dem = new double[pixPerRun];
|
||||||
size_t nb_pixels = pixPerRun * sizeof(double);
|
size_t nb_pixels = pixPerRun * sizeof(double);
|
||||||
|
@ -291,14 +299,14 @@ void Geo2rdr::geo2rdr() {
|
||||||
outputArrays[2] = (double *)malloc(nb_pixels); // h_rgoff
|
outputArrays[2] = (double *)malloc(nb_pixels); // h_rgoff
|
||||||
outputArrays[3] = (double *)malloc(nb_pixels); // h_azoff
|
outputArrays[3] = (double *)malloc(nb_pixels); // h_azoff
|
||||||
|
|
||||||
runGPUGeo(i, pixPerRun, gpu_inputs_d, gpu_inputs_i, lat, lon, dem,
|
runGPUGeo(i, pixPerRun, gpu_inputs_d, gpu_inputs_i, lat, lon, dem,
|
||||||
gpu_orbNvec, gpu_orbSvs, gpu_polyOrd, gpu_polyMean, gpu_polyNorm,
|
gpu_orbNvec, gpu_orbSvs, gpu_polyOrd, gpu_polyMean, gpu_polyNorm,
|
||||||
gpu_polyCoef, prf, outputArrays);
|
gpu_polyCoef, prf, outputArrays);
|
||||||
for (int j=0; j<4; j++) writeArrays[j] = outputArrays[j]; // Copying pointers
|
for (int j=0; j<4; j++) writeArrays[j] = outputArrays[j]; // Copying pointers
|
||||||
if (i != 0) printf(" Waiting for previous asynchronous write-out to finish...\n");
|
if (i != 0) printf(" Waiting for previous asynchronous write-out to finish...\n");
|
||||||
pthread_attr_destroy(&attr);
|
pthread_attr_destroy(&attr);
|
||||||
pthread_join(writeThread, &thread_stat); // Waits for async thread to finish
|
pthread_join(writeThread, &thread_stat); // Waits for async thread to finish
|
||||||
|
|
||||||
printf(" Writing run %d out asynchronously to image files...\n", i);
|
printf(" Writing run %d out asynchronously to image files...\n", i);
|
||||||
wd.accessors = (void**)accObjs;
|
wd.accessors = (void**)accObjs;
|
||||||
wd.rg = writeArrays[0];
|
wd.rg = writeArrays[0];
|
||||||
|
@ -381,14 +389,14 @@ void Geo2rdr::geo2rdr() {
|
||||||
pixel = latAccObj->getLineSequential((char *)lat);
|
pixel = latAccObj->getLineSequential((char *)lat);
|
||||||
pixel = lonAccObj->getLineSequential((char *)lon);
|
pixel = lonAccObj->getLineSequential((char *)lon);
|
||||||
pixel = hgtAccObj->getLineSequential((char *)dem);
|
pixel = hgtAccObj->getLineSequential((char *)dem);
|
||||||
|
|
||||||
if ((line%1000) == 0) printf("Processing line: %d %d\n", line, numOutsideImage);
|
if ((line%1000) == 0) printf("Processing line: %d %d\n", line, numOutsideImage);
|
||||||
|
|
||||||
#pragma omp parallel for private(pixel, rngpix, tline, tprev, stat, fnprime, fdop, \
|
#pragma omp parallel for private(pixel, rngpix, tline, tprev, stat, fnprime, fdop, \
|
||||||
fdopder, isOutside, xyz, llh, satx, satv, dr) \
|
fdopder, isOutside, xyz, llh, satx, satv, dr) \
|
||||||
reduction(+:numOutsideImage,conv,cnt)
|
reduction(+:numOutsideImage,conv,cnt)
|
||||||
for (pixel=0; pixel<demWidth; pixel++) {
|
for (pixel=0; pixel<demWidth; pixel++) {
|
||||||
|
|
||||||
isOutside = false; // Flag to determine if point is outside image
|
isOutside = false; // Flag to determine if point is outside image
|
||||||
|
|
||||||
llh[0] = lat[pixel] * (M_PI / 180.);
|
llh[0] = lat[pixel] * (M_PI / 180.);
|
||||||
|
@ -426,7 +434,7 @@ void Geo2rdr::geo2rdr() {
|
||||||
break; // Point converged
|
break; // Point converged
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((tline < tstart) || (tline > tend)) isOutside = true;
|
if ((tline < tstart) || (tline > tend)) isOutside = true;
|
||||||
|
|
||||||
for (int i=0; i<3; i++) dr[i] = xyz[i] - satx[i];
|
for (int i=0; i<3; i++) dr[i] = xyz[i] - satx[i];
|
||||||
|
|
|
@ -73,7 +73,7 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
|
||||||
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
|
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
int v0 = -1;
|
int v0 = -1;
|
||||||
|
|
||||||
if ((t < orb->svs[0].t) || (t > orb->svs[orb->nVec-1].t)) return 1;
|
if ((t < orb->svs[0].t) || (t > orb->svs[orb->nVec-1].t)) return 1;
|
||||||
for (int i=0; i<orb->nVec; i++) {
|
for (int i=0; i<orb->nVec; i++) {
|
||||||
if ((orb->svs[i].t >= t) && (v0 == -1)) {
|
if ((orb->svs[i].t >= t) && (v0 == -1)) {
|
||||||
|
@ -95,44 +95,44 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
|
||||||
sum = (1.0 / (orb->svs[v0+3].t - orb->svs[v0].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
sum = (1.0 / (orb->svs[v0+3].t - orb->svs[v0].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
||||||
f0[3] = 1.0 - (2.0 * (t - orb->svs[v0+3].t) * sum);
|
f0[3] = 1.0 - (2.0 * (t - orb->svs[v0+3].t) * sum);
|
||||||
|
|
||||||
h[0] = ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
h[0] = ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
||||||
((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t));
|
((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t));
|
||||||
h[1] = ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
h[1] = ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
||||||
((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t));
|
((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t));
|
||||||
h[2] = ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
|
h[2] = ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
|
||||||
((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t));
|
((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t));
|
||||||
h[3] = ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
h[3] = ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
||||||
((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
||||||
|
|
||||||
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
||||||
(1.0 / (orb->svs[v0].t - orb->svs[v0+1].t));
|
(1.0 / (orb->svs[v0].t - orb->svs[v0+1].t));
|
||||||
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
||||||
(1.0 / (orb->svs[v0].t - orb->svs[v0+2].t));
|
(1.0 / (orb->svs[v0].t - orb->svs[v0+2].t));
|
||||||
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
||||||
(1.0 / (orb->svs[v0].t - orb->svs[v0+3].t));
|
(1.0 / (orb->svs[v0].t - orb->svs[v0+3].t));
|
||||||
hdot[0] = sum;
|
hdot[0] = sum;
|
||||||
|
|
||||||
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
|
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
|
||||||
(1.0 / (orb->svs[v0+1].t - orb->svs[v0].t));
|
(1.0 / (orb->svs[v0+1].t - orb->svs[v0].t));
|
||||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
|
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
|
||||||
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+2].t));
|
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+2].t));
|
||||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
||||||
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+3].t));
|
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+3].t));
|
||||||
hdot[1] = sum;
|
hdot[1] = sum;
|
||||||
|
|
||||||
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
||||||
(1.0 / (orb->svs[v0+2].t - orb->svs[v0].t));
|
(1.0 / (orb->svs[v0+2].t - orb->svs[v0].t));
|
||||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
||||||
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+1].t));
|
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+1].t));
|
||||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
|
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
|
||||||
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+3].t));
|
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+3].t));
|
||||||
hdot[2] = sum;
|
hdot[2] = sum;
|
||||||
|
|
||||||
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
||||||
(1.0 / (orb->svs[v0+3].t - orb->svs[v0].t));
|
(1.0 / (orb->svs[v0+3].t - orb->svs[v0].t));
|
||||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
||||||
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t));
|
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t));
|
||||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
||||||
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
||||||
hdot[3] = sum;
|
hdot[3] = sum;
|
||||||
|
|
||||||
|
@ -152,12 +152,12 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
|
||||||
|
|
||||||
xyz[0] = (((orb->svs[v0].px * f0[0]) + (orb->svs[v0].vx * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].px * f0[1]) + (orb->svs[v0+1].vx * f1[1])) * h[1] * h[1]) +
|
xyz[0] = (((orb->svs[v0].px * f0[0]) + (orb->svs[v0].vx * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].px * f0[1]) + (orb->svs[v0+1].vx * f1[1])) * h[1] * h[1]) +
|
||||||
(((orb->svs[v0+2].px * f0[2]) + (orb->svs[v0+2].vx * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].px * f0[3]) + (orb->svs[v0+3].vx * f1[3])) * h[3] * h[3]);
|
(((orb->svs[v0+2].px * f0[2]) + (orb->svs[v0+2].vx * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].px * f0[3]) + (orb->svs[v0+3].vx * f1[3])) * h[3] * h[3]);
|
||||||
xyz[1] = (((orb->svs[v0].py * f0[0]) + (orb->svs[v0].vy * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].py * f0[1]) + (orb->svs[v0+1].vy * f1[1])) * h[1] * h[1]) +
|
xyz[1] = (((orb->svs[v0].py * f0[0]) + (orb->svs[v0].vy * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].py * f0[1]) + (orb->svs[v0+1].vy * f1[1])) * h[1] * h[1]) +
|
||||||
(((orb->svs[v0+2].py * f0[2]) + (orb->svs[v0+2].vy * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].py * f0[3]) + (orb->svs[v0+3].vy * f1[3])) * h[3] * h[3]);
|
(((orb->svs[v0+2].py * f0[2]) + (orb->svs[v0+2].vy * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].py * f0[3]) + (orb->svs[v0+3].vy * f1[3])) * h[3] * h[3]);
|
||||||
xyz[2] = (((orb->svs[v0].pz * f0[0]) + (orb->svs[v0].vz * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].pz * f0[1]) + (orb->svs[v0+1].vz * f1[1])) * h[1] * h[1]) +
|
xyz[2] = (((orb->svs[v0].pz * f0[0]) + (orb->svs[v0].vz * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].pz * f0[1]) + (orb->svs[v0+1].vz * f1[1])) * h[1] * h[1]) +
|
||||||
(((orb->svs[v0+2].pz * f0[2]) + (orb->svs[v0+2].vz * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].pz * f0[3]) + (orb->svs[v0+3].vz * f1[3])) * h[3] * h[3]);
|
(((orb->svs[v0+2].pz * f0[2]) + (orb->svs[v0+2].vz * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].pz * f0[3]) + (orb->svs[v0+3].vz * f1[3])) * h[3] * h[3]);
|
||||||
|
|
||||||
vel[0] = (((orb->svs[v0].px * g0[0]) + (orb->svs[v0].vx * g1[0])) * h[0]) + (((orb->svs[v0+1].px * g0[1]) + (orb->svs[v0+1].vx * g1[1])) * h[1]) +
|
vel[0] = (((orb->svs[v0].px * g0[0]) + (orb->svs[v0].vx * g1[0])) * h[0]) + (((orb->svs[v0+1].px * g0[1]) + (orb->svs[v0+1].vx * g1[1])) * h[1]) +
|
||||||
(((orb->svs[v0+2].px * g0[2]) + (orb->svs[v0+2].vx * g1[2])) * h[2]) + (((orb->svs[v0+3].px * g0[3]) + (orb->svs[v0+3].vx * g1[3])) * h[3]);
|
(((orb->svs[v0+2].px * g0[2]) + (orb->svs[v0+2].vx * g1[2])) * h[2]) + (((orb->svs[v0+3].px * g0[3]) + (orb->svs[v0+3].vx * g1[3])) * h[3]);
|
||||||
vel[1] = (((orb->svs[v0].py * g0[0]) + (orb->svs[v0].vy * g1[0])) * h[0]) + (((orb->svs[v0+1].py * g0[1]) + (orb->svs[v0+1].vy * g1[1])) * h[1]) +
|
vel[1] = (((orb->svs[v0].py * g0[0]) + (orb->svs[v0].vy * g1[0])) * h[0]) + (((orb->svs[v0+1].py * g0[1]) + (orb->svs[v0+1].vy * g1[1])) * h[1]) +
|
||||||
(((orb->svs[v0+2].py * g0[2]) + (orb->svs[v0+2].vy * g1[2])) * h[2]) + (((orb->svs[v0+3].py * g0[3]) + (orb->svs[v0+3].vy * g1[3])) * h[3]);
|
(((orb->svs[v0+2].py * g0[2]) + (orb->svs[v0+2].vy * g1[2])) * h[2]) + (((orb->svs[v0+3].py * g0[3]) + (orb->svs[v0+3].vy * g1[3])) * h[3]);
|
||||||
|
@ -212,7 +212,7 @@ __device__ double interpolateDEM(float *DEM, double lon, double lat, int width,
|
||||||
|
|
||||||
i0 = int(lon) - 2;
|
i0 = int(lon) - 2;
|
||||||
j0 = int(lat) - 2;
|
j0 = int(lat) - 2;
|
||||||
|
|
||||||
indi = min((i0+1), width); // bound by out_of_bounds, so this isn't a concern
|
indi = min((i0+1), width); // bound by out_of_bounds, so this isn't a concern
|
||||||
spline(indi, j0, length, A, DEM);
|
spline(indi, j0, length, A, DEM);
|
||||||
initSpline(A,R,Q);
|
initSpline(A,R,Q);
|
||||||
|
@ -328,7 +328,7 @@ __device__ void radar2xyz(struct Peg *peg, struct Ellipsoid *elp, struct PegTran
|
||||||
ptm->mat[2][0] = sin(peg->lat);
|
ptm->mat[2][0] = sin(peg->lat);
|
||||||
ptm->mat[2][1] = cos(peg->lat) * cos(peg->hdg);
|
ptm->mat[2][1] = cos(peg->lat) * cos(peg->hdg);
|
||||||
ptm->mat[2][2] = cos(peg->lat) * sin(peg->hdg);
|
ptm->mat[2][2] = cos(peg->lat) * sin(peg->hdg);
|
||||||
|
|
||||||
re = elp->a / sqrt(1.0 - (elp->e2 * pow(sin(peg->lat),2)));
|
re = elp->a / sqrt(1.0 - (elp->e2 * pow(sin(peg->lat),2)));
|
||||||
rn = (elp->a * (1.0 - elp->e2)) / pow((1.0 - (elp->e2 * pow(sin(peg->lat),2))),1.5);
|
rn = (elp->a * (1.0 - elp->e2)) / pow((1.0 - (elp->e2 * pow(sin(peg->lat),2))),1.5);
|
||||||
ptm->radcur = (re * rn) / ((re * pow(cos(peg->hdg),2)) + (rn * pow(sin(peg->hdg),2)));
|
ptm->radcur = (re * rn) / ((re * pow(cos(peg->hdg),2)) + (rn * pow(sin(peg->hdg),2)));
|
||||||
|
@ -337,7 +337,7 @@ __device__ void radar2xyz(struct Peg *peg, struct Ellipsoid *elp, struct PegTran
|
||||||
llh[1] = peg->lon;
|
llh[1] = peg->lon;
|
||||||
llh[2] = 0.0;
|
llh[2] = 0.0;
|
||||||
llh2xyz(temp,llh,elp);
|
llh2xyz(temp,llh,elp);
|
||||||
|
|
||||||
ptm->ov[0] = temp[0] - (ptm->radcur * cos(peg->lat) * cos(peg->lon));
|
ptm->ov[0] = temp[0] - (ptm->radcur * cos(peg->lat) * cos(peg->lon));
|
||||||
ptm->ov[1] = temp[1] - (ptm->radcur * cos(peg->lat) * sin(peg->lon));
|
ptm->ov[1] = temp[1] - (ptm->radcur * cos(peg->lat) * sin(peg->lon));
|
||||||
ptm->ov[2] = temp[2] - (ptm->radcur * sin(peg->lat));
|
ptm->ov[2] = temp[2] - (ptm->radcur * sin(peg->lat));
|
||||||
|
@ -370,7 +370,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
long pixel = (blockDim.x * blockIdx.x) + threadIdx.x;
|
long pixel = (blockDim.x * blockIdx.x) + threadIdx.x;
|
||||||
|
|
||||||
if (pixel < NPIXELS) { // Make sure we're not operating on a non-existent pixel
|
if (pixel < NPIXELS) { // Make sure we're not operating on a non-existent pixel
|
||||||
|
|
||||||
double enumat[3][3];
|
double enumat[3][3];
|
||||||
double xyzsat[3], velsat[3], llhsat[3], vhat[3], that[3], chat[3], nhat[3];
|
double xyzsat[3], velsat[3], llhsat[3], vhat[3], that[3], chat[3], nhat[3];
|
||||||
double llh[3], llh_prev[3], xyz[3], xyz_prev[3], sch[3], enu[3], delta[3];
|
double llh[3], llh_prev[3], xyz[3], xyz_prev[3], sch[3], enu[3], delta[3];
|
||||||
|
@ -381,11 +381,11 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
double thrd_z, thrd_zsch, thrd_lat, thrd_lon, thrd_distance, thrd_losang0, thrd_losang1;
|
double thrd_z, thrd_zsch, thrd_lat, thrd_lon, thrd_distance, thrd_losang0, thrd_losang1;
|
||||||
double thrd_incang0, thrd_incang1;
|
double thrd_incang0, thrd_incang1;
|
||||||
int thrd_converge;
|
int thrd_converge;
|
||||||
|
|
||||||
struct Ellipsoid elp;
|
struct Ellipsoid elp;
|
||||||
struct Peg peg;
|
struct Peg peg;
|
||||||
struct PegTrans ptm;
|
struct PegTrans ptm;
|
||||||
|
|
||||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||||
* double t0 = inpts_dbl[0];
|
* double t0 = inpts_dbl[0];
|
||||||
* double prf = inpts_dbl[1];
|
* double prf = inpts_dbl[1];
|
||||||
|
@ -412,7 +412,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
* int extraiter = inpts_int[5];
|
* int extraiter = inpts_int[5];
|
||||||
* int length = inpts_int[6]; NOT USED IN THIS KERNEL
|
* int length = inpts_int[6]; NOT USED IN THIS KERNEL
|
||||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||||
|
|
||||||
line = (pixel + OFFSET) / d_inpts_int[1];
|
line = (pixel + OFFSET) / d_inpts_int[1];
|
||||||
tline = d_inpts_dbl[0] + (d_inpts_int[0] * (line / d_inpts_dbl[1]));
|
tline = d_inpts_dbl[0] + (d_inpts_int[0] * (line / d_inpts_dbl[1]));
|
||||||
if (interpolateOrbit(&orbit,tline,xyzsat,velsat) != 0) {
|
if (interpolateOrbit(&orbit,tline,xyzsat,velsat) != 0) {
|
||||||
|
@ -427,28 +427,28 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
peg.lat = llhsat[0];
|
peg.lat = llhsat[0];
|
||||||
peg.lon = llhsat[1];
|
peg.lon = llhsat[1];
|
||||||
radar2xyz(&peg,&elp,&ptm);
|
radar2xyz(&peg,&elp,&ptm);
|
||||||
|
|
||||||
thrd_converge = 0;
|
thrd_converge = 0;
|
||||||
thrd_z = 0.0;
|
thrd_z = 0.0;
|
||||||
thrd_zsch = 0.0;
|
thrd_zsch = 0.0;
|
||||||
thrd_lat = d_inpts_dbl[7] + (0.5 * d_inpts_dbl[9] * d_inpts_int[2]);
|
thrd_lat = d_inpts_dbl[7] + (0.5 * d_inpts_dbl[9] * d_inpts_int[2]);
|
||||||
thrd_lon = d_inpts_dbl[8] + (0.5 * d_inpts_dbl[10] * d_inpts_int[3]);
|
thrd_lon = d_inpts_dbl[8] + (0.5 * d_inpts_dbl[10] * d_inpts_int[3]);
|
||||||
|
|
||||||
dopfact = (0.5 * d_inpts_dbl[11] * (inImgArrs.dopline[pixel] / vmag)) * inImgArrs.rho[pixel];
|
dopfact = (0.5 * d_inpts_dbl[11] * (inImgArrs.dopline[pixel] / vmag)) * inImgArrs.rho[pixel];
|
||||||
|
|
||||||
// START THE ITERATIONS
|
// START THE ITERATIONS
|
||||||
for (iter=0; iter<=(d_inpts_int[4]+d_inpts_int[5]); iter++) {
|
for (iter=0; iter<=(d_inpts_int[4]+d_inpts_int[5]); iter++) {
|
||||||
if (thrd_converge == 0) { // Designing this way helps prevent thread divergence as much as possible
|
if (thrd_converge == 0) { // Designing this way helps prevent thread divergence as much as possible
|
||||||
llh_prev[0] = thrd_lat / (180. / M_PI);
|
llh_prev[0] = thrd_lat / (180. / M_PI);
|
||||||
llh_prev[1] = thrd_lon / (180. / M_PI);
|
llh_prev[1] = thrd_lon / (180. / M_PI);
|
||||||
llh_prev[2] = thrd_z;
|
llh_prev[2] = thrd_z;
|
||||||
|
|
||||||
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
|
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
|
||||||
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
|
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
|
||||||
sintheta = sqrt(1.0 - pow(costheta,2));
|
sintheta = sqrt(1.0 - pow(costheta,2));
|
||||||
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
|
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
|
||||||
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
|
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
|
||||||
|
|
||||||
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
|
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
|
||||||
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
|
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
|
||||||
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
|
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
|
||||||
|
@ -457,7 +457,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
xyz[1] = xyzsat[1] + delta[1];
|
xyz[1] = xyzsat[1] + delta[1];
|
||||||
xyz[2] = xyzsat[2] + delta[2];
|
xyz[2] = xyzsat[2] + delta[2];
|
||||||
xyz2llh(xyz,llh,&elp);
|
xyz2llh(xyz,llh,&elp);
|
||||||
|
|
||||||
thrd_lat = llh[0] * (180. / M_PI);
|
thrd_lat = llh[0] * (180. / M_PI);
|
||||||
thrd_lon = llh[1] * (180. / M_PI);
|
thrd_lon = llh[1] * (180. / M_PI);
|
||||||
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
|
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
|
||||||
|
@ -468,7 +468,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
demlon = fmin(demlon,(d_inpts_int[3]-1.));
|
demlon = fmin(demlon,(d_inpts_int[3]-1.));
|
||||||
thrd_z = interpolateDEM(inImgArrs.DEM,demlon,demlat,d_inpts_int[3],d_inpts_int[2]);
|
thrd_z = interpolateDEM(inImgArrs.DEM,demlon,demlat,d_inpts_int[3],d_inpts_int[2]);
|
||||||
thrd_z = fmax(thrd_z,-500.);
|
thrd_z = fmax(thrd_z,-500.);
|
||||||
|
|
||||||
llh[0] = thrd_lat / (180. / M_PI);
|
llh[0] = thrd_lat / (180. / M_PI);
|
||||||
llh[1] = thrd_lon / (180. / M_PI);
|
llh[1] = thrd_lon / (180. / M_PI);
|
||||||
llh[2] = thrd_z;
|
llh[2] = thrd_z;
|
||||||
|
@ -494,23 +494,23 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final computation
|
// Final computation
|
||||||
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
|
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
|
||||||
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
|
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
|
||||||
sintheta = sqrt(1.0 - pow(costheta,2));
|
sintheta = sqrt(1.0 - pow(costheta,2));
|
||||||
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
|
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
|
||||||
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
|
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
|
||||||
|
|
||||||
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
|
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
|
||||||
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
|
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
|
||||||
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
|
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
|
||||||
|
|
||||||
xyz[0] = xyzsat[0] + delta[0];
|
xyz[0] = xyzsat[0] + delta[0];
|
||||||
xyz[1] = xyzsat[1] + delta[1];
|
xyz[1] = xyzsat[1] + delta[1];
|
||||||
xyz[2] = xyzsat[2] + delta[2];
|
xyz[2] = xyzsat[2] + delta[2];
|
||||||
xyz2llh(xyz,llh,&elp);
|
xyz2llh(xyz,llh,&elp);
|
||||||
|
|
||||||
thrd_lat = llh[0] * (180. / M_PI);
|
thrd_lat = llh[0] * (180. / M_PI);
|
||||||
thrd_lon = llh[1] * (180. / M_PI);
|
thrd_lon = llh[1] * (180. / M_PI);
|
||||||
thrd_z = llh[2];
|
thrd_z = llh[2];
|
||||||
|
@ -526,42 +526,42 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
||||||
enumat[0][2] = 0.0;
|
enumat[0][2] = 0.0;
|
||||||
enumat[1][2] = cos(llh[0]);
|
enumat[1][2] = cos(llh[0]);
|
||||||
enumat[2][2] = sin(llh[0]);
|
enumat[2][2] = sin(llh[0]);
|
||||||
|
|
||||||
// Expanded from Linalg::matvec
|
// Expanded from Linalg::matvec
|
||||||
enu[0] = (enumat[0][0] * delta[0]) + (enumat[0][1] * delta[1]) + (enumat[0][2] * delta[2]);
|
enu[0] = (enumat[0][0] * delta[0]) + (enumat[0][1] * delta[1]) + (enumat[0][2] * delta[2]);
|
||||||
enu[1] = (enumat[1][0] * delta[0]) + (enumat[1][1] * delta[1]) + (enumat[1][2] * delta[2]);
|
enu[1] = (enumat[1][0] * delta[0]) + (enumat[1][1] * delta[1]) + (enumat[1][2] * delta[2]);
|
||||||
enu[2] = (enumat[2][0] * delta[0]) + (enumat[2][1] * delta[1]) + (enumat[2][2] * delta[2]);
|
enu[2] = (enumat[2][0] * delta[0]) + (enumat[2][1] * delta[1]) + (enumat[2][2] * delta[2]);
|
||||||
|
|
||||||
cosalpha = fabs(enu[2]) / norm(3,enu);
|
cosalpha = fabs(enu[2]) / norm(3,enu);
|
||||||
thrd_losang0 = acos(cosalpha) * (180. / M_PI);
|
thrd_losang0 = acos(cosalpha) * (180. / M_PI);
|
||||||
thrd_losang1 = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
|
thrd_losang1 = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
|
||||||
thrd_incang0 = acos(costheta) * (180. / M_PI);
|
thrd_incang0 = acos(costheta) * (180. / M_PI);
|
||||||
thrd_zsch = inImgArrs.rho[pixel] * sintheta;
|
thrd_zsch = inImgArrs.rho[pixel] * sintheta;
|
||||||
|
|
||||||
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
|
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
|
||||||
demlat = fmax(demlat,2.);
|
demlat = fmax(demlat,2.);
|
||||||
demlat = fmin(demlat,(d_inpts_int[2]-1.));
|
demlat = fmin(demlat,(d_inpts_int[2]-1.));
|
||||||
demlon = ((thrd_lon - d_inpts_dbl[8]) / d_inpts_dbl[10]) + 1;
|
demlon = ((thrd_lon - d_inpts_dbl[8]) / d_inpts_dbl[10]) + 1;
|
||||||
demlon = fmax(demlon,2.);
|
demlon = fmax(demlon,2.);
|
||||||
demlon = fmin(demlon,(d_inpts_int[3]-1.));
|
demlon = fmin(demlon,(d_inpts_int[3]-1.));
|
||||||
|
|
||||||
aa = interpolateDEM(inImgArrs.DEM,(demlon-1.),demlat,d_inpts_int[3],d_inpts_int[2]);
|
aa = interpolateDEM(inImgArrs.DEM,(demlon-1.),demlat,d_inpts_int[3],d_inpts_int[2]);
|
||||||
bb = interpolateDEM(inImgArrs.DEM,(demlon+1.),demlat,d_inpts_int[3],d_inpts_int[2]);
|
bb = interpolateDEM(inImgArrs.DEM,(demlon+1.),demlat,d_inpts_int[3],d_inpts_int[2]);
|
||||||
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * (elp.a / sqrt(1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2)))) * d_inpts_dbl[10]);
|
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * (elp.a / sqrt(1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2)))) * d_inpts_dbl[10]);
|
||||||
|
|
||||||
aa = interpolateDEM(inImgArrs.DEM,demlon,(demlat-1.),d_inpts_int[3],d_inpts_int[2]);
|
aa = interpolateDEM(inImgArrs.DEM,demlon,(demlat-1.),d_inpts_int[3],d_inpts_int[2]);
|
||||||
bb = interpolateDEM(inImgArrs.DEM,demlon,(demlat+1.),d_inpts_int[3],d_inpts_int[2]);
|
bb = interpolateDEM(inImgArrs.DEM,demlon,(demlat+1.),d_inpts_int[3],d_inpts_int[2]);
|
||||||
beta = ((bb - aa) * (180. / M_PI)) / (2.0 * ((elp.a * (1.0 - elp.e2)) / pow((1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2))),1.5)) * d_inpts_dbl[9]);
|
beta = ((bb - aa) * (180. / M_PI)) / (2.0 * ((elp.a * (1.0 - elp.e2)) / pow((1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2))),1.5)) * d_inpts_dbl[9]);
|
||||||
|
|
||||||
enunorm = norm(3,enu);
|
enunorm = norm(3,enu);
|
||||||
enu[0] = enu[0] / enunorm;
|
enu[0] = enu[0] / enunorm;
|
||||||
enu[1] = enu[1] / enunorm;
|
enu[1] = enu[1] / enunorm;
|
||||||
enu[2] = enu[2] / enunorm;
|
enu[2] = enu[2] / enunorm;
|
||||||
costheta = ((enu[0] * alpha) + (enu[1] * beta) - enu[2]) / sqrt(1.0 + pow(alpha,2) + pow(beta,2));
|
costheta = ((enu[0] * alpha) + (enu[1] * beta) - enu[2]) / sqrt(1.0 + pow(alpha,2) + pow(beta,2));
|
||||||
thrd_incang1 = acos(costheta) * (180. / M_PI);
|
thrd_incang1 = acos(costheta) * (180. / M_PI);
|
||||||
|
|
||||||
// Leave out masking stuff for now (though it's doable)
|
// Leave out masking stuff for now (though it's doable)
|
||||||
|
|
||||||
// Finally write to reference arrays
|
// Finally write to reference arrays
|
||||||
outImgArrs.lat[pixel] = thrd_lat;
|
outImgArrs.lat[pixel] = thrd_lat;
|
||||||
outImgArrs.lon[pixel] = thrd_lon;
|
outImgArrs.lon[pixel] = thrd_lon;
|
||||||
|
@ -590,11 +590,10 @@ void freeOrbit(struct Orbit *orb) {
|
||||||
free(orb->svs);
|
free(orb->svs);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getDeviceMem() {
|
size_t getDeviceFreeMem() {
|
||||||
size_t freeByte, totalByte;
|
size_t freeByte, totalByte;
|
||||||
cudaMemGetInfo(&freeByte, &totalByte);
|
cudaMemGetInfo(&freeByte, &totalByte);
|
||||||
totalByte = (totalByte / 1e9) * 1e9; // Round down to nearest GB
|
return freeByte;
|
||||||
return totalByte;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------- C FUNCTIONS ----------------
|
// --------------- C FUNCTIONS ----------------
|
||||||
|
@ -616,10 +615,10 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
|
||||||
cudaSetDevice(0);
|
cudaSetDevice(0);
|
||||||
|
|
||||||
printf(" Allocating host and general GPU memory...\n");
|
printf(" Allocating host and general GPU memory...\n");
|
||||||
|
|
||||||
size_t nb_pixels = numPix * sizeof(double); // size of rho/dopline/lat/lon/z/zsch/incang/losang
|
size_t nb_pixels = numPix * sizeof(double); // size of rho/dopline/lat/lon/z/zsch/incang/losang
|
||||||
size_t nb_DEM = h_inpts_int[3] * h_inpts_int[2] * sizeof(float); // size of DEM
|
size_t nb_DEM = h_inpts_int[3] * h_inpts_int[2] * sizeof(float); // size of DEM
|
||||||
|
|
||||||
/*
|
/*
|
||||||
h_lat = (double *)malloc(nb_pixels);
|
h_lat = (double *)malloc(nb_pixels);
|
||||||
h_lon = (double *)malloc(nb_pixels);
|
h_lon = (double *)malloc(nb_pixels);
|
||||||
|
@ -655,21 +654,21 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
|
||||||
cudaMemcpyToSymbol(d_inpts_dbl, h_inpts_dbl, (14*sizeof(double)));
|
cudaMemcpyToSymbol(d_inpts_dbl, h_inpts_dbl, (14*sizeof(double)));
|
||||||
cudaMemcpyToSymbol(d_inpts_int, h_inpts_int, (7*sizeof(int)));
|
cudaMemcpyToSymbol(d_inpts_int, h_inpts_int, (7*sizeof(int)));
|
||||||
freeOrbit(&orbit);
|
freeOrbit(&orbit);
|
||||||
|
|
||||||
orbit.svs = d_svs;
|
orbit.svs = d_svs;
|
||||||
inImgArrs.DEM = d_DEM;
|
inImgArrs.DEM = d_DEM;
|
||||||
inImgArrs.rho = d_rho;
|
inImgArrs.rho = d_rho;
|
||||||
inImgArrs.dopline = d_dopline;
|
inImgArrs.dopline = d_dopline;
|
||||||
|
|
||||||
printf(" Allocating block memory (%d pixels per image)...\n", numPix);
|
printf(" Allocating block memory (%d pixels per image)...\n", numPix);
|
||||||
|
|
||||||
cudaMalloc((double**)&d_lat, nb_pixels);
|
cudaMalloc((double**)&d_lat, nb_pixels);
|
||||||
cudaMalloc((double**)&d_lon, nb_pixels);
|
cudaMalloc((double**)&d_lon, nb_pixels);
|
||||||
cudaMalloc((double**)&d_z, nb_pixels);
|
cudaMalloc((double**)&d_z, nb_pixels);
|
||||||
//cudaMalloc((double**)&d_zsch, nb_pixels);
|
//cudaMalloc((double**)&d_zsch, nb_pixels);
|
||||||
cudaMalloc((double**)&d_incang, (2*nb_pixels));
|
cudaMalloc((double**)&d_incang, (2*nb_pixels));
|
||||||
cudaMalloc((double**)&d_losang, (2*nb_pixels));
|
cudaMalloc((double**)&d_losang, (2*nb_pixels));
|
||||||
|
|
||||||
outImgArrs.lat = d_lat;
|
outImgArrs.lat = d_lat;
|
||||||
outImgArrs.lon = d_lon;
|
outImgArrs.lon = d_lon;
|
||||||
outImgArrs.z = d_z;
|
outImgArrs.z = d_z;
|
||||||
|
@ -702,7 +701,7 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
|
||||||
iEndRun = cpuSecond();
|
iEndRun = cpuSecond();
|
||||||
if (nBlock > -1) printf(" GPU finished block %d in %f s.\n", nBlock, (iEndRun-iStartRun));
|
if (nBlock > -1) printf(" GPU finished block %d in %f s.\n", nBlock, (iEndRun-iStartRun));
|
||||||
else printf(" GPU finished remaining lines in %f s.\n", (iEndRun-iStartRun));
|
else printf(" GPU finished remaining lines in %f s.\n", (iEndRun-iStartRun));
|
||||||
|
|
||||||
printf(" Copying memory back to host...\n");
|
printf(" Copying memory back to host...\n");
|
||||||
|
|
||||||
cudaMemcpy(accArr[0], outImgArrs.lat, nb_pixels, cudaMemcpyDeviceToHost); // Copy memory from device to host with offset
|
cudaMemcpy(accArr[0], outImgArrs.lat, nb_pixels, cudaMemcpyDeviceToHost); // Copy memory from device to host with offset
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
#ifndef GPU_TOPO_H
|
#ifndef GPU_TOPO_H
|
||||||
#define GPU_TOPO_H
|
#define GPU_TOPO_H
|
||||||
|
|
||||||
size_t getDeviceMem();
|
size_t getDeviceFreeMem();
|
||||||
void runGPUTopo(long,long,double*,int*,float*,double*,double*,int,double*,double**);
|
void runGPUTopo(long,long,double*,int*,float*,double*,double*,int,double*,double**);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <cassert>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <future>
|
#include <future>
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
|
@ -78,7 +79,7 @@ void *writeToFile(void *inputData) {
|
||||||
data.nLines = ((struct writeData *)inputData)->nLines;
|
data.nLines = ((struct writeData *)inputData)->nLines;
|
||||||
data.width = ((struct writeData *)inputData)->width;
|
data.width = ((struct writeData *)inputData)->width;
|
||||||
data.firstWrite = ((struct writeData *)inputData)->firstWrite;
|
data.firstWrite = ((struct writeData *)inputData)->firstWrite;
|
||||||
|
|
||||||
if (!data.firstWrite) {
|
if (!data.firstWrite) {
|
||||||
for (int i=0; i<data.nLines; i++) {
|
for (int i=0; i<data.nLines; i++) {
|
||||||
size_t offset = i * size_t(data.width);
|
size_t offset = i * size_t(data.width);
|
||||||
|
@ -170,7 +171,7 @@ void Topo::topo() {
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
tzMethods.prepareMethods(dem_method);
|
tzMethods.prepareMethods(dem_method);
|
||||||
|
|
||||||
// Set up Ellipsoid object
|
// Set up Ellipsoid object
|
||||||
elp.a = major;
|
elp.a = major;
|
||||||
elp.e2 = eccentricitySquared;
|
elp.e2 = eccentricitySquared;
|
||||||
|
@ -252,12 +253,12 @@ void Topo::topo() {
|
||||||
peg.hdg = peghdg;
|
peg.hdg = peghdg;
|
||||||
ptm.radar_to_xyz(elp,peg);
|
ptm.radar_to_xyz(elp,peg);
|
||||||
rcurv = ptm.radcur;
|
rcurv = ptm.radcur;
|
||||||
|
|
||||||
for (int ind=0; ind<2; ind++) {
|
for (int ind=0; ind<2; ind++) {
|
||||||
pixel = ind * (width - 1);
|
pixel = ind * (width - 1);
|
||||||
rng = rho[pixel];
|
rng = rho[pixel];
|
||||||
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
|
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
|
||||||
|
|
||||||
for (int iter=0; iter<2; iter++) {
|
for (int iter=0; iter<2; iter++) {
|
||||||
// SWOT-specific near range check
|
// SWOT-specific near range check
|
||||||
// If slant range vector doesn't hit ground, pick nadir point
|
// If slant range vector doesn't hit ground, pick nadir point
|
||||||
|
@ -377,7 +378,7 @@ void Topo::topo() {
|
||||||
if (RUN_GPU_TOPO) {
|
if (RUN_GPU_TOPO) {
|
||||||
double gpu_inputs_d[14];
|
double gpu_inputs_d[14];
|
||||||
int gpu_inputs_i[7];
|
int gpu_inputs_i[7];
|
||||||
|
|
||||||
gpu_inputs_d[0] = t0;
|
gpu_inputs_d[0] = t0;
|
||||||
gpu_inputs_d[1] = prf;
|
gpu_inputs_d[1] = prf;
|
||||||
gpu_inputs_d[2] = elp.a;
|
gpu_inputs_d[2] = elp.a;
|
||||||
|
@ -409,7 +410,7 @@ void Topo::topo() {
|
||||||
gpu_dem[(i*udemlength)+j] = dem[i][j];
|
gpu_dem[(i*udemlength)+j] = dem[i][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int gpu_orbNvec = orb.nVectors;
|
int gpu_orbNvec = orb.nVectors;
|
||||||
double *gpu_orbSvs = new double[7*gpu_orbNvec];
|
double *gpu_orbSvs = new double[7*gpu_orbNvec];
|
||||||
for (int i=0; i<gpu_orbNvec; i++) {
|
for (int i=0; i<gpu_orbNvec; i++) {
|
||||||
|
@ -432,7 +433,7 @@ void Topo::topo() {
|
||||||
bool incFlag = bool(incAccessor > 0);
|
bool incFlag = bool(incAccessor > 0);
|
||||||
bool losFlag = bool(losAccessor > 0);
|
bool losFlag = bool(losAccessor > 0);
|
||||||
//std::future<void> result = std::async(std::launch::async, &Topo::writeToFile, this, (void **)accObjs, outputArrays, incFlag, losFlag, 0, width, true);
|
//std::future<void> result = std::async(std::launch::async, &Topo::writeToFile, this, (void **)accObjs, outputArrays, incFlag, losFlag, 0, width, true);
|
||||||
|
|
||||||
// Create pthread data and initialize dummy thread
|
// Create pthread data and initialize dummy thread
|
||||||
pthread_t writeThread;
|
pthread_t writeThread;
|
||||||
pthread_attr_t attr;
|
pthread_attr_t attr;
|
||||||
|
@ -455,26 +456,51 @@ void Topo::topo() {
|
||||||
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd);
|
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd);
|
||||||
|
|
||||||
// Calculate number of and size of blocks
|
// Calculate number of and size of blocks
|
||||||
size_t num_GPU_bytes = getDeviceMem();
|
|
||||||
long totalPixels = (long)length * width;
|
|
||||||
long pixPerImg = (((num_GPU_bytes / 8) / 9) / 1e7) * 1e7; // Round down to the nearest 10M pixels
|
|
||||||
long linesPerImg = pixPerImg / width;
|
|
||||||
pixPerImg = linesPerImg * width;
|
|
||||||
int nBlocks = totalPixels / pixPerImg;
|
|
||||||
|
|
||||||
//original values: 1.5e8 is too large for each of GPU on kamb.
|
// free GPU memory available
|
||||||
//here I change it to 1.0e8. 16-MAY-2018, Cunren Liang
|
size_t num_GPU_bytes = getDeviceFreeMem();
|
||||||
while (pixPerImg > 1.0e8) {
|
// use 100Mb as a rounding unit , may be adjusted
|
||||||
linesPerImg -= 1;
|
size_t memoryRoundingUnit = 1024ULL * 1024ULL * 100;
|
||||||
pixPerImg -= width;
|
// memory to be used for each pixel in bytes, with 9 double elements per pixel
|
||||||
nBlocks = totalPixels / pixPerImg;
|
size_t pixelBytes = sizeof(double) * 9;
|
||||||
}
|
// memory overhead for other shared parameters, in terms of memoryRoundUnit, or 200M
|
||||||
long remPix = totalPixels - (pixPerImg * nBlocks);
|
size_t memoryOverhead = 2;
|
||||||
long remLines = remPix / width;
|
|
||||||
|
// adjust the available free memory by rounding down
|
||||||
|
num_GPU_bytes = (num_GPU_bytes/memoryRoundingUnit - memoryOverhead) * memoryRoundingUnit;
|
||||||
|
|
||||||
|
// calculate the max pixels allowed in a batch (block)
|
||||||
|
size_t pixPerImg = num_GPU_bytes / pixelBytes;
|
||||||
|
assert(pixPerImg > 0);
|
||||||
|
|
||||||
|
// ! To best parallelize the computation, use the max available gpu memory is the best option
|
||||||
|
// ! the following adjustment is not needed
|
||||||
|
// set a upper limit on the size of the block
|
||||||
|
// preferably offered as an input parameter
|
||||||
|
// 2^24 is about 1.2G Memory
|
||||||
|
// size_t maxPixPerImg = 1 << 24;
|
||||||
|
// pixPerImg = std::min(pixPerImg, maxPixPerImg);
|
||||||
|
|
||||||
|
// the max lines in a batch, and will be used for each run
|
||||||
|
int linesPerImg = pixPerImg / width;
|
||||||
|
assert(linesPerImg >0);
|
||||||
|
// now reassign the value for pixels in a batch
|
||||||
|
pixPerImg = linesPerImg * width;
|
||||||
|
|
||||||
|
// total number of pixels in SLC
|
||||||
|
size_t totalPixels = (size_t)length * width;
|
||||||
|
|
||||||
|
// total of blocks needed to process the whole image
|
||||||
|
int nBlocks = length / linesPerImg;
|
||||||
|
|
||||||
|
// check whether there are remnant lines
|
||||||
|
int remLines = length - nBlocks*linesPerImg;
|
||||||
|
size_t remPix = remLines * width;
|
||||||
|
|
||||||
printf("NOTE: GPU will process image in %d blocks of %d lines", nBlocks, linesPerImg);
|
printf("NOTE: GPU will process image in %d blocks of %d lines", nBlocks, linesPerImg);
|
||||||
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
|
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
double *gpu_rho = new double[linesPerImg * width];
|
double *gpu_rho = new double[linesPerImg * width];
|
||||||
double *gpu_dopline = new double[linesPerImg * width];
|
double *gpu_dopline = new double[linesPerImg * width];
|
||||||
size_t nb_pixels = pixPerImg * sizeof(double);
|
size_t nb_pixels = pixPerImg * sizeof(double);
|
||||||
|
@ -490,7 +516,7 @@ void Topo::topo() {
|
||||||
dopAccObj->getLineSequential((char *)raw_line);
|
dopAccObj->getLineSequential((char *)raw_line);
|
||||||
for (int k=0; k<width; k++) gpu_dopline[(j*width)+k] = raw_line[k];
|
for (int k=0; k<width; k++) gpu_dopline[(j*width)+k] = raw_line[k];
|
||||||
}
|
}
|
||||||
|
|
||||||
outputArrays[0] = (double *)malloc(nb_pixels); // h_lat
|
outputArrays[0] = (double *)malloc(nb_pixels); // h_lat
|
||||||
outputArrays[1] = (double *)malloc(nb_pixels); // h_lon
|
outputArrays[1] = (double *)malloc(nb_pixels); // h_lon
|
||||||
outputArrays[2] = (double *)malloc(nb_pixels); // h_z
|
outputArrays[2] = (double *)malloc(nb_pixels); // h_z
|
||||||
|
@ -565,7 +591,7 @@ void Topo::topo() {
|
||||||
|
|
||||||
printf("\n ------------------ EXITING GPU TOPO ------------------\n\n");
|
printf("\n ------------------ EXITING GPU TOPO ------------------\n\n");
|
||||||
printf("Finished!\n");
|
printf("Finished!\n");
|
||||||
|
|
||||||
delete[] raw_line;
|
delete[] raw_line;
|
||||||
delete[] gpu_dem;
|
delete[] gpu_dem;
|
||||||
delete[] gpu_rho;
|
delete[] gpu_rho;
|
||||||
|
@ -579,7 +605,7 @@ void Topo::topo() {
|
||||||
// Step 1: Get satellite position
|
// Step 1: Get satellite position
|
||||||
// Get time
|
// Get time
|
||||||
tline = t0 + (Nazlooks * (line / prf));
|
tline = t0 + (Nazlooks * (line / prf));
|
||||||
|
|
||||||
// Get state vector
|
// Get state vector
|
||||||
stat = orb.interpolateOrbit(tline,xyzsat,velsat,orbit_method);
|
stat = orb.interpolateOrbit(tline,xyzsat,velsat,orbit_method);
|
||||||
if (stat != 0) {
|
if (stat != 0) {
|
||||||
|
@ -657,15 +683,15 @@ void Topo::topo() {
|
||||||
for (pixel=0; pixel<width; pixel++) {
|
for (pixel=0; pixel<width; pixel++) {
|
||||||
rng = rho[pixel];
|
rng = rho[pixel];
|
||||||
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
|
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
|
||||||
|
|
||||||
// If pixel hasn't converged
|
// If pixel hasn't converged
|
||||||
if (converge[pixel] == 0) {
|
if (converge[pixel] == 0) {
|
||||||
|
|
||||||
// Use previous llh in degrees and meters
|
// Use previous llh in degrees and meters
|
||||||
llh_prev[0] = lat[pixel] / (180. / M_PI);
|
llh_prev[0] = lat[pixel] / (180. / M_PI);
|
||||||
llh_prev[1] = lon[pixel] / (180. / M_PI);
|
llh_prev[1] = lon[pixel] / (180. / M_PI);
|
||||||
llh_prev[2] = z[pixel];
|
llh_prev[2] = z[pixel];
|
||||||
|
|
||||||
// Solve for new position at height zsch
|
// Solve for new position at height zsch
|
||||||
aa = height + rcurv;
|
aa = height + rcurv;
|
||||||
bb = rcurv + zsch[pixel];
|
bb = rcurv + zsch[pixel];
|
||||||
|
@ -680,12 +706,12 @@ void Topo::topo() {
|
||||||
gamm = costheta * rng;
|
gamm = costheta * rng;
|
||||||
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
|
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
|
||||||
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
|
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
|
||||||
|
|
||||||
// xyz position of target
|
// xyz position of target
|
||||||
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
|
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
|
||||||
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
|
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
|
||||||
elp.latlon(xyz,llh,XYZ_2_LLH);
|
elp.latlon(xyz,llh,XYZ_2_LLH);
|
||||||
|
|
||||||
// Convert lat, lon, hgt to xyz coordinates
|
// Convert lat, lon, hgt to xyz coordinates
|
||||||
lat[pixel] = llh[0] * (180. / M_PI);
|
lat[pixel] = llh[0] * (180. / M_PI);
|
||||||
lon[pixel] = llh[1] * (180. / M_PI);
|
lon[pixel] = llh[1] * (180. / M_PI);
|
||||||
|
@ -701,7 +727,7 @@ void Topo::topo() {
|
||||||
fraclon = demlon - idemlon;
|
fraclon = demlon - idemlon;
|
||||||
z[pixel] = tzMethods.interpolate(dem,idemlon,idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
z[pixel] = tzMethods.interpolate(dem,idemlon,idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||||
if (z[pixel] < -500.0) z[pixel] = -500.0;
|
if (z[pixel] < -500.0) z[pixel] = -500.0;
|
||||||
|
|
||||||
// Given llh, where h = z(pixel, line) in WGS84, get the SCH height
|
// Given llh, where h = z(pixel, line) in WGS84, get the SCH height
|
||||||
llh[0] = lat[pixel] / (180. / M_PI);
|
llh[0] = lat[pixel] / (180. / M_PI);
|
||||||
llh[1] = lon[pixel] / (180. / M_PI);
|
llh[1] = lon[pixel] / (180. / M_PI);
|
||||||
|
@ -719,7 +745,7 @@ void Topo::topo() {
|
||||||
} else if (iter > numiter) {
|
} else if (iter > numiter) {
|
||||||
elp.latlon(xyz_prev,llh_prev,LLH_2_XYZ);
|
elp.latlon(xyz_prev,llh_prev,LLH_2_XYZ);
|
||||||
for (int idx=0; idx<3; idx++) xyz[idx] = 0.5 * (xyz_prev[idx] + xyz[idx]);
|
for (int idx=0; idx<3; idx++) xyz[idx] = 0.5 * (xyz_prev[idx] + xyz[idx]);
|
||||||
|
|
||||||
// Repopulate lat, lon, z
|
// Repopulate lat, lon, z
|
||||||
elp.latlon(xyz,llh,XYZ_2_LLH);
|
elp.latlon(xyz,llh,XYZ_2_LLH);
|
||||||
lat[pixel] = llh[0] * (180. / M_PI);
|
lat[pixel] = llh[0] * (180. / M_PI);
|
||||||
|
@ -727,7 +753,7 @@ void Topo::topo() {
|
||||||
z[pixel] = llh[2];
|
z[pixel] = llh[2];
|
||||||
ptm.convert_sch_to_xyz(sch,xyz,XYZ_2_SCH);
|
ptm.convert_sch_to_xyz(sch,xyz,XYZ_2_SCH);
|
||||||
zsch[pixel] = sch[2];
|
zsch[pixel] = sch[2];
|
||||||
|
|
||||||
// Absolute distance
|
// Absolute distance
|
||||||
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
|
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
|
||||||
}
|
}
|
||||||
|
@ -754,32 +780,32 @@ void Topo::topo() {
|
||||||
gamm = costheta * rng;
|
gamm = costheta * rng;
|
||||||
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
|
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
|
||||||
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
|
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
|
||||||
|
|
||||||
// xyz position of target
|
// xyz position of target
|
||||||
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
|
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
|
||||||
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
|
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
|
||||||
elp.latlon(xyz,llh,XYZ_2_LLH);
|
elp.latlon(xyz,llh,XYZ_2_LLH);
|
||||||
|
|
||||||
// Copy into output arrays
|
// Copy into output arrays
|
||||||
lat[pixel] = llh[0] * (180. / M_PI);
|
lat[pixel] = llh[0] * (180. / M_PI);
|
||||||
lon[pixel] = llh[1] * (180. / M_PI);
|
lon[pixel] = llh[1] * (180. / M_PI);
|
||||||
z[pixel] = llh[2];
|
z[pixel] = llh[2];
|
||||||
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
|
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
|
||||||
|
|
||||||
// Computation in ENU coordinates around target
|
// Computation in ENU coordinates around target
|
||||||
linalg.enubasis(llh[0],llh[1],enumat);
|
linalg.enubasis(llh[0],llh[1],enumat);
|
||||||
linalg.tranmat(enumat,xyz2enu);
|
linalg.tranmat(enumat,xyz2enu);
|
||||||
linalg.matvec(xyz2enu,delta,enu);
|
linalg.matvec(xyz2enu,delta,enu);
|
||||||
cosalpha = abs(enu[2]) / linalg.norm(enu);
|
cosalpha = abs(enu[2]) / linalg.norm(enu);
|
||||||
|
|
||||||
// LOS vectors
|
// LOS vectors
|
||||||
losang[(2*pixel)] = acos(cosalpha) * (180. / M_PI);
|
losang[(2*pixel)] = acos(cosalpha) * (180. / M_PI);
|
||||||
losang[((2*pixel)+1)] = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
|
losang[((2*pixel)+1)] = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
|
||||||
incang[(2*pixel)] = acos(costheta) * (180. / M_PI);
|
incang[(2*pixel)] = acos(costheta) * (180. / M_PI);
|
||||||
|
|
||||||
// ctrack gets stored in zsch
|
// ctrack gets stored in zsch
|
||||||
zsch[pixel] = rng * sintheta;
|
zsch[pixel] = rng * sintheta;
|
||||||
|
|
||||||
// Get local incidence angle
|
// Get local incidence angle
|
||||||
demlat = ((lat[pixel] - ufirstlat) / deltalat) + 1;
|
demlat = ((lat[pixel] - ufirstlat) / deltalat) + 1;
|
||||||
demlon = ((lon[pixel] - ufirstlon) / deltalon) + 1;
|
demlon = ((lon[pixel] - ufirstlon) / deltalon) + 1;
|
||||||
|
@ -792,12 +818,12 @@ void Topo::topo() {
|
||||||
fraclat = demlat - idemlat;
|
fraclat = demlat - idemlat;
|
||||||
fraclon = demlon - idemlon;
|
fraclon = demlon - idemlon;
|
||||||
gamm = lat[pixel] / (180. / M_PI);
|
gamm = lat[pixel] / (180. / M_PI);
|
||||||
|
|
||||||
// Slopex
|
// Slopex
|
||||||
aa = tzMethods.interpolate(dem,(idemlon-1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
aa = tzMethods.interpolate(dem,(idemlon-1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||||
bb = tzMethods.interpolate(dem,(idemlon+1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
bb = tzMethods.interpolate(dem,(idemlon+1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||||
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * elp.reast(gamm) * deltalon);
|
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * elp.reast(gamm) * deltalon);
|
||||||
|
|
||||||
// Slopey
|
// Slopey
|
||||||
aa = tzMethods.interpolate(dem,idemlon,(idemlat-1),fraclon,fraclat,udemwidth,udemlength,dem_method);
|
aa = tzMethods.interpolate(dem,idemlon,(idemlat-1),fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||||
bb = tzMethods.interpolate(dem,idemlon,(idemlat+1),fraclon,fraclat,udemwidth,udemlength,dem_method);
|
bb = tzMethods.interpolate(dem,idemlon,(idemlat+1),fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||||
|
@ -822,7 +848,7 @@ void Topo::topo() {
|
||||||
max_lat = max(mxlat, max_lat);
|
max_lat = max(mxlat, max_lat);
|
||||||
min_lon = min(mnlon, min_lon);
|
min_lon = min(mnlon, min_lon);
|
||||||
max_lon = max(mxlon, max_lon);
|
max_lon = max(mxlon, max_lon);
|
||||||
|
|
||||||
latAccObj->setLineSequential((char *)&lat[0]);
|
latAccObj->setLineSequential((char *)&lat[0]);
|
||||||
lonAccObj->setLineSequential((char *)&lon[0]);
|
lonAccObj->setLineSequential((char *)&lon[0]);
|
||||||
heightAccObj->setLineSequential((char *)&z[0]);
|
heightAccObj->setLineSequential((char *)&z[0]);
|
||||||
|
@ -840,7 +866,7 @@ void Topo::topo() {
|
||||||
ctrackmin = mnzsch - demmax;
|
ctrackmin = mnzsch - demmax;
|
||||||
ctrackmax = mxzsch + demmax;
|
ctrackmax = mxzsch + demmax;
|
||||||
dctrack = (ctrackmax - ctrackmin) / (owidth - 1.0);
|
dctrack = (ctrackmax - ctrackmin) / (owidth - 1.0);
|
||||||
|
|
||||||
// Sort lat/lon by ctrack
|
// Sort lat/lon by ctrack
|
||||||
linalg.insertionSort(zsch,width);
|
linalg.insertionSort(zsch,width);
|
||||||
linalg.insertionSort(lat,width);
|
linalg.insertionSort(lat,width);
|
||||||
|
@ -853,7 +879,7 @@ void Topo::topo() {
|
||||||
aa = ctrackmin + (pixel * dctrack);
|
aa = ctrackmin + (pixel * dctrack);
|
||||||
ctrack[pixel] = aa;
|
ctrack[pixel] = aa;
|
||||||
i_type = linalg.binarySearch(zsch,0,(width-1),aa);
|
i_type = linalg.binarySearch(zsch,0,(width-1),aa);
|
||||||
|
|
||||||
// Simple bi-linear interpolation
|
// Simple bi-linear interpolation
|
||||||
fraclat = (aa - zsch[i_type]) / (zsch[(i_type+1)] - zsch[i_type]);
|
fraclat = (aa - zsch[i_type]) / (zsch[(i_type+1)] - zsch[i_type]);
|
||||||
demlat = lat[i_type] + (fraclat * (lat[(i_type+1)] - lat[i_type]));
|
demlat = lat[i_type] + (fraclat * (lat[(i_type+1)] - lat[i_type]));
|
||||||
|
|
Loading…
Reference in New Issue