GPU geo2rdr and topo memory allocation fix

LT1AB
Lijun Zhu 2021-12-08 13:02:56 -08:00
parent 31803ef7fa
commit 74c92a1dc2
5 changed files with 165 additions and 125 deletions

View File

@ -4,6 +4,7 @@
// //
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cassert>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include <sys/time.h> #include <sys/time.h>
@ -65,7 +66,7 @@ struct Poly1d {
__constant__ double d_inpts_double[9]; __constant__ double d_inpts_double[9];
__constant__ int d_inpts_int[3]; __constant__ int d_inpts_int[3];
// Mem usage: 27 doubles (216 bytes) per call // Mem usage: 27 doubles (216 bytes) per call
__device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double *vel) { __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double *vel) {
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4]; double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
double sum = 0.0; double sum = 0.0;
@ -197,7 +198,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
if (pixel < NPIXELS) { // The number of pixels in a run changes based on if it's a full run or a partial run if (pixel < NPIXELS) { // The number of pixels in a run changes based on if it's a full run or a partial run
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Input mapping * Input mapping
* *
* int[0] = demLength * int[0] = demLength
* int[1] = demWidth * int[1] = demWidth
* int[2] = bistatic * int[2] = bistatic
@ -212,7 +213,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
* double[7] = dmrg * double[7] = dmrg
* double[8] = dtaz * double[8] = dtaz
* * * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
double xyz[3], llh[3], satx[3], satv[3], dr[3]; double xyz[3], llh[3], satx[3], satv[3], dr[3];
double rngpix, tline, tprev, fnprime, fdop, fdopder; double rngpix, tline, tprev, fnprime, fdop, fdopder;
int stat, i, j; int stat, i, j;
@ -231,7 +232,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
llh2xyz(&elp,xyz,llh); llh2xyz(&elp,xyz,llh);
tline = .5 * (d_inpts_double[2] + d_inpts_double[3]); tline = .5 * (d_inpts_double[2] + d_inpts_double[3]);
stat = interpolateOrbit(&orb, tline, satx, satv); // Originally we got xyz_mid and vel_mid, then copied into satx/satv, stat = interpolateOrbit(&orb, tline, satx, satv); // Originally we got xyz_mid and vel_mid, then copied into satx/satv,
// but since these are all independent here it's fine // but since these are all independent here it's fine
if (stat != 0) isOutside = true; // Should exit, but this is next-best thing... if (stat != 0) isOutside = true; // Should exit, but this is next-best thing...
@ -294,11 +295,17 @@ int nLinesPossible(int length, int width) {
size_t freeByte, totalByte; size_t freeByte, totalByte;
int linesPerRun; int linesPerRun;
cudaMemGetInfo(&freeByte, &totalByte); cudaMemGetInfo(&freeByte, &totalByte);
printf("tb %ld\n", totalByte); printf("Available free gpu memory in bytes %ld\n", freeByte);
totalByte = size_t((double(totalByte) / 5.e8) * 5.e8); // Round down to nearest .5 GB // use 100Mb as a rounding unit , may be adjusted
printf("tba %ld\n", totalByte); size_t memoryRoundingUnit = 1024ULL * 1024ULL * 100;
printf("Device has roughly %.4f GB of memory, ", double(totalByte)/1.e9); // use 2*memoryRoundingUnit as an overhead for safety
linesPerRun = totalByte / (556 * width); freeByte = (freeByte / memoryRoundingUnit -2) * memoryRoundingUnit;
assert(freeByte >0);
// printf("GPU Memory to be used %ld\n", freeByte);
// printf("Device has roughly %.4f GB of memory, ", double(totalByte)/1.e9);
// determine the allowed max lines per run, 556 is per pixel memory usage (estimated)
linesPerRun = freeByte / (7*sizeof(double) * width);
assert(linesPerRun>0);
printf("and can process roughly %d lines (each with %d pixels) per run.\n", linesPerRun, width); printf("and can process roughly %d lines (each with %d pixels) per run.\n", linesPerRun, width);
return linesPerRun; return linesPerRun;
} }
@ -319,9 +326,9 @@ void freePoly1d(struct Poly1d *poly) {
free(poly->coeffs); free(poly->coeffs);
} }
void runGPUGeo(int iter, int numPix, double *h_inpts_dbl, int *h_inpts_int, double *h_lat, double *h_lon, double *h_dem, int h_orbNvec, double *h_orbSvs, void runGPUGeo(int iter, int numPix, double *h_inpts_dbl, int *h_inpts_int, double *h_lat, double *h_lon, double *h_dem, int h_orbNvec, double *h_orbSvs,
int h_polyOrd, double h_polyMean, double h_polyNorm, double *h_polyCoeffs, double h_polyPRF, double **accArr) { int h_polyOrd, double h_polyMean, double h_polyNorm, double *h_polyCoeffs, double h_polyPRF, double **accArr) {
double iStartCpy, iStartRun, iEndRun, iEndCpy; double iStartCpy, iStartRun, iEndRun, iEndCpy;
int i; int i;

View File

@ -88,7 +88,7 @@ Geo2rdr::Geo2rdr() {
} }
void Geo2rdr::geo2rdr() { void Geo2rdr::geo2rdr() {
double *lat, *lon, *dem, *rgm, *azt, *rgoff, *azoff; double *lat, *lon, *dem, *rgm, *azt, *rgoff, *azoff;
double xyz_mid[3], vel_mid[3], llh[3], xyz[3], satx[3], satv[3], dr[3]; double xyz_mid[3], vel_mid[3], llh[3], xyz[3], satx[3], satv[3], dr[3];
double tend, tline, tprev, rngend, rngpix, tmid, temp, dtaz, dmrg, fdop, fdopder, fnprime; double tend, tline, tprev, rngend, rngpix, tmid, temp, dtaz, dmrg, fdop, fdopder, fnprime;
@ -137,7 +137,7 @@ void Geo2rdr::geo2rdr() {
} }
// OpenMP replacement for clock() (clock reports cumulative thread time, not single thread // OpenMP replacement for clock() (clock reports cumulative thread time, not single thread
// time, so clock() on 4 threads would report 4 x the true runtime) // time, so clock() on 4 threads would report 4 x the true runtime)
timer_start = omp_get_wtime(); timer_start = omp_get_wtime();
cnt = 0; cnt = 0;
printf("Geo2rdr executing on %d threads...\n", omp_get_max_threads()); printf("Geo2rdr executing on %d threads...\n", omp_get_max_threads());
@ -259,12 +259,20 @@ void Geo2rdr::geo2rdr() {
wd.width = demWidth; wd.width = demWidth;
wd.firstWrite = true; // Flag to ignore write instructions wd.firstWrite = true; // Flag to ignore write instructions
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); // Fires empty thread pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); // Fires empty thread
int totalPixels = demLength * demWidth; size_t totalPixels = demLength * demWidth;
//int linesPerRun = min(demLength, nLinesPossible(demLength, demWidth)); // adjust the lines per run by the available gpu memory
int linesPerRun = demLength; int linesPerRun = std::min(demLength, nLinesPossible(demLength, demWidth));
while ((linesPerRun*demWidth) > 2e8) linesPerRun--; // ! To best parallelize the computation, use the max available gpu memory is the best option
int pixPerRun = linesPerRun * demWidth; // ! the following adjustment is not needed
// adjust further by the max pixels per run, prefavorbly as a user configurable parameter
// temp set as 2^20
// size_t maxPixPerRun = 1 << 20;
// size_t pixPerRun = std::min((size_t)linesPerRun*demWidth, maxPixPerRun);
// linesPerRun = pixPerRun/demWidth *demWidth;
// recalculate run info
size_t pixPerRun = linesPerRun * demWidth;
int nRuns = demLength / linesPerRun; int nRuns = demLength / linesPerRun;
int remPix = totalPixels - (nRuns * pixPerRun); int remPix = totalPixels - (nRuns * pixPerRun);
int remLines = remPix / demWidth; int remLines = remPix / demWidth;
@ -273,7 +281,7 @@ void Geo2rdr::geo2rdr() {
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines); if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
printf("\n"); printf("\n");
lat = new double[pixPerRun]; lat = new double[pixPerRun];
lon = new double[pixPerRun]; lon = new double[pixPerRun];
dem = new double[pixPerRun]; dem = new double[pixPerRun];
size_t nb_pixels = pixPerRun * sizeof(double); size_t nb_pixels = pixPerRun * sizeof(double);
@ -291,14 +299,14 @@ void Geo2rdr::geo2rdr() {
outputArrays[2] = (double *)malloc(nb_pixels); // h_rgoff outputArrays[2] = (double *)malloc(nb_pixels); // h_rgoff
outputArrays[3] = (double *)malloc(nb_pixels); // h_azoff outputArrays[3] = (double *)malloc(nb_pixels); // h_azoff
runGPUGeo(i, pixPerRun, gpu_inputs_d, gpu_inputs_i, lat, lon, dem, runGPUGeo(i, pixPerRun, gpu_inputs_d, gpu_inputs_i, lat, lon, dem,
gpu_orbNvec, gpu_orbSvs, gpu_polyOrd, gpu_polyMean, gpu_polyNorm, gpu_orbNvec, gpu_orbSvs, gpu_polyOrd, gpu_polyMean, gpu_polyNorm,
gpu_polyCoef, prf, outputArrays); gpu_polyCoef, prf, outputArrays);
for (int j=0; j<4; j++) writeArrays[j] = outputArrays[j]; // Copying pointers for (int j=0; j<4; j++) writeArrays[j] = outputArrays[j]; // Copying pointers
if (i != 0) printf(" Waiting for previous asynchronous write-out to finish...\n"); if (i != 0) printf(" Waiting for previous asynchronous write-out to finish...\n");
pthread_attr_destroy(&attr); pthread_attr_destroy(&attr);
pthread_join(writeThread, &thread_stat); // Waits for async thread to finish pthread_join(writeThread, &thread_stat); // Waits for async thread to finish
printf(" Writing run %d out asynchronously to image files...\n", i); printf(" Writing run %d out asynchronously to image files...\n", i);
wd.accessors = (void**)accObjs; wd.accessors = (void**)accObjs;
wd.rg = writeArrays[0]; wd.rg = writeArrays[0];
@ -381,14 +389,14 @@ void Geo2rdr::geo2rdr() {
pixel = latAccObj->getLineSequential((char *)lat); pixel = latAccObj->getLineSequential((char *)lat);
pixel = lonAccObj->getLineSequential((char *)lon); pixel = lonAccObj->getLineSequential((char *)lon);
pixel = hgtAccObj->getLineSequential((char *)dem); pixel = hgtAccObj->getLineSequential((char *)dem);
if ((line%1000) == 0) printf("Processing line: %d %d\n", line, numOutsideImage); if ((line%1000) == 0) printf("Processing line: %d %d\n", line, numOutsideImage);
#pragma omp parallel for private(pixel, rngpix, tline, tprev, stat, fnprime, fdop, \ #pragma omp parallel for private(pixel, rngpix, tline, tprev, stat, fnprime, fdop, \
fdopder, isOutside, xyz, llh, satx, satv, dr) \ fdopder, isOutside, xyz, llh, satx, satv, dr) \
reduction(+:numOutsideImage,conv,cnt) reduction(+:numOutsideImage,conv,cnt)
for (pixel=0; pixel<demWidth; pixel++) { for (pixel=0; pixel<demWidth; pixel++) {
isOutside = false; // Flag to determine if point is outside image isOutside = false; // Flag to determine if point is outside image
llh[0] = lat[pixel] * (M_PI / 180.); llh[0] = lat[pixel] * (M_PI / 180.);
@ -426,7 +434,7 @@ void Geo2rdr::geo2rdr() {
break; // Point converged break; // Point converged
} }
} }
if ((tline < tstart) || (tline > tend)) isOutside = true; if ((tline < tstart) || (tline > tend)) isOutside = true;
for (int i=0; i<3; i++) dr[i] = xyz[i] - satx[i]; for (int i=0; i<3; i++) dr[i] = xyz[i] - satx[i];

View File

@ -73,7 +73,7 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4]; double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
double sum = 0.0; double sum = 0.0;
int v0 = -1; int v0 = -1;
if ((t < orb->svs[0].t) || (t > orb->svs[orb->nVec-1].t)) return 1; if ((t < orb->svs[0].t) || (t > orb->svs[orb->nVec-1].t)) return 1;
for (int i=0; i<orb->nVec; i++) { for (int i=0; i<orb->nVec; i++) {
if ((orb->svs[i].t >= t) && (v0 == -1)) { if ((orb->svs[i].t >= t) && (v0 == -1)) {
@ -95,44 +95,44 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
sum = (1.0 / (orb->svs[v0+3].t - orb->svs[v0].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t)); sum = (1.0 / (orb->svs[v0+3].t - orb->svs[v0].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
f0[3] = 1.0 - (2.0 * (t - orb->svs[v0+3].t) * sum); f0[3] = 1.0 - (2.0 * (t - orb->svs[v0+3].t) * sum);
h[0] = ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * h[0] = ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)); ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t));
h[1] = ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) * h[1] = ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)); ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t));
h[2] = ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * h[2] = ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)); ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t));
h[3] = ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * h[3] = ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)); ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t));
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) * sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
(1.0 / (orb->svs[v0].t - orb->svs[v0+1].t)); (1.0 / (orb->svs[v0].t - orb->svs[v0+1].t));
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) * sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
(1.0 / (orb->svs[v0].t - orb->svs[v0+2].t)); (1.0 / (orb->svs[v0].t - orb->svs[v0+2].t));
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
(1.0 / (orb->svs[v0].t - orb->svs[v0+3].t)); (1.0 / (orb->svs[v0].t - orb->svs[v0+3].t));
hdot[0] = sum; hdot[0] = sum;
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) * sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
(1.0 / (orb->svs[v0+1].t - orb->svs[v0].t)); (1.0 / (orb->svs[v0+1].t - orb->svs[v0].t));
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) * sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+2].t)); (1.0 / (orb->svs[v0+1].t - orb->svs[v0+2].t));
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) * sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+3].t)); (1.0 / (orb->svs[v0+1].t - orb->svs[v0+3].t));
hdot[1] = sum; hdot[1] = sum;
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) * sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
(1.0 / (orb->svs[v0+2].t - orb->svs[v0].t)); (1.0 / (orb->svs[v0+2].t - orb->svs[v0].t));
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) * sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+1].t)); (1.0 / (orb->svs[v0+2].t - orb->svs[v0+1].t));
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+3].t)); (1.0 / (orb->svs[v0+2].t - orb->svs[v0+3].t));
hdot[2] = sum; hdot[2] = sum;
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) * sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
(1.0 / (orb->svs[v0+3].t - orb->svs[v0].t)); (1.0 / (orb->svs[v0+3].t - orb->svs[v0].t));
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) * sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t)); (1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t));
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t)); (1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
hdot[3] = sum; hdot[3] = sum;
@ -152,12 +152,12 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
xyz[0] = (((orb->svs[v0].px * f0[0]) + (orb->svs[v0].vx * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].px * f0[1]) + (orb->svs[v0+1].vx * f1[1])) * h[1] * h[1]) + xyz[0] = (((orb->svs[v0].px * f0[0]) + (orb->svs[v0].vx * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].px * f0[1]) + (orb->svs[v0+1].vx * f1[1])) * h[1] * h[1]) +
(((orb->svs[v0+2].px * f0[2]) + (orb->svs[v0+2].vx * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].px * f0[3]) + (orb->svs[v0+3].vx * f1[3])) * h[3] * h[3]); (((orb->svs[v0+2].px * f0[2]) + (orb->svs[v0+2].vx * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].px * f0[3]) + (orb->svs[v0+3].vx * f1[3])) * h[3] * h[3]);
xyz[1] = (((orb->svs[v0].py * f0[0]) + (orb->svs[v0].vy * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].py * f0[1]) + (orb->svs[v0+1].vy * f1[1])) * h[1] * h[1]) + xyz[1] = (((orb->svs[v0].py * f0[0]) + (orb->svs[v0].vy * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].py * f0[1]) + (orb->svs[v0+1].vy * f1[1])) * h[1] * h[1]) +
(((orb->svs[v0+2].py * f0[2]) + (orb->svs[v0+2].vy * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].py * f0[3]) + (orb->svs[v0+3].vy * f1[3])) * h[3] * h[3]); (((orb->svs[v0+2].py * f0[2]) + (orb->svs[v0+2].vy * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].py * f0[3]) + (orb->svs[v0+3].vy * f1[3])) * h[3] * h[3]);
xyz[2] = (((orb->svs[v0].pz * f0[0]) + (orb->svs[v0].vz * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].pz * f0[1]) + (orb->svs[v0+1].vz * f1[1])) * h[1] * h[1]) + xyz[2] = (((orb->svs[v0].pz * f0[0]) + (orb->svs[v0].vz * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].pz * f0[1]) + (orb->svs[v0+1].vz * f1[1])) * h[1] * h[1]) +
(((orb->svs[v0+2].pz * f0[2]) + (orb->svs[v0+2].vz * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].pz * f0[3]) + (orb->svs[v0+3].vz * f1[3])) * h[3] * h[3]); (((orb->svs[v0+2].pz * f0[2]) + (orb->svs[v0+2].vz * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].pz * f0[3]) + (orb->svs[v0+3].vz * f1[3])) * h[3] * h[3]);
vel[0] = (((orb->svs[v0].px * g0[0]) + (orb->svs[v0].vx * g1[0])) * h[0]) + (((orb->svs[v0+1].px * g0[1]) + (orb->svs[v0+1].vx * g1[1])) * h[1]) + vel[0] = (((orb->svs[v0].px * g0[0]) + (orb->svs[v0].vx * g1[0])) * h[0]) + (((orb->svs[v0+1].px * g0[1]) + (orb->svs[v0+1].vx * g1[1])) * h[1]) +
(((orb->svs[v0+2].px * g0[2]) + (orb->svs[v0+2].vx * g1[2])) * h[2]) + (((orb->svs[v0+3].px * g0[3]) + (orb->svs[v0+3].vx * g1[3])) * h[3]); (((orb->svs[v0+2].px * g0[2]) + (orb->svs[v0+2].vx * g1[2])) * h[2]) + (((orb->svs[v0+3].px * g0[3]) + (orb->svs[v0+3].vx * g1[3])) * h[3]);
vel[1] = (((orb->svs[v0].py * g0[0]) + (orb->svs[v0].vy * g1[0])) * h[0]) + (((orb->svs[v0+1].py * g0[1]) + (orb->svs[v0+1].vy * g1[1])) * h[1]) + vel[1] = (((orb->svs[v0].py * g0[0]) + (orb->svs[v0].vy * g1[0])) * h[0]) + (((orb->svs[v0+1].py * g0[1]) + (orb->svs[v0+1].vy * g1[1])) * h[1]) +
(((orb->svs[v0+2].py * g0[2]) + (orb->svs[v0+2].vy * g1[2])) * h[2]) + (((orb->svs[v0+3].py * g0[3]) + (orb->svs[v0+3].vy * g1[3])) * h[3]); (((orb->svs[v0+2].py * g0[2]) + (orb->svs[v0+2].vy * g1[2])) * h[2]) + (((orb->svs[v0+3].py * g0[3]) + (orb->svs[v0+3].vy * g1[3])) * h[3]);
@ -212,7 +212,7 @@ __device__ double interpolateDEM(float *DEM, double lon, double lat, int width,
i0 = int(lon) - 2; i0 = int(lon) - 2;
j0 = int(lat) - 2; j0 = int(lat) - 2;
indi = min((i0+1), width); // bound by out_of_bounds, so this isn't a concern indi = min((i0+1), width); // bound by out_of_bounds, so this isn't a concern
spline(indi, j0, length, A, DEM); spline(indi, j0, length, A, DEM);
initSpline(A,R,Q); initSpline(A,R,Q);
@ -328,7 +328,7 @@ __device__ void radar2xyz(struct Peg *peg, struct Ellipsoid *elp, struct PegTran
ptm->mat[2][0] = sin(peg->lat); ptm->mat[2][0] = sin(peg->lat);
ptm->mat[2][1] = cos(peg->lat) * cos(peg->hdg); ptm->mat[2][1] = cos(peg->lat) * cos(peg->hdg);
ptm->mat[2][2] = cos(peg->lat) * sin(peg->hdg); ptm->mat[2][2] = cos(peg->lat) * sin(peg->hdg);
re = elp->a / sqrt(1.0 - (elp->e2 * pow(sin(peg->lat),2))); re = elp->a / sqrt(1.0 - (elp->e2 * pow(sin(peg->lat),2)));
rn = (elp->a * (1.0 - elp->e2)) / pow((1.0 - (elp->e2 * pow(sin(peg->lat),2))),1.5); rn = (elp->a * (1.0 - elp->e2)) / pow((1.0 - (elp->e2 * pow(sin(peg->lat),2))),1.5);
ptm->radcur = (re * rn) / ((re * pow(cos(peg->hdg),2)) + (rn * pow(sin(peg->hdg),2))); ptm->radcur = (re * rn) / ((re * pow(cos(peg->hdg),2)) + (rn * pow(sin(peg->hdg),2)));
@ -337,7 +337,7 @@ __device__ void radar2xyz(struct Peg *peg, struct Ellipsoid *elp, struct PegTran
llh[1] = peg->lon; llh[1] = peg->lon;
llh[2] = 0.0; llh[2] = 0.0;
llh2xyz(temp,llh,elp); llh2xyz(temp,llh,elp);
ptm->ov[0] = temp[0] - (ptm->radcur * cos(peg->lat) * cos(peg->lon)); ptm->ov[0] = temp[0] - (ptm->radcur * cos(peg->lat) * cos(peg->lon));
ptm->ov[1] = temp[1] - (ptm->radcur * cos(peg->lat) * sin(peg->lon)); ptm->ov[1] = temp[1] - (ptm->radcur * cos(peg->lat) * sin(peg->lon));
ptm->ov[2] = temp[2] - (ptm->radcur * sin(peg->lat)); ptm->ov[2] = temp[2] - (ptm->radcur * sin(peg->lat));
@ -370,7 +370,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
long pixel = (blockDim.x * blockIdx.x) + threadIdx.x; long pixel = (blockDim.x * blockIdx.x) + threadIdx.x;
if (pixel < NPIXELS) { // Make sure we're not operating on a non-existent pixel if (pixel < NPIXELS) { // Make sure we're not operating on a non-existent pixel
double enumat[3][3]; double enumat[3][3];
double xyzsat[3], velsat[3], llhsat[3], vhat[3], that[3], chat[3], nhat[3]; double xyzsat[3], velsat[3], llhsat[3], vhat[3], that[3], chat[3], nhat[3];
double llh[3], llh_prev[3], xyz[3], xyz_prev[3], sch[3], enu[3], delta[3]; double llh[3], llh_prev[3], xyz[3], xyz_prev[3], sch[3], enu[3], delta[3];
@ -381,11 +381,11 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
double thrd_z, thrd_zsch, thrd_lat, thrd_lon, thrd_distance, thrd_losang0, thrd_losang1; double thrd_z, thrd_zsch, thrd_lat, thrd_lon, thrd_distance, thrd_losang0, thrd_losang1;
double thrd_incang0, thrd_incang1; double thrd_incang0, thrd_incang1;
int thrd_converge; int thrd_converge;
struct Ellipsoid elp; struct Ellipsoid elp;
struct Peg peg; struct Peg peg;
struct PegTrans ptm; struct PegTrans ptm;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* double t0 = inpts_dbl[0]; * double t0 = inpts_dbl[0];
* double prf = inpts_dbl[1]; * double prf = inpts_dbl[1];
@ -412,7 +412,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
* int extraiter = inpts_int[5]; * int extraiter = inpts_int[5];
* int length = inpts_int[6]; NOT USED IN THIS KERNEL * int length = inpts_int[6]; NOT USED IN THIS KERNEL
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
line = (pixel + OFFSET) / d_inpts_int[1]; line = (pixel + OFFSET) / d_inpts_int[1];
tline = d_inpts_dbl[0] + (d_inpts_int[0] * (line / d_inpts_dbl[1])); tline = d_inpts_dbl[0] + (d_inpts_int[0] * (line / d_inpts_dbl[1]));
if (interpolateOrbit(&orbit,tline,xyzsat,velsat) != 0) { if (interpolateOrbit(&orbit,tline,xyzsat,velsat) != 0) {
@ -427,28 +427,28 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
peg.lat = llhsat[0]; peg.lat = llhsat[0];
peg.lon = llhsat[1]; peg.lon = llhsat[1];
radar2xyz(&peg,&elp,&ptm); radar2xyz(&peg,&elp,&ptm);
thrd_converge = 0; thrd_converge = 0;
thrd_z = 0.0; thrd_z = 0.0;
thrd_zsch = 0.0; thrd_zsch = 0.0;
thrd_lat = d_inpts_dbl[7] + (0.5 * d_inpts_dbl[9] * d_inpts_int[2]); thrd_lat = d_inpts_dbl[7] + (0.5 * d_inpts_dbl[9] * d_inpts_int[2]);
thrd_lon = d_inpts_dbl[8] + (0.5 * d_inpts_dbl[10] * d_inpts_int[3]); thrd_lon = d_inpts_dbl[8] + (0.5 * d_inpts_dbl[10] * d_inpts_int[3]);
dopfact = (0.5 * d_inpts_dbl[11] * (inImgArrs.dopline[pixel] / vmag)) * inImgArrs.rho[pixel]; dopfact = (0.5 * d_inpts_dbl[11] * (inImgArrs.dopline[pixel] / vmag)) * inImgArrs.rho[pixel];
// START THE ITERATIONS // START THE ITERATIONS
for (iter=0; iter<=(d_inpts_int[4]+d_inpts_int[5]); iter++) { for (iter=0; iter<=(d_inpts_int[4]+d_inpts_int[5]); iter++) {
if (thrd_converge == 0) { // Designing this way helps prevent thread divergence as much as possible if (thrd_converge == 0) { // Designing this way helps prevent thread divergence as much as possible
llh_prev[0] = thrd_lat / (180. / M_PI); llh_prev[0] = thrd_lat / (180. / M_PI);
llh_prev[1] = thrd_lon / (180. / M_PI); llh_prev[1] = thrd_lon / (180. / M_PI);
llh_prev[2] = thrd_z; llh_prev[2] = thrd_z;
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) - costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel]))); (((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
sintheta = sqrt(1.0 - pow(costheta,2)); sintheta = sqrt(1.0 - pow(costheta,2));
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that); alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2)); beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]); delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]); delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]); delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
@ -457,7 +457,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
xyz[1] = xyzsat[1] + delta[1]; xyz[1] = xyzsat[1] + delta[1];
xyz[2] = xyzsat[2] + delta[2]; xyz[2] = xyzsat[2] + delta[2];
xyz2llh(xyz,llh,&elp); xyz2llh(xyz,llh,&elp);
thrd_lat = llh[0] * (180. / M_PI); thrd_lat = llh[0] * (180. / M_PI);
thrd_lon = llh[1] * (180. / M_PI); thrd_lon = llh[1] * (180. / M_PI);
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1; demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
@ -468,7 +468,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
demlon = fmin(demlon,(d_inpts_int[3]-1.)); demlon = fmin(demlon,(d_inpts_int[3]-1.));
thrd_z = interpolateDEM(inImgArrs.DEM,demlon,demlat,d_inpts_int[3],d_inpts_int[2]); thrd_z = interpolateDEM(inImgArrs.DEM,demlon,demlat,d_inpts_int[3],d_inpts_int[2]);
thrd_z = fmax(thrd_z,-500.); thrd_z = fmax(thrd_z,-500.);
llh[0] = thrd_lat / (180. / M_PI); llh[0] = thrd_lat / (180. / M_PI);
llh[1] = thrd_lon / (180. / M_PI); llh[1] = thrd_lon / (180. / M_PI);
llh[2] = thrd_z; llh[2] = thrd_z;
@ -494,23 +494,23 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
} }
} }
} }
// Final computation // Final computation
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) - costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel]))); (((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
sintheta = sqrt(1.0 - pow(costheta,2)); sintheta = sqrt(1.0 - pow(costheta,2));
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that); alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2)); beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]); delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]); delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]); delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
xyz[0] = xyzsat[0] + delta[0]; xyz[0] = xyzsat[0] + delta[0];
xyz[1] = xyzsat[1] + delta[1]; xyz[1] = xyzsat[1] + delta[1];
xyz[2] = xyzsat[2] + delta[2]; xyz[2] = xyzsat[2] + delta[2];
xyz2llh(xyz,llh,&elp); xyz2llh(xyz,llh,&elp);
thrd_lat = llh[0] * (180. / M_PI); thrd_lat = llh[0] * (180. / M_PI);
thrd_lon = llh[1] * (180. / M_PI); thrd_lon = llh[1] * (180. / M_PI);
thrd_z = llh[2]; thrd_z = llh[2];
@ -526,42 +526,42 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
enumat[0][2] = 0.0; enumat[0][2] = 0.0;
enumat[1][2] = cos(llh[0]); enumat[1][2] = cos(llh[0]);
enumat[2][2] = sin(llh[0]); enumat[2][2] = sin(llh[0]);
// Expanded from Linalg::matvec // Expanded from Linalg::matvec
enu[0] = (enumat[0][0] * delta[0]) + (enumat[0][1] * delta[1]) + (enumat[0][2] * delta[2]); enu[0] = (enumat[0][0] * delta[0]) + (enumat[0][1] * delta[1]) + (enumat[0][2] * delta[2]);
enu[1] = (enumat[1][0] * delta[0]) + (enumat[1][1] * delta[1]) + (enumat[1][2] * delta[2]); enu[1] = (enumat[1][0] * delta[0]) + (enumat[1][1] * delta[1]) + (enumat[1][2] * delta[2]);
enu[2] = (enumat[2][0] * delta[0]) + (enumat[2][1] * delta[1]) + (enumat[2][2] * delta[2]); enu[2] = (enumat[2][0] * delta[0]) + (enumat[2][1] * delta[1]) + (enumat[2][2] * delta[2]);
cosalpha = fabs(enu[2]) / norm(3,enu); cosalpha = fabs(enu[2]) / norm(3,enu);
thrd_losang0 = acos(cosalpha) * (180. / M_PI); thrd_losang0 = acos(cosalpha) * (180. / M_PI);
thrd_losang1 = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI); thrd_losang1 = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
thrd_incang0 = acos(costheta) * (180. / M_PI); thrd_incang0 = acos(costheta) * (180. / M_PI);
thrd_zsch = inImgArrs.rho[pixel] * sintheta; thrd_zsch = inImgArrs.rho[pixel] * sintheta;
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1; demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
demlat = fmax(demlat,2.); demlat = fmax(demlat,2.);
demlat = fmin(demlat,(d_inpts_int[2]-1.)); demlat = fmin(demlat,(d_inpts_int[2]-1.));
demlon = ((thrd_lon - d_inpts_dbl[8]) / d_inpts_dbl[10]) + 1; demlon = ((thrd_lon - d_inpts_dbl[8]) / d_inpts_dbl[10]) + 1;
demlon = fmax(demlon,2.); demlon = fmax(demlon,2.);
demlon = fmin(demlon,(d_inpts_int[3]-1.)); demlon = fmin(demlon,(d_inpts_int[3]-1.));
aa = interpolateDEM(inImgArrs.DEM,(demlon-1.),demlat,d_inpts_int[3],d_inpts_int[2]); aa = interpolateDEM(inImgArrs.DEM,(demlon-1.),demlat,d_inpts_int[3],d_inpts_int[2]);
bb = interpolateDEM(inImgArrs.DEM,(demlon+1.),demlat,d_inpts_int[3],d_inpts_int[2]); bb = interpolateDEM(inImgArrs.DEM,(demlon+1.),demlat,d_inpts_int[3],d_inpts_int[2]);
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * (elp.a / sqrt(1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2)))) * d_inpts_dbl[10]); alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * (elp.a / sqrt(1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2)))) * d_inpts_dbl[10]);
aa = interpolateDEM(inImgArrs.DEM,demlon,(demlat-1.),d_inpts_int[3],d_inpts_int[2]); aa = interpolateDEM(inImgArrs.DEM,demlon,(demlat-1.),d_inpts_int[3],d_inpts_int[2]);
bb = interpolateDEM(inImgArrs.DEM,demlon,(demlat+1.),d_inpts_int[3],d_inpts_int[2]); bb = interpolateDEM(inImgArrs.DEM,demlon,(demlat+1.),d_inpts_int[3],d_inpts_int[2]);
beta = ((bb - aa) * (180. / M_PI)) / (2.0 * ((elp.a * (1.0 - elp.e2)) / pow((1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2))),1.5)) * d_inpts_dbl[9]); beta = ((bb - aa) * (180. / M_PI)) / (2.0 * ((elp.a * (1.0 - elp.e2)) / pow((1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2))),1.5)) * d_inpts_dbl[9]);
enunorm = norm(3,enu); enunorm = norm(3,enu);
enu[0] = enu[0] / enunorm; enu[0] = enu[0] / enunorm;
enu[1] = enu[1] / enunorm; enu[1] = enu[1] / enunorm;
enu[2] = enu[2] / enunorm; enu[2] = enu[2] / enunorm;
costheta = ((enu[0] * alpha) + (enu[1] * beta) - enu[2]) / sqrt(1.0 + pow(alpha,2) + pow(beta,2)); costheta = ((enu[0] * alpha) + (enu[1] * beta) - enu[2]) / sqrt(1.0 + pow(alpha,2) + pow(beta,2));
thrd_incang1 = acos(costheta) * (180. / M_PI); thrd_incang1 = acos(costheta) * (180. / M_PI);
// Leave out masking stuff for now (though it's doable) // Leave out masking stuff for now (though it's doable)
// Finally write to reference arrays // Finally write to reference arrays
outImgArrs.lat[pixel] = thrd_lat; outImgArrs.lat[pixel] = thrd_lat;
outImgArrs.lon[pixel] = thrd_lon; outImgArrs.lon[pixel] = thrd_lon;
@ -590,11 +590,10 @@ void freeOrbit(struct Orbit *orb) {
free(orb->svs); free(orb->svs);
} }
size_t getDeviceMem() { size_t getDeviceFreeMem() {
size_t freeByte, totalByte; size_t freeByte, totalByte;
cudaMemGetInfo(&freeByte, &totalByte); cudaMemGetInfo(&freeByte, &totalByte);
totalByte = (totalByte / 1e9) * 1e9; // Round down to nearest GB return freeByte;
return totalByte;
} }
// --------------- C FUNCTIONS ---------------- // --------------- C FUNCTIONS ----------------
@ -616,10 +615,10 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
cudaSetDevice(0); cudaSetDevice(0);
printf(" Allocating host and general GPU memory...\n"); printf(" Allocating host and general GPU memory...\n");
size_t nb_pixels = numPix * sizeof(double); // size of rho/dopline/lat/lon/z/zsch/incang/losang size_t nb_pixels = numPix * sizeof(double); // size of rho/dopline/lat/lon/z/zsch/incang/losang
size_t nb_DEM = h_inpts_int[3] * h_inpts_int[2] * sizeof(float); // size of DEM size_t nb_DEM = h_inpts_int[3] * h_inpts_int[2] * sizeof(float); // size of DEM
/* /*
h_lat = (double *)malloc(nb_pixels); h_lat = (double *)malloc(nb_pixels);
h_lon = (double *)malloc(nb_pixels); h_lon = (double *)malloc(nb_pixels);
@ -655,21 +654,21 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
cudaMemcpyToSymbol(d_inpts_dbl, h_inpts_dbl, (14*sizeof(double))); cudaMemcpyToSymbol(d_inpts_dbl, h_inpts_dbl, (14*sizeof(double)));
cudaMemcpyToSymbol(d_inpts_int, h_inpts_int, (7*sizeof(int))); cudaMemcpyToSymbol(d_inpts_int, h_inpts_int, (7*sizeof(int)));
freeOrbit(&orbit); freeOrbit(&orbit);
orbit.svs = d_svs; orbit.svs = d_svs;
inImgArrs.DEM = d_DEM; inImgArrs.DEM = d_DEM;
inImgArrs.rho = d_rho; inImgArrs.rho = d_rho;
inImgArrs.dopline = d_dopline; inImgArrs.dopline = d_dopline;
printf(" Allocating block memory (%d pixels per image)...\n", numPix); printf(" Allocating block memory (%d pixels per image)...\n", numPix);
cudaMalloc((double**)&d_lat, nb_pixels); cudaMalloc((double**)&d_lat, nb_pixels);
cudaMalloc((double**)&d_lon, nb_pixels); cudaMalloc((double**)&d_lon, nb_pixels);
cudaMalloc((double**)&d_z, nb_pixels); cudaMalloc((double**)&d_z, nb_pixels);
//cudaMalloc((double**)&d_zsch, nb_pixels); //cudaMalloc((double**)&d_zsch, nb_pixels);
cudaMalloc((double**)&d_incang, (2*nb_pixels)); cudaMalloc((double**)&d_incang, (2*nb_pixels));
cudaMalloc((double**)&d_losang, (2*nb_pixels)); cudaMalloc((double**)&d_losang, (2*nb_pixels));
outImgArrs.lat = d_lat; outImgArrs.lat = d_lat;
outImgArrs.lon = d_lon; outImgArrs.lon = d_lon;
outImgArrs.z = d_z; outImgArrs.z = d_z;
@ -702,7 +701,7 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
iEndRun = cpuSecond(); iEndRun = cpuSecond();
if (nBlock > -1) printf(" GPU finished block %d in %f s.\n", nBlock, (iEndRun-iStartRun)); if (nBlock > -1) printf(" GPU finished block %d in %f s.\n", nBlock, (iEndRun-iStartRun));
else printf(" GPU finished remaining lines in %f s.\n", (iEndRun-iStartRun)); else printf(" GPU finished remaining lines in %f s.\n", (iEndRun-iStartRun));
printf(" Copying memory back to host...\n"); printf(" Copying memory back to host...\n");
cudaMemcpy(accArr[0], outImgArrs.lat, nb_pixels, cudaMemcpyDeviceToHost); // Copy memory from device to host with offset cudaMemcpy(accArr[0], outImgArrs.lat, nb_pixels, cudaMemcpyDeviceToHost); // Copy memory from device to host with offset

View File

@ -6,7 +6,7 @@
#ifndef GPU_TOPO_H #ifndef GPU_TOPO_H
#define GPU_TOPO_H #define GPU_TOPO_H
size_t getDeviceMem(); size_t getDeviceFreeMem();
void runGPUTopo(long,long,double*,int*,float*,double*,double*,int,double*,double**); void runGPUTopo(long,long,double*,int*,float*,double*,double*,int,double*,double**);
#endif #endif

View File

@ -23,6 +23,7 @@
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cassert>
#include <fstream> #include <fstream>
#include <future> #include <future>
#include <omp.h> #include <omp.h>
@ -78,7 +79,7 @@ void *writeToFile(void *inputData) {
data.nLines = ((struct writeData *)inputData)->nLines; data.nLines = ((struct writeData *)inputData)->nLines;
data.width = ((struct writeData *)inputData)->width; data.width = ((struct writeData *)inputData)->width;
data.firstWrite = ((struct writeData *)inputData)->firstWrite; data.firstWrite = ((struct writeData *)inputData)->firstWrite;
if (!data.firstWrite) { if (!data.firstWrite) {
for (int i=0; i<data.nLines; i++) { for (int i=0; i<data.nLines; i++) {
size_t offset = i * size_t(data.width); size_t offset = i * size_t(data.width);
@ -170,7 +171,7 @@ void Topo::topo() {
exit(1); exit(1);
} }
tzMethods.prepareMethods(dem_method); tzMethods.prepareMethods(dem_method);
// Set up Ellipsoid object // Set up Ellipsoid object
elp.a = major; elp.a = major;
elp.e2 = eccentricitySquared; elp.e2 = eccentricitySquared;
@ -252,12 +253,12 @@ void Topo::topo() {
peg.hdg = peghdg; peg.hdg = peghdg;
ptm.radar_to_xyz(elp,peg); ptm.radar_to_xyz(elp,peg);
rcurv = ptm.radcur; rcurv = ptm.radcur;
for (int ind=0; ind<2; ind++) { for (int ind=0; ind<2; ind++) {
pixel = ind * (width - 1); pixel = ind * (width - 1);
rng = rho[pixel]; rng = rho[pixel];
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng; dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
for (int iter=0; iter<2; iter++) { for (int iter=0; iter<2; iter++) {
// SWOT-specific near range check // SWOT-specific near range check
// If slant range vector doesn't hit ground, pick nadir point // If slant range vector doesn't hit ground, pick nadir point
@ -377,7 +378,7 @@ void Topo::topo() {
if (RUN_GPU_TOPO) { if (RUN_GPU_TOPO) {
double gpu_inputs_d[14]; double gpu_inputs_d[14];
int gpu_inputs_i[7]; int gpu_inputs_i[7];
gpu_inputs_d[0] = t0; gpu_inputs_d[0] = t0;
gpu_inputs_d[1] = prf; gpu_inputs_d[1] = prf;
gpu_inputs_d[2] = elp.a; gpu_inputs_d[2] = elp.a;
@ -409,7 +410,7 @@ void Topo::topo() {
gpu_dem[(i*udemlength)+j] = dem[i][j]; gpu_dem[(i*udemlength)+j] = dem[i][j];
} }
} }
int gpu_orbNvec = orb.nVectors; int gpu_orbNvec = orb.nVectors;
double *gpu_orbSvs = new double[7*gpu_orbNvec]; double *gpu_orbSvs = new double[7*gpu_orbNvec];
for (int i=0; i<gpu_orbNvec; i++) { for (int i=0; i<gpu_orbNvec; i++) {
@ -432,7 +433,7 @@ void Topo::topo() {
bool incFlag = bool(incAccessor > 0); bool incFlag = bool(incAccessor > 0);
bool losFlag = bool(losAccessor > 0); bool losFlag = bool(losAccessor > 0);
//std::future<void> result = std::async(std::launch::async, &Topo::writeToFile, this, (void **)accObjs, outputArrays, incFlag, losFlag, 0, width, true); //std::future<void> result = std::async(std::launch::async, &Topo::writeToFile, this, (void **)accObjs, outputArrays, incFlag, losFlag, 0, width, true);
// Create pthread data and initialize dummy thread // Create pthread data and initialize dummy thread
pthread_t writeThread; pthread_t writeThread;
pthread_attr_t attr; pthread_attr_t attr;
@ -455,26 +456,51 @@ void Topo::topo() {
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); pthread_create(&writeThread, &attr, writeToFile, (void*)&wd);
// Calculate number of and size of blocks // Calculate number of and size of blocks
size_t num_GPU_bytes = getDeviceMem();
long totalPixels = (long)length * width;
long pixPerImg = (((num_GPU_bytes / 8) / 9) / 1e7) * 1e7; // Round down to the nearest 10M pixels
long linesPerImg = pixPerImg / width;
pixPerImg = linesPerImg * width;
int nBlocks = totalPixels / pixPerImg;
//original values: 1.5e8 is too large for each of GPU on kamb. // free GPU memory available
//here I change it to 1.0e8. 16-MAY-2018, Cunren Liang size_t num_GPU_bytes = getDeviceFreeMem();
while (pixPerImg > 1.0e8) { // use 100Mb as a rounding unit , may be adjusted
linesPerImg -= 1; size_t memoryRoundingUnit = 1024ULL * 1024ULL * 100;
pixPerImg -= width; // memory to be used for each pixel in bytes, with 9 double elements per pixel
nBlocks = totalPixels / pixPerImg; size_t pixelBytes = sizeof(double) * 9;
} // memory overhead for other shared parameters, in terms of memoryRoundUnit, or 200M
long remPix = totalPixels - (pixPerImg * nBlocks); size_t memoryOverhead = 2;
long remLines = remPix / width;
// adjust the available free memory by rounding down
num_GPU_bytes = (num_GPU_bytes/memoryRoundingUnit - memoryOverhead) * memoryRoundingUnit;
// calculate the max pixels allowed in a batch (block)
size_t pixPerImg = num_GPU_bytes / pixelBytes;
assert(pixPerImg > 0);
// ! To best parallelize the computation, use the max available gpu memory is the best option
// ! the following adjustment is not needed
// set a upper limit on the size of the block
// preferably offered as an input parameter
// 2^24 is about 1.2G Memory
// size_t maxPixPerImg = 1 << 24;
// pixPerImg = std::min(pixPerImg, maxPixPerImg);
// the max lines in a batch, and will be used for each run
int linesPerImg = pixPerImg / width;
assert(linesPerImg >0);
// now reassign the value for pixels in a batch
pixPerImg = linesPerImg * width;
// total number of pixels in SLC
size_t totalPixels = (size_t)length * width;
// total of blocks needed to process the whole image
int nBlocks = length / linesPerImg;
// check whether there are remnant lines
int remLines = length - nBlocks*linesPerImg;
size_t remPix = remLines * width;
printf("NOTE: GPU will process image in %d blocks of %d lines", nBlocks, linesPerImg); printf("NOTE: GPU will process image in %d blocks of %d lines", nBlocks, linesPerImg);
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines); if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
printf("\n"); printf("\n");
double *gpu_rho = new double[linesPerImg * width]; double *gpu_rho = new double[linesPerImg * width];
double *gpu_dopline = new double[linesPerImg * width]; double *gpu_dopline = new double[linesPerImg * width];
size_t nb_pixels = pixPerImg * sizeof(double); size_t nb_pixels = pixPerImg * sizeof(double);
@ -490,7 +516,7 @@ void Topo::topo() {
dopAccObj->getLineSequential((char *)raw_line); dopAccObj->getLineSequential((char *)raw_line);
for (int k=0; k<width; k++) gpu_dopline[(j*width)+k] = raw_line[k]; for (int k=0; k<width; k++) gpu_dopline[(j*width)+k] = raw_line[k];
} }
outputArrays[0] = (double *)malloc(nb_pixels); // h_lat outputArrays[0] = (double *)malloc(nb_pixels); // h_lat
outputArrays[1] = (double *)malloc(nb_pixels); // h_lon outputArrays[1] = (double *)malloc(nb_pixels); // h_lon
outputArrays[2] = (double *)malloc(nb_pixels); // h_z outputArrays[2] = (double *)malloc(nb_pixels); // h_z
@ -565,7 +591,7 @@ void Topo::topo() {
printf("\n ------------------ EXITING GPU TOPO ------------------\n\n"); printf("\n ------------------ EXITING GPU TOPO ------------------\n\n");
printf("Finished!\n"); printf("Finished!\n");
delete[] raw_line; delete[] raw_line;
delete[] gpu_dem; delete[] gpu_dem;
delete[] gpu_rho; delete[] gpu_rho;
@ -579,7 +605,7 @@ void Topo::topo() {
// Step 1: Get satellite position // Step 1: Get satellite position
// Get time // Get time
tline = t0 + (Nazlooks * (line / prf)); tline = t0 + (Nazlooks * (line / prf));
// Get state vector // Get state vector
stat = orb.interpolateOrbit(tline,xyzsat,velsat,orbit_method); stat = orb.interpolateOrbit(tline,xyzsat,velsat,orbit_method);
if (stat != 0) { if (stat != 0) {
@ -657,15 +683,15 @@ void Topo::topo() {
for (pixel=0; pixel<width; pixel++) { for (pixel=0; pixel<width; pixel++) {
rng = rho[pixel]; rng = rho[pixel];
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng; dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
// If pixel hasn't converged // If pixel hasn't converged
if (converge[pixel] == 0) { if (converge[pixel] == 0) {
// Use previous llh in degrees and meters // Use previous llh in degrees and meters
llh_prev[0] = lat[pixel] / (180. / M_PI); llh_prev[0] = lat[pixel] / (180. / M_PI);
llh_prev[1] = lon[pixel] / (180. / M_PI); llh_prev[1] = lon[pixel] / (180. / M_PI);
llh_prev[2] = z[pixel]; llh_prev[2] = z[pixel];
// Solve for new position at height zsch // Solve for new position at height zsch
aa = height + rcurv; aa = height + rcurv;
bb = rcurv + zsch[pixel]; bb = rcurv + zsch[pixel];
@ -680,12 +706,12 @@ void Topo::topo() {
gamm = costheta * rng; gamm = costheta * rng;
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that); alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha)); beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
// xyz position of target // xyz position of target
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]); for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx]; for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
elp.latlon(xyz,llh,XYZ_2_LLH); elp.latlon(xyz,llh,XYZ_2_LLH);
// Convert lat, lon, hgt to xyz coordinates // Convert lat, lon, hgt to xyz coordinates
lat[pixel] = llh[0] * (180. / M_PI); lat[pixel] = llh[0] * (180. / M_PI);
lon[pixel] = llh[1] * (180. / M_PI); lon[pixel] = llh[1] * (180. / M_PI);
@ -701,7 +727,7 @@ void Topo::topo() {
fraclon = demlon - idemlon; fraclon = demlon - idemlon;
z[pixel] = tzMethods.interpolate(dem,idemlon,idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method); z[pixel] = tzMethods.interpolate(dem,idemlon,idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
if (z[pixel] < -500.0) z[pixel] = -500.0; if (z[pixel] < -500.0) z[pixel] = -500.0;
// Given llh, where h = z(pixel, line) in WGS84, get the SCH height // Given llh, where h = z(pixel, line) in WGS84, get the SCH height
llh[0] = lat[pixel] / (180. / M_PI); llh[0] = lat[pixel] / (180. / M_PI);
llh[1] = lon[pixel] / (180. / M_PI); llh[1] = lon[pixel] / (180. / M_PI);
@ -719,7 +745,7 @@ void Topo::topo() {
} else if (iter > numiter) { } else if (iter > numiter) {
elp.latlon(xyz_prev,llh_prev,LLH_2_XYZ); elp.latlon(xyz_prev,llh_prev,LLH_2_XYZ);
for (int idx=0; idx<3; idx++) xyz[idx] = 0.5 * (xyz_prev[idx] + xyz[idx]); for (int idx=0; idx<3; idx++) xyz[idx] = 0.5 * (xyz_prev[idx] + xyz[idx]);
// Repopulate lat, lon, z // Repopulate lat, lon, z
elp.latlon(xyz,llh,XYZ_2_LLH); elp.latlon(xyz,llh,XYZ_2_LLH);
lat[pixel] = llh[0] * (180. / M_PI); lat[pixel] = llh[0] * (180. / M_PI);
@ -727,7 +753,7 @@ void Topo::topo() {
z[pixel] = llh[2]; z[pixel] = llh[2];
ptm.convert_sch_to_xyz(sch,xyz,XYZ_2_SCH); ptm.convert_sch_to_xyz(sch,xyz,XYZ_2_SCH);
zsch[pixel] = sch[2]; zsch[pixel] = sch[2];
// Absolute distance // Absolute distance
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng; distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
} }
@ -754,32 +780,32 @@ void Topo::topo() {
gamm = costheta * rng; gamm = costheta * rng;
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that); alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha)); beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
// xyz position of target // xyz position of target
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]); for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx]; for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
elp.latlon(xyz,llh,XYZ_2_LLH); elp.latlon(xyz,llh,XYZ_2_LLH);
// Copy into output arrays // Copy into output arrays
lat[pixel] = llh[0] * (180. / M_PI); lat[pixel] = llh[0] * (180. / M_PI);
lon[pixel] = llh[1] * (180. / M_PI); lon[pixel] = llh[1] * (180. / M_PI);
z[pixel] = llh[2]; z[pixel] = llh[2];
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng; distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
// Computation in ENU coordinates around target // Computation in ENU coordinates around target
linalg.enubasis(llh[0],llh[1],enumat); linalg.enubasis(llh[0],llh[1],enumat);
linalg.tranmat(enumat,xyz2enu); linalg.tranmat(enumat,xyz2enu);
linalg.matvec(xyz2enu,delta,enu); linalg.matvec(xyz2enu,delta,enu);
cosalpha = abs(enu[2]) / linalg.norm(enu); cosalpha = abs(enu[2]) / linalg.norm(enu);
// LOS vectors // LOS vectors
losang[(2*pixel)] = acos(cosalpha) * (180. / M_PI); losang[(2*pixel)] = acos(cosalpha) * (180. / M_PI);
losang[((2*pixel)+1)] = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI); losang[((2*pixel)+1)] = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
incang[(2*pixel)] = acos(costheta) * (180. / M_PI); incang[(2*pixel)] = acos(costheta) * (180. / M_PI);
// ctrack gets stored in zsch // ctrack gets stored in zsch
zsch[pixel] = rng * sintheta; zsch[pixel] = rng * sintheta;
// Get local incidence angle // Get local incidence angle
demlat = ((lat[pixel] - ufirstlat) / deltalat) + 1; demlat = ((lat[pixel] - ufirstlat) / deltalat) + 1;
demlon = ((lon[pixel] - ufirstlon) / deltalon) + 1; demlon = ((lon[pixel] - ufirstlon) / deltalon) + 1;
@ -792,12 +818,12 @@ void Topo::topo() {
fraclat = demlat - idemlat; fraclat = demlat - idemlat;
fraclon = demlon - idemlon; fraclon = demlon - idemlon;
gamm = lat[pixel] / (180. / M_PI); gamm = lat[pixel] / (180. / M_PI);
// Slopex // Slopex
aa = tzMethods.interpolate(dem,(idemlon-1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method); aa = tzMethods.interpolate(dem,(idemlon-1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
bb = tzMethods.interpolate(dem,(idemlon+1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method); bb = tzMethods.interpolate(dem,(idemlon+1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * elp.reast(gamm) * deltalon); alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * elp.reast(gamm) * deltalon);
// Slopey // Slopey
aa = tzMethods.interpolate(dem,idemlon,(idemlat-1),fraclon,fraclat,udemwidth,udemlength,dem_method); aa = tzMethods.interpolate(dem,idemlon,(idemlat-1),fraclon,fraclat,udemwidth,udemlength,dem_method);
bb = tzMethods.interpolate(dem,idemlon,(idemlat+1),fraclon,fraclat,udemwidth,udemlength,dem_method); bb = tzMethods.interpolate(dem,idemlon,(idemlat+1),fraclon,fraclat,udemwidth,udemlength,dem_method);
@ -822,7 +848,7 @@ void Topo::topo() {
max_lat = max(mxlat, max_lat); max_lat = max(mxlat, max_lat);
min_lon = min(mnlon, min_lon); min_lon = min(mnlon, min_lon);
max_lon = max(mxlon, max_lon); max_lon = max(mxlon, max_lon);
latAccObj->setLineSequential((char *)&lat[0]); latAccObj->setLineSequential((char *)&lat[0]);
lonAccObj->setLineSequential((char *)&lon[0]); lonAccObj->setLineSequential((char *)&lon[0]);
heightAccObj->setLineSequential((char *)&z[0]); heightAccObj->setLineSequential((char *)&z[0]);
@ -840,7 +866,7 @@ void Topo::topo() {
ctrackmin = mnzsch - demmax; ctrackmin = mnzsch - demmax;
ctrackmax = mxzsch + demmax; ctrackmax = mxzsch + demmax;
dctrack = (ctrackmax - ctrackmin) / (owidth - 1.0); dctrack = (ctrackmax - ctrackmin) / (owidth - 1.0);
// Sort lat/lon by ctrack // Sort lat/lon by ctrack
linalg.insertionSort(zsch,width); linalg.insertionSort(zsch,width);
linalg.insertionSort(lat,width); linalg.insertionSort(lat,width);
@ -853,7 +879,7 @@ void Topo::topo() {
aa = ctrackmin + (pixel * dctrack); aa = ctrackmin + (pixel * dctrack);
ctrack[pixel] = aa; ctrack[pixel] = aa;
i_type = linalg.binarySearch(zsch,0,(width-1),aa); i_type = linalg.binarySearch(zsch,0,(width-1),aa);
// Simple bi-linear interpolation // Simple bi-linear interpolation
fraclat = (aa - zsch[i_type]) / (zsch[(i_type+1)] - zsch[i_type]); fraclat = (aa - zsch[i_type]) / (zsch[(i_type+1)] - zsch[i_type]);
demlat = lat[i_type] + (fraclat * (lat[(i_type+1)] - lat[i_type])); demlat = lat[i_type] + (fraclat * (lat[(i_type+1)] - lat[i_type]));