GPU geo2rdr and topo memory allocation fix
parent
31803ef7fa
commit
74c92a1dc2
|
@ -4,6 +4,7 @@
|
|||
//
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cassert>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/time.h>
|
||||
|
@ -65,7 +66,7 @@ struct Poly1d {
|
|||
__constant__ double d_inpts_double[9];
|
||||
__constant__ int d_inpts_int[3];
|
||||
|
||||
// Mem usage: 27 doubles (216 bytes) per call
|
||||
// Mem usage: 27 doubles (216 bytes) per call
|
||||
__device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double *vel) {
|
||||
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
|
||||
double sum = 0.0;
|
||||
|
@ -197,7 +198,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
|
|||
if (pixel < NPIXELS) { // The number of pixels in a run changes based on if it's a full run or a partial run
|
||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* Input mapping
|
||||
*
|
||||
*
|
||||
* int[0] = demLength
|
||||
* int[1] = demWidth
|
||||
* int[2] = bistatic
|
||||
|
@ -212,7 +213,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
|
|||
* double[7] = dmrg
|
||||
* double[8] = dtaz
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
|
||||
double xyz[3], llh[3], satx[3], satv[3], dr[3];
|
||||
double rngpix, tline, tprev, fnprime, fdop, fdopder;
|
||||
int stat, i, j;
|
||||
|
@ -231,7 +232,7 @@ __global__ void runGeo(struct Orbit orb, struct Poly1d fdvsrng, struct Poly1d fd
|
|||
llh2xyz(&elp,xyz,llh);
|
||||
|
||||
tline = .5 * (d_inpts_double[2] + d_inpts_double[3]);
|
||||
stat = interpolateOrbit(&orb, tline, satx, satv); // Originally we got xyz_mid and vel_mid, then copied into satx/satv,
|
||||
stat = interpolateOrbit(&orb, tline, satx, satv); // Originally we got xyz_mid and vel_mid, then copied into satx/satv,
|
||||
// but since these are all independent here it's fine
|
||||
if (stat != 0) isOutside = true; // Should exit, but this is next-best thing...
|
||||
|
||||
|
@ -294,11 +295,17 @@ int nLinesPossible(int length, int width) {
|
|||
size_t freeByte, totalByte;
|
||||
int linesPerRun;
|
||||
cudaMemGetInfo(&freeByte, &totalByte);
|
||||
printf("tb %ld\n", totalByte);
|
||||
totalByte = size_t((double(totalByte) / 5.e8) * 5.e8); // Round down to nearest .5 GB
|
||||
printf("tba %ld\n", totalByte);
|
||||
printf("Device has roughly %.4f GB of memory, ", double(totalByte)/1.e9);
|
||||
linesPerRun = totalByte / (556 * width);
|
||||
printf("Available free gpu memory in bytes %ld\n", freeByte);
|
||||
// use 100Mb as a rounding unit , may be adjusted
|
||||
size_t memoryRoundingUnit = 1024ULL * 1024ULL * 100;
|
||||
// use 2*memoryRoundingUnit as an overhead for safety
|
||||
freeByte = (freeByte / memoryRoundingUnit -2) * memoryRoundingUnit;
|
||||
assert(freeByte >0);
|
||||
// printf("GPU Memory to be used %ld\n", freeByte);
|
||||
// printf("Device has roughly %.4f GB of memory, ", double(totalByte)/1.e9);
|
||||
// determine the allowed max lines per run, 556 is per pixel memory usage (estimated)
|
||||
linesPerRun = freeByte / (7*sizeof(double) * width);
|
||||
assert(linesPerRun>0);
|
||||
printf("and can process roughly %d lines (each with %d pixels) per run.\n", linesPerRun, width);
|
||||
return linesPerRun;
|
||||
}
|
||||
|
@ -319,9 +326,9 @@ void freePoly1d(struct Poly1d *poly) {
|
|||
free(poly->coeffs);
|
||||
}
|
||||
|
||||
void runGPUGeo(int iter, int numPix, double *h_inpts_dbl, int *h_inpts_int, double *h_lat, double *h_lon, double *h_dem, int h_orbNvec, double *h_orbSvs,
|
||||
void runGPUGeo(int iter, int numPix, double *h_inpts_dbl, int *h_inpts_int, double *h_lat, double *h_lon, double *h_dem, int h_orbNvec, double *h_orbSvs,
|
||||
int h_polyOrd, double h_polyMean, double h_polyNorm, double *h_polyCoeffs, double h_polyPRF, double **accArr) {
|
||||
|
||||
|
||||
double iStartCpy, iStartRun, iEndRun, iEndCpy;
|
||||
int i;
|
||||
|
||||
|
|
|
@ -88,7 +88,7 @@ Geo2rdr::Geo2rdr() {
|
|||
}
|
||||
|
||||
void Geo2rdr::geo2rdr() {
|
||||
|
||||
|
||||
double *lat, *lon, *dem, *rgm, *azt, *rgoff, *azoff;
|
||||
double xyz_mid[3], vel_mid[3], llh[3], xyz[3], satx[3], satv[3], dr[3];
|
||||
double tend, tline, tprev, rngend, rngpix, tmid, temp, dtaz, dmrg, fdop, fdopder, fnprime;
|
||||
|
@ -137,7 +137,7 @@ void Geo2rdr::geo2rdr() {
|
|||
}
|
||||
|
||||
// OpenMP replacement for clock() (clock reports cumulative thread time, not single thread
|
||||
// time, so clock() on 4 threads would report 4 x the true runtime)
|
||||
// time, so clock() on 4 threads would report 4 x the true runtime)
|
||||
timer_start = omp_get_wtime();
|
||||
cnt = 0;
|
||||
printf("Geo2rdr executing on %d threads...\n", omp_get_max_threads());
|
||||
|
@ -259,12 +259,20 @@ void Geo2rdr::geo2rdr() {
|
|||
wd.width = demWidth;
|
||||
wd.firstWrite = true; // Flag to ignore write instructions
|
||||
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); // Fires empty thread
|
||||
|
||||
int totalPixels = demLength * demWidth;
|
||||
//int linesPerRun = min(demLength, nLinesPossible(demLength, demWidth));
|
||||
int linesPerRun = demLength;
|
||||
while ((linesPerRun*demWidth) > 2e8) linesPerRun--;
|
||||
int pixPerRun = linesPerRun * demWidth;
|
||||
|
||||
size_t totalPixels = demLength * demWidth;
|
||||
// adjust the lines per run by the available gpu memory
|
||||
int linesPerRun = std::min(demLength, nLinesPossible(demLength, demWidth));
|
||||
// ! To best parallelize the computation, use the max available gpu memory is the best option
|
||||
// ! the following adjustment is not needed
|
||||
// adjust further by the max pixels per run, prefavorbly as a user configurable parameter
|
||||
// temp set as 2^20
|
||||
// size_t maxPixPerRun = 1 << 20;
|
||||
// size_t pixPerRun = std::min((size_t)linesPerRun*demWidth, maxPixPerRun);
|
||||
// linesPerRun = pixPerRun/demWidth *demWidth;
|
||||
|
||||
// recalculate run info
|
||||
size_t pixPerRun = linesPerRun * demWidth;
|
||||
int nRuns = demLength / linesPerRun;
|
||||
int remPix = totalPixels - (nRuns * pixPerRun);
|
||||
int remLines = remPix / demWidth;
|
||||
|
@ -273,7 +281,7 @@ void Geo2rdr::geo2rdr() {
|
|||
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
|
||||
printf("\n");
|
||||
|
||||
lat = new double[pixPerRun];
|
||||
lat = new double[pixPerRun];
|
||||
lon = new double[pixPerRun];
|
||||
dem = new double[pixPerRun];
|
||||
size_t nb_pixels = pixPerRun * sizeof(double);
|
||||
|
@ -291,14 +299,14 @@ void Geo2rdr::geo2rdr() {
|
|||
outputArrays[2] = (double *)malloc(nb_pixels); // h_rgoff
|
||||
outputArrays[3] = (double *)malloc(nb_pixels); // h_azoff
|
||||
|
||||
runGPUGeo(i, pixPerRun, gpu_inputs_d, gpu_inputs_i, lat, lon, dem,
|
||||
runGPUGeo(i, pixPerRun, gpu_inputs_d, gpu_inputs_i, lat, lon, dem,
|
||||
gpu_orbNvec, gpu_orbSvs, gpu_polyOrd, gpu_polyMean, gpu_polyNorm,
|
||||
gpu_polyCoef, prf, outputArrays);
|
||||
for (int j=0; j<4; j++) writeArrays[j] = outputArrays[j]; // Copying pointers
|
||||
if (i != 0) printf(" Waiting for previous asynchronous write-out to finish...\n");
|
||||
pthread_attr_destroy(&attr);
|
||||
pthread_join(writeThread, &thread_stat); // Waits for async thread to finish
|
||||
|
||||
pthread_join(writeThread, &thread_stat); // Waits for async thread to finish
|
||||
|
||||
printf(" Writing run %d out asynchronously to image files...\n", i);
|
||||
wd.accessors = (void**)accObjs;
|
||||
wd.rg = writeArrays[0];
|
||||
|
@ -381,14 +389,14 @@ void Geo2rdr::geo2rdr() {
|
|||
pixel = latAccObj->getLineSequential((char *)lat);
|
||||
pixel = lonAccObj->getLineSequential((char *)lon);
|
||||
pixel = hgtAccObj->getLineSequential((char *)dem);
|
||||
|
||||
|
||||
if ((line%1000) == 0) printf("Processing line: %d %d\n", line, numOutsideImage);
|
||||
|
||||
#pragma omp parallel for private(pixel, rngpix, tline, tprev, stat, fnprime, fdop, \
|
||||
fdopder, isOutside, xyz, llh, satx, satv, dr) \
|
||||
reduction(+:numOutsideImage,conv,cnt)
|
||||
for (pixel=0; pixel<demWidth; pixel++) {
|
||||
|
||||
|
||||
isOutside = false; // Flag to determine if point is outside image
|
||||
|
||||
llh[0] = lat[pixel] * (M_PI / 180.);
|
||||
|
@ -426,7 +434,7 @@ void Geo2rdr::geo2rdr() {
|
|||
break; // Point converged
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ((tline < tstart) || (tline > tend)) isOutside = true;
|
||||
|
||||
for (int i=0; i<3; i++) dr[i] = xyz[i] - satx[i];
|
||||
|
|
|
@ -73,7 +73,7 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
|
|||
double h[4], hdot[4], f0[4], f1[4], g0[4], g1[4];
|
||||
double sum = 0.0;
|
||||
int v0 = -1;
|
||||
|
||||
|
||||
if ((t < orb->svs[0].t) || (t > orb->svs[orb->nVec-1].t)) return 1;
|
||||
for (int i=0; i<orb->nVec; i++) {
|
||||
if ((orb->svs[i].t >= t) && (v0 == -1)) {
|
||||
|
@ -95,44 +95,44 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
|
|||
sum = (1.0 / (orb->svs[v0+3].t - orb->svs[v0].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t)) + (1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
||||
f0[3] = 1.0 - (2.0 * (t - orb->svs[v0+3].t) * sum);
|
||||
|
||||
h[0] = ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
||||
h[0] = ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
||||
((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t));
|
||||
h[1] = ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
||||
h[1] = ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
||||
((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t));
|
||||
h[2] = ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
|
||||
((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t));
|
||||
h[3] = ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
||||
h[3] = ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
||||
((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
||||
|
||||
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
||||
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
||||
(1.0 / (orb->svs[v0].t - orb->svs[v0+1].t));
|
||||
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
||||
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0].t - orb->svs[v0+3].t)) *
|
||||
(1.0 / (orb->svs[v0].t - orb->svs[v0+2].t));
|
||||
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
||||
sum += ((t - orb->svs[v0+1].t) / (orb->svs[v0].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0].t - orb->svs[v0+2].t)) *
|
||||
(1.0 / (orb->svs[v0].t - orb->svs[v0+3].t));
|
||||
hdot[0] = sum;
|
||||
|
||||
sum = ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
|
||||
(1.0 / (orb->svs[v0+1].t - orb->svs[v0].t));
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+1].t - orb->svs[v0+3].t)) *
|
||||
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+2].t));
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+1].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+1].t - orb->svs[v0+2].t)) *
|
||||
(1.0 / (orb->svs[v0+1].t - orb->svs[v0+3].t));
|
||||
hdot[1] = sum;
|
||||
|
||||
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
||||
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
||||
(1.0 / (orb->svs[v0+2].t - orb->svs[v0].t));
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+3].t) / (orb->svs[v0+2].t - orb->svs[v0+3].t)) *
|
||||
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+1].t));
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+2].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+2].t - orb->svs[v0+1].t)) *
|
||||
(1.0 / (orb->svs[v0+2].t - orb->svs[v0+3].t));
|
||||
hdot[2] = sum;
|
||||
|
||||
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
||||
sum = ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
||||
(1.0 / (orb->svs[v0+3].t - orb->svs[v0].t));
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+2].t) / (orb->svs[v0+3].t - orb->svs[v0+2].t)) *
|
||||
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+1].t));
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
||||
sum += ((t - orb->svs[v0].t) / (orb->svs[v0+3].t - orb->svs[v0].t)) * ((t - orb->svs[v0+1].t) / (orb->svs[v0+3].t - orb->svs[v0+1].t)) *
|
||||
(1.0 / (orb->svs[v0+3].t - orb->svs[v0+2].t));
|
||||
hdot[3] = sum;
|
||||
|
||||
|
@ -152,12 +152,12 @@ __device__ int interpolateOrbit(struct Orbit *orb, double t, double *xyz, double
|
|||
|
||||
xyz[0] = (((orb->svs[v0].px * f0[0]) + (orb->svs[v0].vx * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].px * f0[1]) + (orb->svs[v0+1].vx * f1[1])) * h[1] * h[1]) +
|
||||
(((orb->svs[v0+2].px * f0[2]) + (orb->svs[v0+2].vx * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].px * f0[3]) + (orb->svs[v0+3].vx * f1[3])) * h[3] * h[3]);
|
||||
xyz[1] = (((orb->svs[v0].py * f0[0]) + (orb->svs[v0].vy * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].py * f0[1]) + (orb->svs[v0+1].vy * f1[1])) * h[1] * h[1]) +
|
||||
xyz[1] = (((orb->svs[v0].py * f0[0]) + (orb->svs[v0].vy * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].py * f0[1]) + (orb->svs[v0+1].vy * f1[1])) * h[1] * h[1]) +
|
||||
(((orb->svs[v0+2].py * f0[2]) + (orb->svs[v0+2].vy * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].py * f0[3]) + (orb->svs[v0+3].vy * f1[3])) * h[3] * h[3]);
|
||||
xyz[2] = (((orb->svs[v0].pz * f0[0]) + (orb->svs[v0].vz * f1[0])) * h[0] * h[0]) + (((orb->svs[v0+1].pz * f0[1]) + (orb->svs[v0+1].vz * f1[1])) * h[1] * h[1]) +
|
||||
(((orb->svs[v0+2].pz * f0[2]) + (orb->svs[v0+2].vz * f1[2])) * h[2] * h[2]) + (((orb->svs[v0+3].pz * f0[3]) + (orb->svs[v0+3].vz * f1[3])) * h[3] * h[3]);
|
||||
|
||||
vel[0] = (((orb->svs[v0].px * g0[0]) + (orb->svs[v0].vx * g1[0])) * h[0]) + (((orb->svs[v0+1].px * g0[1]) + (orb->svs[v0+1].vx * g1[1])) * h[1]) +
|
||||
vel[0] = (((orb->svs[v0].px * g0[0]) + (orb->svs[v0].vx * g1[0])) * h[0]) + (((orb->svs[v0+1].px * g0[1]) + (orb->svs[v0+1].vx * g1[1])) * h[1]) +
|
||||
(((orb->svs[v0+2].px * g0[2]) + (orb->svs[v0+2].vx * g1[2])) * h[2]) + (((orb->svs[v0+3].px * g0[3]) + (orb->svs[v0+3].vx * g1[3])) * h[3]);
|
||||
vel[1] = (((orb->svs[v0].py * g0[0]) + (orb->svs[v0].vy * g1[0])) * h[0]) + (((orb->svs[v0+1].py * g0[1]) + (orb->svs[v0+1].vy * g1[1])) * h[1]) +
|
||||
(((orb->svs[v0+2].py * g0[2]) + (orb->svs[v0+2].vy * g1[2])) * h[2]) + (((orb->svs[v0+3].py * g0[3]) + (orb->svs[v0+3].vy * g1[3])) * h[3]);
|
||||
|
@ -212,7 +212,7 @@ __device__ double interpolateDEM(float *DEM, double lon, double lat, int width,
|
|||
|
||||
i0 = int(lon) - 2;
|
||||
j0 = int(lat) - 2;
|
||||
|
||||
|
||||
indi = min((i0+1), width); // bound by out_of_bounds, so this isn't a concern
|
||||
spline(indi, j0, length, A, DEM);
|
||||
initSpline(A,R,Q);
|
||||
|
@ -328,7 +328,7 @@ __device__ void radar2xyz(struct Peg *peg, struct Ellipsoid *elp, struct PegTran
|
|||
ptm->mat[2][0] = sin(peg->lat);
|
||||
ptm->mat[2][1] = cos(peg->lat) * cos(peg->hdg);
|
||||
ptm->mat[2][2] = cos(peg->lat) * sin(peg->hdg);
|
||||
|
||||
|
||||
re = elp->a / sqrt(1.0 - (elp->e2 * pow(sin(peg->lat),2)));
|
||||
rn = (elp->a * (1.0 - elp->e2)) / pow((1.0 - (elp->e2 * pow(sin(peg->lat),2))),1.5);
|
||||
ptm->radcur = (re * rn) / ((re * pow(cos(peg->hdg),2)) + (rn * pow(sin(peg->hdg),2)));
|
||||
|
@ -337,7 +337,7 @@ __device__ void radar2xyz(struct Peg *peg, struct Ellipsoid *elp, struct PegTran
|
|||
llh[1] = peg->lon;
|
||||
llh[2] = 0.0;
|
||||
llh2xyz(temp,llh,elp);
|
||||
|
||||
|
||||
ptm->ov[0] = temp[0] - (ptm->radcur * cos(peg->lat) * cos(peg->lon));
|
||||
ptm->ov[1] = temp[1] - (ptm->radcur * cos(peg->lat) * sin(peg->lon));
|
||||
ptm->ov[2] = temp[2] - (ptm->radcur * sin(peg->lat));
|
||||
|
@ -370,7 +370,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
long pixel = (blockDim.x * blockIdx.x) + threadIdx.x;
|
||||
|
||||
if (pixel < NPIXELS) { // Make sure we're not operating on a non-existent pixel
|
||||
|
||||
|
||||
double enumat[3][3];
|
||||
double xyzsat[3], velsat[3], llhsat[3], vhat[3], that[3], chat[3], nhat[3];
|
||||
double llh[3], llh_prev[3], xyz[3], xyz_prev[3], sch[3], enu[3], delta[3];
|
||||
|
@ -381,11 +381,11 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
double thrd_z, thrd_zsch, thrd_lat, thrd_lon, thrd_distance, thrd_losang0, thrd_losang1;
|
||||
double thrd_incang0, thrd_incang1;
|
||||
int thrd_converge;
|
||||
|
||||
|
||||
struct Ellipsoid elp;
|
||||
struct Peg peg;
|
||||
struct PegTrans ptm;
|
||||
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* double t0 = inpts_dbl[0];
|
||||
* double prf = inpts_dbl[1];
|
||||
|
@ -412,7 +412,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
* int extraiter = inpts_int[5];
|
||||
* int length = inpts_int[6]; NOT USED IN THIS KERNEL
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
|
||||
line = (pixel + OFFSET) / d_inpts_int[1];
|
||||
tline = d_inpts_dbl[0] + (d_inpts_int[0] * (line / d_inpts_dbl[1]));
|
||||
if (interpolateOrbit(&orbit,tline,xyzsat,velsat) != 0) {
|
||||
|
@ -427,28 +427,28 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
peg.lat = llhsat[0];
|
||||
peg.lon = llhsat[1];
|
||||
radar2xyz(&peg,&elp,&ptm);
|
||||
|
||||
|
||||
thrd_converge = 0;
|
||||
thrd_z = 0.0;
|
||||
thrd_zsch = 0.0;
|
||||
thrd_lat = d_inpts_dbl[7] + (0.5 * d_inpts_dbl[9] * d_inpts_int[2]);
|
||||
thrd_lon = d_inpts_dbl[8] + (0.5 * d_inpts_dbl[10] * d_inpts_int[3]);
|
||||
|
||||
|
||||
dopfact = (0.5 * d_inpts_dbl[11] * (inImgArrs.dopline[pixel] / vmag)) * inImgArrs.rho[pixel];
|
||||
|
||||
|
||||
// START THE ITERATIONS
|
||||
for (iter=0; iter<=(d_inpts_int[4]+d_inpts_int[5]); iter++) {
|
||||
if (thrd_converge == 0) { // Designing this way helps prevent thread divergence as much as possible
|
||||
llh_prev[0] = thrd_lat / (180. / M_PI);
|
||||
llh_prev[1] = thrd_lon / (180. / M_PI);
|
||||
llh_prev[2] = thrd_z;
|
||||
|
||||
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
|
||||
|
||||
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
|
||||
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
|
||||
sintheta = sqrt(1.0 - pow(costheta,2));
|
||||
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
|
||||
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
|
||||
|
||||
|
||||
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
|
||||
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
|
||||
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
|
||||
|
@ -457,7 +457,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
xyz[1] = xyzsat[1] + delta[1];
|
||||
xyz[2] = xyzsat[2] + delta[2];
|
||||
xyz2llh(xyz,llh,&elp);
|
||||
|
||||
|
||||
thrd_lat = llh[0] * (180. / M_PI);
|
||||
thrd_lon = llh[1] * (180. / M_PI);
|
||||
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
|
||||
|
@ -468,7 +468,7 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
demlon = fmin(demlon,(d_inpts_int[3]-1.));
|
||||
thrd_z = interpolateDEM(inImgArrs.DEM,demlon,demlat,d_inpts_int[3],d_inpts_int[2]);
|
||||
thrd_z = fmax(thrd_z,-500.);
|
||||
|
||||
|
||||
llh[0] = thrd_lat / (180. / M_PI);
|
||||
llh[1] = thrd_lon / (180. / M_PI);
|
||||
llh[2] = thrd_z;
|
||||
|
@ -494,23 +494,23 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Final computation
|
||||
costheta = 0.5 * (((height + ptm.radcur) / inImgArrs.rho[pixel]) + (inImgArrs.rho[pixel] / (height + ptm.radcur)) -
|
||||
(((ptm.radcur + thrd_zsch) / (height + ptm.radcur)) * ((ptm.radcur + thrd_zsch) / inImgArrs.rho[pixel])));
|
||||
sintheta = sqrt(1.0 - pow(costheta,2));
|
||||
alpha = (dopfact - (costheta * inImgArrs.rho[pixel] * dot(nhat,vhat))) / dot(vhat,that);
|
||||
beta = -d_inpts_dbl[12] * sqrt((pow(inImgArrs.rho[pixel],2) * pow(sintheta,2)) - pow(alpha,2));
|
||||
|
||||
|
||||
delta[0] = (costheta * inImgArrs.rho[pixel] * nhat[0]) + (alpha * that[0]) + (beta * chat[0]);
|
||||
delta[1] = (costheta * inImgArrs.rho[pixel] * nhat[1]) + (alpha * that[1]) + (beta * chat[1]);
|
||||
delta[2] = (costheta * inImgArrs.rho[pixel] * nhat[2]) + (alpha * that[2]) + (beta * chat[2]);
|
||||
|
||||
|
||||
xyz[0] = xyzsat[0] + delta[0];
|
||||
xyz[1] = xyzsat[1] + delta[1];
|
||||
xyz[2] = xyzsat[2] + delta[2];
|
||||
xyz2llh(xyz,llh,&elp);
|
||||
|
||||
|
||||
thrd_lat = llh[0] * (180. / M_PI);
|
||||
thrd_lon = llh[1] * (180. / M_PI);
|
||||
thrd_z = llh[2];
|
||||
|
@ -526,42 +526,42 @@ __global__ void runTopo(struct Orbit orbit, struct OutputImgArrs outImgArrs, str
|
|||
enumat[0][2] = 0.0;
|
||||
enumat[1][2] = cos(llh[0]);
|
||||
enumat[2][2] = sin(llh[0]);
|
||||
|
||||
|
||||
// Expanded from Linalg::matvec
|
||||
enu[0] = (enumat[0][0] * delta[0]) + (enumat[0][1] * delta[1]) + (enumat[0][2] * delta[2]);
|
||||
enu[1] = (enumat[1][0] * delta[0]) + (enumat[1][1] * delta[1]) + (enumat[1][2] * delta[2]);
|
||||
enu[2] = (enumat[2][0] * delta[0]) + (enumat[2][1] * delta[1]) + (enumat[2][2] * delta[2]);
|
||||
|
||||
|
||||
cosalpha = fabs(enu[2]) / norm(3,enu);
|
||||
thrd_losang0 = acos(cosalpha) * (180. / M_PI);
|
||||
thrd_losang1 = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
|
||||
thrd_incang0 = acos(costheta) * (180. / M_PI);
|
||||
thrd_zsch = inImgArrs.rho[pixel] * sintheta;
|
||||
|
||||
|
||||
demlat = ((thrd_lat - d_inpts_dbl[7]) / d_inpts_dbl[9]) + 1;
|
||||
demlat = fmax(demlat,2.);
|
||||
demlat = fmin(demlat,(d_inpts_int[2]-1.));
|
||||
demlon = ((thrd_lon - d_inpts_dbl[8]) / d_inpts_dbl[10]) + 1;
|
||||
demlon = fmax(demlon,2.);
|
||||
demlon = fmin(demlon,(d_inpts_int[3]-1.));
|
||||
|
||||
|
||||
aa = interpolateDEM(inImgArrs.DEM,(demlon-1.),demlat,d_inpts_int[3],d_inpts_int[2]);
|
||||
bb = interpolateDEM(inImgArrs.DEM,(demlon+1.),demlat,d_inpts_int[3],d_inpts_int[2]);
|
||||
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * (elp.a / sqrt(1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2)))) * d_inpts_dbl[10]);
|
||||
|
||||
|
||||
aa = interpolateDEM(inImgArrs.DEM,demlon,(demlat-1.),d_inpts_int[3],d_inpts_int[2]);
|
||||
bb = interpolateDEM(inImgArrs.DEM,demlon,(demlat+1.),d_inpts_int[3],d_inpts_int[2]);
|
||||
beta = ((bb - aa) * (180. / M_PI)) / (2.0 * ((elp.a * (1.0 - elp.e2)) / pow((1.0 - (elp.e2 * pow(sin(thrd_lat / (180. / M_PI)),2))),1.5)) * d_inpts_dbl[9]);
|
||||
|
||||
|
||||
enunorm = norm(3,enu);
|
||||
enu[0] = enu[0] / enunorm;
|
||||
enu[1] = enu[1] / enunorm;
|
||||
enu[2] = enu[2] / enunorm;
|
||||
costheta = ((enu[0] * alpha) + (enu[1] * beta) - enu[2]) / sqrt(1.0 + pow(alpha,2) + pow(beta,2));
|
||||
thrd_incang1 = acos(costheta) * (180. / M_PI);
|
||||
|
||||
|
||||
// Leave out masking stuff for now (though it's doable)
|
||||
|
||||
|
||||
// Finally write to reference arrays
|
||||
outImgArrs.lat[pixel] = thrd_lat;
|
||||
outImgArrs.lon[pixel] = thrd_lon;
|
||||
|
@ -590,11 +590,10 @@ void freeOrbit(struct Orbit *orb) {
|
|||
free(orb->svs);
|
||||
}
|
||||
|
||||
size_t getDeviceMem() {
|
||||
size_t getDeviceFreeMem() {
|
||||
size_t freeByte, totalByte;
|
||||
cudaMemGetInfo(&freeByte, &totalByte);
|
||||
totalByte = (totalByte / 1e9) * 1e9; // Round down to nearest GB
|
||||
return totalByte;
|
||||
return freeByte;
|
||||
}
|
||||
|
||||
// --------------- C FUNCTIONS ----------------
|
||||
|
@ -616,10 +615,10 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
|
|||
cudaSetDevice(0);
|
||||
|
||||
printf(" Allocating host and general GPU memory...\n");
|
||||
|
||||
|
||||
size_t nb_pixels = numPix * sizeof(double); // size of rho/dopline/lat/lon/z/zsch/incang/losang
|
||||
size_t nb_DEM = h_inpts_int[3] * h_inpts_int[2] * sizeof(float); // size of DEM
|
||||
|
||||
|
||||
/*
|
||||
h_lat = (double *)malloc(nb_pixels);
|
||||
h_lon = (double *)malloc(nb_pixels);
|
||||
|
@ -655,21 +654,21 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
|
|||
cudaMemcpyToSymbol(d_inpts_dbl, h_inpts_dbl, (14*sizeof(double)));
|
||||
cudaMemcpyToSymbol(d_inpts_int, h_inpts_int, (7*sizeof(int)));
|
||||
freeOrbit(&orbit);
|
||||
|
||||
|
||||
orbit.svs = d_svs;
|
||||
inImgArrs.DEM = d_DEM;
|
||||
inImgArrs.rho = d_rho;
|
||||
inImgArrs.dopline = d_dopline;
|
||||
|
||||
printf(" Allocating block memory (%d pixels per image)...\n", numPix);
|
||||
|
||||
|
||||
cudaMalloc((double**)&d_lat, nb_pixels);
|
||||
cudaMalloc((double**)&d_lon, nb_pixels);
|
||||
cudaMalloc((double**)&d_z, nb_pixels);
|
||||
//cudaMalloc((double**)&d_zsch, nb_pixels);
|
||||
cudaMalloc((double**)&d_incang, (2*nb_pixels));
|
||||
cudaMalloc((double**)&d_losang, (2*nb_pixels));
|
||||
|
||||
|
||||
outImgArrs.lat = d_lat;
|
||||
outImgArrs.lon = d_lon;
|
||||
outImgArrs.z = d_z;
|
||||
|
@ -702,7 +701,7 @@ void runGPUTopo(long nBlock, long numPix, double *h_inpts_dbl, int *h_inpts_int,
|
|||
iEndRun = cpuSecond();
|
||||
if (nBlock > -1) printf(" GPU finished block %d in %f s.\n", nBlock, (iEndRun-iStartRun));
|
||||
else printf(" GPU finished remaining lines in %f s.\n", (iEndRun-iStartRun));
|
||||
|
||||
|
||||
printf(" Copying memory back to host...\n");
|
||||
|
||||
cudaMemcpy(accArr[0], outImgArrs.lat, nb_pixels, cudaMemcpyDeviceToHost); // Copy memory from device to host with offset
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#ifndef GPU_TOPO_H
|
||||
#define GPU_TOPO_H
|
||||
|
||||
size_t getDeviceMem();
|
||||
size_t getDeviceFreeMem();
|
||||
void runGPUTopo(long,long,double*,int*,float*,double*,double*,int,double*,double**);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <future>
|
||||
#include <omp.h>
|
||||
|
@ -78,7 +79,7 @@ void *writeToFile(void *inputData) {
|
|||
data.nLines = ((struct writeData *)inputData)->nLines;
|
||||
data.width = ((struct writeData *)inputData)->width;
|
||||
data.firstWrite = ((struct writeData *)inputData)->firstWrite;
|
||||
|
||||
|
||||
if (!data.firstWrite) {
|
||||
for (int i=0; i<data.nLines; i++) {
|
||||
size_t offset = i * size_t(data.width);
|
||||
|
@ -170,7 +171,7 @@ void Topo::topo() {
|
|||
exit(1);
|
||||
}
|
||||
tzMethods.prepareMethods(dem_method);
|
||||
|
||||
|
||||
// Set up Ellipsoid object
|
||||
elp.a = major;
|
||||
elp.e2 = eccentricitySquared;
|
||||
|
@ -252,12 +253,12 @@ void Topo::topo() {
|
|||
peg.hdg = peghdg;
|
||||
ptm.radar_to_xyz(elp,peg);
|
||||
rcurv = ptm.radcur;
|
||||
|
||||
|
||||
for (int ind=0; ind<2; ind++) {
|
||||
pixel = ind * (width - 1);
|
||||
rng = rho[pixel];
|
||||
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
|
||||
|
||||
|
||||
for (int iter=0; iter<2; iter++) {
|
||||
// SWOT-specific near range check
|
||||
// If slant range vector doesn't hit ground, pick nadir point
|
||||
|
@ -377,7 +378,7 @@ void Topo::topo() {
|
|||
if (RUN_GPU_TOPO) {
|
||||
double gpu_inputs_d[14];
|
||||
int gpu_inputs_i[7];
|
||||
|
||||
|
||||
gpu_inputs_d[0] = t0;
|
||||
gpu_inputs_d[1] = prf;
|
||||
gpu_inputs_d[2] = elp.a;
|
||||
|
@ -409,7 +410,7 @@ void Topo::topo() {
|
|||
gpu_dem[(i*udemlength)+j] = dem[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int gpu_orbNvec = orb.nVectors;
|
||||
double *gpu_orbSvs = new double[7*gpu_orbNvec];
|
||||
for (int i=0; i<gpu_orbNvec; i++) {
|
||||
|
@ -432,7 +433,7 @@ void Topo::topo() {
|
|||
bool incFlag = bool(incAccessor > 0);
|
||||
bool losFlag = bool(losAccessor > 0);
|
||||
//std::future<void> result = std::async(std::launch::async, &Topo::writeToFile, this, (void **)accObjs, outputArrays, incFlag, losFlag, 0, width, true);
|
||||
|
||||
|
||||
// Create pthread data and initialize dummy thread
|
||||
pthread_t writeThread;
|
||||
pthread_attr_t attr;
|
||||
|
@ -455,26 +456,51 @@ void Topo::topo() {
|
|||
pthread_create(&writeThread, &attr, writeToFile, (void*)&wd);
|
||||
|
||||
// Calculate number of and size of blocks
|
||||
size_t num_GPU_bytes = getDeviceMem();
|
||||
long totalPixels = (long)length * width;
|
||||
long pixPerImg = (((num_GPU_bytes / 8) / 9) / 1e7) * 1e7; // Round down to the nearest 10M pixels
|
||||
long linesPerImg = pixPerImg / width;
|
||||
pixPerImg = linesPerImg * width;
|
||||
int nBlocks = totalPixels / pixPerImg;
|
||||
|
||||
//original values: 1.5e8 is too large for each of GPU on kamb.
|
||||
//here I change it to 1.0e8. 16-MAY-2018, Cunren Liang
|
||||
while (pixPerImg > 1.0e8) {
|
||||
linesPerImg -= 1;
|
||||
pixPerImg -= width;
|
||||
nBlocks = totalPixels / pixPerImg;
|
||||
}
|
||||
long remPix = totalPixels - (pixPerImg * nBlocks);
|
||||
long remLines = remPix / width;
|
||||
// free GPU memory available
|
||||
size_t num_GPU_bytes = getDeviceFreeMem();
|
||||
// use 100Mb as a rounding unit , may be adjusted
|
||||
size_t memoryRoundingUnit = 1024ULL * 1024ULL * 100;
|
||||
// memory to be used for each pixel in bytes, with 9 double elements per pixel
|
||||
size_t pixelBytes = sizeof(double) * 9;
|
||||
// memory overhead for other shared parameters, in terms of memoryRoundUnit, or 200M
|
||||
size_t memoryOverhead = 2;
|
||||
|
||||
// adjust the available free memory by rounding down
|
||||
num_GPU_bytes = (num_GPU_bytes/memoryRoundingUnit - memoryOverhead) * memoryRoundingUnit;
|
||||
|
||||
// calculate the max pixels allowed in a batch (block)
|
||||
size_t pixPerImg = num_GPU_bytes / pixelBytes;
|
||||
assert(pixPerImg > 0);
|
||||
|
||||
// ! To best parallelize the computation, use the max available gpu memory is the best option
|
||||
// ! the following adjustment is not needed
|
||||
// set a upper limit on the size of the block
|
||||
// preferably offered as an input parameter
|
||||
// 2^24 is about 1.2G Memory
|
||||
// size_t maxPixPerImg = 1 << 24;
|
||||
// pixPerImg = std::min(pixPerImg, maxPixPerImg);
|
||||
|
||||
// the max lines in a batch, and will be used for each run
|
||||
int linesPerImg = pixPerImg / width;
|
||||
assert(linesPerImg >0);
|
||||
// now reassign the value for pixels in a batch
|
||||
pixPerImg = linesPerImg * width;
|
||||
|
||||
// total number of pixels in SLC
|
||||
size_t totalPixels = (size_t)length * width;
|
||||
|
||||
// total of blocks needed to process the whole image
|
||||
int nBlocks = length / linesPerImg;
|
||||
|
||||
// check whether there are remnant lines
|
||||
int remLines = length - nBlocks*linesPerImg;
|
||||
size_t remPix = remLines * width;
|
||||
|
||||
printf("NOTE: GPU will process image in %d blocks of %d lines", nBlocks, linesPerImg);
|
||||
if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines);
|
||||
printf("\n");
|
||||
|
||||
|
||||
double *gpu_rho = new double[linesPerImg * width];
|
||||
double *gpu_dopline = new double[linesPerImg * width];
|
||||
size_t nb_pixels = pixPerImg * sizeof(double);
|
||||
|
@ -490,7 +516,7 @@ void Topo::topo() {
|
|||
dopAccObj->getLineSequential((char *)raw_line);
|
||||
for (int k=0; k<width; k++) gpu_dopline[(j*width)+k] = raw_line[k];
|
||||
}
|
||||
|
||||
|
||||
outputArrays[0] = (double *)malloc(nb_pixels); // h_lat
|
||||
outputArrays[1] = (double *)malloc(nb_pixels); // h_lon
|
||||
outputArrays[2] = (double *)malloc(nb_pixels); // h_z
|
||||
|
@ -565,7 +591,7 @@ void Topo::topo() {
|
|||
|
||||
printf("\n ------------------ EXITING GPU TOPO ------------------\n\n");
|
||||
printf("Finished!\n");
|
||||
|
||||
|
||||
delete[] raw_line;
|
||||
delete[] gpu_dem;
|
||||
delete[] gpu_rho;
|
||||
|
@ -579,7 +605,7 @@ void Topo::topo() {
|
|||
// Step 1: Get satellite position
|
||||
// Get time
|
||||
tline = t0 + (Nazlooks * (line / prf));
|
||||
|
||||
|
||||
// Get state vector
|
||||
stat = orb.interpolateOrbit(tline,xyzsat,velsat,orbit_method);
|
||||
if (stat != 0) {
|
||||
|
@ -657,15 +683,15 @@ void Topo::topo() {
|
|||
for (pixel=0; pixel<width; pixel++) {
|
||||
rng = rho[pixel];
|
||||
dopfact = (0.5 * wvl * (dopline[pixel] / vmag)) * rng;
|
||||
|
||||
|
||||
// If pixel hasn't converged
|
||||
if (converge[pixel] == 0) {
|
||||
|
||||
|
||||
// Use previous llh in degrees and meters
|
||||
llh_prev[0] = lat[pixel] / (180. / M_PI);
|
||||
llh_prev[1] = lon[pixel] / (180. / M_PI);
|
||||
llh_prev[2] = z[pixel];
|
||||
|
||||
|
||||
// Solve for new position at height zsch
|
||||
aa = height + rcurv;
|
||||
bb = rcurv + zsch[pixel];
|
||||
|
@ -680,12 +706,12 @@ void Topo::topo() {
|
|||
gamm = costheta * rng;
|
||||
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
|
||||
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
|
||||
|
||||
|
||||
// xyz position of target
|
||||
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
|
||||
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
|
||||
elp.latlon(xyz,llh,XYZ_2_LLH);
|
||||
|
||||
|
||||
// Convert lat, lon, hgt to xyz coordinates
|
||||
lat[pixel] = llh[0] * (180. / M_PI);
|
||||
lon[pixel] = llh[1] * (180. / M_PI);
|
||||
|
@ -701,7 +727,7 @@ void Topo::topo() {
|
|||
fraclon = demlon - idemlon;
|
||||
z[pixel] = tzMethods.interpolate(dem,idemlon,idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||
if (z[pixel] < -500.0) z[pixel] = -500.0;
|
||||
|
||||
|
||||
// Given llh, where h = z(pixel, line) in WGS84, get the SCH height
|
||||
llh[0] = lat[pixel] / (180. / M_PI);
|
||||
llh[1] = lon[pixel] / (180. / M_PI);
|
||||
|
@ -719,7 +745,7 @@ void Topo::topo() {
|
|||
} else if (iter > numiter) {
|
||||
elp.latlon(xyz_prev,llh_prev,LLH_2_XYZ);
|
||||
for (int idx=0; idx<3; idx++) xyz[idx] = 0.5 * (xyz_prev[idx] + xyz[idx]);
|
||||
|
||||
|
||||
// Repopulate lat, lon, z
|
||||
elp.latlon(xyz,llh,XYZ_2_LLH);
|
||||
lat[pixel] = llh[0] * (180. / M_PI);
|
||||
|
@ -727,7 +753,7 @@ void Topo::topo() {
|
|||
z[pixel] = llh[2];
|
||||
ptm.convert_sch_to_xyz(sch,xyz,XYZ_2_SCH);
|
||||
zsch[pixel] = sch[2];
|
||||
|
||||
|
||||
// Absolute distance
|
||||
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
|
||||
}
|
||||
|
@ -754,32 +780,32 @@ void Topo::topo() {
|
|||
gamm = costheta * rng;
|
||||
alpha = (dopfact - (gamm * linalg.dot(nhat,vhat))) / linalg.dot(vhat,that);
|
||||
beta = -ilrl * sqrt((rng * rng * sintheta * sintheta) - (alpha * alpha));
|
||||
|
||||
|
||||
// xyz position of target
|
||||
for (int idx=0; idx<3; idx++) delta[idx] = (gamm * nhat[idx]) + (alpha * that[idx]) + (beta * chat[idx]);
|
||||
for (int idx=0; idx<3; idx++) xyz[idx] = xyzsat[idx] + delta[idx];
|
||||
elp.latlon(xyz,llh,XYZ_2_LLH);
|
||||
|
||||
|
||||
// Copy into output arrays
|
||||
lat[pixel] = llh[0] * (180. / M_PI);
|
||||
lon[pixel] = llh[1] * (180. / M_PI);
|
||||
z[pixel] = llh[2];
|
||||
distance[pixel] = sqrt(pow((xyz[0]-xyzsat[0]),2)+pow((xyz[1]-xyzsat[1]),2) + pow((xyz[2]-xyzsat[2]),2)) - rng;
|
||||
|
||||
|
||||
// Computation in ENU coordinates around target
|
||||
linalg.enubasis(llh[0],llh[1],enumat);
|
||||
linalg.tranmat(enumat,xyz2enu);
|
||||
linalg.matvec(xyz2enu,delta,enu);
|
||||
cosalpha = abs(enu[2]) / linalg.norm(enu);
|
||||
|
||||
|
||||
// LOS vectors
|
||||
losang[(2*pixel)] = acos(cosalpha) * (180. / M_PI);
|
||||
losang[((2*pixel)+1)] = (atan2(-enu[1],-enu[0]) - (0.5*M_PI)) * (180. / M_PI);
|
||||
incang[(2*pixel)] = acos(costheta) * (180. / M_PI);
|
||||
|
||||
|
||||
// ctrack gets stored in zsch
|
||||
zsch[pixel] = rng * sintheta;
|
||||
|
||||
|
||||
// Get local incidence angle
|
||||
demlat = ((lat[pixel] - ufirstlat) / deltalat) + 1;
|
||||
demlon = ((lon[pixel] - ufirstlon) / deltalon) + 1;
|
||||
|
@ -792,12 +818,12 @@ void Topo::topo() {
|
|||
fraclat = demlat - idemlat;
|
||||
fraclon = demlon - idemlon;
|
||||
gamm = lat[pixel] / (180. / M_PI);
|
||||
|
||||
|
||||
// Slopex
|
||||
aa = tzMethods.interpolate(dem,(idemlon-1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||
bb = tzMethods.interpolate(dem,(idemlon+1),idemlat,fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||
alpha = ((bb - aa) * (180. / M_PI)) / (2.0 * elp.reast(gamm) * deltalon);
|
||||
|
||||
|
||||
// Slopey
|
||||
aa = tzMethods.interpolate(dem,idemlon,(idemlat-1),fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||
bb = tzMethods.interpolate(dem,idemlon,(idemlat+1),fraclon,fraclat,udemwidth,udemlength,dem_method);
|
||||
|
@ -822,7 +848,7 @@ void Topo::topo() {
|
|||
max_lat = max(mxlat, max_lat);
|
||||
min_lon = min(mnlon, min_lon);
|
||||
max_lon = max(mxlon, max_lon);
|
||||
|
||||
|
||||
latAccObj->setLineSequential((char *)&lat[0]);
|
||||
lonAccObj->setLineSequential((char *)&lon[0]);
|
||||
heightAccObj->setLineSequential((char *)&z[0]);
|
||||
|
@ -840,7 +866,7 @@ void Topo::topo() {
|
|||
ctrackmin = mnzsch - demmax;
|
||||
ctrackmax = mxzsch + demmax;
|
||||
dctrack = (ctrackmax - ctrackmin) / (owidth - 1.0);
|
||||
|
||||
|
||||
// Sort lat/lon by ctrack
|
||||
linalg.insertionSort(zsch,width);
|
||||
linalg.insertionSort(lat,width);
|
||||
|
@ -853,7 +879,7 @@ void Topo::topo() {
|
|||
aa = ctrackmin + (pixel * dctrack);
|
||||
ctrack[pixel] = aa;
|
||||
i_type = linalg.binarySearch(zsch,0,(width-1),aa);
|
||||
|
||||
|
||||
// Simple bi-linear interpolation
|
||||
fraclat = (aa - zsch[i_type]) / (zsch[(i_type+1)] - zsch[i_type]);
|
||||
demlat = lat[i_type] + (fraclat * (lat[(i_type+1)] - lat[i_type]));
|
||||
|
|
Loading…
Reference in New Issue