// // Author: Joshua Cohen // Copyright 2017 // #include #include #include #include #include #include #include #include "DataAccessor.h" #include "Constants.h" #include "Ellipsoid.h" #include "LinAlg.h" #include "Orbit.h" #include "Poly1d.h" #include "Geo2rdr.h" #ifdef GPU_ACC_ENABLED // Check to see if scons discovered gpu-capable system #include "GPUgeo.h" #endif using std::abs; pthread_mutex_t m; // Global mutex lock struct writeData { void **accessors; double *rg; double *az; double *rgoff; double *azoff; bool rgFlag; bool azFlag; bool rgOffFlag; bool azOffFlag; int nLines; int width; bool firstWrite; }; void *writeToFile(void *inputData) { pthread_mutex_lock(&m); struct writeData data; data.accessors = ((struct writeData *)inputData)->accessors; data.rg = ((struct writeData *)inputData)->rg; data.az = ((struct writeData *)inputData)->az; data.rgoff = ((struct writeData *)inputData)->rgoff; data.azoff = ((struct writeData *)inputData)->azoff; data.rgFlag = ((struct writeData *)inputData)->rgFlag; data.azFlag = ((struct writeData *)inputData)->azFlag; data.rgOffFlag = ((struct writeData *)inputData)->rgOffFlag; data.azOffFlag = ((struct writeData *)inputData)->azOffFlag; data.nLines = ((struct writeData *)inputData)->nLines; data.width = ((struct writeData *)inputData)->width; data.firstWrite = ((struct writeData *)inputData)->firstWrite; if (!data.firstWrite) { for (int i=0; isetLineSequential((char *)&data.rg[offset]); if (data.azFlag) ((DataAccessor *)data.accessors[1])->setLineSequential((char *)&data.az[offset]); if (data.rgOffFlag) ((DataAccessor *)data.accessors[2])->setLineSequential((char *)&data.rgoff[offset]); if (data.azOffFlag) ((DataAccessor *)data.accessors[3])->setLineSequential((char *)&data.azoff[offset]); } free(data.rg); // These free the data from the run that was just completed free(data.az); // Note that after each run, this function is the ONLY one that retains free(data.rgoff); // these pointers free(data.azoff); } pthread_mutex_unlock(&m); pthread_exit(NULL); } // Initializes the internal orbit stateVector memory (called from Python-level) void Geo2rdr::createOrbit() { orb.setOrbit(orbit_nvecs,orbit_basis); } // Initializes the internal poly1d coefficients memory (called from Python-level) void Geo2rdr::createPoly() { dop.setPoly(poly_order,poly_mean,poly_norm); } Geo2rdr::Geo2rdr() { usr_enable_gpu = true; // Default to enabling the GPU acceleration (GPU_ACC_ENABLED is an env var set by scons based on capability) } void Geo2rdr::geo2rdr() { double *lat, *lon, *dem, *rgm, *azt, *rgoff, *azoff; double xyz_mid[3], vel_mid[3], llh[3], xyz[3], satx[3], satv[3], dr[3]; double tend, tline, tprev, rngend, rngpix, tmid, temp, dtaz, dmrg, fdop, fdopder, fnprime; double timer_start; int *distance; int stat, cnt, pixel, line, conv, numOutsideImage; bool isOutside; DataAccessor *latAccObj = (DataAccessor*)latAccessor; DataAccessor *lonAccObj = (DataAccessor*)lonAccessor; DataAccessor *hgtAccObj = (DataAccessor*)hgtAccessor; DataAccessor *azAccObj = (DataAccessor*)azAccessor; DataAccessor *rgAccObj = (DataAccessor*)rgAccessor; DataAccessor *azOffAccObj = (DataAccessor*)azOffAccessor; DataAccessor *rgOffAccObj = (DataAccessor*)rgOffAccessor; Ellipsoid elp(major, eccentricitySquared); LinAlg linalg; Poly1d fdvsrng, fddotvsrng; // Empty constructor, will be modified later #ifndef GPU_ACC_ENABLED // If scons didnt find a CUDA-compatible system, force-disable the GPU code usr_enable_gpu = false; #endif if (orbitMethod == HERMITE_METHOD) { if (orb.nVectors < 4) { printf("Error in Geo2rdr::geo2rdr - Need at least 4 state vectors for using hermite polynomial interpolation.\n"); exit(1); } } else if (orbitMethod == SCH_METHOD) { if (orb.nVectors < 4) { printf("Error in Geo2rdr::geo2rdr - Need at least 4 state vectors for using SCH interpolation.\n"); exit(1); } } else if (orbitMethod == LEGENDRE_METHOD) { if (orb.nVectors < 9) { printf("Error in Geo2rdr::geo2rdr - Need at least 9 state vectors for using legendre polynomial interpolation.\n"); exit(1); } } else { printf("Error in Geo2rdr::geo2rdr - Undefined orbit interpolation method.\n"); exit(1); } // OpenMP replacement for clock() (clock reports cumulative thread time, not single thread // time, so clock() on 4 threads would report 4 x the true runtime) timer_start = omp_get_wtime(); cnt = 0; printf("Geo2rdr executing on %d threads...\n", omp_get_max_threads()); dtaz = nAzLooks / prf; tend = tstart + ((imgLength - 1) * dtaz); tmid = 0.5 * (tstart + tend); printf("Starting Acquisition time: %f\n", tstart); printf("Stop Acquisition time: %f\n", tend); printf("Azimuth line spacing in secs: %f\n", dtaz); dmrg = nRngLooks * drho; rngend = rngstart + ((imgWidth - 1) * dmrg); printf("Near Range in m: %f\n", rngstart); printf("Far Range in m: %f\n", rngend); printf("Range sample spacing in m: %f\n", dmrg); printf("Radar Image Length: %d\n", imgLength); printf("Radar Image Width: %d\n", imgWidth); printf("Reading DEM...\n"); printf("Geocoded Lines: %d\n", demLength); printf("Geocoded Samples: %d\n", demWidth); // setPoly() resets the internal values of a Poly1d without destruct/construct fdvsrng.setPoly(dop.order, rngstart+(dop.mean*drho), dop.norm*drho); for (int i=0; i<=dop.order; i++) fdvsrng.setCoeff(i, (prf * dop.getCoeff(i))); if (fdvsrng.order == 0) { fddotvsrng.setPoly(0,0.,1.); fddotvsrng.setCoeff(0, 0.); } else { fddotvsrng.setPoly(fdvsrng.order-1, fdvsrng.mean, fdvsrng.norm); for (int i=1; i<=dop.order; i++) { temp = (i * fdvsrng.getCoeff(i)) / fdvsrng.norm; fddotvsrng.setCoeff(i-1, temp); } } printf("Dopplers: %f %f\n", fdvsrng.eval(rngstart), fdvsrng.eval(rngend)); tline = tmid; stat = orb.interpolateOrbit(tline, xyz_mid, vel_mid, orbitMethod); if (stat != 0) { printf("Cannot interpolate orbits at the center of scene.\n"); exit(1); } numOutsideImage = 0; conv = 0; if (usr_enable_gpu) { // GPU-enabled ; will only be true if GPU_ACC_ENABLED is defined and if the user doesn't disable this flag #ifdef GPU_ACC_ENABLED // Doesn't compile the GPU code if scons didnt find CUDA-compatible libraries, etc double gpu_inputs_d[9]; int gpu_inputs_i[3]; gpu_inputs_i[0] = demLength; gpu_inputs_i[1] = demWidth; gpu_inputs_i[2] = int(bistatic); gpu_inputs_d[0] = major; gpu_inputs_d[1] = eccentricitySquared; gpu_inputs_d[2] = tstart; gpu_inputs_d[3] = tend; gpu_inputs_d[4] = wvl; gpu_inputs_d[5] = rngstart; gpu_inputs_d[6] = rngend; gpu_inputs_d[7] = dmrg; gpu_inputs_d[8] = dtaz; printf("\nCopying Orbit and Poly1d data to compatible arrays...\n"); int gpu_orbNvec = orb.nVectors; double *gpu_orbSvs = new double[7*gpu_orbNvec]; for (int i=0; i 0); bool azFlag = bool(azAccessor > 0); bool rgOffFlag = bool(rgOffAccessor > 0); bool azOffFlag = bool(azOffAccessor > 0); // Create pthread data and initialize dummy thread pthread_t writeThread; pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); void *thread_stat; struct writeData wd; wd.accessors = (void**)accObjs; wd.rg = outputArrays[0]; // Don't contain data/valid pointers yet wd.az = outputArrays[1]; wd.rgoff = outputArrays[2]; wd.azoff = outputArrays[3]; wd.rgFlag = rgFlag; wd.azFlag = azFlag; wd.rgOffFlag = rgOffFlag; wd.azOffFlag = azOffFlag; wd.nLines = 0; wd.width = demWidth; wd.firstWrite = true; // Flag to ignore write instructions pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); // Fires empty thread int totalPixels = demLength * demWidth; //int linesPerRun = min(demLength, nLinesPossible(demLength, demWidth)); int linesPerRun = demLength; while ((linesPerRun*demWidth) > 2e8) linesPerRun--; int pixPerRun = linesPerRun * demWidth; int nRuns = demLength / linesPerRun; int remPix = totalPixels - (nRuns * pixPerRun); int remLines = remPix / demWidth; printf("NOTE: GPU will process image in %d runs of %d lines", nRuns, linesPerRun); if (remPix > 0) printf(" (with %d lines in a final partial block)", remLines); printf("\n"); lat = new double[pixPerRun]; lon = new double[pixPerRun]; dem = new double[pixPerRun]; size_t nb_pixels = pixPerRun * sizeof(double); printf("\n\n ------------------ INITIALIZING GPU GEO2RDR ------------------\n\n"); for (int i=0; igetLineSequential((char *)(lat+(j*demWidth))); // Yay pointer magic for (int j=0; jgetLineSequential((char *)(lon+(j*demWidth))); for (int j=0; jgetLineSequential((char *)(dem+(j*demWidth))); outputArrays[0] = (double *)malloc(nb_pixels); // h_rg outputArrays[1] = (double *)malloc(nb_pixels); // h_az outputArrays[2] = (double *)malloc(nb_pixels); // h_rgoff outputArrays[3] = (double *)malloc(nb_pixels); // h_azoff runGPUGeo(i, pixPerRun, gpu_inputs_d, gpu_inputs_i, lat, lon, dem, gpu_orbNvec, gpu_orbSvs, gpu_polyOrd, gpu_polyMean, gpu_polyNorm, gpu_polyCoef, prf, outputArrays); for (int j=0; j<4; j++) writeArrays[j] = outputArrays[j]; // Copying pointers if (i != 0) printf(" Waiting for previous asynchronous write-out to finish...\n"); pthread_attr_destroy(&attr); pthread_join(writeThread, &thread_stat); // Waits for async thread to finish printf(" Writing run %d out asynchronously to image files...\n", i); wd.accessors = (void**)accObjs; wd.rg = writeArrays[0]; wd.az = writeArrays[1]; wd.rgoff = writeArrays[2]; wd.azoff = writeArrays[3]; wd.rgFlag = rgFlag; wd.azFlag = azFlag; wd.rgOffFlag = rgOffFlag; wd.azOffFlag = azOffFlag; wd.nLines = linesPerRun; wd.width = demWidth; wd.firstWrite = false; pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); // Set up and fire async write thread } if (remPix > 0) { // If we have a final partial run nb_pixels = remPix * sizeof(double); outputArrays[0] = (double *)malloc(nb_pixels); outputArrays[1] = (double *)malloc(nb_pixels); outputArrays[2] = (double *)malloc(nb_pixels); outputArrays[3] = (double *)malloc(nb_pixels); printf(" Loading relevant geometry product data...\n"); for (int i=0; igetLineSequential((char *)(lat+(i*demWidth))); lonAccObj->getLineSequential((char *)(lon+(i*demWidth))); hgtAccObj->getLineSequential((char *)(dem+(i*demWidth))); } for (int i=0; i<4; i++) writeArrays[i] = outputArrays[i]; runGPUGeo((-1*linesPerRun*nRuns), remPix, gpu_inputs_d, gpu_inputs_i, lat, lon, dem, gpu_orbNvec, gpu_orbSvs, gpu_polyOrd, gpu_polyMean, gpu_polyNorm, gpu_polyCoef, prf, outputArrays); // Iter now stores number of lines processed printf(" Waiting for previous asynchronous write-out to finish...\n"); pthread_attr_destroy(&attr); pthread_join(writeThread, &thread_stat); printf(" Writing remaining %d lines out asynchronously to image files...\n", remLines); wd.accessors = (void**)accObjs; wd.rg = writeArrays[0]; wd.az = writeArrays[1]; wd.rgoff = writeArrays[2]; wd.azoff = writeArrays[3]; wd.rgFlag = rgFlag; wd.azFlag = azFlag; wd.rgOffFlag = rgOffFlag; wd.azOffFlag = azOffFlag; wd.nLines = remLines; wd.width = demWidth; wd.firstWrite = false; pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_create(&writeThread, &attr, writeToFile, (void*)&wd); } pthread_attr_destroy(&attr); pthread_join(writeThread, &thread_stat); printf(" Finished writing to files!\n"); printf("\n ------------------ EXITING GPU GEO2RDR ------------------\n\n"); printf("Finished!\n"); printf("Elapsed time = %f seconds\n", (omp_get_wtime()-timer_start)); delete[] lat; delete[] lon; delete[] dem; delete[] gpu_orbSvs; delete[] gpu_polyCoef; #endif } else { // Standard code lat = new double[demWidth]; lon = new double[demWidth]; dem = new double[demWidth]; rgm = new double[demWidth]; azt = new double[demWidth]; rgoff = new double[demWidth]; azoff = new double[demWidth]; distance = new int[demWidth]; for (line=0; linegetLineSequential((char *)lat); pixel = lonAccObj->getLineSequential((char *)lon); pixel = hgtAccObj->getLineSequential((char *)dem); if ((line%1000) == 0) printf("Processing line: %d %d\n", line, numOutsideImage); #pragma omp parallel for private(pixel, rngpix, tline, tprev, stat, fnprime, fdop, \ fdopder, isOutside, xyz, llh, satx, satv, dr) \ reduction(+:numOutsideImage,conv,cnt) for (pixel=0; pixel tend)) isOutside = true; for (int i=0; i<3; i++) dr[i] = xyz[i] - satx[i]; rngpix = linalg.norm(dr); if ((rngpix < rngstart) || (rngpix > rngend)) isOutside = true; if (bistatic) { // Not an available feature yet... tline = tline + ((2. * rngpix) / SPEED_OF_LIGHT); if ((tline < tstart) || (tline > tend)) isOutside = true; stat = orb.interpolateOrbit(tline, satx, satv, orbitMethod); if (stat != 0) isOutside = true; for (int i=0; i<3; i++) dr[i] = xyz[i] - satx[i]; rngpix = linalg.norm(dr); if ((rngpix < rngstart) || (rngpix > rngend)) isOutside = true; } if (!isOutside) { // Found a valid point inside the image cnt = cnt + 1; rgm[pixel] = rngpix; azt[pixel] = tline; rgoff[pixel] = ((rngpix - rngstart) / dmrg) - double(pixel); azoff[pixel] = ((tline - tstart) / dtaz) - double(line); distance[pixel] = tline - tprev; } else { // Point is outside the image numOutsideImage = numOutsideImage + 1; rgm[pixel] = BAD_VALUE; // This either-or is better here than filling the azt[pixel] = BAD_VALUE; // whole array first rgoff[pixel] = BAD_VALUE; azoff[pixel] = BAD_VALUE; distance[pixel] = BAD_VALUE; } } // end omp parallel for if (azAccessor > 0) azAccObj->setLineSequential((char*)azt); if (rgAccessor > 0) rgAccObj->setLineSequential((char*)rgm); if (azOffAccessor > 0) azOffAccObj->setLineSequential((char*)azoff); if (rgOffAccessor > 0) rgOffAccObj->setLineSequential((char*)rgoff); } printf("Number of pixels outside the image: %d\n", numOutsideImage); printf("Number of pixels with valid data: %d\n", cnt); printf("Number of pixels that converged: %d\n", conv); // Yay memory management! delete[] dem; delete[] lat; delete[] lon; delete[] rgm; delete[] azt; delete[] rgoff; delete[] azoff; delete[] distance; printf("Elapsed time = %f seconds\n", (omp_get_wtime()-timer_start)); } }