ISCE_INSAR/components/iscesys/DataRetriever/DataRetriever.py

328 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Copyright 2012 California Institute of Technology. ALL RIGHTS RESERVED.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# United States Government Sponsorship acknowledged. This software is subject to
# U.S. export control laws and regulations and has been classified as 'EAR99 NLR'
# (No [Export] License Required except when exporting to an embargoed country,
# end user, or in support of a prohibited end use). By downloading this software,
# the user agrees to comply with all applicable U.S. export laws and regulations.
# The user has the responsibility to obtain export licenses, or other export
# authority as may be required before exporting this software to any 'EAR99'
# embargoed foreign country or citizen of those countries.
#
# Author: Giangi Sacco
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import isce
import zipfile
import os
import sys
from isce import logging
from iscesys.Component.Component import Component
import shutil
from urllib import request
from urllib.parse import urlparse
import time
#Parameters definitions
URL = Component.Parameter('_url',
public_name = 'URL',default = '',
type = str,
mandatory = False,
doc = "URL where to get the data from")
USERNAME = Component.Parameter('_un',
public_name='username',
default = None,
type = str,
mandatory = False,
doc = "Username in case the url is password protected")
PASSWORD = Component.Parameter('_pw',
public_name='password',
default = None,
type = str,
mandatory = False,
doc = "Password in case the url is password protected")
DIRECTORY = Component.Parameter('_downloadDir',
public_name='directory',
default = './',
type = str,
mandatory = False,
doc = "Location where the file are downloaded")
WAIT = Component.Parameter('_wait',
public_name='wait',
default = 5,
type = float,
mandatory = False,
doc = "Wait time between trials when server is down")
NUM_TRIALS = Component.Parameter('_numTrials',
public_name='number of trials',
default = 3,
type = int,
mandatory = False,
doc = "Number of times it tries to download the file when server is down")
PROCEED_IF_NO_SERVER = Component.Parameter(
'_proceedIfNoServer',
public_name='proceed if no server',
default=False,
type=bool,
mandatory=False,
doc='Flag to continue even if server is down.'
)
## This class provides a set of convenience method to retrieve and possibly combine different DEMs from the USGS server.
# \c NOTE: the latitudes and the longitudes that describe the DEMs refer to the bottom left corner of the image.
class DataRetriever(Component):
def serverUp(self,url,needCredentials=False):
urlp = urlparse(url)
server = urlp.scheme + "://" + urlp.netloc
ret = False
if needCredentials:
try:
request.urlopen(server)
ret = True
except Exception as e:
try:
#when server needs credentials trying the url open fails
#with one of the below messages
if e.reason.reason.count('CERTIFICATE_VERIFY_FAILED'):
ret = True
except:
try:
if ''.join(e.reason.split()).lower() == 'authorizationrequired':
ret = True
except:
#then assume that the exception was due to the server down
ret = False
else:
try:
request.urlopen(server)
ret = True
except Exception:
#in this case assume directly server down
ret = False
return ret
##
# Fetches the files in listFiles from URL
# @param listFile \c list of the filenames to be retrieved.
def getFiles(self,listFile):
try:
os.makedirs(self._downloadDir)
except:
#dir already exists
pass
#curl with -O downloads in working dir, so save cwd
cwd = os.getcwd()
#move to _downloadDir
os.chdir(self._downloadDir)
for fileNow in listFile:
reason = 'file'
for i in range(self._numTrials):
try:
if not os.path.exists(fileNow):
if(self._un is None or self._pw is None):
if not self.serverUp(self._url):
reason = 'server'
raise Exception
if os.path.exists(os.path.join(os.environ['HOME'],'.netrc')):
command = 'curl -n -L -c $HOME/.earthdatacookie -b $HOME/.earthdatacookie -k -f -O ' + os.path.join(self._url,fileNow)
print("command = {}".format(command))
else:
self.logger.error('Please create a .netrc file in your home directory containing\nmachine urs.earthdata.nasa.gov\n\tlogin yourusername\n\tpassword yourpassword')
sys.exit(1)
else:
if not self.serverUp(self._url,True):
reason = 'server'
raise Exception
command = 'curl -k -f -u ' + self._un + ':' + self._pw + ' -O ' + os.path.join(self._url,fileNow)
if os.system(command):
raise Exception
self._downloadReport[fileNow] = self._succeded
break
except Exception as e:
if reason == 'file':
self.logger.warning('There was a problem in retrieving the file %s. Requested file seems not present on server.'%(os.path.join(self._url,fileNow)))
#if the problem is file missing break the loop that tries when the server is down
self._downloadReport[fileNow] = self._failed
break
elif reason == 'server':
if i == self._numTrials - 1 and not self._proceedIfNoServer:
self.logger.error('There was a problem in retrieving the file %s. Check the name of the server or try again later in case the server is momentarily down.'%(os.path.join(self._url,fileNow)))
sys.exit(1)
if i == self._numTrials - 1 and self._proceedIfNoServer:
self._downloadReport[fileNow] = self._failed
else:
time.sleep(self._wait)
#move back to original directory
self.decompressFiles(listFile,self._downloadReport,os.getcwd())
self.clean(listFile,self._downloadReport)
os.chdir(cwd)
def decompressFiles(self,listFile,report,cwd='./'):
import tempfile as tf
for file in listFile:
if report[file] == self._succeded:
td = tf.TemporaryDirectory()
self.decompress(file,td.name)
self._namesMapping[file] = os.listdir(td.name)
for name in self._namesMapping[file]:
try:
shutil.move(os.path.join(td.name,name),cwd)
except Exception:
#probably file already exists. Remove it and try again
try:
os.remove(os.path.join(cwd,name))
shutil.move(os.path.join(td.name,name),cwd)
except Exception:
print('Cannot decompress file',name)
raise Exception
def clean(self,listFile,report):
for file in listFile:
if report[file] == self._succeded:
os.remove(file)
##
#After retrieving the files this function prints the status of the download for each file,
#which could be 'succeeded' or 'failed'
def printDownloadReport(self):
for k,v in self._downloadReport.items():
print('Download of file',k,v,'.')
##
# This function returns a dictionary whose keys are the attempted downloaded files and
# the values are the status of the download, 'succeed' or 'failed'.
# @return \c dictionary whose keys are the attempted downloaded files and the values are
# the status of teh download, 'succeed' or 'failed'.
def getDownloadReport(self):
return self._downloadReport
##
# Function that decompresses the file.
# @param filename \c string the name of the file to decompress.
def decompress(self,filename,ddir):
ex = self.getExtractor(filename)
ex.extractall(ddir)
##
#Inspecting the file determine the right extractor. If it cannot be determined then assume
#no compression was used
def getExtractor(self,filename):
import tarfile
import zipfile
from . import gzipfile
ret = None
if(tarfile.is_tarfile(filename)):
ret = tarfile.TarFile(filename)
elif(zipfile.is_zipfile(filename)):
ret = zipfile.ZipFile(filename)
elif(gzipfile.is_gzipfile(filename)):
ret = gzipfile.GZipFile(filename)
else:
print('Unrecognized archive type')
raise Exception
return ret
@property
def proceedIfNoServer(self):
return self._proceedIfNoServer
@proceedIfNoServer.setter
def proceedIfNoServer(self,proceedIfNoServer):
self._proceedIfNoServer = proceedIfNoServer
@property
def url(self):
return self._url
@url.setter
def url(self,url):
self._url = url
@property
def un(self):
return self._un
@un.setter
def un(self,un):
self._un = un
@property
def pw(self):
return self._pw
@pw.setter
def pw(self,pw):
self._pw = pw
##
# Setter function for the download directory.
# @param ddir \c string directory where the data are downloaded.
@property
def downloadDir(self):
return self._downloadDir
@downloadDir.setter
def downloadDir(self,ddir):
self._downloadDir = ddir
def __getstate__(self):
d = dict(self.__dict__)
del d['logger']
return d
def __setstate__(self,d):
self.__dict__.update(d)
self.logger = logging.getLogger('isce.iscesys.DataRetriever')
return
family = 'dataretriever'
parameter_list = (
URL,
USERNAME,
PASSWORD,
DIRECTORY,
WAIT,
NUM_TRIALS,
PROCEED_IF_NO_SERVER
)
def __init__(self,family = '', name = ''):
#map of the names before and after decompression
self._namesMapping = {}
self._downloadReport = {}
# Note if _useLocalDirectory is True then the donwloadDir is the local directory
##self._downloadDir = os.getcwd()#default to the cwd
self._failed = 'failed'
self._succeded = 'succeeded'
super(DataRetriever, self).__init__(family if family else self.__class__.family, name=name)
# logger not defined until baseclass is called
if not self.logger:
self.logger = logging.getLogger('isce.iscesys.DataRetriever')