Merge pull request #254 from mirzaees/aux_url

Fix url change for downloading orbits
LT1AB
Ryan Burns 2021-03-09 12:16:43 -08:00 committed by GitHub
commit 9eed870041
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 180 deletions

View File

@ -37,9 +37,7 @@ The following calibration auxliary (AUX_CAL) file is used for **antenna pattern
Run the command below to download the AUX_CAL file once and store it somewhere (_i.e._ ~/aux/aux_cal) so that you can use it all the time, for `stackSentinel.py -a` or `auxiliary data directory` in `topsApp.py`. Run the command below to download the AUX_CAL file once and store it somewhere (_i.e._ ~/aux/aux_cal) so that you can use it all the time, for `stackSentinel.py -a` or `auxiliary data directory` in `topsApp.py`.
``` ```
wget https://qc.sentinel1.eo.esa.int/product/S1A/AUX_CAL/20140908T000000/S1A_AUX_CAL_V20140908T000000_G20190626T100201.SAFE.TGZ wget https://aux.sentinel1.eo.esa.int/AUX_CAL/2014/09/08/S1A_AUX_CAL_V20140908T000000_G20190626T100201.SAFE/ --no-check-certificate --recursive --level=1 --cut-dirs=4 -nH
tar zxvf S1A_AUX_CAL_V20140908T000000_G20190626T100201.SAFE.TGZ
rm S1A_AUX_CAL_V20140908T000000_G20190626T100201.SAFE.TGZ
``` ```
#### 1. Create your project folder somewhere #### #### 1. Create your project folder somewhere ####

View File

@ -8,35 +8,33 @@ import requests
from html.parser import HTMLParser from html.parser import HTMLParser
from requests.packages.urllib3.exceptions import InsecureRequestWarning from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
fmt = '%Y%m%d' fmt = '%Y%m%d'
today = datetime.datetime.now().strftime(fmt) today = datetime.datetime.now().strftime(fmt)
server = 'https://qc.sentinel1.eo.esa.int/' server = 'https://aux.sentinel1.eo.esa.int/'
queryfmt = '%Y-%m-%d' queryfmt = '%Y-%m-%d'
datefmt = '%Y%m%dT%H%M%S' datefmt = '%Y%m%dT%H%M%S'
S1Astart = '20140901' S1Astart = '20140901'
S1Astart_dt = datetime.datetime.strptime(S1Astart, '%Y%m%d') S1Astart_dt = datetime.datetime.strptime(S1Astart, '%Y%m%d')
S1Bstart = '20160501' S1Bstart = '20160501'
S1Bstart_dt = datetime.datetime.strptime(S1Bstart, '%Y%m%d') S1Bstart_dt = datetime.datetime.strptime(S1Bstart, '%Y%m%d')
def cmdLineParse(): def cmdLineParse():
''' '''
Automated download of orbits. Automated download of orbits.
''' '''
parser = argparse.ArgumentParser('S1A orbit downloader') parser = argparse.ArgumentParser('S1A orbit downloader')
parser.add_argument('--start','-b', dest='start', type=str, default=S1Astart, help='Start date') parser.add_argument('--start', '-b', dest='start', type=str, default=S1Astart, help='Start date')
parser.add_argument('--end','-e', dest='end', parser.add_argument('--end', '-e', dest='end', type=str, default=today, help='Stop date')
type=str, default=today, help='Stop date') parser.add_argument('--dir', '-d', dest='dirname', type=str, default='.', help='Directory with precise orbits')
parser.add_argument('--dir', '-d', dest='dirname', return parser.parse_args()
type=str, default='.', help='Directory with precise orbits')
return parser.parse_args()
def fileToRange(fname): def fileToRange(fname):
''' '''
@ -50,13 +48,14 @@ def fileToRange(fname):
return (start, stop, mission) return (start, stop, mission)
def gatherExistingOrbits(dirname): def gatherExistingOrbits(dirname):
''' '''
Gather existing orbits. Gather existing orbits.
''' '''
fnames = glob.glob(os.path.join(dirname, 'S1?_OPER_AUX_POEORB*')) fnames = glob.glob(os.path.join(dirname, 'S1?_OPER_AUX_POEORB*'))
rangeList=[] rangeList = []
for name in fnames: for name in fnames:
rangeList.append(fileToRange(name)) rangeList.append(fileToRange(name))
@ -84,10 +83,11 @@ def ifAlreadyExists(indate, mission, rangeList):
return found return found
def validS1BDate(indate): def validS1BDate(indate):
if indate < S1Bstart_dt: if indate < S1Bstart_dt:
return False return False
else: else:
return True return True
@ -110,7 +110,7 @@ def download_file(url, outdir='.', session=None):
success = False success = False
if success: if success:
with open(path,'wb') as f: with open(path, 'wb') as f:
for chunk in request.iter_content(chunk_size=1024): for chunk in request.iter_content(chunk_size=1024):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
@ -121,13 +121,13 @@ def download_file(url, outdir='.', session=None):
class MyHTMLParser(HTMLParser): class MyHTMLParser(HTMLParser):
def __init__(self): def __init__(self, url):
HTMLParser.__init__(self) HTMLParser.__init__(self)
self.fileList = [] self.fileList = []
self.in_td = False self.in_td = False
self.in_a = False self.in_a = False
self.in_table = False self.in_table = False
self._url = None self._url = url
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if tag == 'td': if tag == 'td':
@ -135,14 +135,16 @@ class MyHTMLParser(HTMLParser):
elif tag == 'a': elif tag == 'a':
self.in_a = True self.in_a = True
for name, val in attrs: for name, val in attrs:
if name== "href": if name == "href":
if val.startswith("http"): if val.startswith("http"):
self._url = val.strip() self._url = val.strip()
def handle_data(self,data): def handle_data(self, data):
if self.in_td and self.in_a: if self.in_td and self.in_a:
if ('S1A_OPER' in data) or ('S1B_OPER' in data): if ('S1A_OPER' in data) or ('S1B_OPER' in data):
# print(data.strip())
self.fileList.append((self._url, data.strip())) self.fileList.append((self._url, data.strip()))
print(self._url, data.strip())
def handle_tag(self, tag): def handle_tag(self, tag):
if tag == 'td': if tag == 'td':
@ -150,51 +152,7 @@ class MyHTMLParser(HTMLParser):
self.in_a = False self.in_a = False
elif tag == 'a': elif tag == 'a':
self.in_a = False self.in_a = False
self._url=None self._url = None
def query(indate, mission, session):
'''
Query the system for a given date.
'''
if mission == 'S1B':
if not validS1BDate(indate):
return
delta = datetime.timedelta(days=2)
timebef = (indate - delta).strftime(queryfmt)
timeaft = (indate + delta).strftime(queryfmt)
url = server + 'aux_poeorb'
query = url + '/?validity_start={0}..{1}&sentinel1__mission={2}'.format(timebef, timeaft,mission)
success = False
match = None
try:
r = session.get(query, verify=False)
r.raise_for_status()
parser = MyHTMLParser()
parser.feed(r.text)
for resulturl, result in parser.fileList:
tbef, taft, mission = fileToRange(os.path.basename(result))
if (tbef <= indate) and (taft >= indate):
#match = os.path.join(url, result)
match = resulturl
break
if match is not None:
success = True
except:
pass
if match is None:
print('Failed to find {1} orbits for tref {0}'.format(indate, mission))
return
return match
if __name__ == '__main__': if __name__ == '__main__':
@ -202,7 +160,7 @@ if __name__ == '__main__':
Main driver. Main driver.
''' '''
#Parse command line # Parse command line
inps = cmdLineParse() inps = cmdLineParse()
###Compute interval ###Compute interval
@ -212,20 +170,40 @@ if __name__ == '__main__':
days = (tend - tstart).days days = (tend - tstart).days
print('Number of days to check: ', days) print('Number of days to check: ', days)
####Gather existing orbits
ranges = gatherExistingOrbits(inps.dirname) ranges = gatherExistingOrbits(inps.dirname)
session = requests.session()
for dd in range(days): for dd in range(days):
indate = tstart + datetime.timedelta(days=dd, hours=12) indate = tstart + datetime.timedelta(days=dd, hours=12)
print('Searching for {0}'.format(indate))
url = server + 'POEORB/' + str(indate.year).zfill(2) + '/' + str(indate.month).zfill(2) + '/' + str(
indate.day).zfill(2) + '/'
session = requests.session()
match = None
for mission in ['S1A', 'S1B']: for mission in ['S1A', 'S1B']:
if not ifAlreadyExists(indate, mission, ranges): if not ifAlreadyExists(indate, mission, ranges):
match = query(indate, mission, session)
try:
r = session.get(url, verify=False)
r.raise_for_status()
parser = MyHTMLParser(url)
parser.feed(r.text)
for resulturl, result in parser.fileList:
match = os.path.join(resulturl, result)
if match is not None:
success = True
except:
pass
if match is not None: if match is not None:
download_file(match, inps.dirname, session) download_file(match, inps.dirname, session)
pass else:
print('Failed to find {1} orbits for tref {0}'.format(indate, mission))
else: else:
print('Already exists: ', mission, indate) print('Already exists: ', mission, indate)
print('Exit dloadOrbits Successfully')

View File

@ -8,15 +8,15 @@ import argparse
import datetime import datetime
from html.parser import HTMLParser from html.parser import HTMLParser
server = 'https://qc.sentinel1.eo.esa.int/' server = 'http://aux.sentinel1.eo.esa.int/'
server2 = 'http://aux.sentinel1.eo.esa.int/'
orbitMap = [('precise','aux_poeorb'), orbitMap = [('precise', 'POEORB/'),
('restituted','aux_resorb')] ('restituted', 'RESORB/')]
datefmt = "%Y%m%dT%H%M%S" datefmt = "%Y%m%dT%H%M%S"
queryfmt = "%Y-%m-%d" queryfmt = "%Y-%m-%d"
queryfmt2= "%Y/%m/%d/" queryfmt2 = "%Y/%m/%d/"
def cmdLineParse(): def cmdLineParse():
''' '''
@ -25,9 +25,9 @@ def cmdLineParse():
parser = argparse.ArgumentParser(description='Fetch orbits corresponding to given SAFE package') parser = argparse.ArgumentParser(description='Fetch orbits corresponding to given SAFE package')
parser.add_argument('-i', '--input', dest='input', type=str, required=True, parser.add_argument('-i', '--input', dest='input', type=str, required=True,
help='Path to SAFE package of interest') help='Path to SAFE package of interest')
parser.add_argument('-o', '--output', dest='outdir', type=str, default='.', parser.add_argument('-o', '--output', dest='outdir', type=str, default='.',
help='Path to output directory') help='Path to output directory')
return parser.parse_args() return parser.parse_args()
@ -43,9 +43,9 @@ def FileToTimeStamp(safename):
try: try:
tstamp = datetime.datetime.strptime(fields[-4], datefmt) tstamp = datetime.datetime.strptime(fields[-4], datefmt)
sstamp = datetime.datetime.strptime(fields[-5], datefmt) sstamp = datetime.datetime.strptime(fields[-5], datefmt)
except: except:
p = re.compile(r'(?<=_)\d{8}') p = re.compile(r'(?<=_)\d{8}')
dt2 = p.search(safename).group() dt2 = p.search(safename).group()
tstamp = datetime.datetime.strptime(dt2, '%Y%m%d') tstamp = datetime.datetime.strptime(dt2, '%Y%m%d')
satName = fields[0] satName = fields[0]
@ -55,47 +55,37 @@ def FileToTimeStamp(safename):
class MyHTMLParser(HTMLParser): class MyHTMLParser(HTMLParser):
def __init__(self, satName): def __init__(self, satName, url):
HTMLParser.__init__(self) HTMLParser.__init__(self)
self.fileList = [] self.fileList = []
self.pages = 0
self.in_td = False self.in_td = False
self.in_a = False self.in_a = False
self.in_ul = False self.in_table = False
self._url = url
self.satName = satName self.satName = satName
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if tag == 'td': if tag == 'td':
self.in_td = True self.in_td = True
elif tag == 'a' and self.in_td: elif tag == 'a':
self.in_a = True self.in_a = True
elif tag == 'ul': for name, val in attrs:
for k,v in attrs: if name == "href":
if k == 'class' and v.startswith('pagination'): if val.startswith("http"):
self.in_ul = True self._url = val.strip()
elif tag == 'li' and self.in_ul:
self.pages += 1
def handle_data(self,data): def handle_data(self, data):
if self.in_td and self.in_a: if self.in_td and self.in_a:
#if 'S1A_OPER' in data: if self.satName in data:
#if 'S1B_OPER' in data: self.fileList.append((self._url, data.strip()))
if satName in data:
self.fileList.append(data.strip())
def handle_endtag(self, tag): def handle_tag(self, tag):
if tag == 'td': if tag == 'td':
self.in_td = False self.in_td = False
self.in_a = False self.in_a = False
elif tag == 'a' and self.in_td: elif tag == 'a':
self.in_a = False self.in_a = False
elif tag == 'ul' and self.in_ul: self._url = None
self.in_ul = False
elif tag == 'html':
if self.pages == 0:
self.pages = 1
else:
# decrement page back and page forward list items
self.pages -= 2
def download_file(url, outdir='.', session=None): def download_file(url, outdir='.', session=None):
@ -117,7 +107,7 @@ def download_file(url, outdir='.', session=None):
success = False success = False
if success: if success:
with open(path,'wb') as f: with open(path, 'wb') as f:
for chunk in request.iter_content(chunk_size=1024): for chunk in request.iter_content(chunk_size=1024):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
@ -149,82 +139,35 @@ if __name__ == '__main__':
fileTS, satName, fileTSStart = FileToTimeStamp(inps.input) fileTS, satName, fileTSStart = FileToTimeStamp(inps.input)
print('Reference time: ', fileTS) print('Reference time: ', fileTS)
print('Satellite name: ', satName) print('Satellite name: ', satName)
match = None match = None
session = requests.Session() session = requests.Session()
for spec in orbitMap: for spec in orbitMap:
oType = spec[0] oType = spec[0]
if oType == 'precise': url = server + spec[1] + str(fileTS.year).zfill(2) + '/' + str(fileTS.month).zfill(2) + \
delta =datetime.timedelta(days=2) '/' + str(fileTS.day).zfill(2) + '/'
elif oType == 'restituted':
delta = datetime.timedelta(days=1)
timebef = (fileTS - delta).strftime(queryfmt)
timeaft = (fileTS + delta).strftime(queryfmt)
url = server + spec[1]
query = (url +
'/?validity_start={0}..{1}&sentinel1__mission={2}'
.format(timebef, timeaft,satName))
print(query)
success = False success = False
match = None match = None
try: try:
print('Querying for {0} orbits'.format(oType)) r = session.get(url, verify=False)
r = session.get(query, verify=False)
r.raise_for_status() r.raise_for_status()
parser = MyHTMLParser(satName)
parser = MyHTMLParser(satName, url)
parser.feed(r.text) parser.feed(r.text)
print("Found {} pages".format(parser.pages)) for resulturl, result in parser.fileList:
match = os.path.join(resulturl, result)
# get results from first page if match is not None:
results = parser.fileList success = True
except:
# page through and get more results
for page in range(2, parser.pages + 1):
page_parser = MyHTMLParser(satName)
page_query = "{}&page={}".format(query, page)
print(page_query)
r = session.get(page_query, verify=False)
r.raise_for_status()
page_parser.feed(r.text)
results.extend(page_parser.fileList)
# run through all results and pull the orbit files
if results:
for result in results:
tbef, taft, mission = fileToRange(os.path.basename(result))
if (tbef <= fileTSStart) and (taft >= fileTS):
datestr2 = FileToTimeStamp(result)[0].strftime(queryfmt2)
match = (server2 + spec[1].replace('aux_', '').upper() +
'/' +datestr2+ result + '.EOF')
break
if match is not None:
success = True
except Exception as e:
print('Exception - something went wrong with the web scraper:')
print('Exception: {}'.format(e))
print('Continuing process')
pass pass
if match is None: if match is not None:
print('Failed to find {0} orbits for Time {1}'.format(oType, fileTS)) res = download_file(match, inps.outdir, session)
if res is False:
if success: print('Failed to download URL: ', match)
break else:
print('Failed to find {1} orbits for tref {0}'.format(fileTS, satName))
if match:
res = download_file(match, inps.outdir, session=session)
if res is False:
print('Failed to download URL: ', match)
session.close()

View File

@ -24,7 +24,7 @@ All file paths in the input files should either be absolute paths or relative to
For more details on what these corrections are and where to get the aux files, see: https://sentinel.esa.int/documents/247904/1653440/Sentinel-1-IPF_EAP_Phase_correction For more details on what these corrections are and where to get the aux files, see: https://sentinel.esa.int/documents/247904/1653440/Sentinel-1-IPF_EAP_Phase_correction
Aux data can be accessed here: Aux data can be accessed here:
https://qc.sentinel1.eo.esa.int/aux_cal/?instrument_configuration_id=3 https://aux.sentinel1.eo.esa.int/AUX_CAL/
Note 3: Precise orbits Note 3: Precise orbits
@ -33,10 +33,10 @@ All file paths in the input files should either be absolute paths or relative to
Use of precise / restituted orbits are highly recommend for precise processing of Sentinel-1A interferograms. You can dump all the orbits in a common folder and ISCE will automatically identify the right orbit file to use with your data. Use of precise / restituted orbits are highly recommend for precise processing of Sentinel-1A interferograms. You can dump all the orbits in a common folder and ISCE will automatically identify the right orbit file to use with your data.
Precise orbit data can be accessed here and are typically available 3 weeks after the SLCs are available: Precise orbit data can be accessed here and are typically available 3 weeks after the SLCs are available:
https://qc.sentinel1.eo.esa.int/aux_poeorb/ https://aux.sentinel1.eo.esa.int/POEORB/
Restituted orbits can be accessed here and are available at the same time as the SLCs: Restituted orbits can be accessed here and are available at the same time as the SLCs:
https://qc.sentinel1.eo.esa.int/aux_resorb/ https://aux.sentinel1.eo.esa.int/RESORB/
Note 3: Multiple slices Note 3: Multiple slices