Changes on the fetchOrbit.py to download orbit files from the new ESA website (#273)

* Changes to fetchOrbit.py to download orbit files in the new ESA website

I've made changes to the fetchOrbit.py to download the files on the https://scihub.copernicus.eu/gnss/#/home. I've used the generic credentials to query and download the orbit files.

* Update fetchOrbit.py

* Make output path generic

I've used os.path.join on line no. 165 to make it more generic.

* Set the verify option to true

I've set the verify option to True to remove the warning regarding "Unverified HTTPS request".

* Changes to the download link of the fetchOrbit.py

The download link before uses the href link on the XML of the website. It was properly working right until yesterday when it didn't work properly.  

Instead I've edited the script to be consistent instead with the download link on the scihub copernicus GNSS website. I've tested it and it worked properly.

* Update fetchOrbit.py
LT1AB
Bryan Marfito 2021-05-04 13:06:54 +08:00 committed by GitHub
parent def109815d
commit 2f8de43d28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 41 additions and 58 deletions

View File

@ -1,22 +1,25 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import numpy as np import numpy as np
import re
import requests import requests
import re
import os import os
import argparse import argparse
import datetime import datetime
from html.parser import HTMLParser from html.parser import HTMLParser
server = 'http://aux.sentinel1.eo.esa.int/' server = 'https://scihub.copernicus.eu/gnss/'
orbitMap = [('precise', 'POEORB/'), orbitMap = [('precise', 'AUX_POEORB'),
('restituted', 'RESORB/')] ('restituted', 'AUX_RESORB')]
datefmt = "%Y%m%dT%H%M%S" datefmt = "%Y%m%dT%H%M%S"
queryfmt = "%Y-%m-%d" queryfmt = "%Y-%m-%d"
queryfmt2 = "%Y/%m/%d/" queryfmt2 = "%Y/%m/%d/"
#Generic credentials to query and download orbit files
credentials = ('gnssguest', 'gnssguest')
def cmdLineParse(): def cmdLineParse():
''' '''
@ -55,38 +58,26 @@ def FileToTimeStamp(safename):
class MyHTMLParser(HTMLParser): class MyHTMLParser(HTMLParser):
def __init__(self, satName, url): def __init__(self,url):
HTMLParser.__init__(self) HTMLParser.__init__(self)
self.fileList = [] self.fileList = []
self.in_td = False
self.in_a = False
self.in_table = False
self._url = url self._url = url
self.satName = satName
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if tag == 'td':
self.in_td = True
elif tag == 'a':
self.in_a = True
for name, val in attrs: for name, val in attrs:
if name == "href": if name == 'href':
if val.startswith("http"): if val.startswith("https://scihub.copernicus.eu/gnss/odata") and val.endswith(")/"):
self._url = val.strip() pass
else:
downloadLink = val.strip()
downloadLink = downloadLink.split("/Products('Quicklook')")
downloadLink = downloadLink[0] + downloadLink[-1]
self._url = downloadLink
def handle_data(self, data): def handle_data(self, data):
if self.in_td and self.in_a: if data.startswith("S1") and data.endswith(".EOF"):
if self.satName in data:
self.fileList.append((self._url, data.strip())) self.fileList.append((self._url, data.strip()))
def handle_tag(self, tag):
if tag == 'td':
self.in_td = False
self.in_a = False
elif tag == 'a':
self.in_a = False
self._url = None
def download_file(url, outdir='.', session=None): def download_file(url, outdir='.', session=None):
''' '''
@ -96,9 +87,9 @@ def download_file(url, outdir='.', session=None):
if session is None: if session is None:
session = requests.session() session = requests.session()
path = os.path.join(outdir, os.path.basename(url)) path = outdir
print('Downloading URL: ', url) print('Downloading URL: ', url)
request = session.get(url, stream=True, verify=False) request = session.get(url, stream=True, verify=True, auth=credentials)
try: try:
val = request.raise_for_status() val = request.raise_for_status()
@ -139,37 +130,29 @@ if __name__ == '__main__':
fileTS, satName, fileTSStart = FileToTimeStamp(inps.input) fileTS, satName, fileTSStart = FileToTimeStamp(inps.input)
print('Reference time: ', fileTS) print('Reference time: ', fileTS)
print('Satellite name: ', satName) print('Satellite name: ', satName)
match = None match = None
session = requests.Session() session = requests.Session()
for spec in orbitMap: for spec in orbitMap:
oType = spec[0] oType = spec[0]
delta = datetime.timedelta(days=1)
if oType == 'precise': timebef = (fileTS - delta).strftime(queryfmt)
end_date = fileTS + datetime.timedelta(days=20) timeaft = (fileTS + delta).strftime(queryfmt)
elif oType == 'restituted': url = server + 'search?q=( beginPosition:[{0}T00:00:00.000Z TO {1}T23:59:59.999Z] AND endPosition:[{0}T00:00:00.000Z TO {1}T23:59:59.999Z] ) AND ( (platformname:Sentinel-1 AND filename:{2}_* AND producttype:{3}))&start=0&rows=100'.format(timebef,timeaft, satName,spec[1])
end_date = fileTS
else:
raise ValueError("Unexpected orbit type: '" + oType + "'")
end_date2 = end_date + datetime.timedelta(days=1)
urls = (server + spec[1] + end_date.strftime("%Y/%m/%d/") for end_date in (end_date, end_date2))
success = False success = False
match = None match = None
try: try:
r = session.get(url, verify=True, auth=credentials)
for url in urls:
r = session.get(url, verify=False)
r.raise_for_status() r.raise_for_status()
parser = MyHTMLParser(satName, url) parser = MyHTMLParser(url)
parser.feed(r.text) parser.feed(r.text)
for resulturl, result in parser.fileList: for resulturl, result in parser.fileList:
tbef, taft, mission = fileToRange(os.path.basename(result)) tbef, taft, mission = fileToRange(os.path.basename(result))
if (tbef <= fileTSStart) and (taft >= fileTS): if (tbef <= fileTSStart) and (taft >= fileTS):
match = os.path.join(resulturl, result) matchFileName = result
match = os.path.join(server[0:-5],resulturl[36:])
if match is not None: if match is not None:
success = True success = True
@ -180,8 +163,8 @@ if __name__ == '__main__':
break break
if match is not None: if match is not None:
output = os.path.join(inps.outdir, matchFileName)
res = download_file(match, inps.outdir, session) res = download_file(match, output, session)
if res is False: if res is False:
print('Failed to download URL: ', match) print('Failed to download URL: ', match)
else: else: