Downloading Example Datasets¶
Example datasets are hosted on zenodo at the following URLs:
Small Dataset https://zenodo.org/records/13384454/files/aopp_deconv_tool_example_datasets_small.tar?download=1
Firstly we define some helper functions, download the file from the server, then extract the archive.
Define helper functions¶
In [44]:
import datetime as dt
# Helper-function to detect if we are in a jupyter notebook
def in_notebook():
try:
from IPython import get_ipython
if 'IPKernelApp' not in get_ipython().config: # pragma: no cover
return False
except ImportError:
return False
except AttributeError:
return False
return True
# If we are in a notebook, import the function to clear the output area
if in_notebook():
from IPython.display import clear_output
# Define a class for a progress bar
class ProgressBar:
def __init__(self, max_value=None, interval=0.2, fmt=None):
self.format = fmt
self.value = 0
self.max_value = max_value
self.next_print = dt.datetime.now()
self.interval = dt.timedelta(seconds=interval)
self.is_complete = False
self.is_fail = False
if self.format is None:
if self.max_value is None:
self.format = 'Progress: {value}'
else:
self.format = 'Progress: {value}/{max_value} [{percentage:6.2f}%]'
return
def update(self, value):
self.value = value
now = dt.datetime.now()
if now > self.next_print:
self.next_print = now + self.interval
self.print()
def complete(self):
self.is_complete=True
self.print()
def fail(self):
self.is_fail=True
self.print()
def clear_output(self):
if in_notebook():
clear_output(True)
else:
print('\r', end=None)
def print(self):
self.clear_output()
if self.is_complete:
print(self.format.format(value=self.max_value, max_value=self.max_value, percentage=100*self.value/self.max_value), end=None)
elif self.is_fail:
print(self.format.format(value=self.value, max_value=self.max_value, percentage=100*self.value/self.max_value)+' FAILED', end=None)
else:
print(self.format.format(value=self.value, max_value=self.max_value, percentage=100*self.value/self.max_value), end=None)
Download the file¶
In [45]:
import urllib.request
from pathlib import Path
import io
EXAMPLE_DATA_DIR = Path("../example_data")
URL = "https://zenodo.org/records/13384454/files/aopp_deconv_tool_example_datasets_small.tar?download=1"
EXAMPLE_DATA_ARCHIVE = EXAMPLE_DATA_DIR / "small_dataset.tar"
# create the directory
EXAMPLE_DATA_DIR.mkdir(parents=True, exist_ok=True)
# Download file from server
with urllib.request.urlopen(URL) as response, EXAMPLE_DATA_ARCHIVE.open('wb') as fdest:
# Get the length of the file from the server
length = response.getheader('content-length')
length = int(length) if length is not None else None
# Create a progress bar so we can see what is happening
progress_bar = ProgressBar(length, fmt='Downloaded {value}/{max_value} [{percentage:6.2f}%] bytes of file')
# Define variables
n_total_read = 0 # Total bytes read from server
buffer = bytearray(4*2**10) # A buffer to store bytes from server in
n_read = -1 # Number of bytes read in one go from server
# Read the data from the server in a loop, update the progress bar
while n_read != 0 and response.readable():
n_read = response.readinto(buffer)
n_total_read += n_read
fdest.write(buffer)
progress_bar.update(n_total_read)
# Complete the progress bar when file is downloaded
progress_bar.complete()
Downloaded 81121280/81121280 [100.00%] bytes of file
Unpack the downloaded archive¶
In [46]:
import shutil
# Unpack the archive
shutil.unpack_archive(EXAMPLE_DATA_ARCHIVE, extract_dir=EXAMPLE_DATA_DIR, format='tar')
# Remove the archive now we have unpacked it
EXAMPLE_DATA_ARCHIVE.unlink()
# Print unpacked directory contents
for item in EXAMPLE_DATA_DIR.iterdir():
print(f'{item}')
example_data/single_wavelength_example_std.fits example_data/reduced_data_volume_1_sci.fits example_data/datasets.json example_data/2024-01-11-1951_9-Jupiter_890nm.tif example_data/SMALL_DATACUBE_0001_1_sci.fits example_data/reduced_data_volume_1_std.fits example_data/SMALL_DATACUBE_0001_1_std.fits example_data/single_wavelength_example_sci.fits example_data/2024-01-11-1932_7-Jupiter_890nm.tif example_data/2024-01-11-1957_2-Jupiter_750nm.tif example_data/2024-01-11-1917_1-Jupiter_750nm.tif example_data/2024-01-11-1925_9-Jupiter_727nm.tif
In [ ]: