Downloading Example Datasets¶

Example datasets are hosted on zenodo at the following URLs:

Small Dataset https://zenodo.org/records/13384454/files/aopp_deconv_tool_example_datasets_small.tar?download=1

Firstly we define some helper functions, download the file from the server, then extract the archive.

Define helper functions¶

In [44]:
import datetime as dt


# Helper-function to detect if we are in a jupyter notebook
def in_notebook():
    try:
        from IPython import get_ipython
        if 'IPKernelApp' not in get_ipython().config:  # pragma: no cover
            return False
    except ImportError:
        return False
    except AttributeError:
        return False
    return True

# If we are in a notebook, import the function to clear the output area
if in_notebook():
    from IPython.display import clear_output

# Define a class for a progress bar
class ProgressBar:
    def __init__(self, max_value=None, interval=0.2, fmt=None):
        self.format = fmt
        self.value = 0
        self.max_value = max_value
        self.next_print = dt.datetime.now()
        self.interval = dt.timedelta(seconds=interval)
        self.is_complete = False
        self.is_fail = False

        if self.format is None:
            if self.max_value is None:
                self.format = 'Progress: {value}'
            else:
                self.format = 'Progress: {value}/{max_value} [{percentage:6.2f}%]'
        
        return
        
    def update(self, value):
        self.value = value

        now = dt.datetime.now()
        if now > self.next_print:
            self.next_print = now + self.interval
            self.print()

    def complete(self):
        self.is_complete=True
        self.print()
        
    def fail(self):
        self.is_fail=True
        self.print()

    def clear_output(self):
        if in_notebook():
            clear_output(True)
        else:
            print('\r', end=None)
    
    def print(self):
        self.clear_output()
        
        if self.is_complete:
            print(self.format.format(value=self.max_value, max_value=self.max_value, percentage=100*self.value/self.max_value), end=None)
        elif self.is_fail:
            print(self.format.format(value=self.value, max_value=self.max_value, percentage=100*self.value/self.max_value)+' FAILED', end=None)
        else:
            print(self.format.format(value=self.value, max_value=self.max_value, percentage=100*self.value/self.max_value), end=None)

Download the file¶

In [45]:
import urllib.request
from pathlib import Path
import io


EXAMPLE_DATA_DIR = Path("../example_data")
URL = "https://zenodo.org/records/13384454/files/aopp_deconv_tool_example_datasets_small.tar?download=1"
EXAMPLE_DATA_ARCHIVE = EXAMPLE_DATA_DIR / "small_dataset.tar"


# create the directory
EXAMPLE_DATA_DIR.mkdir(parents=True, exist_ok=True)

# Download file from server
with urllib.request.urlopen(URL) as response, EXAMPLE_DATA_ARCHIVE.open('wb') as fdest:
    # Get the length of the file from the server
    length = response.getheader('content-length')
    length = int(length) if length is not None else None

    # Create a progress bar so we can see what is happening
    progress_bar = ProgressBar(length, fmt='Downloaded {value}/{max_value} [{percentage:6.2f}%] bytes of file')

    # Define variables
    n_total_read = 0 # Total bytes read from server
    buffer = bytearray(4*2**10) # A buffer to store bytes from server in
    n_read = -1 # Number of bytes read in one go from server

    # Read the data from the server in a loop, update the progress bar
    while n_read != 0 and response.readable():
        n_read = response.readinto(buffer)
        n_total_read += n_read
        fdest.write(buffer)
        progress_bar.update(n_total_read)
    
    # Complete the progress bar when file is downloaded
    progress_bar.complete()
Downloaded 81121280/81121280 [100.00%] bytes of file

Unpack the downloaded archive¶

In [46]:
import shutil

# Unpack the archive
shutil.unpack_archive(EXAMPLE_DATA_ARCHIVE, extract_dir=EXAMPLE_DATA_DIR, format='tar')

# Remove the archive now we have unpacked it
EXAMPLE_DATA_ARCHIVE.unlink()

# Print unpacked directory contents
for item in EXAMPLE_DATA_DIR.iterdir():
    print(f'{item}')
example_data/single_wavelength_example_std.fits
example_data/reduced_data_volume_1_sci.fits
example_data/datasets.json
example_data/2024-01-11-1951_9-Jupiter_890nm.tif
example_data/SMALL_DATACUBE_0001_1_sci.fits
example_data/reduced_data_volume_1_std.fits
example_data/SMALL_DATACUBE_0001_1_std.fits
example_data/single_wavelength_example_sci.fits
example_data/2024-01-11-1932_7-Jupiter_890nm.tif
example_data/2024-01-11-1957_2-Jupiter_750nm.tif
example_data/2024-01-11-1917_1-Jupiter_750nm.tif
example_data/2024-01-11-1925_9-Jupiter_727nm.tif
In [ ]: