DataCatalog: model_tables

import requests

import src.utils as ut
import src.ftp as ftp

# Setup the root path of the application
project_path = ut.project_path()

# Load the metadata

meta_filename = [
    f"{ut.project_path(1)}/meta/mosquito_alert/model_tables.json",
    f"{ut.project_path(2)}/meta_ipynb/model_tables.html",
]
metadata = ut.load_metadata(meta_filename)

# Get contentUrl from metadata file
ut.info_meta(metadata)

Part 1: encounter_prob_monthly_grid

1.1. Distribution by HTTP download from MosquitoAlert webserver

This distribution allows to download the estimated probability of encountering for a given type of mosquito on monthly basis. This information is displayed on the Mosquito Alert public map.

# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
    metadata, idx_distribution=0, idx_hasPart=0
)

# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)

In order to get a model output we need to provide the mosquito type, year and month.

# Set mosquito type ('tig' for Aedes Albopictus and 'jap' for Aedes Japonicus)
MOSQUITO_TYPE = "tig"
# For example get all the months for a given year
YEAR = "2021"
MONTH = [str(i).zfill(2) for i in range(1, 13)]

for m in MONTH:
    # Download each CSV-file
    r = requests.get(contentUrl.format(MOSQUITO_TYPE=MOSQUITO_TYPE, YEAR=YEAR, MONTH=m))
    with open(f"{path}/{MOSQUITO_TYPE}_{m}-{YEAR}.csv", "wb") as f:
        f.write(r.content)

1.2. Distribution by SFTP download from MosquitoAlert webserver

This distribution allows to download exactly the same dataset as above but within SFTP. Before we are able to download the dataset we would need a user name and password to ssh-access a remote machine.

# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
    metadata, idx_distribution=1, idx_hasPart=0, parse=True
)

# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)
# Set mosquito type ('tig' for Aedes Albopictus and 'jap' for Aedes Japonicus)
MOSQUITO_TYPE = "tig"
# For example get just one months for a given year
YEAR = "2021"
MONTH = "01"
contentUrl_path = contentUrl.path.format(
    MOSQUITO_TYPE=MOSQUITO_TYPE, YEAR=YEAR, MONTH=MONTH
)
# Insert user password to connect by ftp
password = input(f"Enter {contentUrl.username} user password:")

# Get the dataframe
df = ftp.read_csv_sftp(
    hostname=contentUrl.hostname,
    port=contentUrl.port,
    username=contentUrl.username,
    password=password,
    remotepath=contentUrl_path,
)

df.info()
# Save reports on CSV or parquet
filename = f"{path}/{MOSQUITO_TYPE}_{m}-{YEAR}"
df.to_parquet(f"{filename}.parquet")  # very low file-size (need to install pyArrow)
df.to_csv(f"{filename}.csv")  # x10 size if compared with the dataframe