Dataset: sampling_effort

Contents

Dataset: sampling_effort¶

import pandas as pd
import requests, zipfile, io, os
from bs4 import BeautifulSoup
import urllib.parse

import src.utils as ut

# Setup the root path of the application
project_path = ut.project_path()

# Load the metadata

meta_filename = [
    f"{ut.project_path(1)}/meta/mosquito_alert/sampling_effort.json",
    f"{ut.project_path(2)}/meta_ipynb/sampling_effort.html",
]
metadata = ut.load_metadata(meta_filename)

# Get contentUrl from metadata file
ut.info_meta(metadata)

1. Distribution from Zenodo cloud¶

This dataset is updated nightly and the most recent version can be downloaded from Zenodo at https://doi.org/10.5281/zenodo.5802476. This URL will always resolve to the most recent version of the data.

# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
    metadata, idx_distribution=0, idx_hasPart=None
)

# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)

# Download and open the zip container

# Get the latest zenodo file version of the dataset
r = requests.get(contentUrl)
file_url = BeautifulSoup(r.content, "html.parser").find("a", {"class": "filename"})[
    "href"
]
file_contentUrl = urllib.parse.urljoin(r.url, file_url)

# Download the dataset
r_file = requests.get(file_contentUrl)
z = zipfile.ZipFile(io.BytesIO(r_file.content))

We have the option to extract all the files into a distribution folder.

z.extractall(path)

# Load the 0.05 degree cell resolution dataset into a dataframe
for s in z.namelist():
    if s.find("cellres_05.csv.gz") != -1:
        filename = s
df_05 = pd.read_csv(f"{path}/{filename}")
df_05.info()

2. Distribution from MosquitoAlert Github repository¶

# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
    metadata, idx_distribution=1, idx_hasPart=None
)

# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)

# Download the dataset

# Get the latest GitHub file version of the dataset
for url in contentUrl:
    r = requests.get(url)
    filename = os.path.basename(url)
    with open(f"{path}/{filename}", "wb") as f:
        f.write(r.content)

previous

Metadata table

next

Metadata table