Dataset: sampling_effort
Dataset: sampling_effort¶
import pandas as pd
import requests, zipfile, io, os
from bs4 import BeautifulSoup
import urllib.parse
import src.utils as ut
# Setup the root path of the application
project_path = ut.project_path()
# Load the metadata
meta_filename = [
metadata = ut.load_metadata(meta_filename)
# Get contentUrl from metadata file
1. Distribution from Zenodo cloud¶
This dataset is updated nightly and the most recent version can be downloaded from Zenodo at This URL will always resolve to the most recent version of the data.
# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
metadata, idx_distribution=0, idx_hasPart=None
# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
# Download and open the zip container
# Get the latest zenodo file version of the dataset
r = requests.get(contentUrl)
file_url = BeautifulSoup(r.content, "html.parser").find("a", {"class": "filename"})[
file_contentUrl = urllib.parse.urljoin(r.url, file_url)
# Download the dataset
r_file = requests.get(file_contentUrl)
z = zipfile.ZipFile(io.BytesIO(r_file.content))
We have the option to extract all the files into a distribution folder.
# Load the 0.05 degree cell resolution dataset into a dataframe
for s in z.namelist():
if s.find("cellres_05.csv.gz") != -1:
filename = s
df_05 = pd.read_csv(f"{path}/{filename}")
2. Distribution from MosquitoAlert Github repository¶
# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
metadata, idx_distribution=1, idx_hasPart=None
# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
# Download the dataset
# Get the latest GitHub file version of the dataset
for url in contentUrl:
r = requests.get(url)
filename = os.path.basename(url)
with open(f"{path}/{filename}", "wb") as f: