Dataset: sampling_effort
Contents
Dataset: sampling_effort¶
import pandas as pd
import requests, zipfile, io, os
from bs4 import BeautifulSoup
import urllib.parse
import src.utils as ut
# Setup the root path of the application
project_path = ut.project_path()
# Load the metadata
meta_filename = [
f"{ut.project_path(1)}/meta/mosquito_alert/sampling_effort.json",
f"{ut.project_path(2)}/meta_ipynb/sampling_effort.html",
]
metadata = ut.load_metadata(meta_filename)
# Get contentUrl from metadata file
ut.info_meta(metadata)
1. Distribution from Zenodo cloud¶
This dataset is updated nightly and the most recent version can be downloaded from Zenodo at https://doi.org/10.5281/zenodo.5802476. This URL will always resolve to the most recent version of the data.
# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
metadata, idx_distribution=0, idx_hasPart=None
)
# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)
# Download and open the zip container
# Get the latest zenodo file version of the dataset
r = requests.get(contentUrl)
file_url = BeautifulSoup(r.content, "html.parser").find("a", {"class": "filename"})[
"href"
]
file_contentUrl = urllib.parse.urljoin(r.url, file_url)
# Download the dataset
r_file = requests.get(file_contentUrl)
z = zipfile.ZipFile(io.BytesIO(r_file.content))
We have the option to extract all the files into a distribution folder.
z.extractall(path)
# Load the 0.05 degree cell resolution dataset into a dataframe
for s in z.namelist():
if s.find("cellres_05.csv.gz") != -1:
filename = s
df_05 = pd.read_csv(f"{path}/{filename}")
df_05.info()
2. Distribution from MosquitoAlert Github repository¶
# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
metadata, idx_distribution=1, idx_hasPart=None
)
# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)
# Download the dataset
# Get the latest GitHub file version of the dataset
for url in contentUrl:
r = requests.get(url)
filename = os.path.basename(url)
with open(f"{path}/{filename}", "wb") as f:
f.write(r.content)