Dataset: muni_preds
Contents
Dataset: muni_preds¶
from datetime import datetime
from datetime import timedelta
import pandas as pd
import requests
import src.utils as ut
# Setup the root path of the application
project_path = ut.project_path()
# Load the metadata
meta_filename = [
f"{ut.project_path(1)}/meta/mosquito_alert/muni_preds.json",
f"{ut.project_path(2)}/meta_ipynb/muni_preds.html",
]
metadata = ut.load_metadata(meta_filename)
# Get contentUrl from metadata file
ut.info_meta(metadata)
1. Distribution from Mosquito-Alert/MosquitoAlertES Github repository¶
# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
metadata, idx_distribution=0, idx_hasPart=None
)
# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)
This dataset has a 5 day forecast. Here we take the past 2 weeks and the 5 days of forecast.
# Request a chunk of data in json format and concatenate all of them into a dataframe
start = (datetime.now() - timedelta(days=14)).strftime("%Y-%m-%d")
end = (datetime.now() + timedelta(days=5)).strftime("%Y-%m-%d") # forecast
date_range = pd.date_range(start, end)
date_range = date_range.format(formatter=lambda x: x.strftime("%Y-%m-%d"))
df_muni_preds = []
for date in date_range:
url = contentUrl.format(DATE=date)
r = requests.get(url)
d = r.json()
df_tmp = pd.DataFrame.from_records(d, coerce_float=True)
df_tmp.insert(0, "DATE", date)
df_muni_preds.append(df_tmp)
df = pd.concat(df_muni_preds)
df.info()
# Save on CSV or parquet
filename = f"{path}/muni_preds"
df.to_parquet(f"{filename}.parquet") # very low file-size (need to install pyArrow)
df.to_csv(f"{filename}.csv") # x10 size if compared with the dataframe
# Make the plot relative to the Solórzano municipality.
df.query("NAMEUNIT == 'Solórzano'")[
["DATE", "ma_prob_mean", "ma_prob_mean_codnut3"]
].plot(x="DATE", ylabel="Probability", figsize=(8, 3))