Dataset: muni_preds


from datetime import datetime
from datetime import timedelta
import pandas as pd
import requests

import src.utils as ut

# Setup the root path of the application
project_path = ut.project_path()

# Load the metadata

meta_filename = [
    f"{ut.project_path(1)}/meta/mosquito_alert/muni_preds.json",
    f"{ut.project_path(2)}/meta_ipynb/muni_preds.html",
]
metadata = ut.load_metadata(meta_filename)

# Get contentUrl from metadata file
ut.info_meta(metadata)

1. Distribution from Mosquito-Alert/MosquitoAlertES Github repository

# Get metadata
contentUrl, dataset_name, distr_name = ut.get_meta(
    metadata, idx_distribution=0, idx_hasPart=None
)

# Make folders for data download
path = f"{project_path}/data/{dataset_name}/{distr_name}"
ut.makedirs(path)

This dataset has a 5 day forecast. Here we take the past 2 weeks and the 5 days of forecast.

# Request a chunk of data in json format and concatenate all of them into a dataframe

start = (datetime.now() - timedelta(days=14)).strftime("%Y-%m-%d")
end = (datetime.now() + timedelta(days=5)).strftime("%Y-%m-%d")  # forecast
date_range = pd.date_range(start, end)
date_range = date_range.format(formatter=lambda x: x.strftime("%Y-%m-%d"))

df_muni_preds = []
for date in date_range:
    url = contentUrl.format(DATE=date)
    r = requests.get(url)
    d = r.json()
    df_tmp = pd.DataFrame.from_records(d, coerce_float=True)
    df_tmp.insert(0, "DATE", date)
    df_muni_preds.append(df_tmp)

df = pd.concat(df_muni_preds)
df.info()
# Save on CSV or parquet
filename = f"{path}/muni_preds"
df.to_parquet(f"{filename}.parquet")  # very low file-size (need to install pyArrow)
df.to_csv(f"{filename}.csv")  # x10 size if compared with the dataframe
# Make the plot relative to the Solórzano municipality.
df.query("NAMEUNIT == 'Solórzano'")[
    ["DATE", "ma_prob_mean", "ma_prob_mean_codnut3"]
].plot(x="DATE", ylabel="Probability", figsize=(8, 3))