Skip to content

Commit 8208e62

Browse files
HOTFIX for #1
This hotfix moves the downloading of shapefiles from GADM.org to OCHA for dependability
1 parent d2b22da commit 8208e62

File tree

6 files changed

+159
-82
lines changed

6 files changed

+159
-82
lines changed

conf/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ development_mode: false
77
CDS_API_KEY:
88
path: "$HOME/.cdsapirc"
99

10-
gadm_file: "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/gadm41_MDG.gpkg"
10+
mdg_shapefile: "https://data.humdata.org/dataset/26fa506b-0727-4d9d-a590-d2abee21ee22/resource/ed94d52e-349e-41be-80cb-62dc0435bd34/download/mdg_adm_bngrc_ocha_20181031_shp.zip"
1111

1212
dataset: "reanalysis-era5-single-levels"
1313

notes/01_download_raw_data.ipynb

Lines changed: 103 additions & 44 deletions
Large diffs are not rendered by default.

snakemake.sbatch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
#SBATCH -p test # partition (queue)
44
#SBATCH -c 12 # number of cores
55
#SBATCH --mem 90GB # memory
6-
#SBATCH -t 0-10:00 # time (D-HH:MM)
6+
#SBATCH -t 0-12:00 # time (D-HH:MM)
77

88
snakemake --cores 6
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

src/era5_sandbox/download.py

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
import os
1010
import hydra
1111
import cdsapi
12+
import tempfile
13+
import zipfile
14+
import requests
1215
import geopandas as gpd
16+
from pathlib import Path
1317
from pyprojroot import here
1418
from shapely.geometry import box
1519
from omegaconf import DictConfig, ListConfig, OmegaConf
@@ -70,49 +74,63 @@ def fetch_GADM(
7074

7175
# %% ../../notes/01_download_raw_data.ipynb 7
7276
def create_bounding_box(
73-
gadm_file: str,
74-
buffer_km: float = 50,
75-
round_to: int = 1
76-
) -> list:
77+
zip_url_or_path: str,
78+
buffer_km: float = 50,
79+
round_to: int = 1
80+
) -> list:
7781
'''
78-
Create a bounding box from the GADM data with a buffer.
79-
80-
This function reads the GADM data from a file or URL, applies a buffer around the region,
81-
and extracts the bounding box in the CDS API area format.
82+
Create a bounding box from OCHA/HDX shapefile data with a buffer.
8283
8384
Parameters:
84-
gadm_file (str): Path or URL to the GADM file.
85+
zip_url_or_path (str): URL or local path to the zipped shapefile.
8586
buffer_km (float): Buffer distance in kilometers to expand the bounding box.
8687
round_to (int): Number of decimal places to round the bounding box coordinates.
8788
8889
Returns:
8990
list: Bounding box in the CDS API area format [North, West, South, East].
9091
'''
91-
92-
# Read the GADM data
93-
ground_shape = gpd.read_file(gadm_file, layer="ADM_ADM_0")
94-
95-
# Reproject to a projected CRS (e.g., UTM Zone 38S for Madagascar)
96-
projected_shape = ground_shape.to_crs(epsg=32738) # UTM Zone 38S
97-
98-
# Apply the buffer in meters (1 km = 1000 meters)
99-
buffered_shape = projected_shape.geometry.buffer(buffer_km * 1000)
100-
101-
# Convert back to the original geographic CRS (WGS84)
102-
buffered_shape = gpd.GeoSeries(buffered_shape, crs=projected_shape.crs).to_crs(epsg=4326)
103-
104-
# Calculate the bounding box
105-
bbox = buffered_shape.total_bounds # [min_x, min_y, max_x, max_y]
106-
107-
# Rearrange to CDS API format: [North, West, South, East]
108-
bbox = [
109-
round(bbox[3], round_to), # max_y (North)
110-
round(bbox[0], round_to), # min_x (West)
111-
round(bbox[1], round_to), # min_y (South)
112-
round(bbox[2], round_to) # max_x (East)
113-
]
114-
115-
return bbox
92+
with tempfile.TemporaryDirectory() as tmpdir:
93+
# Download if it's a URL
94+
if zip_url_or_path.startswith("http"):
95+
response = requests.get(zip_url_or_path)
96+
zip_path = os.path.join(tmpdir, "ocha_data.zip")
97+
with open(zip_path, "wb") as f:
98+
f.write(response.content)
99+
else:
100+
zip_path = zip_url_or_path
101+
102+
# Unzip
103+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
104+
zip_ref.extractall(tmpdir)
105+
106+
# Find the .shp file
107+
shp_files = list(Path(tmpdir).rglob("*.shp"))
108+
if not shp_files:
109+
raise FileNotFoundError("No shapefile (.shp) found in the extracted archive.")
110+
shp_path = str(shp_files[0]) # Use first found .shp
111+
112+
# Read shapefile
113+
shape = gpd.read_file(shp_path)
114+
115+
# Reproject to projected CRS (you may want to detect the correct UTM zone)
116+
shape_proj = shape.to_crs(epsg=32738)
117+
118+
# Apply buffer
119+
buffered = shape_proj.geometry.buffer(buffer_km * 1000)
120+
121+
# Convert back to geographic coordinates
122+
buffered_geo = gpd.GeoSeries(buffered, crs=shape_proj.crs).to_crs(epsg=4326)
123+
124+
# Get bounding box
125+
bounds = buffered_geo.total_bounds # [min_x, min_y, max_x, max_y]
126+
bbox = [
127+
round(bounds[3], round_to), # North
128+
round(bounds[0], round_to), # West
129+
round(bounds[1], round_to), # South
130+
round(bounds[2], round_to) # East
131+
]
132+
133+
return bbox
116134

117135

118136
# %% ../../notes/01_download_raw_data.ipynb 8
@@ -137,7 +155,7 @@ def download_raw_era5(
137155

138156
# Send the query to the client
139157
if not testing:
140-
bounds = create_bounding_box(cfg['gadm_file'])
158+
bounds = create_bounding_box(cfg['mdg_shapefile'])
141159
query['area'] = bounds
142160
client.retrieve(dataset, query).download(target)
143161
else:
@@ -148,7 +166,7 @@ def download_raw_era5(
148166
# %% ../../notes/01_download_raw_data.ipynb 11
149167
@hydra.main(config_path="../../conf", config_name="config", version_base=None)
150168
def main(cfg: DictConfig) -> None:
151-
download_raw_era5(cfg=cfg, testing=False)
169+
download_raw_era5(cfg=cfg)
152170

153171
# %% ../../notes/01_download_raw_data.ipynb 12
154172
#| eval: false

0 commit comments

Comments
 (0)