99import os
1010import hydra
1111import cdsapi
12+ import tempfile
13+ import zipfile
14+ import requests
1215import geopandas as gpd
16+ from pathlib import Path
1317from pyprojroot import here
1418from shapely .geometry import box
1519from omegaconf import DictConfig , ListConfig , OmegaConf
@@ -70,49 +74,63 @@ def fetch_GADM(
7074
7175# %% ../../notes/01_download_raw_data.ipynb 7
7276def create_bounding_box (
73- gadm_file : str ,
74- buffer_km : float = 50 ,
75- round_to : int = 1
76- ) -> list :
77+ zip_url_or_path : str ,
78+ buffer_km : float = 50 ,
79+ round_to : int = 1
80+ ) -> list :
7781 '''
78- Create a bounding box from the GADM data with a buffer.
79-
80- This function reads the GADM data from a file or URL, applies a buffer around the region,
81- and extracts the bounding box in the CDS API area format.
82+ Create a bounding box from OCHA/HDX shapefile data with a buffer.
8283
8384 Parameters:
84- gadm_file (str): Path or URL to the GADM file .
85+ zip_url_or_path (str): URL or local path to the zipped shapefile .
8586 buffer_km (float): Buffer distance in kilometers to expand the bounding box.
8687 round_to (int): Number of decimal places to round the bounding box coordinates.
8788
8889 Returns:
8990 list: Bounding box in the CDS API area format [North, West, South, East].
9091 '''
91-
92- # Read the GADM data
93- ground_shape = gpd .read_file (gadm_file , layer = "ADM_ADM_0" )
94-
95- # Reproject to a projected CRS (e.g., UTM Zone 38S for Madagascar)
96- projected_shape = ground_shape .to_crs (epsg = 32738 ) # UTM Zone 38S
97-
98- # Apply the buffer in meters (1 km = 1000 meters)
99- buffered_shape = projected_shape .geometry .buffer (buffer_km * 1000 )
100-
101- # Convert back to the original geographic CRS (WGS84)
102- buffered_shape = gpd .GeoSeries (buffered_shape , crs = projected_shape .crs ).to_crs (epsg = 4326 )
103-
104- # Calculate the bounding box
105- bbox = buffered_shape .total_bounds # [min_x, min_y, max_x, max_y]
106-
107- # Rearrange to CDS API format: [North, West, South, East]
108- bbox = [
109- round (bbox [3 ], round_to ), # max_y (North)
110- round (bbox [0 ], round_to ), # min_x (West)
111- round (bbox [1 ], round_to ), # min_y (South)
112- round (bbox [2 ], round_to ) # max_x (East)
113- ]
114-
115- return bbox
92+ with tempfile .TemporaryDirectory () as tmpdir :
93+ # Download if it's a URL
94+ if zip_url_or_path .startswith ("http" ):
95+ response = requests .get (zip_url_or_path )
96+ zip_path = os .path .join (tmpdir , "ocha_data.zip" )
97+ with open (zip_path , "wb" ) as f :
98+ f .write (response .content )
99+ else :
100+ zip_path = zip_url_or_path
101+
102+ # Unzip
103+ with zipfile .ZipFile (zip_path , 'r' ) as zip_ref :
104+ zip_ref .extractall (tmpdir )
105+
106+ # Find the .shp file
107+ shp_files = list (Path (tmpdir ).rglob ("*.shp" ))
108+ if not shp_files :
109+ raise FileNotFoundError ("No shapefile (.shp) found in the extracted archive." )
110+ shp_path = str (shp_files [0 ]) # Use first found .shp
111+
112+ # Read shapefile
113+ shape = gpd .read_file (shp_path )
114+
115+ # Reproject to projected CRS (you may want to detect the correct UTM zone)
116+ shape_proj = shape .to_crs (epsg = 32738 )
117+
118+ # Apply buffer
119+ buffered = shape_proj .geometry .buffer (buffer_km * 1000 )
120+
121+ # Convert back to geographic coordinates
122+ buffered_geo = gpd .GeoSeries (buffered , crs = shape_proj .crs ).to_crs (epsg = 4326 )
123+
124+ # Get bounding box
125+ bounds = buffered_geo .total_bounds # [min_x, min_y, max_x, max_y]
126+ bbox = [
127+ round (bounds [3 ], round_to ), # North
128+ round (bounds [0 ], round_to ), # West
129+ round (bounds [1 ], round_to ), # South
130+ round (bounds [2 ], round_to ) # East
131+ ]
132+
133+ return bbox
116134
117135
118136# %% ../../notes/01_download_raw_data.ipynb 8
@@ -137,7 +155,7 @@ def download_raw_era5(
137155
138156 # Send the query to the client
139157 if not testing :
140- bounds = create_bounding_box (cfg ['gadm_file ' ])
158+ bounds = create_bounding_box (cfg ['mdg_shapefile ' ])
141159 query ['area' ] = bounds
142160 client .retrieve (dataset , query ).download (target )
143161 else :
@@ -148,7 +166,7 @@ def download_raw_era5(
148166# %% ../../notes/01_download_raw_data.ipynb 11
149167@hydra .main (config_path = "../../conf" , config_name = "config" , version_base = None )
150168def main (cfg : DictConfig ) -> None :
151- download_raw_era5 (cfg = cfg , testing = False )
169+ download_raw_era5 (cfg = cfg )
152170
153171# %% ../../notes/01_download_raw_data.ipynb 12
154172#| eval: false
0 commit comments