Skip to content

Commit 8d57410

Browse files
Add CMIP6 demo
1 parent 01a2bb2 commit 8d57410

File tree

2 files changed

+61
-81
lines changed

2 files changed

+61
-81
lines changed

demo/cmip6/generate_cmip6_items.ipynb

Lines changed: 60 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,19 @@
4141
},
4242
"outputs": [],
4343
"source": [
44-
"# Specify the CMIP collection to use (daily or monthly)\n",
44+
"# Specify the CMIP model and variable to use\n",
4545
"model = \"GISS-E2-1-G\"\n",
4646
"variable = \"tas\""
4747
]
4848
},
49+
{
50+
"cell_type": "markdown",
51+
"id": "762e1e50-46e6-4dab-8462-38d31060e202",
52+
"metadata": {},
53+
"source": [
54+
"## Discover the COG files on S3 using fsspec and `.glob`"
55+
]
56+
},
4957
{
5058
"cell_type": "code",
5159
"execution_count": 3,
@@ -86,6 +94,16 @@
8694
"print(f\"{len(file_paths)} discovered from {s3_path}\")"
8795
]
8896
},
97+
{
98+
"cell_type": "markdown",
99+
"id": "78f991f3-8cde-44bd-a955-b3e5b9694320",
100+
"metadata": {},
101+
"source": [
102+
"## Subset the data so we don't process all historical data\n",
103+
"\n",
104+
"But you can if you want!"
105+
]
106+
},
89107
{
90108
"cell_type": "code",
91109
"execution_count": 6,
@@ -97,6 +115,14 @@
97115
"subset_files = sorted([\"s3://\" + f for f in file_paths if \"_1950_\" in f or \"_1951_\" in f])"
98116
]
99117
},
118+
{
119+
"cell_type": "markdown",
120+
"id": "3bae56f1-1ea6-4755-84b3-149666e84d3d",
121+
"metadata": {},
122+
"source": [
123+
"## Double check we discovered some files"
124+
]
125+
},
100126
{
101127
"cell_type": "code",
102128
"execution_count": 7,
@@ -118,6 +144,16 @@
118144
" print(f\"Subseted data to files for 1950 and 1951. {len(subset_files)} files to process.\")"
119145
]
120146
},
147+
{
148+
"cell_type": "markdown",
149+
"id": "ea59aceb-b80a-4166-a684-74de4230ac4a",
150+
"metadata": {},
151+
"source": [
152+
"## Setup the collection and items\n",
153+
"\n",
154+
"The collection is statically defined in a json file, but can be modified as desired. Then, iterate throug all the files in S3 and create STAC Item JSON using `rio_stac`. Write all the JSON to an `ndjson` file for inserting."
155+
]
156+
},
121157
{
122158
"cell_type": "code",
123159
"execution_count": 8,
@@ -140,7 +176,7 @@
140176
},
141177
"outputs": [],
142178
"source": [
143-
"# clear the file\n",
179+
"# clear the ndjson items file\n",
144180
"with open(stac_items_file, 'w') as file:\n",
145181
" pass"
146182
]
@@ -185,9 +221,17 @@
185221
" file.write(json.dumps(item.to_dict()) + '\\n')"
186222
]
187223
},
224+
{
225+
"cell_type": "markdown",
226+
"id": "f38a5953-a195-4106-b172-26ba2bce9533",
227+
"metadata": {},
228+
"source": [
229+
"NOTE: This can take awhile if processing all 730 file which is why it is subset to only 2 files below, for demonstration purposes."
230+
]
231+
},
188232
{
189233
"cell_type": "code",
190-
"execution_count": null,
234+
"execution_count": 11,
191235
"id": "645d3ccb",
192236
"metadata": {},
193237
"outputs": [
@@ -196,103 +240,38 @@
196240
"output_type": "stream",
197241
"text": [
198242
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_01.tif\n",
199-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_02.tif\n",
200-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_03.tif\n",
201-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_04.tif\n",
202-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_05.tif\n",
203-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_06.tif\n",
204-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_07.tif\n",
205-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_08.tif\n",
206-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_09.tif\n",
207-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_10.tif\n",
208-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_11.tif\n",
209-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_12.tif\n",
210-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_13.tifProcessing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_14.tif\n",
211-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_15.tif\n",
212-
"\n",
213-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_16.tif\n",
214-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_17.tif\n",
215-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_18.tif\n",
216-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_19.tif\n",
217-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_20.tif\n",
218-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_21.tif\n",
219-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_22.tif\n",
220-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_23.tif\n",
221-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_24.tif\n",
222-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_25.tif\n",
223-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_26.tif\n",
224-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_27.tif\n",
225-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_28.tif\n",
226-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_29.tif\n",
227-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_30.tif\n",
228-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_31.tif\n",
229-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_01.tif\n",
230-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_02.tif\n",
231-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_03.tif\n",
232-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_04.tif\n",
233-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_05.tif\n",
234-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_06.tif\n",
235-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_07.tif\n",
236-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_08.tif\n",
237-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_09.tif\n",
238-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_10.tif\n",
239-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_11.tif\n",
240-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_12.tif\n",
241-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_13.tif\n",
242-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_14.tif\n",
243-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_15.tif\n",
244-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_16.tif\n",
245-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_17.tif\n",
246-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_18.tif\n",
247-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_19.tif\n",
248-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_20.tif\n",
249-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_21.tif\n",
250-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_22.tif\n",
251-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_23.tif\n",
252-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_24.tif\n",
253-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_25.tif\n",
254-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_26.tif\n",
255-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_27.tif\n",
256-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_02_28.tif\n",
257-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_01.tif\n",
258-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_02.tif\n",
259-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_03.tif\n",
260-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_04.tif\n",
261-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_05.tif\n",
262-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_06.tif\n",
263-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_07.tif\n",
264-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_08.tif\n",
265-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_09.tif\n",
266-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_10.tif\n",
267-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_11.tif\n",
268-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_12.tif\n",
269-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_13.tif\n",
270-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_14.tif\n",
271-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_15.tif\n",
272-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_16.tif\n",
273-
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_03_17.tif\n"
243+
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_02.tif\n"
274244
]
275245
}
276246
],
277247
"source": [
278248
"lock = threading.Lock()\n",
279249
"file = open(stac_items_file, 'a')\n",
280250
"with concurrent.futures.ThreadPoolExecutor() as executor:\n",
281-
" futures = [executor.submit(process_item, obj, file, lock) for obj in subset_files]\n",
251+
" futures = [executor.submit(process_item, obj, file, lock) for obj in subset_files[0:2]]\n",
282252
" [future.result() for future in concurrent.futures.as_completed(futures)]\n",
283253
"file.close()"
284254
]
285255
},
256+
{
257+
"cell_type": "markdown",
258+
"id": "8e062949-16c9-4a79-b2ee-1579f244d74f",
259+
"metadata": {},
260+
"source": [
261+
"# Final step - seed the database"
262+
]
263+
},
286264
{
287265
"cell_type": "code",
288-
"execution_count": 21,
289-
"id": "d1728b26-9f8f-4bef-8704-a37fe9af2f9f",
266+
"execution_count": 16,
267+
"id": "6965e650-f89a-4c7d-9f41-11774a905b81",
290268
"metadata": {},
291269
"outputs": [
292270
{
293271
"name": "stdout",
294272
"output_type": "stream",
295273
"text": [
274+
"postgresql://postgres:password@localhost:5432/postgres\n",
296275
"Inserting collection from CMIP6_daily_GISS-E2-1-G_tas_collection.json\n",
297276
"Inserting items from CMIP6_daily_GISS-E2-1-G_tas_stac_items.ndjson\n"
298277
]

demo/cmip6/seed-db.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ if [ -z "$DATABASE_URL" ]; then
1414
DATABASE_URL="postgresql://$username:$password@$host:$port/$dbname"
1515
fi
1616

17+
echo $DATABASE_URL
1718
echo "Inserting collection from $collection_json_file"
1819
pypgstac load collections $collection_json_file --dsn $DATABASE_URL --method insert_ignore
1920
echo "Inserting items from $items_json_file"

0 commit comments

Comments
 (0)