Skip to content

Commit 83a834c

Browse files
committed
dags for uploading to bigquery with bigframes
1 parent bf46bc3 commit 83a834c

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Tested on Cloud Composer 3
16+
#
17+
# For local development:
18+
# pip install 'apache-airflow[google]==2.10.5'
19+
20+
21+
import datetime
22+
23+
from airflow import models
24+
from airflow.operators import bash
25+
26+
27+
default_dag_args = {
28+
# The start_date describes when a DAG is valid / can be run. Set this to a
29+
# fixed point in time rather than dynamically, since it is evaluated every
30+
# time a DAG is parsed. See:
31+
# https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date
32+
"start_date": datetime.datetime(2025, 6, 30),
33+
}
34+
35+
# Define a DAG (directed acyclic graph) of tasks.
36+
# Any task you create within the context manager is automatically added to the
37+
# DAG object.
38+
with models.DAG(
39+
"census_from_http_to_gcs_once",
40+
schedule_interval="@once",
41+
default_args=default_dag_args,
42+
) as dag:
43+
download = bash.BashOperator(
44+
task_id="download",
45+
bash_command="wget https://www2.census.gov/programs-surveys/popest/datasets/2020-2023/counties/asrh/cc-est2023-agesex-all.csv",
46+
)
47+
upload = bash.BashOperator(
48+
task_id="upload",
49+
bash_command="gcloud storage cp cc-est2023-agesex-all.csv gs://us-central1-bigframes-orche-b70f2a52-bucket/data/us-census/",
50+
)
51+
52+
download >> upload
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Tested on Cloud Composer 3
16+
#
17+
# For local development:
18+
# pip install 'apache-airflow[google]==2.10.5'
19+
20+
21+
import datetime
22+
23+
from airflow import models
24+
25+
from airflow.providers.google.cloud.transfers import http_to_gcs
26+
27+
28+
default_dag_args = {
29+
# The start_date describes when a DAG is valid / can be run. Set this to a
30+
# fixed point in time rather than dynamically, since it is evaluated every
31+
# time a DAG is parsed. See:
32+
# https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date
33+
"start_date": datetime.datetime(2018, 1, 1),
34+
}
35+
36+
# Define a DAG (directed acyclic graph) of tasks.
37+
# Any task you create within the context manager is automatically added to the
38+
# DAG object.
39+
with models.DAG(
40+
"composer_sample_simple_greeting",
41+
schedule_interval=datetime.timedelta(days=1),
42+
default_args=default_dag_args,
43+
) as dag:
44+
http_to_gcs_task = http_to_gcs.HttpToGCSOperator(
45+
task_id="http_to_gcs_task",
46+
endpoint="https://www2.census.gov/programs-surveys/popest/datasets/2020-2023/counties/asrh/cc-est2023-agesex-all.csv",
47+
method="GET",
48+
dest_gcs="gs://us-central1-bigframes-orche-b70f2a52-bucket/data/us-census/",
49+
)

0 commit comments

Comments
 (0)