From 0f2eb85aef5c6e4970fdd145700847fe4d2a028c Mon Sep 17 00:00:00 2001 From: Ayoub Ziate Date: Mon, 19 Jun 2023 09:11:11 +0000 Subject: [PATCH 1/2] change source dataset config --- models/staging/google_analytics/src_google_analytics.yml | 6 +++--- .../google_analytics/stg_google_analytics__events.sql | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/models/staging/google_analytics/src_google_analytics.yml b/models/staging/google_analytics/src_google_analytics.yml index 4013fba..71b152f 100644 --- a/models/staging/google_analytics/src_google_analytics.yml +++ b/models/staging/google_analytics/src_google_analytics.yml @@ -2,8 +2,8 @@ version: 2 sources: - name: google_analytics - database: stacktonic-cloud # gcp-project - dataset: analytics_263482362 # gcp-dataset + database: bigquery-public-data # gcp-project + dataset: ga4_obfuscated_sample_ecommerce # gcp-dataset freshness: warn_after: { count: 24, period: hour } @@ -12,6 +12,6 @@ sources: loaded_at_field: "parse_timestamp(_table_suffix, '%Y%m%d')" tables: - - name: events + - name: events_20210131 identifier: events_* description: "Default Google Analytics 4 (raw) data dumps. Tables are date sharded (events_YYYYMMDD)." \ No newline at end of file diff --git a/models/staging/google_analytics/stg_google_analytics__events.sql b/models/staging/google_analytics/stg_google_analytics__events.sql index f108d29..eda8f22 100644 --- a/models/staging/google_analytics/stg_google_analytics__events.sql +++ b/models/staging/google_analytics/stg_google_analytics__events.sql @@ -9,7 +9,7 @@ with source as ( select *, _table_suffix as table_suffix - from {{ source('google_analytics', 'events') }} + from {{ source('google_analytics', 'events_20210131') }} where -- start date (using the _table_suffix_ pseudo column for performance) (_table_suffix between format_date('%Y%m%d', date('{{ var("start_date") }}')) From f24016524eec91a4286a6dae3e40f47e28ae7eb4 Mon Sep 17 00:00:00 2001 From: Ayoub Ziate Date: Wed, 21 Jun 2023 09:32:36 +0000 Subject: [PATCH 2/2] update tables config --- .../google_analytics/src_google_analytics.yml | 2 +- .../stg_google_analytics__events.sql | 88 +++++++++++-------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/models/staging/google_analytics/src_google_analytics.yml b/models/staging/google_analytics/src_google_analytics.yml index 71b152f..5d20c9d 100644 --- a/models/staging/google_analytics/src_google_analytics.yml +++ b/models/staging/google_analytics/src_google_analytics.yml @@ -12,6 +12,6 @@ sources: loaded_at_field: "parse_timestamp(_table_suffix, '%Y%m%d')" tables: - - name: events_20210131 + - name: events identifier: events_* description: "Default Google Analytics 4 (raw) data dumps. Tables are date sharded (events_YYYYMMDD)." \ No newline at end of file diff --git a/models/staging/google_analytics/stg_google_analytics__events.sql b/models/staging/google_analytics/stg_google_analytics__events.sql index eda8f22..9dfd7ad 100644 --- a/models/staging/google_analytics/stg_google_analytics__events.sql +++ b/models/staging/google_analytics/stg_google_analytics__events.sql @@ -1,39 +1,49 @@ -{{ - config( - materialized="view" - ) -}} - -with source as ( - - select - *, - _table_suffix as table_suffix - from {{ source('google_analytics', 'events_20210131') }} - where - -- start date (using the _table_suffix_ pseudo column for performance) - (_table_suffix between format_date('%Y%m%d', date('{{ var("start_date") }}')) - and format_date('%Y%m%d', current_date())) - -), - -renamed as ( - - select - user_pseudo_id as fpc_id, -- first-party cookie-id - concat(user_pseudo_id, '.', (select cast(value.int_value as string) from unnest(event_params) where key = 'ga_session_id')) as session_id, - ifnull((select value.string_value from unnest(event_params) where key = 'traffic_type'), 'production') as traffic_type, - * - from source - -), - -filtered as ( - select - * - from renamed - where - traffic_type not in ('development', 'internal') -) - -select * from filtered \ No newline at end of file +{{ config(materialized="view") }} + +with + source as ( + + select *, _table_suffix as table_suffix + from {{ source("google_analytics", "events") }} + where + -- start date (using the _table_suffix_ pseudo column for performance) + ( + _table_suffix between format_date( + '%Y%m%d', date('{{ var("start_date") }}') + ) and format_date('%Y%m%d', current_date()) + ) + + ), + + renamed as ( + + select + user_pseudo_id as fpc_id, -- first-party cookie-id + concat( + user_pseudo_id, + '.', + ( + select cast(value.int_value as string) + from unnest(event_params) + where key = 'ga_session_id' + ) + ) as session_id, + ifnull( + ( + select value.string_value + from unnest(event_params) + where key = 'traffic_type' + ), + 'production' + ) as traffic_type, + * + from source + + ), + + filtered as ( + select * from renamed where traffic_type not in ('development', 'internal') + ) + +select * +from filtered