diff --git a/dags.yaml b/dags.yaml index f5f7a200ff9..6754a4ebbe8 100644 --- a/dags.yaml +++ b/dags.yaml @@ -248,6 +248,26 @@ bqetl_messaging_system: tags: - impact/tier_3 +bqetl_messaging_system_hourly: + default_args: + depends_on_past: false + email: + - telemetry-alerts@mozilla.com + - phlee@mozilla.com + email_on_failure: true + email_on_retry: false + end_date: null + owner: phlee@mozilla.com + retries: 1 + retry_delay: 10m + start_date: '2025-12-15' + description: Hourly tables for onboarding reporting + repo: bigquery-etl + schedule_interval: hourly + catchup: true + tags: + - impact/tier_1 + bqetl_activity_stream: schedule_interval: 0 2 * * * description: | diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop/onboarding_hourly/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop/onboarding_hourly/metadata.yaml new file mode 100644 index 00000000000..6fdf53014f1 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop/onboarding_hourly/metadata.yaml @@ -0,0 +1,5 @@ +friendly_name: Onboarding Hourly +description: |- + Contains CFR specific data extracted from messaging_system_v1 at an hourly cadence +owners: +- phlee@mozilla.com diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop/onboarding_hourly/view.sql b/sql/moz-fx-data-shared-prod/firefox_desktop/onboarding_hourly/view.sql new file mode 100644 index 00000000000..404648ea675 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop/onboarding_hourly/view.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.firefox_desktop.onboarding_hourly` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.firefox_desktop_derived.onboarding_hourly_v2` diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/metadata.yaml new file mode 100644 index 00000000000..c15dff926d6 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/metadata.yaml @@ -0,0 +1,37 @@ +friendly_name: Onboarding Hourly +description: |- + Contains CFR specific data extracted from messaging_system_v1 at an hourly cadence +owners: +- phlee@mozilla.com +labels: + incremental: true + schedule: hourly + owner1: phlee + table_type: client_level + dag: bqetl_messaging_system_hourly +scheduling: + dag_name: bqetl_messaging_system_hourly + date_partition_parameter: null + # We reprocess the same day every hour up until 1:00 the following day, to give + # the live data time to come in + destination_table: >- + onboarding_hourly_v2${{ + (execution_date - macros.timedelta(hours=1)).strftime("%Y%m%d") + }} + parameters: + - >- + submission_date:DATE:{{ + (execution_date - macros.timedelta(hours=1)).strftime('%Y-%m-%d') + }} +bigquery: + time_partitioning: + type: day + field: submission_timestamp + require_partition_filter: true + expiration_days: 7.0 + range_partitioning: null + clustering: + fields: + - normalized_channel + - sample_id +require_column_descriptions: false diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/query.sql b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/query.sql new file mode 100644 index 00000000000..1bad4032ca4 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/query.sql @@ -0,0 +1,55 @@ +WITH messaging_system_live_deduped AS ( + SELECT + * + FROM + `moz-fx-data-shared-prod.firefox_desktop_live.messaging_system_v1` + WHERE + DATE(submission_timestamp) = @submission_date + AND metrics.string.messaging_system_ping_type IS NULL + QUALIFY + ROW_NUMBER() OVER ( + PARTITION BY + DATE(submission_timestamp), + document_id + ORDER BY + submission_timestamp + ) = 1 +) +SELECT + submission_timestamp, + additional_properties, + metrics.string.messaging_system_addon_version AS addon_version, + metrics.uuid.messaging_system_client_id AS client_id, + document_id, + metrics.string.messaging_system_event AS event, + metrics.text2.messaging_system_event_context AS event_context, + metrics.string.messaging_system_event_page AS event_page, + metrics.string.messaging_system_event_reason AS event_reason, + metrics.string.messaging_system_event_source AS event_source, + metrics.string.messaging_system_locale AS locale, + metrics.text2.messaging_system_message_id AS message_id, + metadata, + normalized_app_name, + normalized_channel, + normalized_country_code, + normalized_os, + normalized_os_version, + client_info.app_channel AS release_channel, + sample_id, + client_info.app_display_version AS version, + metrics.uuid.messaging_system_browser_session_id AS browser_session_id, + ping_info.experiments AS experiments, + STRUCT( + metrics.string.messaging_system_attribution_campaign AS campaign, + metrics.string.messaging_system_attribution_content AS content, + metrics.string.messaging_system_attribution_experiment AS experiment, + metrics.string.messaging_system_attribution_medium AS medium, + metrics.string.messaging_system_attribution_source AS source, + metrics.string.messaging_system_attribution_ua AS ua, + metrics.string.messaging_system_attribution_variation AS variation, + metrics.string.messaging_system_attribution_dltoken AS dltoken, + metrics.string.messaging_system_attribution_dlsource AS dlsource, + metrics.string.messaging_system_attribution_msstoresignedin AS msstoresignedin + ) AS attribution +FROM + messaging_system_live_deduped diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/schema.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/schema.yaml new file mode 100644 index 00000000000..97e01ffa644 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/onboarding_hourly_v2/schema.yaml @@ -0,0 +1,257 @@ +fields: +- description: Timestamp when the ping is received on the server side. + name: submission_timestamp + type: TIMESTAMP + mode: NULLABLE +- description: A JSON string containing any payload properties not present in the + schema + name: additional_properties + type: STRING + mode: NULLABLE +- description: Addon Version + name: addon_version + type: STRING + mode: NULLABLE +- description: A unique identifier (UUID) for the client. + name: client_id + type: STRING + mode: NULLABLE +- description: The document ID specified in the URI when the client sent this message. + name: document_id + type: STRING + mode: NULLABLE +- name: event + type: STRING + mode: NULLABLE +- description: A string that describes the context about this event + name: event_context + type: STRING + mode: NULLABLE +- description: The event_context's page. Almost always "about:welcome". + name: event_page + type: STRING + mode: NULLABLE +- description: The event_context's reason. Likely something like "welcome-window-closed" + or "app-shut-down". + name: event_reason + type: STRING + mode: NULLABLE +- description: The event_context's source. Likely something like "primary_button". + name: event_source + type: STRING + mode: NULLABLE +- name: locale + type: STRING + mode: NULLABLE + description: Set of language- and/or country-based preferences for a user interface. +- name: message_id + type: STRING + mode: NULLABLE + description: Message ID +- name: metadata + type: RECORD + mode: NULLABLE + fields: + - description: Results of a geographic lookup based on the client's IP address + name: geo + type: RECORD + mode: NULLABLE + fields: + - name: city + type: STRING + mode: NULLABLE + - description: An ISO 3166-1 alpha-2 country code + name: country + type: STRING + mode: NULLABLE + - description: The specific geo database version used for this lookup + name: db_version + type: STRING + mode: NULLABLE + - description: First major country subdivision, typically a state, province, or + county + name: subdivision1 + type: STRING + mode: NULLABLE + - description: Second major country subdivision; not applicable for most countries + name: subdivision2 + type: STRING + mode: NULLABLE + - name: header + type: RECORD + mode: NULLABLE + fields: + - description: Date HTTP header + mode: NULLABLE + name: date + type: STRING + - description: DNT (Do Not Track) HTTP header + mode: NULLABLE + name: dnt + type: STRING + - description: X-Debug-Id HTTP header + mode: NULLABLE + name: x_debug_id + type: STRING + - description: X-PingSender-Version HTTP header + mode: NULLABLE + name: x_pingsender_version + type: STRING + - description: X-Source-Tags HTTP header + mode: NULLABLE + name: x_source_tags + type: STRING + - description: X-Telemetry-Agent HTTP header + mode: NULLABLE + name: x_telemetry_agent + type: STRING + - description: X-Foxsec-IP-Reputation header + mode: NULLABLE + name: x_foxsec_ip_reputation + type: STRING + - description: X-LB-Tags HTTP header + mode: NULLABLE + name: x_lb_tags + type: STRING + - description: Results of ISP lookup based on the client's IP address + name: isp + type: RECORD + mode: NULLABLE + fields: + - description: The specific geo ISP database version used for this lookup + mode: NULLABLE + name: db_version + type: STRING + - description: The name of the ISP associated with the client's IP address + mode: NULLABLE + name: name + type: STRING + - description: The name of a specific business entity associated with the client's + IP address when available; otherwise the ISP name + mode: NULLABLE + name: organization + type: STRING + - description: Parsed components of the client's user agent string + name: user_agent + type: RECORD + mode: NULLABLE + fields: + - name: browser + type: STRING + mode: NULLABLE + - name: os + type: STRING + mode: NULLABLE + - name: version + type: STRING + mode: NULLABLE +- description: Set to "Other" if this message contained an unrecognized app name + mode: NULLABLE + name: normalized_app_name + type: STRING +- description: The normalized channel the application is being distributed on. + mode: NULLABLE + name: normalized_channel + type: STRING +- description: Code of the country in which the activity took place, as determined + by the IP geolocation. Unknown or NULL values are normally stored as '??'. + mode: NULLABLE + name: normalized_country_code + type: STRING +- description: The normalized name of the operating system running at the client. + mode: NULLABLE + name: normalized_os + type: STRING +- name: normalized_os_version + mode: NULLABLE + type: STRING +- name: release_channel + mode: NULLABLE + type: STRING +- name: sample_id + mode: NULLABLE + type: INTEGER + description: A number, 0-99, that samples by client_id and allows filtering data + for analysis. It is a pipeline-generated artifact that should match between pings. +- name: version + mode: NULLABLE + type: STRING + description: User visible version string (e.g. "1.0.3") for the browser. +- description: A mirror of the browser sessionId, as defined in + https://github.com/mozilla-services/mozilla-pipeline-schemas/blob/main/schemas/telemetry/main/main.4.schema.json + name: browser_session_id + type: STRING + mode: NULLABLE +- name: experiments + type: RECORD + mode: REPEATED + description: Experiment Information + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: RECORD + mode: NULLABLE + fields: + - name: branch + type: STRING + mode: NULLABLE + - name: extra + type: RECORD + mode: NULLABLE + fields: + - name: enrollment_id + type: STRING + mode: NULLABLE + - name: type + type: STRING + mode: NULLABLE +- name: attribution + type: RECORD + mode: NULLABLE + fields: + - description: Identifier of the particular campaign that led to the download of + the product. + name: campaign + type: STRING + mode: NULLABLE + - description: Identifier to indicate the particular link within a campaign. + name: content + type: STRING + mode: NULLABLE + - description: Funnel experiment parameters, see bug 1567339 + name: experiment + type: STRING + mode: NULLABLE + - description: Category of the source, such as 'organic' for a search engine. + name: medium + type: STRING + mode: NULLABLE + - description: Referring partner domain, when install happens via a known partner. + name: source + type: STRING + mode: NULLABLE + - description: Derived user agent, see bug 1595063 + name: ua + type: STRING + mode: NULLABLE + - description: Funnel experiment parameters, see bug 1567339 + name: variation + type: STRING + mode: NULLABLE + - description: Unique token created at Firefox download time, see bug 1757451 + name: dltoken + type: STRING + mode: NULLABLE + - description: Identifier that indicates where installations of Firefox originate, + see bug 1827238 + name: dlsource + type: STRING + mode: NULLABLE + - description: Either the string "true" or the string "false" to indicate whether + the attributed install came from the Microsoft store and, if so, whether the + user was signed in at the time. + name: msstoresignedin + type: STRING + mode: NULLABLE