Skip to content

Commit 95121a0

Browse files
authored
Merge branch 'main' into use_coutners_glam
2 parents c067b8b + ec29797 commit 95121a0

File tree

4 files changed

+66
-31
lines changed

4 files changed

+66
-31
lines changed

bigquery_etl/glam/generate.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ def main():
278278
source_table=f"glam_etl.{args.prefix}__scalar_bucket_counts_v1",
279279
is_scalar=True,
280280
),
281+
channel=channel_prefixes[args.prefix],
281282
),
282283
table(
283284
"probe_counts_v1",
@@ -286,6 +287,7 @@ def main():
286287
source_table=f"glam_etl.{args.prefix}__histogram_bucket_counts_v1",
287288
is_scalar=False,
288289
),
290+
channel=channel_prefixes[args.prefix],
289291
),
290292
table(
291293
"scalar_percentiles_v1",
@@ -296,7 +298,11 @@ def main():
296298
table("histogram_percentiles_v1"),
297299
view("view_probe_counts_v1"),
298300
view("view_user_counts_v1", **models.user_counts()),
299-
view("view_sample_counts_v1", **models.sample_counts()),
301+
view(
302+
"view_sample_counts_v1",
303+
**models.sample_counts(),
304+
channel=channel_prefixes[args.prefix],
305+
),
300306
table("extract_user_counts_v1", **config[args.prefix]),
301307
table("extract_probe_counts_v1", **config[args.prefix]),
302308
]

bigquery_etl/glam/templates/probe_counts_v1.sql

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,17 @@ SELECT
4141
{% if is_scalar %}
4242
client_agg_type,
4343
agg_type,
44-
-- Logic to count clients based on sampled windows release data.
45-
-- If you're changing this, then you'll also need to change
46-
-- clients_daily_[scalar | histogram]_aggregates
47-
IF(os = 'Windows' AND channel = 'release',
48-
SUM(count) * 10,
49-
SUM(count)
50-
) AS total_users,
44+
{%if channel == "release" %}
45+
-- Logic to count clients based on sampled windows release data, which started in v119.
46+
-- If you're changing this, then you'll also need to change
47+
-- clients_daily_[scalar | histogram]_aggregates
48+
IF(os = 'Windows' AND app_version >= 119,
49+
SUM(count) * 10,
50+
SUM(count)
51+
) AS total_users,
52+
{% else %}
53+
SUM(count) AS total_users,
54+
{% endif %}
5155
mozfun.glam.histogram_fill_buckets_dirichlet(
5256
mozfun.map.sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
5357
CASE
@@ -64,13 +68,17 @@ SELECT
6468
{% else %}
6569
agg_type AS client_agg_type,
6670
'histogram' as agg_type,
67-
-- Logic to count clients based on sampled windows release data.
68-
-- If you're changing this, then you'll also need to change
69-
-- clients_daily_[scalar | histogram]_aggregates
70-
IF(os = 'Windows' AND channel = 'release',
71-
CAST(ROUND(SUM(record.value)) AS INT64) * 10,
72-
CAST(ROUND(SUM(record.value)) AS INT64)
73-
) AS total_users,
71+
{% if channel == "release" %}
72+
-- Logic to count clients based on sampled windows release data, which started in v119.
73+
-- If you're changing this, then you'll also need to change
74+
-- clients_daily_[scalar | histogram]_aggregates
75+
IF(os = 'Windows' AND app_version >= 119,
76+
CAST(ROUND(SUM(record.value)) AS INT64) * 10,
77+
CAST(ROUND(SUM(record.value)) AS INT64)
78+
) AS total_users,
79+
{% else %}
80+
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
81+
{% endif %}
7482
mozfun.glam.histogram_fill_buckets_dirichlet(
7583
mozfun.map.sum(ARRAY_AGG(record)),
7684
mozfun.glam.histogram_buckets_cast_string_array(

bigquery_etl/glam/templates/view_sample_counts_v1.sql

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ WITH histogram_data AS (
1414
app_version,
1515
app_build_id,
1616
channel,
17+
{% if channel == 'release' %}
18+
IF(os = 'Windows', 10, 1) AS sample_mult,
19+
{% endif %}
1720
h1.metric,
1821
h1.key,
1922
h1.agg_type,
@@ -29,14 +32,17 @@ scalars_histogram_data AS (
2932
app_version,
3033
app_build_id,
3134
channel,
35+
{% if channel == 'release' %}
36+
IF(os = 'Windows', 10, 1) AS sample_mult,
37+
{% endif %}
3238
s1.metric,
3339
s1.key,
3440
agg_type,
3541
s1.value
3642
FROM
3743
`{{ project }}.{{ dataset }}.{{ prefix }}__clients_scalar_aggregates_v1`, UNNEST(scalar_aggregates) s1
3844

39-
UNION ALL
45+
UNION ALL
4046

4147
SELECT
4248
client_id,
@@ -45,6 +51,9 @@ scalars_histogram_data AS (
4551
app_version,
4652
app_build_id,
4753
channel,
54+
{% if channel == 'release' %}
55+
sample_mult,
56+
{% endif %}
4857
metric,
4958
v1.key,
5059
agg_type,
@@ -64,16 +73,20 @@ scalars_histogram_data AS (
6473
}}
6574
SELECT
6675
{{ attributes }},
67-
metric,
76+
metric,
6877
'' AS key,
6978
agg_type,
70-
SUM(value) as total_sample
79+
{% if channel == 'release' %}
80+
SUM(value) * MAX(sample_mult) as total_sample
81+
{% else %}
82+
SUM(value) as total_sample
83+
{% endif %}
7184
FROM
7285
all_combos
7386
WHERE agg_type = 'summed_histogram'
7487
GROUP BY
75-
{{ attributes }},
76-
metric,
88+
{{ attributes }},
89+
metric,
7790
key,
7891
agg_type
7992

@@ -84,7 +97,11 @@ SELECT
8497
metric,
8598
key,
8699
agg_type,
87-
SUM(value) as total_sample
100+
{% if channel == 'release' %}
101+
SUM(value) * MAX(sample_mult) as total_sample
102+
{% else %}
103+
SUM(value) as total_sample
104+
{% endif %}
88105
FROM
89106
all_combos
90107
WHERE agg_type <> 'summed_histogram'

sql/moz-fx-data-shared-prod/telemetry_derived/glam_sample_counts_v1/query.sql

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ WITH histogram_data AS (
88
process,
99
key,
1010
h1.agg_type,
11-
h1.aggregates
11+
h1.aggregates,
12+
IF(os = 'Windows'
13+
AND channel = 'release', 10, 1) AS sample_mult
1214
FROM
1315
clients_histogram_aggregates_v2,
1416
UNNEST(histogram_aggregates) h1
@@ -21,7 +23,9 @@ scalars_data AS (
2123
app_version,
2224
app_build_id,
2325
channel,
24-
scalar_aggregates
26+
scalar_aggregates,
27+
IF(os = 'Windows'
28+
AND channel = 'release', 10, 1) AS sample_mult
2529
FROM
2630
clients_scalar_aggregates_v1
2731
WHERE
@@ -36,7 +40,7 @@ SELECT
3640
process,
3741
histogram_data.key,
3842
agg_type,
39-
SUM(v1.value) AS total_sample
43+
SUM(v1.value) * MAX(sample_mult) AS total_sample
4044
FROM
4145
histogram_data,
4246
UNNEST(aggregates) v1
@@ -59,7 +63,7 @@ SELECT
5963
process,
6064
histogram_data.key,
6165
agg_type,
62-
SUM(v1.value) AS total_sample
66+
SUM(v1.value) * MAX(sample_mult) AS total_sample
6367
FROM
6468
histogram_data,
6569
UNNEST(aggregates) v1
@@ -81,7 +85,7 @@ SELECT
8185
process,
8286
histogram_data.key,
8387
agg_type,
84-
SUM(v1.value) AS total_sample
88+
SUM(v1.value) * MAX(sample_mult) AS total_sample
8589
FROM
8690
histogram_data,
8791
UNNEST(aggregates) v1
@@ -103,7 +107,7 @@ SELECT
103107
process,
104108
histogram_data.key,
105109
agg_type,
106-
SUM(v1.value) AS total_sample
110+
SUM(v1.value) * MAX(sample_mult) AS total_sample
107111
FROM
108112
histogram_data,
109113
UNNEST(aggregates) v1
@@ -126,7 +130,7 @@ SELECT
126130
agg_type,
127131
CASE
128132
WHEN agg_type IN ('count', 'true', 'false')
129-
THEN SUM(value)
133+
THEN SUM(value) * MAX(sample_mult)
130134
ELSE NULL
131135
END AS total_sample
132136
FROM
@@ -153,7 +157,7 @@ SELECT
153157
agg_type,
154158
CASE
155159
WHEN agg_type IN ('count', 'true', 'false')
156-
THEN SUM(value)
160+
THEN SUM(value) * MAX(sample_mult)
157161
ELSE NULL
158162
END AS total_sample
159163
FROM
@@ -179,7 +183,7 @@ SELECT
179183
agg_type,
180184
CASE
181185
WHEN agg_type IN ('count', 'true', 'false')
182-
THEN SUM(value)
186+
THEN SUM(value) * MAX(sample_mult)
183187
ELSE NULL
184188
END AS total_sample
185189
FROM
@@ -205,7 +209,7 @@ SELECT
205209
agg_type,
206210
CASE
207211
WHEN agg_type IN ('count', 'true', 'false')
208-
THEN SUM(value)
212+
THEN SUM(value) * MAX(sample_mult)
209213
ELSE NULL
210214
END AS total_sample
211215
FROM

0 commit comments

Comments
 (0)