Skip to content

Commit 559d647

Browse files
authored
Add tile size to the propensity calculation (#8478)
* Add tile size * lint
1 parent 1f8696c commit 559d647

File tree

1 file changed

+79
-42
lines changed
  • sql/moz-fx-data-shared-prod/telemetry_derived/newtab_merino_propensity_v1

1 file changed

+79
-42
lines changed

sql/moz-fx-data-shared-prod/telemetry_derived/newtab_merino_propensity_v1/query.sql

Lines changed: 79 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ events AS (
6565
)
6666
AND metrics.string.newtab_content_country = params.country
6767
),
68-
base_events AS (
68+
base_events_fresh_items AS (
6969
SELECT
7070
submission_date,
7171
branch,
@@ -100,86 +100,123 @@ aggregates AS (
100100
COUNTIF(event_name = 'impression') AS impressions,
101101
COUNTIF(event_name = 'click') AS clicks
102102
FROM
103-
base_events
103+
base_events_fresh_items
104104
GROUP BY
105105
tile_format,
106106
section_position,
107107
position
108108
),
109-
top_stories_aggregates AS (
109+
stories_aggregates AS (
110110
SELECT
111111
position,
112+
tile_format,
112113
SUM(impressions) AS impressions,
113114
SUM(clicks) AS clicks,
114115
SUM(clicks) / SUM(impressions) AS ctr
115116
FROM
116117
aggregates
117118
WHERE
118-
section_position = 0
119-
AND position >= 0
120-
AND position <= 7
119+
position >= 0
120+
AND position <= 50
121121
GROUP BY
122-
position
122+
position,
123+
tile_format
123124
),
124-
top_stories_totals AS (
125+
stories_totals AS (
125126
SELECT
126127
SUM(impressions) AS impressions,
127128
SUM(clicks) AS clicks,
128129
SUM(clicks) / SUM(impressions) AS ctr
129130
FROM
130-
top_stories_aggregates
131+
stories_aggregates
131132
),
132-
per_section_aggregates AS (
133+
stories_weights AS (
133134
SELECT
134-
section_position,
135-
SUM(impressions) AS impressions,
136-
SUM(clicks) AS clicks,
137-
SAFE_DIVIDE(SUM(clicks), SUM(impressions)) AS ctr
135+
SAFE_DIVIDE(stories_totals.ctr, ag.ctr) AS unormalized_weight,
136+
ag.impressions,
137+
position,
138+
tile_format
138139
FROM
139-
aggregates
140+
stories_totals,
141+
stories_aggregates AS ag
142+
),
143+
base_events_all_items AS (
144+
SELECT
145+
tile_format,
146+
position,
147+
event_name
148+
FROM
149+
events ev,
150+
params
140151
WHERE
141-
section_position <= 10
152+
ev.section_position IS NOT NULL
153+
),
154+
aggregates_all_items AS (
155+
SELECT
156+
tile_format,
157+
position,
158+
COUNTIF(event_name = 'impression') AS impressions,
159+
COUNTIF(event_name = 'click') AS clicks
160+
FROM
161+
base_events_all_items
142162
GROUP BY
143-
section_position
163+
tile_format,
164+
position
165+
),
166+
adjusted_all_data_clicks AS (
167+
SELECT
168+
aggregates_all_items.clicks / stories_weights.unormalized_weight AS clicks_adjusted,
169+
aggregates_all_items.position,
170+
aggregates_all_items.tile_format
171+
FROM
172+
aggregates_all_items
173+
JOIN
174+
stories_weights
175+
ON (
176+
stories_weights.position = aggregates_all_items.position
177+
AND stories_weights.tile_format = aggregates_all_items.tile_format
178+
)
144179
),
145-
per_section_totals AS (
180+
all_items_stats AS (
146181
SELECT
147182
SUM(impressions) AS impressions,
148183
SUM(clicks) AS clicks,
149-
SAFE_DIVIDE(SUM(clicks), SUM(impressions)) AS ctr
184+
SAFE_DIVIDE(SUM(clicks), SUM(impressions)) AS target_ctr
150185
FROM
151-
per_section_aggregates
186+
aggregates_all_items
152187
),
153-
top_stories_weights AS (
188+
totals_all_items AS (
154189
SELECT
155-
position,
156-
SAFE_DIVIDE(top_stories_totals.ctr, ag.ctr) AS weight,
157-
ag.impressions
190+
SUM(impressions) AS impressions_all
158191
FROM
159-
top_stories_totals,
160-
top_stories_aggregates AS ag
192+
aggregates_all_items
161193
),
162-
section_weights AS (
194+
adjusted_clicks_total AS (
163195
SELECT
164-
section_position,
165-
SAFE_DIVIDE(per_section_totals.ctr, ag.ctr) AS weight,
166-
ag.impressions
196+
SUM(clicks_adjusted) AS clicks_adj_total
167197
FROM
168-
per_section_aggregates AS ag,
169-
per_section_totals
198+
adjusted_all_data_clicks
199+
),
200+
normalization_factor AS (
201+
SELECT
202+
SAFE_DIVIDE(
203+
all_items_stats.target_ctr * totals_all_items.impressions_all,
204+
adjusted_clicks_total.clicks_adj_total
205+
) AS factor
206+
FROM
207+
all_items_stats,
208+
totals_all_items,
209+
adjusted_clicks_total
170210
)
171211
SELECT
172-
weight,
212+
unormalized_weight * normalization_factor.factor AS weight,
173213
position,
214+
tile_format,
174215
NULL AS section_position,
175216
impressions
176217
FROM
177-
top_stories_weights
178-
UNION ALL
179-
SELECT
180-
weight,
181-
NULL AS position,
182-
section_position,
183-
impressions
184-
FROM
185-
section_weights
218+
stories_weights
219+
CROSS JOIN
220+
normalization_factor
221+
WHERE
222+
impressions > 2000

0 commit comments

Comments
 (0)