Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions pypistats/templates/package.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ <h1>{{ package }}</h1>
<br>
Downloads last month:
{{ "{:,.0f}".format(recent['month']) }}
<br>
<input type="checkbox" id="smoothing" {{ 'checked' if metadata['use_smoothing'] else ''}} onclick="return setSmoothing();"> 7-day smoothing
</p>
<script>

Expand Down Expand Up @@ -101,5 +103,14 @@ <h1>{{ package }}</h1>
}
};
})();

function setSmoothing() {
if (document.getElementById('smoothing').checked) {
window.location='{{ url_for('general.package_page', package=package) }}?smooth=true';
} else {
window.location='{{ url_for('general.package_page', package=package) }}';
}
return false;
}
</script>
{% endblock %}
91 changes: 44 additions & 47 deletions pypistats/views/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def package_page(package):
recent[r.category] = r.downloads

# PyPI metadata
metadata = None
metadata = dict()
if package != "__all__":
try:
metadata = requests.get(f"https://pypi.python.org/pypi/{package}/json", timeout=5).json()
Expand Down Expand Up @@ -139,8 +139,13 @@ def package_page(package):
else:
metrics = ["downloads", "percentages"]

use_smoothing = metadata['use_smoothing'] = request.args.get('smooth', None) is not None
for metric in metrics:
model_data.append({"metric": metric, "name": model.__tablename__, "data": data_function[metric](records)})
model_data.append({
"metric": metric,
"name": model.__tablename__,
"data": data_function[metric](records, use_smoothing=use_smoothing),
})

# Build the plots
plots = []
Expand Down Expand Up @@ -191,7 +196,20 @@ def package_page(package):
return render_template("package.html", package=package, plots=plots, metadata=metadata, recent=recent, user=g.user)


def get_download_data(records):
def smooth_data(data, window=7):
# Ensure data is sorted by date
data["x"], data["y"] = zip(*[(x, y) for x, y in sorted(
zip(data["x"], data["y"]), key=lambda pair: pair[0])])
# Smooth data with a trailing window, so recent days are as accurate as possible
smoothed_data = deepcopy(data)
smoothed_data["y"] = list(smoothed_data["y"])
for i in range(window, len(data["y"])):
window_data = data["y"][max(0, i - window):i]
smoothed_data["y"][i] = sum(window_data) / len(window_data)
return smoothed_data


def get_download_data(records, use_smoothing=False):
"""Organize the data for the absolute plots."""
data = defaultdict(lambda: {"x": [], "y": []})

Expand Down Expand Up @@ -241,54 +259,33 @@ def get_download_data(records):
if category not in date_categories:
data[category]["x"].append(str(records[-1].date))
data[category]["y"].append(0)
return data


def get_proportion_data(records):
"""Organize the data for the fill plots."""
data = defaultdict(lambda: {"x": [], "y": [], "text": []})

date_categories = defaultdict(lambda: 0)
all_categories = []

prev_date = records[0].date

for record in records:
if record.category not in all_categories:
all_categories.append(record.category)

all_categories = sorted(all_categories)
for category in all_categories:
data[category] # set the dict value (keeps it ordered)

for record in records:
if record.date != prev_date:
if use_smoothing:
# Smooth data using a 7-day window
for category in all_categories:
data[category] = smooth_data(data[category])

total = sum(date_categories.values()) / 100
for category in all_categories:
data[category]["x"].append(str(prev_date))
value = date_categories[category] / total
data[category]["y"].append(value)
data[category]["text"].append("{0:.2f}%".format(value) + " = {:,}".format(date_categories[category]))
return data

date_categories = defaultdict(lambda: 0)
prev_date = record.date

# Track categories for this date
date_categories[record.category] = record.downloads
else:
# Fill in missing final date with zeros
total = sum(date_categories.values()) / 100
for category in all_categories:
if category not in date_categories:
data[category]["x"].append(str(records[-1].date))
data[category]["y"].append(0)
data[category]["text"].append("{0:.2f}%".format(0) + " = {:,}".format(0))
else:
data[category]["x"].append(str(records[-1].date))
value = date_categories[category] / total
data[category]["y"].append(value)
data[category]["text"].append("{0:.2f}%".format(value) + " = {:,}".format(date_categories[category]))
def get_proportion_data(records, use_smoothing=False):
"""Organize the data for the fill plots."""
# Get the absolute numbers as a starting point, to handle fills etc.
# Note that this means we smooth *then* calculate proportions, which
# is the correct order to avoid inflating random noise.
data = get_download_data(records, use_smoothing=use_smoothing)

# Calculate the per-day sum of all y-values to divide by.
all_ys = [category_values["y"] for category_values in data.values()]
totals = [sum(chunk) or 1 for chunk in zip(*all_ys)]

# and finally divide each category by totals and add detailed labels
for category_values in data.values():
ys = category_values["y"]
category_values["y"] = [y / t for y, t in zip(ys, totals)]
category_values["text"] = [
"{:.2f}% = {:,}".format(p, a) for p, a in zip(ys, category_values["y"])
]

return data

Expand Down