Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 6 additions & 17 deletions scripts/2-process/gcs_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"""
# Standard library
import argparse
import csv
import os
import sys
import textwrap
Expand Down Expand Up @@ -62,16 +61,6 @@ def parse_arguments():
return args


def data_to_csv(args, data, file_path):
if not args.enable_save:
return
os.makedirs(PATHS["data_phase"], exist_ok=True)
# emulate csv.unix_dialect
data.to_csv(
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
)


def process_product_totals(args, count_data):
"""
Processing count data: totals by product
Expand Down Expand Up @@ -111,7 +100,7 @@ def process_product_totals(args, count_data):
data.items(), columns=["CC legal tool product", "Count"]
)
file_path = shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv")
data_to_csv(args, data, file_path)
shared.data_to_csv(args, data, file_path, PATHS)


def process_latest_prior_retired_totals(args, count_data):
Expand Down Expand Up @@ -192,7 +181,7 @@ def process_latest_prior_retired_totals(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], f"gcs_status_{key}_totals.csv"
)
data_to_csv(args, dataframe, file_path)
shared.data_to_csv(args, dataframe, file_path)


def process_totals_by_free_cultural(args, count_data):
Expand Down Expand Up @@ -225,7 +214,7 @@ def process_totals_by_free_cultural(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"
)
data_to_csv(args, data, file_path)
shared.data_to_csv(args, data, file_path)


def process_totals_by_restrictions(args, count_data):
Expand Down Expand Up @@ -259,7 +248,7 @@ def process_totals_by_restrictions(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], "gcs_totals_by_restrictions.csv"
)
data_to_csv(args, data, file_path)
shared.data_to_csv(args, data, file_path)


def process_totals_by_language(args, data):
Expand All @@ -280,7 +269,7 @@ def process_totals_by_language(args, data):
file_path = shared.path_join(
PATHS["data_phase"], "gcs_totals_by_language.csv"
)
data_to_csv(args, data, file_path)
shared.data_to_csv(args, data, file_path)


def process_totals_by_country(args, data):
Expand All @@ -301,7 +290,7 @@ def process_totals_by_country(args, data):
file_path = shared.path_join(
PATHS["data_phase"], "gcs_totals_by_country.csv"
)
data_to_csv(args, data, file_path)
shared.data_to_csv(args, data, file_path)


def main():
Expand Down
15 changes: 2 additions & 13 deletions scripts/2-process/github_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""
# Standard library
import argparse
import csv
import os
import sys
import traceback
Expand Down Expand Up @@ -66,16 +65,6 @@ def check_for_data_file(file_path):
)


def data_to_csv(args, data, file_path):
if not args.enable_save:
return
os.makedirs(PATHS["data_phase"], exist_ok=True)
# emulate csv.unix_dialect
data.to_csv(
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
)


def process_totals_by_license(args, count_data):
"""
Processing count data: totals by License
Expand All @@ -99,7 +88,7 @@ def process_totals_by_license(args, count_data):
PATHS["data_phase"], "github_totals_by_license.csv"
)
check_for_data_file(file_path)
data_to_csv(args, data, file_path)
shared.data_to_csv(args, data, file_path, PATHS)


def process_totals_by_restriction(args, count_data):
Expand Down Expand Up @@ -134,7 +123,7 @@ def process_totals_by_restriction(args, count_data):
PATHS["data_phase"], "github_totals_by_restriction.csv"
)
check_for_data_file(file_path)
data_to_csv(args, data, file_path)
shared.data_to_csv(args, data, file_path, PATHS)


def main():
Expand Down
17 changes: 3 additions & 14 deletions scripts/2-process/wikipedia_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""
# Standard library
import argparse
import csv
import os
import sys
import textwrap
Expand Down Expand Up @@ -70,16 +69,6 @@ def check_for_data_file(file_path):
)


def data_to_csv(args, data, file_path):
if not args.enable_save:
return
os.makedirs(PATHS["data_phase"], exist_ok=True)
# emulate csv.unix_dialect
data.to_csv(
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
)


def process_highest_language_usage(args, count_data):
"""
Processing count data: Most represented languages
Expand All @@ -99,7 +88,7 @@ def process_highest_language_usage(args, count_data):
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
)
check_for_data_file(file_path)
data_to_csv(args, top_10, file_path)
shared.data_to_csv(args, top_10, file_path, PATHS)


def process_least_language_usage(args, count_data):
Expand All @@ -123,7 +112,7 @@ def process_least_language_usage(args, count_data):
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
)
check_for_data_file(file_path)
data_to_csv(args, bottom_10, file_path)
shared.data_to_csv(args, bottom_10, file_path, PATHS)


def process_language_representation(args, count_data):
Expand All @@ -150,7 +139,7 @@ def process_language_representation(args, count_data):
PATHS["data_phase"], "wikipedia_language_representation.csv"
)
check_for_data_file(file_path)
data_to_csv(args, language_counts, file_path)
shared.data_to_csv(args, language_counts, file_path, PATHS)


def main():
Expand Down
11 changes: 11 additions & 0 deletions scripts/shared.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Standard library
import csv
import logging
import os
import sys
Expand Down Expand Up @@ -36,6 +37,16 @@ def __init__(self, message, exit_code=None):
super().__init__(self.message)


def data_to_csv(args, data, file_path, PATHS):
if not args.enable_save:
return
os.makedirs(PATHS["data_phase"], exist_ok=True)
# emulate csv.unix_dialect
data.to_csv(
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
)


def get_session(accept_header=None, session=None):
"""
Create or configure a reusable HTTPS session with retry logic and
Expand Down