diff --git a/scripts/2-process/gcs_process.py b/scripts/2-process/gcs_process.py index fefbba0f..a79b7d02 100755 --- a/scripts/2-process/gcs_process.py +++ b/scripts/2-process/gcs_process.py @@ -4,7 +4,6 @@ """ # Standard library import argparse -import csv import os import sys import textwrap @@ -62,16 +61,6 @@ def parse_arguments(): return args -def data_to_csv(args, data, file_path): - if not args.enable_save: - return - os.makedirs(PATHS["data_phase"], exist_ok=True) - # emulate csv.unix_dialect - data.to_csv( - file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n" - ) - - def process_product_totals(args, count_data): """ Processing count data: totals by product @@ -111,7 +100,7 @@ def process_product_totals(args, count_data): data.items(), columns=["CC legal tool product", "Count"] ) file_path = shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv") - data_to_csv(args, data, file_path) + shared.data_to_csv(args, data, file_path, PATHS) def process_latest_prior_retired_totals(args, count_data): @@ -192,7 +181,7 @@ def process_latest_prior_retired_totals(args, count_data): file_path = shared.path_join( PATHS["data_phase"], f"gcs_status_{key}_totals.csv" ) - data_to_csv(args, dataframe, file_path) + shared.data_to_csv(args, dataframe, file_path) def process_totals_by_free_cultural(args, count_data): @@ -225,7 +214,7 @@ def process_totals_by_free_cultural(args, count_data): file_path = shared.path_join( PATHS["data_phase"], "gcs_totals_by_free_cultural.csv" ) - data_to_csv(args, data, file_path) + shared.data_to_csv(args, data, file_path) def process_totals_by_restrictions(args, count_data): @@ -259,7 +248,7 @@ def process_totals_by_restrictions(args, count_data): file_path = shared.path_join( PATHS["data_phase"], "gcs_totals_by_restrictions.csv" ) - data_to_csv(args, data, file_path) + shared.data_to_csv(args, data, file_path) def process_totals_by_language(args, data): @@ -280,7 +269,7 @@ def process_totals_by_language(args, data): file_path = shared.path_join( PATHS["data_phase"], "gcs_totals_by_language.csv" ) - data_to_csv(args, data, file_path) + shared.data_to_csv(args, data, file_path) def process_totals_by_country(args, data): @@ -301,7 +290,7 @@ def process_totals_by_country(args, data): file_path = shared.path_join( PATHS["data_phase"], "gcs_totals_by_country.csv" ) - data_to_csv(args, data, file_path) + shared.data_to_csv(args, data, file_path) def main(): diff --git a/scripts/2-process/github_process.py b/scripts/2-process/github_process.py index 27945613..ec4441cc 100755 --- a/scripts/2-process/github_process.py +++ b/scripts/2-process/github_process.py @@ -5,7 +5,6 @@ """ # Standard library import argparse -import csv import os import sys import traceback @@ -66,16 +65,6 @@ def check_for_data_file(file_path): ) -def data_to_csv(args, data, file_path): - if not args.enable_save: - return - os.makedirs(PATHS["data_phase"], exist_ok=True) - # emulate csv.unix_dialect - data.to_csv( - file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n" - ) - - def process_totals_by_license(args, count_data): """ Processing count data: totals by License @@ -99,7 +88,7 @@ def process_totals_by_license(args, count_data): PATHS["data_phase"], "github_totals_by_license.csv" ) check_for_data_file(file_path) - data_to_csv(args, data, file_path) + shared.data_to_csv(args, data, file_path, PATHS) def process_totals_by_restriction(args, count_data): @@ -134,7 +123,7 @@ def process_totals_by_restriction(args, count_data): PATHS["data_phase"], "github_totals_by_restriction.csv" ) check_for_data_file(file_path) - data_to_csv(args, data, file_path) + shared.data_to_csv(args, data, file_path, PATHS) def main(): diff --git a/scripts/2-process/wikipedia_process.py b/scripts/2-process/wikipedia_process.py index 7712b26a..41435ec0 100755 --- a/scripts/2-process/wikipedia_process.py +++ b/scripts/2-process/wikipedia_process.py @@ -5,7 +5,6 @@ """ # Standard library import argparse -import csv import os import sys import textwrap @@ -70,16 +69,6 @@ def check_for_data_file(file_path): ) -def data_to_csv(args, data, file_path): - if not args.enable_save: - return - os.makedirs(PATHS["data_phase"], exist_ok=True) - # emulate csv.unix_dialect - data.to_csv( - file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n" - ) - - def process_highest_language_usage(args, count_data): """ Processing count data: Most represented languages @@ -99,7 +88,7 @@ def process_highest_language_usage(args, count_data): PATHS["data_phase"], "wikipedia_highest_language_usage.csv" ) check_for_data_file(file_path) - data_to_csv(args, top_10, file_path) + shared.data_to_csv(args, top_10, file_path, PATHS) def process_least_language_usage(args, count_data): @@ -123,7 +112,7 @@ def process_least_language_usage(args, count_data): PATHS["data_phase"], "wikipedia_least_language_usage.csv" ) check_for_data_file(file_path) - data_to_csv(args, bottom_10, file_path) + shared.data_to_csv(args, bottom_10, file_path, PATHS) def process_language_representation(args, count_data): @@ -150,7 +139,7 @@ def process_language_representation(args, count_data): PATHS["data_phase"], "wikipedia_language_representation.csv" ) check_for_data_file(file_path) - data_to_csv(args, language_counts, file_path) + shared.data_to_csv(args, language_counts, file_path, PATHS) def main(): diff --git a/scripts/shared.py b/scripts/shared.py index 51cfd8d2..66650c43 100644 --- a/scripts/shared.py +++ b/scripts/shared.py @@ -1,4 +1,5 @@ # Standard library +import csv import logging import os import sys @@ -36,6 +37,16 @@ def __init__(self, message, exit_code=None): super().__init__(self.message) +def data_to_csv(args, data, file_path, PATHS): + if not args.enable_save: + return + os.makedirs(PATHS["data_phase"], exist_ok=True) + # emulate csv.unix_dialect + data.to_csv( + file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n" + ) + + def get_session(accept_header=None, session=None): """ Create or configure a reusable HTTPS session with retry logic and