From 7139f3624144887229205cd69f85b2e79657fdd3 Mon Sep 17 00:00:00 2001 From: Zach H Date: Sun, 27 Oct 2019 19:06:09 -0400 Subject: [PATCH] Add CSV support (#33) * Add CSV support Signed-off-by: ZeldaZach * foreignData => foreign_data. `--all` will now build all components Signed-off-by: ZeldaZach --- .gitignore | 6 +++-- mtgsqlive/__main__.py | 59 +++++++++++++++++++++++++++++++++++++++++-- mtgsqlive/json2sql.py | 45 ++++++++++----------------------- mtgsqlive/sql2csv.py | 36 ++++++++++++++++++++++++++ requirements.txt | 1 + 5 files changed, 111 insertions(+), 36 deletions(-) create mode 100644 mtgsqlive/sql2csv.py diff --git a/.gitignore b/.gitignore index 4da0df4..6bbc3d9 100644 --- a/.gitignore +++ b/.gitignore @@ -100,5 +100,7 @@ ENV/ *.json # data base -*.db -*.sqlite* \ No newline at end of file +*.db* +*.sqlite* +*.csv* +*.sql* \ No newline at end of file diff --git a/mtgsqlive/__main__.py b/mtgsqlive/__main__.py index 276c4a8..45c0076 100644 --- a/mtgsqlive/__main__.py +++ b/mtgsqlive/__main__.py @@ -1,9 +1,64 @@ """ Main Executor """ +import argparse +import logging +import pathlib + import mtgsqlive -from mtgsqlive.json2sql import main +from mtgsqlive import sql2csv, json2sql if __name__ == "__main__": mtgsqlive.init_logger() - main() + + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + help="input source (AllPrintings.json, AllSetFiles.zip)", + required=True, + metavar="fileIn", + ) + parser.add_argument( + "-o", + help="output folder (outputs/)", + default="outputs", + required=True, + metavar="fileOut", + ) + parser.add_argument( + "--all", + help="Build all types (SQLite, SQL, CSV)", + action="store_true", + required=False, + ) + args = parser.parse_args() + + # Define our I/O paths + input_file = pathlib.Path(args.i).expanduser() + output_file = {"path": pathlib.Path(args.o).expanduser().absolute(), "handle": None} + + if args.all: + logging.info("> Creating AllPrintings.sqlite") + json2sql.execute( + input_file, + { + "path": output_file["path"].joinpath("AllPrintings.sqlite"), + "handle": None, + }, + ) + + logging.info("> Creating AllPrintings.sql") + json2sql.execute( + input_file, + {"path": output_file["path"].joinpath("AllPrintings.sql"), "handle": None}, + ) + + logging.info("> Creating AllPrintings CSV components") + sql2csv.execute( + output_file["path"].joinpath("AllPrintings.sqlite"), + {"path": output_file["path"].joinpath("csv"), "handle": None}, + ) + elif str(input_file).endswith(".sqlite"): + sql2csv.execute(input_file, output_file) + else: + json2sql.execute(input_file, output_file) diff --git a/mtgsqlive/json2sql.py b/mtgsqlive/json2sql.py index 92063d9..a40f17c 100644 --- a/mtgsqlive/json2sql.py +++ b/mtgsqlive/json2sql.py @@ -13,29 +13,10 @@ LOGGER = logging.getLogger(__name__) version = "v4.5.x" # need to automate this -def main() -> None: +def execute(input_file, output_file) -> None: """ Main function """ - parser = argparse.ArgumentParser() - parser.add_argument( - "-i", - help='input source ("AllSets.json" file or "AllSetFiles" directory)', - required=True, - metavar="fileIn", - ) - parser.add_argument( - "-o", - help="output file (*.sqlite, *.db, *.sqlite3, *.db3, *.sql)", - required=True, - metavar="fileOut", - ) - args = parser.parse_args() - - # Define our I/O paths - input_file = pathlib.Path(args.i).expanduser() - output_file = {"path": pathlib.Path(args.o).expanduser(), "handle": None} - if not validate_io_streams(input_file, output_file): exit(1) @@ -64,11 +45,11 @@ def main() -> None: output_file["handle"].close() -def validate_io_streams(input_file: pathlib.Path, output_file: Dict) -> bool: +def validate_io_streams(input_file: pathlib.Path, output_dir: Dict) -> bool: """ Ensure I/O paths are valid and clean for program :param input_file: Input file (JSON) - :param output_file: Output file (SQLite) + :param output_dir: Output dir :return: Good to continue status """ if input_file.is_file(): @@ -80,11 +61,11 @@ def validate_io_streams(input_file: pathlib.Path, output_file: Dict) -> bool: LOGGER.fatal(f"Invalid input file/directory. ({input_file})") return False - output_file["path"].parent.mkdir(exist_ok=True) - if output_file["path"].is_file(): - LOGGER.warning(f"Output file {output_file['path']} exists already, moving it.") - output_file["path"].replace( - output_file["path"].parent.joinpath(output_file["path"].name + ".old") + output_dir["path"].parent.mkdir(exist_ok=True) + if output_dir["path"].is_file(): + LOGGER.warning(f"Output path {output_dir['path']} exists already, moving it.") + output_dir["path"].replace( + output_dir["path"].parent.joinpath(output_dir["path"].name + ".old") ) return True @@ -246,8 +227,8 @@ def build_sql_schema(output_file: Dict) -> None: "", "", ], - "foreignData": [ - "CREATE TABLE `foreignData` (", + "foreign_data": [ + "CREATE TABLE `foreign_data` (", "id INTEGER PRIMARY KEY AUTOINCREMENT,", "flavorText TEXT,", "language TEXT,", @@ -381,8 +362,8 @@ def sql_insert_all_card_fields( """ sql_dict_insert(card_attributes["cards"], "cards", output_file) - for foreign_val in card_attributes["foreignData"]: - sql_dict_insert(foreign_val, "foreignData", output_file) + for foreign_val in card_attributes["foreign_data"]: + sql_dict_insert(foreign_val, "foreign_data", output_file) for legal_val in card_attributes["legalities"]: sql_dict_insert(legal_val, "legalities", output_file) @@ -586,7 +567,7 @@ def handle_card_row_insertion( return { "cards": card_insert_values, - "foreignData": foreign_insert_values, + "foreign_data": foreign_insert_values, "legalities": legal_insert_values, "rulings": ruling_insert_values, "prices": price_insert_values, diff --git a/mtgsqlive/sql2csv.py b/mtgsqlive/sql2csv.py new file mode 100644 index 0000000..14cdce8 --- /dev/null +++ b/mtgsqlive/sql2csv.py @@ -0,0 +1,36 @@ +import logging +import pathlib +import sqlite3 +from typing import Any, Dict + +import pandas + + +def execute(input_file: pathlib.Path, output_dir: Dict[str, Any]) -> None: + """ + Create the CSV dumps from SQLite + :param input_file: SQLite file + :param output_dir: Output directory + """ + # Make output path + output_dir["path"].mkdir(exist_ok=True) + + db = sqlite3.connect(str(input_file)) + cursor = db.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + + tables = cursor.fetchall() + for table_name in tables: + table_name = table_name[0] + logging.info(f"Handling {table_name}") + + table = pandas.read_sql_query(f"SELECT * from {table_name}", db) + table.to_csv( + str(output_dir["path"].joinpath(table_name + ".csv")), index_label="index" + ) + + cursor.close() + db.close() + + # Remove Schema + output_dir["path"].joinpath("sqlite_sequence.csv").unlink() diff --git a/requirements.txt b/requirements.txt index 05f27b4..5889605 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ +pandas argparse requests \ No newline at end of file