Add CSV support (#33)

* Add CSV support

Signed-off-by: ZeldaZach <zahalpern+github@gmail.com>

* foreignData => foreign_data. `--all` will now build all components

Signed-off-by: ZeldaZach <zahalpern+github@gmail.com>
This commit is contained in:
Zach H 2019-10-27 19:06:09 -04:00 committed by GitHub
parent cd7e36886a
commit 7139f36241
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 111 additions and 36 deletions

4
.gitignore vendored
View file

@ -100,5 +100,7 @@ ENV/
*.json *.json
# data base # data base
*.db *.db*
*.sqlite* *.sqlite*
*.csv*
*.sql*

View file

@ -1,9 +1,64 @@
""" """
Main Executor Main Executor
""" """
import argparse
import logging
import pathlib
import mtgsqlive import mtgsqlive
from mtgsqlive.json2sql import main from mtgsqlive import sql2csv, json2sql
if __name__ == "__main__": if __name__ == "__main__":
mtgsqlive.init_logger() mtgsqlive.init_logger()
main()
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
help="input source (AllPrintings.json, AllSetFiles.zip)",
required=True,
metavar="fileIn",
)
parser.add_argument(
"-o",
help="output folder (outputs/)",
default="outputs",
required=True,
metavar="fileOut",
)
parser.add_argument(
"--all",
help="Build all types (SQLite, SQL, CSV)",
action="store_true",
required=False,
)
args = parser.parse_args()
# Define our I/O paths
input_file = pathlib.Path(args.i).expanduser()
output_file = {"path": pathlib.Path(args.o).expanduser().absolute(), "handle": None}
if args.all:
logging.info("> Creating AllPrintings.sqlite")
json2sql.execute(
input_file,
{
"path": output_file["path"].joinpath("AllPrintings.sqlite"),
"handle": None,
},
)
logging.info("> Creating AllPrintings.sql")
json2sql.execute(
input_file,
{"path": output_file["path"].joinpath("AllPrintings.sql"), "handle": None},
)
logging.info("> Creating AllPrintings CSV components")
sql2csv.execute(
output_file["path"].joinpath("AllPrintings.sqlite"),
{"path": output_file["path"].joinpath("csv"), "handle": None},
)
elif str(input_file).endswith(".sqlite"):
sql2csv.execute(input_file, output_file)
else:
json2sql.execute(input_file, output_file)

View file

@ -13,29 +13,10 @@ LOGGER = logging.getLogger(__name__)
version = "v4.5.x" # need to automate this version = "v4.5.x" # need to automate this
def main() -> None: def execute(input_file, output_file) -> None:
""" """
Main function Main function
""" """
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
help='input source ("AllSets.json" file or "AllSetFiles" directory)',
required=True,
metavar="fileIn",
)
parser.add_argument(
"-o",
help="output file (*.sqlite, *.db, *.sqlite3, *.db3, *.sql)",
required=True,
metavar="fileOut",
)
args = parser.parse_args()
# Define our I/O paths
input_file = pathlib.Path(args.i).expanduser()
output_file = {"path": pathlib.Path(args.o).expanduser(), "handle": None}
if not validate_io_streams(input_file, output_file): if not validate_io_streams(input_file, output_file):
exit(1) exit(1)
@ -64,11 +45,11 @@ def main() -> None:
output_file["handle"].close() output_file["handle"].close()
def validate_io_streams(input_file: pathlib.Path, output_file: Dict) -> bool: def validate_io_streams(input_file: pathlib.Path, output_dir: Dict) -> bool:
""" """
Ensure I/O paths are valid and clean for program Ensure I/O paths are valid and clean for program
:param input_file: Input file (JSON) :param input_file: Input file (JSON)
:param output_file: Output file (SQLite) :param output_dir: Output dir
:return: Good to continue status :return: Good to continue status
""" """
if input_file.is_file(): if input_file.is_file():
@ -80,11 +61,11 @@ def validate_io_streams(input_file: pathlib.Path, output_file: Dict) -> bool:
LOGGER.fatal(f"Invalid input file/directory. ({input_file})") LOGGER.fatal(f"Invalid input file/directory. ({input_file})")
return False return False
output_file["path"].parent.mkdir(exist_ok=True) output_dir["path"].parent.mkdir(exist_ok=True)
if output_file["path"].is_file(): if output_dir["path"].is_file():
LOGGER.warning(f"Output file {output_file['path']} exists already, moving it.") LOGGER.warning(f"Output path {output_dir['path']} exists already, moving it.")
output_file["path"].replace( output_dir["path"].replace(
output_file["path"].parent.joinpath(output_file["path"].name + ".old") output_dir["path"].parent.joinpath(output_dir["path"].name + ".old")
) )
return True return True
@ -246,8 +227,8 @@ def build_sql_schema(output_file: Dict) -> None:
"", "",
"", "",
], ],
"foreignData": [ "foreign_data": [
"CREATE TABLE `foreignData` (", "CREATE TABLE `foreign_data` (",
"id INTEGER PRIMARY KEY AUTOINCREMENT,", "id INTEGER PRIMARY KEY AUTOINCREMENT,",
"flavorText TEXT,", "flavorText TEXT,",
"language TEXT,", "language TEXT,",
@ -381,8 +362,8 @@ def sql_insert_all_card_fields(
""" """
sql_dict_insert(card_attributes["cards"], "cards", output_file) sql_dict_insert(card_attributes["cards"], "cards", output_file)
for foreign_val in card_attributes["foreignData"]: for foreign_val in card_attributes["foreign_data"]:
sql_dict_insert(foreign_val, "foreignData", output_file) sql_dict_insert(foreign_val, "foreign_data", output_file)
for legal_val in card_attributes["legalities"]: for legal_val in card_attributes["legalities"]:
sql_dict_insert(legal_val, "legalities", output_file) sql_dict_insert(legal_val, "legalities", output_file)
@ -586,7 +567,7 @@ def handle_card_row_insertion(
return { return {
"cards": card_insert_values, "cards": card_insert_values,
"foreignData": foreign_insert_values, "foreign_data": foreign_insert_values,
"legalities": legal_insert_values, "legalities": legal_insert_values,
"rulings": ruling_insert_values, "rulings": ruling_insert_values,
"prices": price_insert_values, "prices": price_insert_values,

36
mtgsqlive/sql2csv.py Normal file
View file

@ -0,0 +1,36 @@
import logging
import pathlib
import sqlite3
from typing import Any, Dict
import pandas
def execute(input_file: pathlib.Path, output_dir: Dict[str, Any]) -> None:
"""
Create the CSV dumps from SQLite
:param input_file: SQLite file
:param output_dir: Output directory
"""
# Make output path
output_dir["path"].mkdir(exist_ok=True)
db = sqlite3.connect(str(input_file))
cursor = db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
for table_name in tables:
table_name = table_name[0]
logging.info(f"Handling {table_name}")
table = pandas.read_sql_query(f"SELECT * from {table_name}", db)
table.to_csv(
str(output_dir["path"].joinpath(table_name + ".csv")), index_label="index"
)
cursor.close()
db.close()
# Remove Schema
output_dir["path"].joinpath("sqlite_sequence.csv").unlink()

View file

@ -1,2 +1,3 @@
pandas
argparse argparse
requests requests