mage/Utils/de-dup-cards-data.py

43 lines
No EOL
1.5 KiB
Python

"""
Purpose: Removes duplicate CardName|CardSet|CardNumber| entries from mtg-cards-data.txt that crop up
@author: escplan9 (Derek Monturo - dmontur1 at gmail dot com)
@version: 1.0
Written in Python 3.x, should work in Python 2.x as well.
"""
import re
"""
example line from file:
Aven Mimeomancer|Alara Reborn|2|R|{1}{W}{U}|Creature - Bird Wizard|3|1|Flying$At the beginning of your upkeep, you may put a feather counter on target creature. If you do, that creature is 3/1 and has flying for as long as it has a feather counter on it.|
With the reg-ex pattern below, separates into 2 match groups
match-group-1: Aven Mimeomancer|Alara Reborn|2|
match-group-2: (remainder of the line up to the \r\n)
"""
reg_ptn = re.compile(r'([a-zA-Z].*[|][a-zA-Z].*[|]\d+[|])(.*[\r\n])')
orig_txt_filename = 'mtg-cards-data.txt'
card_dict = {}
new_contents = ''
with open(orig_txt_filename) as orig_txt_file:
for line in orig_txt_file:
matchObj = re.match(reg_ptn, line)
if matchObj is not None:
matchGroups = matchObj.groups()
if len(matchGroups) > 0:
card_set_num = matchGroups[0]
if card_set_num not in card_dict.keys(): # only add unique card-set-number entries to new-contents
card_dict[card_set_num] = True
new_contents += line
else:
new_contents += line
else:
new_contents += line
# generate new file with the de-duped contents
new_txt_filename = 'unduped-cards-data.txt'
with open(new_txt_filename, "w") as new_txt_file:
new_txt_file.write(new_contents)