mirror of
https://github.com/correl/mage.git
synced 2024-12-30 19:10:36 +00:00
43 lines
No EOL
1.5 KiB
Python
43 lines
No EOL
1.5 KiB
Python
"""
|
|
Purpose: Removes duplicate CardName|CardSet|CardNumber| entries from mtg-cards-data.txt that crop up
|
|
|
|
@author: escplan9 (Derek Monturo - dmontur1 at gmail dot com)
|
|
@version: 1.0
|
|
|
|
Written in Python 3.x, should work in Python 2.x as well.
|
|
"""
|
|
import re
|
|
|
|
"""
|
|
example line from file:
|
|
Aven Mimeomancer|Alara Reborn|2|R|{1}{W}{U}|Creature - Bird Wizard|3|1|Flying$At the beginning of your upkeep, you may put a feather counter on target creature. If you do, that creature is 3/1 and has flying for as long as it has a feather counter on it.|
|
|
|
|
With the reg-ex pattern below, separates into 2 match groups
|
|
match-group-1: Aven Mimeomancer|Alara Reborn|2|
|
|
match-group-2: (remainder of the line up to the \r\n)
|
|
"""
|
|
reg_ptn = re.compile(r'([a-zA-Z].*[|][a-zA-Z].*[|]\d+[|])(.*[\r\n])')
|
|
orig_txt_filename = 'mtg-cards-data.txt'
|
|
|
|
card_dict = {}
|
|
new_contents = ''
|
|
|
|
with open(orig_txt_filename) as orig_txt_file:
|
|
for line in orig_txt_file:
|
|
matchObj = re.match(reg_ptn, line)
|
|
if matchObj is not None:
|
|
matchGroups = matchObj.groups()
|
|
if len(matchGroups) > 0:
|
|
card_set_num = matchGroups[0]
|
|
if card_set_num not in card_dict.keys(): # only add unique card-set-number entries to new-contents
|
|
card_dict[card_set_num] = True
|
|
new_contents += line
|
|
else:
|
|
new_contents += line
|
|
else:
|
|
new_contents += line
|
|
|
|
# generate new file with the de-duped contents
|
|
new_txt_filename = 'unduped-cards-data.txt'
|
|
with open(new_txt_filename, "w") as new_txt_file:
|
|
new_txt_file.write(new_contents) |