From 8cf865931569560f09937c2581cd06dbedbdd12a Mon Sep 17 00:00:00 2001 From: North Date: Sun, 28 Aug 2011 17:04:21 +0300 Subject: [PATCH] Fixed Gatherer Crawler: flavorText is correctly saved now; there were a couple of cards where it failed to take the card number from magiccards.info (now it works) --- .../main/java/north/gatherercrawler/Card.java | 19 +-- .../north/gatherercrawler/CardParser.java | 29 ++++- .../main/java/north/gatherercrawler/Main.java | 118 +++--------------- 3 files changed, 50 insertions(+), 116 deletions(-) diff --git a/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Card.java b/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Card.java index b486a24a63..3317049a16 100644 --- a/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Card.java +++ b/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Card.java @@ -16,7 +16,7 @@ public class Card implements Comparable { private Integer convertedManaCost; private String types; private List cardText; - private String flavorText; + private List flavorText; private String powerToughness; private String expansion; private String rarity; @@ -45,9 +45,8 @@ public class Card implements Comparable { } cardText = new ArrayList(); cardText.addAll(Arrays.asList(split[5].split("\\$"))); - if (split[6].length() > 0) { - flavorText = split[6]; - } + flavorText = new ArrayList(); + flavorText.addAll(Arrays.asList(split[6].split("\\$"))); if (split[7].length() > 0) { powerToughness = split[7]; } @@ -105,11 +104,11 @@ public class Card implements Comparable { this.expansion = expansion; } - public String getFlavorText() { + public List getFlavorText() { return flavorText; } - public void setFlavorText(String flavorText) { + public void setFlavorText(List flavorText) { this.flavorText = flavorText; } @@ -182,7 +181,13 @@ public class Card implements Comparable { } } sb.append("|"); - sb.append(flavorText != null ? flavorText : "").append("|"); + for (int i = 0; i < flavorText.size(); i++) { + sb.append(flavorText.get(i)); + if (i < flavorText.size() - 1) { + sb.append("$"); + } + } + sb.append("|"); sb.append(powerToughness != null ? powerToughness : "").append("|"); sb.append(expansion).append("|"); sb.append(rarity != null ? rarity : "").append("|"); diff --git a/Utils/GathererCrawler/src/main/java/north/gatherercrawler/CardParser.java b/Utils/GathererCrawler/src/main/java/north/gatherercrawler/CardParser.java index 1a4bb3aa2b..5dc5875d8c 100644 --- a/Utils/GathererCrawler/src/main/java/north/gatherercrawler/CardParser.java +++ b/Utils/GathererCrawler/src/main/java/north/gatherercrawler/CardParser.java @@ -13,7 +13,7 @@ import org.jsoup.select.Elements; /** * - * @author robert.biter + * @author North */ public class CardParser extends Thread { @@ -69,15 +69,19 @@ public class CardParser extends Thread { List cardText = new ArrayList(); if (!select.isEmpty()) { for (Element element : select) { - cardText.add(element.html().trim().replace("\"[\\d\\w\\s]+?\"", "").replace("\n", "")); + cardText.add(element.html().trim().replace("\"[\\d\\w\\s]+?\"", "").replace("\n", "").replace(""", "\"")); } } card.setCardText(cardText); - select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_FlavorText .cardtextbox i"); + select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_FlavorText .cardtextbox"); + List flavorText = new ArrayList(); if (!select.isEmpty()) { - card.setFlavorText(select.get(0).text().trim()); + for (Element element : select) { + flavorText.add(element.html().trim().replace(""", "\"")); + } } + card.setFlavorText(flavorText); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ptRow .value"); if (!select.isEmpty()) { @@ -147,7 +151,22 @@ public class CardParser extends Thread { } if (card.getCardNumber() == null) { - System.out.println("Card number missing: " + card.getName()); + Elements select = doc.select("p a:contains(" + card.getExpansion() + ")"); + if (!select.isEmpty()) { + Matcher matcher = patternUrl.matcher(select.get(0).attr("href")); + matcher.find(); + card.setCardNumber(matcher.group()); + } else { + select = doc.select("p b:contains(#)"); + if (!select.isEmpty()) { + Matcher matcher = patternPrint.matcher(select.get(0).html()); + matcher.find(); + card.setCardNumber(matcher.group()); + } + } + if (card.getCardNumber() == null) { + System.out.println("Card number missing: " + card.getName()); + } } CardsList.add(card); return true; diff --git a/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Main.java b/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Main.java index 32c7fc3e14..0cae3e9dd2 100644 --- a/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Main.java +++ b/Utils/GathererCrawler/src/main/java/north/gatherercrawler/Main.java @@ -15,7 +15,7 @@ import org.jsoup.select.Elements; /** * - * @author robert.biter + * @author North */ public class Main { @@ -44,110 +44,20 @@ public class Main { public static void main(String[] args) throws IOException, InterruptedException { List sets = new ArrayList(); -// sets.add("Alara Reborn"); -// sets.add("Alliances"); -// sets.add("Antiquities"); -// sets.add("Apocalypse"); -// sets.add("Arabian Nights"); -// sets.add("Archenemy"); -// sets.add("Battle Royale Box Set"); -// sets.add("Beatdown Box Set"); -// sets.add("Betrayers of Kamigawa"); -// sets.add("Champions of Kamigawa"); -// sets.add("Chronicles"); -// sets.add("Classic Sixth Edition"); -// sets.add("Coldsnap"); -// sets.add("Conflux"); -// sets.add("Darksteel"); -// sets.add("Dissension"); -// sets.add("Duel Decks: Divine vs. Demonic"); -// sets.add("Duel Decks: Elspeth vs. Tezzeret"); -// sets.add("Duel Decks: Elves vs. Goblins"); -// sets.add("Duel Decks: Garruk vs. Liliana"); -// sets.add("Duel Decks: Jace vs. Chandra"); -// sets.add("Duel Decks: Knights vs. Dragons"); -// sets.add("Duel Decks: Phyrexia vs. the Coalition"); -// sets.add("Eighth Edition"); -// sets.add("Eventide"); -// sets.add("Exodus"); -// sets.add("Fallen Empires"); -// sets.add("Fifth Dawn"); -// sets.add("Fifth Edition"); -// sets.add("Fourth Edition"); -// sets.add("From the Vault: Dragons"); -// sets.add("From the Vault: Exiled"); -// sets.add("From the Vault: Relics"); -// sets.add("Future Sight"); -// sets.add("Guildpact"); -// sets.add("Homelands"); -// sets.add("Ice Age"); -// sets.add("Invasion"); -// sets.add("Judgment"); -// sets.add("Legends"); -// sets.add("Legions"); -// sets.add("Limited Edition Alpha"); -// sets.add("Limited Edition Beta"); -// sets.add("Lorwyn"); -// sets.add("Magic 2010"); -// sets.add("Magic 2011"); -// sets.add("Magic 2012"); -// sets.add("Masters Edition"); -// sets.add("Masters Edition II"); -// sets.add("Masters Edition III"); -// sets.add("Masters Edition IV"); -// sets.add("Mercadian Masques"); -// sets.add("Mirage"); -// sets.add("Mirrodin"); -// sets.add("Mirrodin Besieged"); -// sets.add("Morningtide"); -// sets.add("Nemesis"); -// sets.add("New Phyrexia"); -// sets.add("Ninth Edition"); -// sets.add("Odyssey"); -// sets.add("Onslaught"); -// sets.add("Planar Chaos"); -// sets.add("Planechase"); -// sets.add("Planeshift"); -// sets.add("Portal"); -// sets.add("Portal Second Age"); -// sets.add("Portal Three Kingdoms"); -// sets.add("Premium Deck Series: Fire and Lightning"); -// sets.add("Premium Deck Series: Slivers"); -// sets.add("Promo set for Gatherer"); -// sets.add("Prophecy"); -// sets.add("Ravnica: City of Guilds"); -// sets.add("Revised Edition"); -// sets.add("Rise of the Eldrazi"); -// sets.add("Saviors of Kamigawa"); -// sets.add("Scars of Mirrodin"); -// sets.add("Scourge"); -// sets.add("Seventh Edition"); -// sets.add("Shadowmoor"); -// sets.add("Shards of Alara"); -// sets.add("Starter 1999"); -// sets.add("Starter 2000"); -// sets.add("Stronghold"); -// sets.add("Tempest"); -// sets.add("Tenth Edition"); -// sets.add("The Dark"); -// sets.add("Time Spiral"); -// sets.add("Time Spiral \"Timeshifted\""); -// sets.add("Torment"); -// sets.add("Unlimited Edition"); -// sets.add("Urza's Destiny"); -// sets.add("Urza's Legacy"); -// sets.add("Urza's Saga"); -// sets.add("Vanguard"); -// sets.add("Visions"); -// sets.add("Weatherlight"); -// sets.add("Worldwake"); -// sets.add("Zendikar"); -// sets.add("Magic: The Gathering-Commander"); - - -// sets.add("Unglued"); -// sets.add("Unhinged"); + // Change to false if you only want to add a specific set + if (true) { + Document doc = Jsoup.connect("http://gatherer.wizards.com/Pages/Advanced.aspx").get(); + Elements select = doc.select("#autoCompleteSourceBoxsetAddText0_InnerTextBoxcontainer a"); + if (!select.isEmpty()) { + for (Element element : select) { + if (!element.text().equals("Unglued") && !element.text().equals("Unhinged")) { + sets.add(element.text()); + } + } + } + } + readCardsFromFile(); StringBuilder sb = new StringBuilder();