mirror of
https://github.com/correl/mage.git
synced 2024-12-24 11:50:45 +00:00
Fixed Gatherer Crawler: flavorText is correctly saved now;
there were a couple of cards where it failed to take the card number from magiccards.info (now it works)
This commit is contained in:
parent
d1fc1567fe
commit
8cf8659315
3 changed files with 50 additions and 116 deletions
|
@ -16,7 +16,7 @@ public class Card implements Comparable<Card> {
|
|||
private Integer convertedManaCost;
|
||||
private String types;
|
||||
private List<String> cardText;
|
||||
private String flavorText;
|
||||
private List<String> flavorText;
|
||||
private String powerToughness;
|
||||
private String expansion;
|
||||
private String rarity;
|
||||
|
@ -45,9 +45,8 @@ public class Card implements Comparable<Card> {
|
|||
}
|
||||
cardText = new ArrayList<String>();
|
||||
cardText.addAll(Arrays.asList(split[5].split("\\$")));
|
||||
if (split[6].length() > 0) {
|
||||
flavorText = split[6];
|
||||
}
|
||||
flavorText = new ArrayList<String>();
|
||||
flavorText.addAll(Arrays.asList(split[6].split("\\$")));
|
||||
if (split[7].length() > 0) {
|
||||
powerToughness = split[7];
|
||||
}
|
||||
|
@ -105,11 +104,11 @@ public class Card implements Comparable<Card> {
|
|||
this.expansion = expansion;
|
||||
}
|
||||
|
||||
public String getFlavorText() {
|
||||
public List<String> getFlavorText() {
|
||||
return flavorText;
|
||||
}
|
||||
|
||||
public void setFlavorText(String flavorText) {
|
||||
public void setFlavorText(List<String> flavorText) {
|
||||
this.flavorText = flavorText;
|
||||
}
|
||||
|
||||
|
@ -182,7 +181,13 @@ public class Card implements Comparable<Card> {
|
|||
}
|
||||
}
|
||||
sb.append("|");
|
||||
sb.append(flavorText != null ? flavorText : "").append("|");
|
||||
for (int i = 0; i < flavorText.size(); i++) {
|
||||
sb.append(flavorText.get(i));
|
||||
if (i < flavorText.size() - 1) {
|
||||
sb.append("$");
|
||||
}
|
||||
}
|
||||
sb.append("|");
|
||||
sb.append(powerToughness != null ? powerToughness : "").append("|");
|
||||
sb.append(expansion).append("|");
|
||||
sb.append(rarity != null ? rarity : "").append("|");
|
||||
|
|
|
@ -13,7 +13,7 @@ import org.jsoup.select.Elements;
|
|||
|
||||
/**
|
||||
*
|
||||
* @author robert.biter
|
||||
* @author North
|
||||
*/
|
||||
public class CardParser extends Thread {
|
||||
|
||||
|
@ -69,15 +69,19 @@ public class CardParser extends Thread {
|
|||
List<String> cardText = new ArrayList<String>();
|
||||
if (!select.isEmpty()) {
|
||||
for (Element element : select) {
|
||||
cardText.add(element.html().trim().replace("<img src=\"/Handlers/Image.ashx?size=small&name=", "{").replace("&type=symbol", "}").replaceAll("\" alt=\"[\\d\\w\\s]+?\" align=\"absbottom\" />", "").replace("\n", ""));
|
||||
cardText.add(element.html().trim().replace("<img src=\"/Handlers/Image.ashx?size=small&name=", "{").replace("&type=symbol", "}").replaceAll("\" alt=\"[\\d\\w\\s]+?\" align=\"absbottom\" />", "").replace("\n", "").replace(""", "\""));
|
||||
}
|
||||
}
|
||||
card.setCardText(cardText);
|
||||
|
||||
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_FlavorText .cardtextbox i");
|
||||
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_FlavorText .cardtextbox");
|
||||
List<String> flavorText = new ArrayList<String>();
|
||||
if (!select.isEmpty()) {
|
||||
card.setFlavorText(select.get(0).text().trim());
|
||||
for (Element element : select) {
|
||||
flavorText.add(element.html().trim().replace(""", "\""));
|
||||
}
|
||||
}
|
||||
card.setFlavorText(flavorText);
|
||||
|
||||
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ptRow .value");
|
||||
if (!select.isEmpty()) {
|
||||
|
@ -146,9 +150,24 @@ public class CardParser extends Thread {
|
|||
}
|
||||
}
|
||||
|
||||
if (card.getCardNumber() == null) {
|
||||
Elements select = doc.select("p a:contains(" + card.getExpansion() + ")");
|
||||
if (!select.isEmpty()) {
|
||||
Matcher matcher = patternUrl.matcher(select.get(0).attr("href"));
|
||||
matcher.find();
|
||||
card.setCardNumber(matcher.group());
|
||||
} else {
|
||||
select = doc.select("p b:contains(#)");
|
||||
if (!select.isEmpty()) {
|
||||
Matcher matcher = patternPrint.matcher(select.get(0).html());
|
||||
matcher.find();
|
||||
card.setCardNumber(matcher.group());
|
||||
}
|
||||
}
|
||||
if (card.getCardNumber() == null) {
|
||||
System.out.println("Card number missing: " + card.getName());
|
||||
}
|
||||
}
|
||||
CardsList.add(card);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ import org.jsoup.select.Elements;
|
|||
|
||||
/**
|
||||
*
|
||||
* @author robert.biter
|
||||
* @author North
|
||||
*/
|
||||
public class Main {
|
||||
|
||||
|
@ -44,109 +44,19 @@ public class Main {
|
|||
|
||||
public static void main(String[] args) throws IOException, InterruptedException {
|
||||
List<String> sets = new ArrayList<String>();
|
||||
// sets.add("Alara Reborn");
|
||||
// sets.add("Alliances");
|
||||
// sets.add("Antiquities");
|
||||
// sets.add("Apocalypse");
|
||||
// sets.add("Arabian Nights");
|
||||
// sets.add("Archenemy");
|
||||
// sets.add("Battle Royale Box Set");
|
||||
// sets.add("Beatdown Box Set");
|
||||
// sets.add("Betrayers of Kamigawa");
|
||||
// sets.add("Champions of Kamigawa");
|
||||
// sets.add("Chronicles");
|
||||
// sets.add("Classic Sixth Edition");
|
||||
// sets.add("Coldsnap");
|
||||
// sets.add("Conflux");
|
||||
// sets.add("Darksteel");
|
||||
// sets.add("Dissension");
|
||||
// sets.add("Duel Decks: Divine vs. Demonic");
|
||||
// sets.add("Duel Decks: Elspeth vs. Tezzeret");
|
||||
// sets.add("Duel Decks: Elves vs. Goblins");
|
||||
// sets.add("Duel Decks: Garruk vs. Liliana");
|
||||
// sets.add("Duel Decks: Jace vs. Chandra");
|
||||
// sets.add("Duel Decks: Knights vs. Dragons");
|
||||
// sets.add("Duel Decks: Phyrexia vs. the Coalition");
|
||||
// sets.add("Eighth Edition");
|
||||
// sets.add("Eventide");
|
||||
// sets.add("Exodus");
|
||||
// sets.add("Fallen Empires");
|
||||
// sets.add("Fifth Dawn");
|
||||
// sets.add("Fifth Edition");
|
||||
// sets.add("Fourth Edition");
|
||||
// sets.add("From the Vault: Dragons");
|
||||
// sets.add("From the Vault: Exiled");
|
||||
// sets.add("From the Vault: Relics");
|
||||
// sets.add("Future Sight");
|
||||
// sets.add("Guildpact");
|
||||
// sets.add("Homelands");
|
||||
// sets.add("Ice Age");
|
||||
// sets.add("Invasion");
|
||||
// sets.add("Judgment");
|
||||
// sets.add("Legends");
|
||||
// sets.add("Legions");
|
||||
// sets.add("Limited Edition Alpha");
|
||||
// sets.add("Limited Edition Beta");
|
||||
// sets.add("Lorwyn");
|
||||
// sets.add("Magic 2010");
|
||||
// sets.add("Magic 2011");
|
||||
// sets.add("Magic 2012");
|
||||
// sets.add("Masters Edition");
|
||||
// sets.add("Masters Edition II");
|
||||
// sets.add("Masters Edition III");
|
||||
// sets.add("Masters Edition IV");
|
||||
// sets.add("Mercadian Masques");
|
||||
// sets.add("Mirage");
|
||||
// sets.add("Mirrodin");
|
||||
// sets.add("Mirrodin Besieged");
|
||||
// sets.add("Morningtide");
|
||||
// sets.add("Nemesis");
|
||||
// sets.add("New Phyrexia");
|
||||
// sets.add("Ninth Edition");
|
||||
// sets.add("Odyssey");
|
||||
// sets.add("Onslaught");
|
||||
// sets.add("Planar Chaos");
|
||||
// sets.add("Planechase");
|
||||
// sets.add("Planeshift");
|
||||
// sets.add("Portal");
|
||||
// sets.add("Portal Second Age");
|
||||
// sets.add("Portal Three Kingdoms");
|
||||
// sets.add("Premium Deck Series: Fire and Lightning");
|
||||
// sets.add("Premium Deck Series: Slivers");
|
||||
// sets.add("Promo set for Gatherer");
|
||||
// sets.add("Prophecy");
|
||||
// sets.add("Ravnica: City of Guilds");
|
||||
// sets.add("Revised Edition");
|
||||
// sets.add("Rise of the Eldrazi");
|
||||
// sets.add("Saviors of Kamigawa");
|
||||
// sets.add("Scars of Mirrodin");
|
||||
// sets.add("Scourge");
|
||||
// sets.add("Seventh Edition");
|
||||
// sets.add("Shadowmoor");
|
||||
// sets.add("Shards of Alara");
|
||||
// sets.add("Starter 1999");
|
||||
// sets.add("Starter 2000");
|
||||
// sets.add("Stronghold");
|
||||
// sets.add("Tempest");
|
||||
// sets.add("Tenth Edition");
|
||||
// sets.add("The Dark");
|
||||
// sets.add("Time Spiral");
|
||||
// sets.add("Time Spiral \"Timeshifted\"");
|
||||
// sets.add("Torment");
|
||||
// sets.add("Unlimited Edition");
|
||||
// sets.add("Urza's Destiny");
|
||||
// sets.add("Urza's Legacy");
|
||||
// sets.add("Urza's Saga");
|
||||
// sets.add("Vanguard");
|
||||
// sets.add("Visions");
|
||||
// sets.add("Weatherlight");
|
||||
// sets.add("Worldwake");
|
||||
// sets.add("Zendikar");
|
||||
// sets.add("Magic: The Gathering-Commander");
|
||||
|
||||
|
||||
// sets.add("Unglued");
|
||||
// sets.add("Unhinged");
|
||||
// Change to false if you only want to add a specific set
|
||||
if (true) {
|
||||
Document doc = Jsoup.connect("http://gatherer.wizards.com/Pages/Advanced.aspx").get();
|
||||
Elements select = doc.select("#autoCompleteSourceBoxsetAddText0_InnerTextBoxcontainer a");
|
||||
if (!select.isEmpty()) {
|
||||
for (Element element : select) {
|
||||
if (!element.text().equals("Unglued") && !element.text().equals("Unhinged")) {
|
||||
sets.add(element.text());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readCardsFromFile();
|
||||
|
||||
|
|
Loading…
Reference in a new issue