Updated Gatherer Crawler to parse Double Faced cards

Replaced the AE character in the output file.
Updated mtg-cards-data.txt
This commit is contained in:
North 2011-09-25 11:54:24 +03:00
parent 5efe352305
commit 5f5d35a42f
3 changed files with 461 additions and 366 deletions

View file

@ -41,12 +41,27 @@ public class CardParser extends Thread {
} }
try { try {
Elements select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow .value"); Elements select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContentHeader_subtitleDisplay");
String cardName = "";
String selectorModifier = "";
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setName(select.get(0).text().trim()); cardName = select.get(0).text().trim();
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_manaRow .value img"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow .value");
if (!select.isEmpty()) {
card.setName(select.get(0).text().trim());
} else {
card.setName(cardName);
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ctl05_nameRow .value");
if (!select.isEmpty() && select.get(0).text().trim().equals(cardName)) {
selectorModifier = "_ctl05";
} else {
selectorModifier = "_ctl06";
}
}
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_manaRow .value img");
List<String> manaCost = new ArrayList<String>(); List<String> manaCost = new ArrayList<String>();
if (!select.isEmpty()) { if (!select.isEmpty()) {
for (Element element : select) { for (Element element : select) {
@ -55,17 +70,17 @@ public class CardParser extends Thread {
} }
card.setManaCost(manaCost); card.setManaCost(manaCost);
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cmcRow .value"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_cmcRow .value");
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setConvertedManaCost(Integer.parseInt(select.get(0).text().trim())); card.setConvertedManaCost(Integer.parseInt(select.get(0).text().trim()));
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_typeRow .value"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_typeRow .value");
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setTypes(select.get(0).text().trim()); card.setTypes(select.get(0).text().trim());
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_textRow .value .cardtextbox"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_textRow .value .cardtextbox");
List<String> cardText = new ArrayList<String>(); List<String> cardText = new ArrayList<String>();
if (!select.isEmpty()) { if (!select.isEmpty()) {
for (Element element : select) { for (Element element : select) {
@ -74,7 +89,7 @@ public class CardParser extends Thread {
} }
card.setCardText(cardText); card.setCardText(cardText);
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_FlavorText .cardtextbox"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_FlavorText .cardtextbox");
List<String> flavorText = new ArrayList<String>(); List<String> flavorText = new ArrayList<String>();
if (!select.isEmpty()) { if (!select.isEmpty()) {
for (Element element : select) { for (Element element : select) {
@ -83,22 +98,22 @@ public class CardParser extends Thread {
} }
card.setFlavorText(flavorText); card.setFlavorText(flavorText);
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ptRow .value"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_ptRow .value");
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setPowerToughness(select.get(0).text().trim()); card.setPowerToughness(select.get(0).text().trim());
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_currentSetSymbol a"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_currentSetSymbol a");
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setExpansion(select.get(1).text().trim()); card.setExpansion(select.get(1).text().trim());
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_rarityRow .value span"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_rarityRow .value span");
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setRarity(select.get(0).text().trim()); card.setRarity(select.get(0).text().trim());
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_otherSetsValue a"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_otherSetsValue a");
List<Integer> otherSets = new ArrayList<Integer>(); List<Integer> otherSets = new ArrayList<Integer>();
if (!select.isEmpty()) { if (!select.isEmpty()) {
for (Element element : select) { for (Element element : select) {
@ -112,12 +127,12 @@ public class CardParser extends Thread {
} }
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_numberRow .value"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_numberRow .value");
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setCardNumber(select.get(0).text().trim()); card.setCardNumber(select.get(0).text().trim());
} }
select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ArtistCredit a"); select = doc.select("#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent" + selectorModifier + "_ArtistCredit a");
if (!select.isEmpty()) { if (!select.isEmpty()) {
card.setArtist(select.get(0).text().trim()); card.setArtist(select.get(0).text().trim());
} }

View file

@ -89,7 +89,7 @@ public class ThreadStarter extends Thread {
} }
sb.append("|"); sb.append("|");
out.write(sb.toString()); out.write(sb.toString().replace("\u00C6", "AE"));
out.newLine(); out.newLine();
} }
out.close(); out.close();

File diff suppressed because it is too large Load diff