Updates to MCTS AI

This commit is contained in:
betasteward 2015-04-30 11:33:22 -04:00
parent 442b12503a
commit 9209e43310
3 changed files with 80 additions and 43 deletions

View file

@ -46,6 +46,8 @@ import java.util.List;
import java.util.UUID; import java.util.UUID;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
/** /**
* *
@ -60,16 +62,14 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
protected transient MCTSNode root; protected transient MCTSNode root;
protected int maxThinkTime; protected int maxThinkTime;
private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class); private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class);
private transient ExecutorService pool; private int poolSize;
private int cores;
public ComputerPlayerMCTS(String name, RangeOfInfluence range, int skill) { public ComputerPlayerMCTS(String name, RangeOfInfluence range, int skill) {
super(name, range); super(name, range);
human = false; human = false;
maxThinkTime = (int) (skill * THINK_TIME_MULTIPLIER); maxThinkTime = (int) (skill * THINK_TIME_MULTIPLIER);
cores = Runtime.getRuntime().availableProcessors(); poolSize = Runtime.getRuntime().availableProcessors();
pool = Executors.newFixedThreadPool(cores);
} }
protected ComputerPlayerMCTS(UUID id) { protected ComputerPlayerMCTS(UUID id) {
@ -85,19 +85,26 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
return new ComputerPlayerMCTS(this); return new ComputerPlayerMCTS(this);
} }
protected String lastPhase = "";
@Override @Override
public boolean priority(Game game) { public boolean priority(Game game) {
if (game.getStep().getType() == PhaseStep.PRECOMBAT_MAIN) if (game.getStep().getType() == PhaseStep.UPKEEP) {
logList("computer player " + name + " hand: ", new ArrayList(hand.getCards(game))); if (!lastPhase.equals(game.getTurn().getValue(game.getTurnNum()))) {
logList(game.getTurn().getValue(game.getTurnNum()) + name + " hand: ", new ArrayList(hand.getCards(game)));
lastPhase = game.getTurn().getValue(game.getTurnNum());
}
}
game.getState().setPriorityPlayerId(playerId); game.getState().setPriorityPlayerId(playerId);
game.firePriorityEvent(playerId); game.firePriorityEvent(playerId);
getNextAction(game, NextAction.PRIORITY); getNextAction(game, NextAction.PRIORITY);
Ability ability = root.getAction(); Ability ability = root.getAction();
if (ability == null) if (ability == null)
logger.fatal("null ability"); logger.fatal("null ability");
activateAbility((ActivatedAbility)ability, game); activateAbility((ActivatedAbility)ability, game);
if (ability instanceof PassAbility) if (ability instanceof PassAbility)
return false; return false;
logLife(game);
logger.info("choose action:" + root.getAction() + " success ratio: " + root.getWinRatio());
return true; return true;
} }
@ -119,7 +126,6 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
newRoot = root.getMatchingState(game.getState().getValue(false, game)); newRoot = root.getMatchingState(game.getState().getValue(false, game));
if (newRoot != null) { if (newRoot != null) {
newRoot.emancipate(); newRoot.emancipate();
logger.info("choose action:" + newRoot.getAction() + " success ratio: " + newRoot.getWinRatio());
} }
else else
logger.info("unable to find matching state"); logger.info("unable to find matching state");
@ -197,26 +203,36 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
@Override @Override
public void selectAttackers(Game game, UUID attackingPlayerId) { public void selectAttackers(Game game, UUID attackingPlayerId) {
StringBuilder sb = new StringBuilder();
sb.append(game.getTurn().getValue(game.getTurnNum())).append(" player ").append(name).append(" attacking with: ");
getNextAction(game, NextAction.SELECT_ATTACKERS); getNextAction(game, NextAction.SELECT_ATTACKERS);
Combat combat = root.getCombat(); Combat combat = root.getCombat();
UUID opponentId = game.getCombat().getDefenders().iterator().next(); UUID opponentId = game.getCombat().getDefenders().iterator().next();
for (UUID attackerId: combat.getAttackers()) { for (UUID attackerId: combat.getAttackers()) {
this.declareAttacker(attackerId, opponentId, game, false); this.declareAttacker(attackerId, opponentId, game, false);
sb.append(game.getPermanent(attackerId).getName()).append(",");
} }
logger.info(sb.toString());
} }
@Override @Override
public void selectBlockers(Game game, UUID defendingPlayerId) { public void selectBlockers(Game game, UUID defendingPlayerId) {
StringBuilder sb = new StringBuilder();
sb.append(game.getTurn().getValue(game.getTurnNum())).append(" player ").append(name).append(" blocking: ");
getNextAction(game, NextAction.SELECT_BLOCKERS); getNextAction(game, NextAction.SELECT_BLOCKERS);
Combat combat = root.getCombat(); Combat combat = root.getCombat();
List<CombatGroup> groups = game.getCombat().getGroups(); List<CombatGroup> groups = game.getCombat().getGroups();
for (int i = 0; i < groups.size(); i++) { for (int i = 0; i < groups.size(); i++) {
if (i < combat.getGroups().size()) { if (i < combat.getGroups().size()) {
sb.append(game.getPermanent(groups.get(i).getAttackers().get(0)).getName()).append(" with: ");
for (UUID blockerId: combat.getGroups().get(i).getBlockers()) { for (UUID blockerId: combat.getGroups().get(i).getBlockers()) {
this.declareBlocker(this.getId(), blockerId, groups.get(i).getAttackers().get(0), game); this.declareBlocker(this.getId(), blockerId, groups.get(i).getAttackers().get(0), game);
sb.append(game.getPermanent(blockerId).getName()).append(",");
} }
sb.append("|");
} }
} }
logger.info(sb.toString());
} }
// @Override // @Override
@ -254,17 +270,16 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
// throw new UnsupportedOperationException("Not supported yet."); // throw new UnsupportedOperationException("Not supported yet.");
// } // }
protected long totalThinkTime = 0;
protected long totalSimulations = 0;
protected void applyMCTS(final Game game, final NextAction action) { protected void applyMCTS(final Game game, final NextAction action) {
int thinkTime = calculateThinkTime(game, action); int thinkTime = calculateThinkTime(game, action);
long startTime = System.nanoTime();
long endTime = startTime + (thinkTime * 1000000000l);
logger.info("applyMCTS - Thinking for " + (endTime - startTime)/1000000000.0 + "s");
if (thinkTime > 0) { if (thinkTime > 0) {
if (USE_MULTIPLE_THREADS) { if (USE_MULTIPLE_THREADS) {
List<MCTSExecutor> tasks = new ArrayList<MCTSExecutor>(); ExecutorService pool = Executors.newFixedThreadPool(poolSize);
for (int i = 0; i < cores; i++) { List<MCTSExecutor> tasks = new ArrayList<>();
for (int i = 0; i < poolSize; i++) {
Game sim = createMCTSGame(game); Game sim = createMCTSGame(game);
MCTSPlayer player = (MCTSPlayer) sim.getPlayer(playerId); MCTSPlayer player = (MCTSPlayer) sim.getPlayer(playerId);
player.setNextAction(action); player.setNextAction(action);
@ -273,18 +288,28 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
} }
try { try {
pool.invokeAll(tasks); pool.invokeAll(tasks, thinkTime, TimeUnit.SECONDS);
} catch (InterruptedException ex) { pool.awaitTermination(1, TimeUnit.SECONDS);
pool.shutdownNow();
} catch (InterruptedException | RejectedExecutionException ex) {
logger.warn("applyMCTS interrupted"); logger.warn("applyMCTS interrupted");
} }
int simCount = 0;
for (MCTSExecutor task: tasks) { for (MCTSExecutor task: tasks) {
simCount += task.getSimCount();
root.merge(task.getRoot()); root.merge(task.getRoot());
task.clear(); task.clear();
} }
tasks.clear(); tasks.clear();
totalThinkTime += thinkTime;
totalSimulations += simCount;
logger.info("Player: " + name + " Simulated " + simCount + " games in " + thinkTime + " seconds - nodes in tree: " + root.size());
logger.info("Total: Simulated " + totalSimulations + " games in " + totalThinkTime + " seconds - Average: " + totalSimulations/totalThinkTime);
} }
else { else {
long startTime = System.nanoTime();
long endTime = startTime + (thinkTime * 1000000000l);
MCTSNode current; MCTSNode current;
int simCount = 0; int simCount = 0;
while (true) { while (true) {
@ -316,10 +341,9 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
} }
logger.info("Simulated " + simCount + " games - nodes in tree: " + root.size()); logger.info("Simulated " + simCount + " games - nodes in tree: " + root.size());
} }
displayMemory(); // displayMemory();
} }
// root.print(1);
} }
//try to ensure that there are at least THINK_MIN_RATIO simulations per node at all times //try to ensure that there are at least THINK_MIN_RATIO simulations per node at all times
@ -328,7 +352,7 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
int nodeSizeRatio = 0; int nodeSizeRatio = 0;
if (root.getNumChildren() > 0) if (root.getNumChildren() > 0)
nodeSizeRatio = root.getVisits() / root.getNumChildren(); nodeSizeRatio = root.getVisits() / root.getNumChildren();
logger.info("Ratio: " + nodeSizeRatio); // logger.info("Ratio: " + nodeSizeRatio);
PhaseStep curStep = game.getStep().getType(); PhaseStep curStep = game.getStep().getType();
if (action == NextAction.SELECT_ATTACKERS || action == NextAction.SELECT_BLOCKERS) { if (action == NextAction.SELECT_ATTACKERS || action == NextAction.SELECT_BLOCKERS) {
if (nodeSizeRatio < THINK_MIN_RATIO) { if (nodeSizeRatio < THINK_MIN_RATIO) {
@ -410,5 +434,14 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
logger.info("Max heap size: " + heapMaxSize/mb + " Heap size: " + heapSize/mb + " Used: " + heapUsedSize/mb); logger.info("Max heap size: " + heapMaxSize/mb + " Heap size: " + heapSize/mb + " Used: " + heapUsedSize/mb);
} }
protected void logLife(Game game) {
StringBuilder sb = new StringBuilder();
sb.append(game.getTurn().getValue(game.getTurnNum()));
for (Player player: game.getPlayers().values()) {
sb.append("[player ").append(player.getName()).append(":").append(player.getLife()).append("]");
}
logger.info(sb.toString());
}
} }

View file

@ -41,8 +41,9 @@ public class MCTSExecutor implements Callable<Boolean> {
protected transient MCTSNode root; protected transient MCTSNode root;
protected int thinkTime; protected int thinkTime;
protected UUID playerId; protected UUID playerId;
protected int simCount;
private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class); private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class);
public MCTSExecutor(Game sim, UUID playerId, int thinkTime) { public MCTSExecutor(Game sim, UUID playerId, int thinkTime) {
this.playerId = playerId; this.playerId = playerId;
@ -52,16 +53,11 @@ public class MCTSExecutor implements Callable<Boolean> {
@Override @Override
public Boolean call() { public Boolean call() {
int simCount = 0; simCount = 0;
long startTime = System.nanoTime();
long endTime = startTime + (thinkTime * 1000000000l);
MCTSNode current; MCTSNode current;
while (true) { while (true) {
long currentTime = System.nanoTime();
if (currentTime > endTime)
break;
current = root; current = root;
// Selection // Selection
@ -92,8 +88,6 @@ public class MCTSExecutor implements Callable<Boolean> {
// Backpropagation // Backpropagation
current.backpropagate(result); current.backpropagate(result);
} }
logger.info("Simulated " + simCount + " games - nodes in tree: " + root.size());
return true;
} }
public MCTSNode getRoot() { public MCTSNode getRoot() {
@ -104,4 +98,7 @@ public class MCTSExecutor implements Callable<Boolean> {
root = null; root = null;
} }
public int getSimCount() {
return simCount;
}
} }

View file

@ -51,14 +51,14 @@ import org.apache.log4j.Logger;
*/ */
public class MCTSNode { public class MCTSNode {
private static final double selectionCoefficient = 1.0; private static final double selectionCoefficient = Math.sqrt(2.0);
private static final double passRatioTolerance = 0.0; private static final double passRatioTolerance = 0.0;
private static final transient Logger logger = Logger.getLogger(MCTSNode.class); private static final transient Logger logger = Logger.getLogger(MCTSNode.class);
private int visits = 0; private int visits = 0;
private int wins = 0; private int wins = 0;
private MCTSNode parent; private MCTSNode parent;
private final List<MCTSNode> children = new ArrayList<MCTSNode>(); private final List<MCTSNode> children = new ArrayList<>();
private Ability action; private Ability action;
private Game game; private Game game;
private Combat combat; private Combat combat;
@ -74,6 +74,7 @@ public class MCTSNode {
this.terminal = game.gameOver(null); this.terminal = game.gameOver(null);
setPlayer(); setPlayer();
nodeCount = 1; nodeCount = 1;
// logger.info(this.stateValue);
} }
protected MCTSNode(MCTSNode parent, Game game, Ability action) { protected MCTSNode(MCTSNode parent, Game game, Ability action) {
@ -84,6 +85,7 @@ public class MCTSNode {
this.action = action; this.action = action;
setPlayer(); setPlayer();
nodeCount++; nodeCount++;
// logger.info(this.stateValue);
} }
protected MCTSNode(MCTSNode parent, Game game, Combat combat) { protected MCTSNode(MCTSNode parent, Game game, Combat combat) {
@ -94,6 +96,7 @@ public class MCTSNode {
this.parent = parent; this.parent = parent;
setPlayer(); setPlayer();
nodeCount++; nodeCount++;
// logger.info(this.stateValue);
} }
private void setPlayer() { private void setPlayer() {
@ -356,11 +359,10 @@ public class MCTSNode {
* performs a breadth first search for a matching game state * performs a breadth first search for a matching game state
* *
* @param state - the game state that we are looking for * @param state - the game state that we are looking for
* @param nextAction - the next action that will be performed
* @return the matching state or null if no match is found * @return the matching state or null if no match is found
*/ */
public MCTSNode getMatchingState(String state) { public MCTSNode getMatchingState(String state) {
ArrayDeque<MCTSNode> queue = new ArrayDeque<MCTSNode>(); ArrayDeque<MCTSNode> queue = new ArrayDeque<>();
queue.add(this); queue.add(this);
while (!queue.isEmpty()) { while (!queue.isEmpty()) {
@ -376,14 +378,15 @@ public class MCTSNode {
public void merge(MCTSNode merge) { public void merge(MCTSNode merge) {
if (!stateValue.equals(merge.stateValue)) { if (!stateValue.equals(merge.stateValue)) {
logger.info("mismatched merge states"); logger.info("mismatched merge states at root");
return; return;
} }
this.visits += merge.visits; this.visits += merge.visits;
this.wins += merge.wins; this.wins += merge.wins;
int mismatchCount = 0;
List<MCTSNode> mergeChildren = new ArrayList<MCTSNode>();
List<MCTSNode> mergeChildren = new ArrayList<>();
for (MCTSNode child: merge.children) { for (MCTSNode child: merge.children) {
mergeChildren.add(child); mergeChildren.add(child);
} }
@ -393,8 +396,9 @@ public class MCTSNode {
if (mergeChild.action != null && child.action != null) { if (mergeChild.action != null && child.action != null) {
if (mergeChild.action.toString().equals(child.action.toString())) { if (mergeChild.action.toString().equals(child.action.toString())) {
if (!mergeChild.stateValue.equals(child.stateValue)) { if (!mergeChild.stateValue.equals(child.stateValue)) {
logger.info("mismatched merge states"); mismatchCount++;
mergeChildren.remove(mergeChild); // logger.info("mismatched merge states");
// mergeChildren.remove(mergeChild);
} }
else { else {
child.merge(mergeChild); child.merge(mergeChild);
@ -406,8 +410,9 @@ public class MCTSNode {
else { else {
if (mergeChild.combat.getValue().equals(child.combat.getValue())) { if (mergeChild.combat.getValue().equals(child.combat.getValue())) {
if (!mergeChild.stateValue.equals(child.stateValue)) { if (!mergeChild.stateValue.equals(child.stateValue)) {
logger.info("mismatched merge states"); mismatchCount++;
mergeChildren.remove(mergeChild); // logger.info("mismatched merge states");
// mergeChildren.remove(mergeChild);
} }
else { else {
child.merge(mergeChild); child.merge(mergeChild);
@ -424,6 +429,8 @@ public class MCTSNode {
children.add(child); children.add(child);
} }
} }
// if (mismatchCount > 0)
// logger.info("mismatched merge states: " + mismatchCount);
} }
// public void print(int depth) { // public void print(int depth) {