mirror of
https://github.com/correl/mage.git
synced 2024-11-28 19:19:55 +00:00
Updates to MCTS AI
This commit is contained in:
parent
442b12503a
commit
9209e43310
3 changed files with 80 additions and 43 deletions
|
@ -46,6 +46,8 @@ import java.util.List;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.RejectedExecutionException;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -61,15 +63,13 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
protected transient MCTSNode root;
|
protected transient MCTSNode root;
|
||||||
protected int maxThinkTime;
|
protected int maxThinkTime;
|
||||||
private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class);
|
private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class);
|
||||||
private transient ExecutorService pool;
|
private int poolSize;
|
||||||
private int cores;
|
|
||||||
|
|
||||||
public ComputerPlayerMCTS(String name, RangeOfInfluence range, int skill) {
|
public ComputerPlayerMCTS(String name, RangeOfInfluence range, int skill) {
|
||||||
super(name, range);
|
super(name, range);
|
||||||
human = false;
|
human = false;
|
||||||
maxThinkTime = (int) (skill * THINK_TIME_MULTIPLIER);
|
maxThinkTime = (int) (skill * THINK_TIME_MULTIPLIER);
|
||||||
cores = Runtime.getRuntime().availableProcessors();
|
poolSize = Runtime.getRuntime().availableProcessors();
|
||||||
pool = Executors.newFixedThreadPool(cores);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ComputerPlayerMCTS(UUID id) {
|
protected ComputerPlayerMCTS(UUID id) {
|
||||||
|
@ -85,10 +85,15 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
return new ComputerPlayerMCTS(this);
|
return new ComputerPlayerMCTS(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected String lastPhase = "";
|
||||||
@Override
|
@Override
|
||||||
public boolean priority(Game game) {
|
public boolean priority(Game game) {
|
||||||
if (game.getStep().getType() == PhaseStep.PRECOMBAT_MAIN)
|
if (game.getStep().getType() == PhaseStep.UPKEEP) {
|
||||||
logList("computer player " + name + " hand: ", new ArrayList(hand.getCards(game)));
|
if (!lastPhase.equals(game.getTurn().getValue(game.getTurnNum()))) {
|
||||||
|
logList(game.getTurn().getValue(game.getTurnNum()) + name + " hand: ", new ArrayList(hand.getCards(game)));
|
||||||
|
lastPhase = game.getTurn().getValue(game.getTurnNum());
|
||||||
|
}
|
||||||
|
}
|
||||||
game.getState().setPriorityPlayerId(playerId);
|
game.getState().setPriorityPlayerId(playerId);
|
||||||
game.firePriorityEvent(playerId);
|
game.firePriorityEvent(playerId);
|
||||||
getNextAction(game, NextAction.PRIORITY);
|
getNextAction(game, NextAction.PRIORITY);
|
||||||
|
@ -98,6 +103,8 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
activateAbility((ActivatedAbility)ability, game);
|
activateAbility((ActivatedAbility)ability, game);
|
||||||
if (ability instanceof PassAbility)
|
if (ability instanceof PassAbility)
|
||||||
return false;
|
return false;
|
||||||
|
logLife(game);
|
||||||
|
logger.info("choose action:" + root.getAction() + " success ratio: " + root.getWinRatio());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,7 +126,6 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
newRoot = root.getMatchingState(game.getState().getValue(false, game));
|
newRoot = root.getMatchingState(game.getState().getValue(false, game));
|
||||||
if (newRoot != null) {
|
if (newRoot != null) {
|
||||||
newRoot.emancipate();
|
newRoot.emancipate();
|
||||||
logger.info("choose action:" + newRoot.getAction() + " success ratio: " + newRoot.getWinRatio());
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
logger.info("unable to find matching state");
|
logger.info("unable to find matching state");
|
||||||
|
@ -197,26 +203,36 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void selectAttackers(Game game, UUID attackingPlayerId) {
|
public void selectAttackers(Game game, UUID attackingPlayerId) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(game.getTurn().getValue(game.getTurnNum())).append(" player ").append(name).append(" attacking with: ");
|
||||||
getNextAction(game, NextAction.SELECT_ATTACKERS);
|
getNextAction(game, NextAction.SELECT_ATTACKERS);
|
||||||
Combat combat = root.getCombat();
|
Combat combat = root.getCombat();
|
||||||
UUID opponentId = game.getCombat().getDefenders().iterator().next();
|
UUID opponentId = game.getCombat().getDefenders().iterator().next();
|
||||||
for (UUID attackerId: combat.getAttackers()) {
|
for (UUID attackerId: combat.getAttackers()) {
|
||||||
this.declareAttacker(attackerId, opponentId, game, false);
|
this.declareAttacker(attackerId, opponentId, game, false);
|
||||||
|
sb.append(game.getPermanent(attackerId).getName()).append(",");
|
||||||
}
|
}
|
||||||
|
logger.info(sb.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void selectBlockers(Game game, UUID defendingPlayerId) {
|
public void selectBlockers(Game game, UUID defendingPlayerId) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(game.getTurn().getValue(game.getTurnNum())).append(" player ").append(name).append(" blocking: ");
|
||||||
getNextAction(game, NextAction.SELECT_BLOCKERS);
|
getNextAction(game, NextAction.SELECT_BLOCKERS);
|
||||||
Combat combat = root.getCombat();
|
Combat combat = root.getCombat();
|
||||||
List<CombatGroup> groups = game.getCombat().getGroups();
|
List<CombatGroup> groups = game.getCombat().getGroups();
|
||||||
for (int i = 0; i < groups.size(); i++) {
|
for (int i = 0; i < groups.size(); i++) {
|
||||||
if (i < combat.getGroups().size()) {
|
if (i < combat.getGroups().size()) {
|
||||||
|
sb.append(game.getPermanent(groups.get(i).getAttackers().get(0)).getName()).append(" with: ");
|
||||||
for (UUID blockerId: combat.getGroups().get(i).getBlockers()) {
|
for (UUID blockerId: combat.getGroups().get(i).getBlockers()) {
|
||||||
this.declareBlocker(this.getId(), blockerId, groups.get(i).getAttackers().get(0), game);
|
this.declareBlocker(this.getId(), blockerId, groups.get(i).getAttackers().get(0), game);
|
||||||
|
sb.append(game.getPermanent(blockerId).getName()).append(",");
|
||||||
|
}
|
||||||
|
sb.append("|");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
logger.info(sb.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
|
@ -254,17 +270,16 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
// throw new UnsupportedOperationException("Not supported yet.");
|
// throw new UnsupportedOperationException("Not supported yet.");
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
protected long totalThinkTime = 0;
|
||||||
|
protected long totalSimulations = 0;
|
||||||
protected void applyMCTS(final Game game, final NextAction action) {
|
protected void applyMCTS(final Game game, final NextAction action) {
|
||||||
int thinkTime = calculateThinkTime(game, action);
|
int thinkTime = calculateThinkTime(game, action);
|
||||||
|
|
||||||
long startTime = System.nanoTime();
|
|
||||||
long endTime = startTime + (thinkTime * 1000000000l);
|
|
||||||
logger.info("applyMCTS - Thinking for " + (endTime - startTime)/1000000000.0 + "s");
|
|
||||||
|
|
||||||
if (thinkTime > 0) {
|
if (thinkTime > 0) {
|
||||||
if (USE_MULTIPLE_THREADS) {
|
if (USE_MULTIPLE_THREADS) {
|
||||||
List<MCTSExecutor> tasks = new ArrayList<MCTSExecutor>();
|
ExecutorService pool = Executors.newFixedThreadPool(poolSize);
|
||||||
for (int i = 0; i < cores; i++) {
|
List<MCTSExecutor> tasks = new ArrayList<>();
|
||||||
|
for (int i = 0; i < poolSize; i++) {
|
||||||
Game sim = createMCTSGame(game);
|
Game sim = createMCTSGame(game);
|
||||||
MCTSPlayer player = (MCTSPlayer) sim.getPlayer(playerId);
|
MCTSPlayer player = (MCTSPlayer) sim.getPlayer(playerId);
|
||||||
player.setNextAction(action);
|
player.setNextAction(action);
|
||||||
|
@ -273,18 +288,28 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
pool.invokeAll(tasks);
|
pool.invokeAll(tasks, thinkTime, TimeUnit.SECONDS);
|
||||||
} catch (InterruptedException ex) {
|
pool.awaitTermination(1, TimeUnit.SECONDS);
|
||||||
|
pool.shutdownNow();
|
||||||
|
} catch (InterruptedException | RejectedExecutionException ex) {
|
||||||
logger.warn("applyMCTS interrupted");
|
logger.warn("applyMCTS interrupted");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int simCount = 0;
|
||||||
for (MCTSExecutor task: tasks) {
|
for (MCTSExecutor task: tasks) {
|
||||||
|
simCount += task.getSimCount();
|
||||||
root.merge(task.getRoot());
|
root.merge(task.getRoot());
|
||||||
task.clear();
|
task.clear();
|
||||||
}
|
}
|
||||||
tasks.clear();
|
tasks.clear();
|
||||||
|
totalThinkTime += thinkTime;
|
||||||
|
totalSimulations += simCount;
|
||||||
|
logger.info("Player: " + name + " Simulated " + simCount + " games in " + thinkTime + " seconds - nodes in tree: " + root.size());
|
||||||
|
logger.info("Total: Simulated " + totalSimulations + " games in " + totalThinkTime + " seconds - Average: " + totalSimulations/totalThinkTime);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
long startTime = System.nanoTime();
|
||||||
|
long endTime = startTime + (thinkTime * 1000000000l);
|
||||||
MCTSNode current;
|
MCTSNode current;
|
||||||
int simCount = 0;
|
int simCount = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -316,10 +341,9 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
}
|
}
|
||||||
logger.info("Simulated " + simCount + " games - nodes in tree: " + root.size());
|
logger.info("Simulated " + simCount + " games - nodes in tree: " + root.size());
|
||||||
}
|
}
|
||||||
displayMemory();
|
// displayMemory();
|
||||||
}
|
}
|
||||||
|
|
||||||
// root.print(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//try to ensure that there are at least THINK_MIN_RATIO simulations per node at all times
|
//try to ensure that there are at least THINK_MIN_RATIO simulations per node at all times
|
||||||
|
@ -328,7 +352,7 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
int nodeSizeRatio = 0;
|
int nodeSizeRatio = 0;
|
||||||
if (root.getNumChildren() > 0)
|
if (root.getNumChildren() > 0)
|
||||||
nodeSizeRatio = root.getVisits() / root.getNumChildren();
|
nodeSizeRatio = root.getVisits() / root.getNumChildren();
|
||||||
logger.info("Ratio: " + nodeSizeRatio);
|
// logger.info("Ratio: " + nodeSizeRatio);
|
||||||
PhaseStep curStep = game.getStep().getType();
|
PhaseStep curStep = game.getStep().getType();
|
||||||
if (action == NextAction.SELECT_ATTACKERS || action == NextAction.SELECT_BLOCKERS) {
|
if (action == NextAction.SELECT_ATTACKERS || action == NextAction.SELECT_BLOCKERS) {
|
||||||
if (nodeSizeRatio < THINK_MIN_RATIO) {
|
if (nodeSizeRatio < THINK_MIN_RATIO) {
|
||||||
|
@ -411,4 +435,13 @@ public class ComputerPlayerMCTS extends ComputerPlayer implements Player {
|
||||||
logger.info("Max heap size: " + heapMaxSize/mb + " Heap size: " + heapSize/mb + " Used: " + heapUsedSize/mb);
|
logger.info("Max heap size: " + heapMaxSize/mb + " Heap size: " + heapSize/mb + " Used: " + heapUsedSize/mb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void logLife(Game game) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(game.getTurn().getValue(game.getTurnNum()));
|
||||||
|
for (Player player: game.getPlayers().values()) {
|
||||||
|
sb.append("[player ").append(player.getName()).append(":").append(player.getLife()).append("]");
|
||||||
|
}
|
||||||
|
logger.info(sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,7 @@ public class MCTSExecutor implements Callable<Boolean> {
|
||||||
protected transient MCTSNode root;
|
protected transient MCTSNode root;
|
||||||
protected int thinkTime;
|
protected int thinkTime;
|
||||||
protected UUID playerId;
|
protected UUID playerId;
|
||||||
|
protected int simCount;
|
||||||
|
|
||||||
private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class);
|
private static final transient Logger logger = Logger.getLogger(ComputerPlayerMCTS.class);
|
||||||
|
|
||||||
|
@ -52,16 +53,11 @@ public class MCTSExecutor implements Callable<Boolean> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Boolean call() {
|
public Boolean call() {
|
||||||
int simCount = 0;
|
simCount = 0;
|
||||||
long startTime = System.nanoTime();
|
|
||||||
long endTime = startTime + (thinkTime * 1000000000l);
|
|
||||||
MCTSNode current;
|
MCTSNode current;
|
||||||
|
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
long currentTime = System.nanoTime();
|
|
||||||
if (currentTime > endTime)
|
|
||||||
break;
|
|
||||||
current = root;
|
current = root;
|
||||||
|
|
||||||
// Selection
|
// Selection
|
||||||
|
@ -92,8 +88,6 @@ public class MCTSExecutor implements Callable<Boolean> {
|
||||||
// Backpropagation
|
// Backpropagation
|
||||||
current.backpropagate(result);
|
current.backpropagate(result);
|
||||||
}
|
}
|
||||||
logger.info("Simulated " + simCount + " games - nodes in tree: " + root.size());
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public MCTSNode getRoot() {
|
public MCTSNode getRoot() {
|
||||||
|
@ -104,4 +98,7 @@ public class MCTSExecutor implements Callable<Boolean> {
|
||||||
root = null;
|
root = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getSimCount() {
|
||||||
|
return simCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,14 +51,14 @@ import org.apache.log4j.Logger;
|
||||||
*/
|
*/
|
||||||
public class MCTSNode {
|
public class MCTSNode {
|
||||||
|
|
||||||
private static final double selectionCoefficient = 1.0;
|
private static final double selectionCoefficient = Math.sqrt(2.0);
|
||||||
private static final double passRatioTolerance = 0.0;
|
private static final double passRatioTolerance = 0.0;
|
||||||
private static final transient Logger logger = Logger.getLogger(MCTSNode.class);
|
private static final transient Logger logger = Logger.getLogger(MCTSNode.class);
|
||||||
|
|
||||||
private int visits = 0;
|
private int visits = 0;
|
||||||
private int wins = 0;
|
private int wins = 0;
|
||||||
private MCTSNode parent;
|
private MCTSNode parent;
|
||||||
private final List<MCTSNode> children = new ArrayList<MCTSNode>();
|
private final List<MCTSNode> children = new ArrayList<>();
|
||||||
private Ability action;
|
private Ability action;
|
||||||
private Game game;
|
private Game game;
|
||||||
private Combat combat;
|
private Combat combat;
|
||||||
|
@ -74,6 +74,7 @@ public class MCTSNode {
|
||||||
this.terminal = game.gameOver(null);
|
this.terminal = game.gameOver(null);
|
||||||
setPlayer();
|
setPlayer();
|
||||||
nodeCount = 1;
|
nodeCount = 1;
|
||||||
|
// logger.info(this.stateValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected MCTSNode(MCTSNode parent, Game game, Ability action) {
|
protected MCTSNode(MCTSNode parent, Game game, Ability action) {
|
||||||
|
@ -84,6 +85,7 @@ public class MCTSNode {
|
||||||
this.action = action;
|
this.action = action;
|
||||||
setPlayer();
|
setPlayer();
|
||||||
nodeCount++;
|
nodeCount++;
|
||||||
|
// logger.info(this.stateValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected MCTSNode(MCTSNode parent, Game game, Combat combat) {
|
protected MCTSNode(MCTSNode parent, Game game, Combat combat) {
|
||||||
|
@ -94,6 +96,7 @@ public class MCTSNode {
|
||||||
this.parent = parent;
|
this.parent = parent;
|
||||||
setPlayer();
|
setPlayer();
|
||||||
nodeCount++;
|
nodeCount++;
|
||||||
|
// logger.info(this.stateValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setPlayer() {
|
private void setPlayer() {
|
||||||
|
@ -356,11 +359,10 @@ public class MCTSNode {
|
||||||
* performs a breadth first search for a matching game state
|
* performs a breadth first search for a matching game state
|
||||||
*
|
*
|
||||||
* @param state - the game state that we are looking for
|
* @param state - the game state that we are looking for
|
||||||
* @param nextAction - the next action that will be performed
|
|
||||||
* @return the matching state or null if no match is found
|
* @return the matching state or null if no match is found
|
||||||
*/
|
*/
|
||||||
public MCTSNode getMatchingState(String state) {
|
public MCTSNode getMatchingState(String state) {
|
||||||
ArrayDeque<MCTSNode> queue = new ArrayDeque<MCTSNode>();
|
ArrayDeque<MCTSNode> queue = new ArrayDeque<>();
|
||||||
queue.add(this);
|
queue.add(this);
|
||||||
|
|
||||||
while (!queue.isEmpty()) {
|
while (!queue.isEmpty()) {
|
||||||
|
@ -376,14 +378,15 @@ public class MCTSNode {
|
||||||
|
|
||||||
public void merge(MCTSNode merge) {
|
public void merge(MCTSNode merge) {
|
||||||
if (!stateValue.equals(merge.stateValue)) {
|
if (!stateValue.equals(merge.stateValue)) {
|
||||||
logger.info("mismatched merge states");
|
logger.info("mismatched merge states at root");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.visits += merge.visits;
|
this.visits += merge.visits;
|
||||||
this.wins += merge.wins;
|
this.wins += merge.wins;
|
||||||
|
int mismatchCount = 0;
|
||||||
|
|
||||||
List<MCTSNode> mergeChildren = new ArrayList<MCTSNode>();
|
List<MCTSNode> mergeChildren = new ArrayList<>();
|
||||||
for (MCTSNode child: merge.children) {
|
for (MCTSNode child: merge.children) {
|
||||||
mergeChildren.add(child);
|
mergeChildren.add(child);
|
||||||
}
|
}
|
||||||
|
@ -393,8 +396,9 @@ public class MCTSNode {
|
||||||
if (mergeChild.action != null && child.action != null) {
|
if (mergeChild.action != null && child.action != null) {
|
||||||
if (mergeChild.action.toString().equals(child.action.toString())) {
|
if (mergeChild.action.toString().equals(child.action.toString())) {
|
||||||
if (!mergeChild.stateValue.equals(child.stateValue)) {
|
if (!mergeChild.stateValue.equals(child.stateValue)) {
|
||||||
logger.info("mismatched merge states");
|
mismatchCount++;
|
||||||
mergeChildren.remove(mergeChild);
|
// logger.info("mismatched merge states");
|
||||||
|
// mergeChildren.remove(mergeChild);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
child.merge(mergeChild);
|
child.merge(mergeChild);
|
||||||
|
@ -406,8 +410,9 @@ public class MCTSNode {
|
||||||
else {
|
else {
|
||||||
if (mergeChild.combat.getValue().equals(child.combat.getValue())) {
|
if (mergeChild.combat.getValue().equals(child.combat.getValue())) {
|
||||||
if (!mergeChild.stateValue.equals(child.stateValue)) {
|
if (!mergeChild.stateValue.equals(child.stateValue)) {
|
||||||
logger.info("mismatched merge states");
|
mismatchCount++;
|
||||||
mergeChildren.remove(mergeChild);
|
// logger.info("mismatched merge states");
|
||||||
|
// mergeChildren.remove(mergeChild);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
child.merge(mergeChild);
|
child.merge(mergeChild);
|
||||||
|
@ -424,6 +429,8 @@ public class MCTSNode {
|
||||||
children.add(child);
|
children.add(child);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// if (mismatchCount > 0)
|
||||||
|
// logger.info("mismatched merge states: " + mismatchCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
// public void print(int depth) {
|
// public void print(int depth) {
|
||||||
|
|
Loading…
Reference in a new issue