diff --git a/open_spiel/games/twenty_forty_eight/2048.cc b/open_spiel/games/twenty_forty_eight/2048.cc index e9b6833ca9..f42b2c7292 100644 --- a/open_spiel/games/twenty_forty_eight/2048.cc +++ b/open_spiel/games/twenty_forty_eight/2048.cc @@ -30,8 +30,6 @@ namespace open_spiel { namespace twenty_forty_eight { namespace { -enum Move { kMoveUp = 0, kMoveRight = 1, kMoveDown = 2, kMoveLeft = 3 }; - constexpr std::array kPlayerActions = {kMoveUp, kMoveRight, kMoveDown, kMoveLeft}; @@ -228,6 +226,29 @@ void TwentyFortyEightState::DoApplyAction(Action action) { total_actions_++; } +bool TwentyFortyEightState::DoesActionChangeBoard(Action action) const { + const std::array, 2>& traversals = kTraversals[action]; + for (int r : traversals[0]) { + for (int c : traversals[1]) { + int tile = GetCellContent(r, c); + if (tile > 0) { + std::array positions = + FindFarthestPosition(r, c, action); + Coordinate farthest_pos = positions[0]; + Coordinate next_pos = positions[1]; + int next_cell = GetCellContent(next_pos.row, next_pos.column); + if (next_cell > 0 && next_cell == tile && + !BoardAt(next_pos).is_merged) { + return true; + } else if (farthest_pos.row != r || farthest_pos.column != c) { + return true; + } + } + } + } + return false; +} + std::string TwentyFortyEightState::ActionToString(Player player, Action action_id) const { if (player == kChancePlayerId) { @@ -295,7 +316,14 @@ std::vector TwentyFortyEightState::LegalActions() const { } // Construct a vector from the array. - return std::vector(kPlayerActions.begin(), kPlayerActions.end()); + std::vector actions = std::vector(kPlayerActions.begin(), kPlayerActions.end()); + + std::vector actions_allowed = {}; + for (Action action: actions) { + if (DoesActionChangeBoard(action)) + actions_allowed.push_back(action); + } + return actions_allowed; } std::string TwentyFortyEightState::ToString() const { diff --git a/open_spiel/games/twenty_forty_eight/2048.h b/open_spiel/games/twenty_forty_eight/2048.h index a9a799b594..b65f85970d 100644 --- a/open_spiel/games/twenty_forty_eight/2048.h +++ b/open_spiel/games/twenty_forty_eight/2048.h @@ -41,6 +41,8 @@ namespace open_spiel { namespace twenty_forty_eight { +enum Move { kMoveUp = 0, kMoveRight = 1, kMoveDown = 2, kMoveLeft = 3 }; + constexpr int kNumPlayers = 1; constexpr int kRows = 4; constexpr int kColumns = 4; @@ -124,6 +126,7 @@ class TwentyFortyEightState : public State { bool TileMatchesAvailable() const; void PrepareTiles(); int GetCellContent(int r, int c) const; + bool DoesActionChangeBoard(Action action) const; const TwentyFortyEightGame& parent_game_; Player current_player_ = kChancePlayerId; diff --git a/open_spiel/games/twenty_forty_eight/2048_test.cc b/open_spiel/games/twenty_forty_eight/2048_test.cc index 1cb0603be5..11a9f4564d 100644 --- a/open_spiel/games/twenty_forty_eight/2048_test.cc +++ b/open_spiel/games/twenty_forty_eight/2048_test.cc @@ -62,7 +62,7 @@ void MultipleMergePossibleTest() { TwentyFortyEightState* cstate = static_cast(state.get()); cstate->SetCustomBoard({0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0}); - cstate->ApplyAction(cstate->LegalActions()[2]); + cstate->ApplyAction(kMoveDown); SPIEL_CHECK_EQ(cstate->BoardAt(3, 0).value, 4); } @@ -78,7 +78,7 @@ void OneMergePerTurnTest() { TwentyFortyEightState* cstate = static_cast(state.get()); cstate->SetCustomBoard({2, 4, 0, 4, 0, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0}); - cstate->ApplyAction(cstate->LegalActions()[2]); + cstate->ApplyAction(kMoveDown); SPIEL_CHECK_EQ(cstate->BoardAt(2, 1).value, 4); SPIEL_CHECK_EQ(cstate->BoardAt(3, 1).value, 4); } @@ -112,7 +112,7 @@ void GameWonTest() { static_cast(state.get()); cstate->SetCustomBoard( {4, 8, 2, 4, 2, 4, 8, 16, 1024, 128, 64, 128, 1024, 8, 2, 8}); - cstate->ApplyAction(cstate->LegalActions()[2]); + cstate->ApplyAction(kMoveDown); SPIEL_CHECK_EQ(cstate->IsTerminal(), true); SPIEL_CHECK_EQ(cstate->Returns()[0], 2048); } @@ -122,26 +122,16 @@ void GameWonTest() { // 0 0 0 0 // 0 0 0 0 // 2 0 0 2 -// No random tiles should appear if the board didn't change after player move +// Down should not be a legal action here as it does not change the board void BoardNotChangedTest() { std::shared_ptr game = LoadGame("2048"); std::unique_ptr state = game->NewInitialState(); TwentyFortyEightState* cstate = static_cast(state.get()); cstate->SetCustomBoard({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2}); - cstate->ApplyAction(cstate->LegalActions()[2]); - // Check the board remained the same after player move - for (int r = 0; r < kRows; r++) { - for (int c = 0; c < kColumns; c++) { - if (!(r == 3 && c == 0) && !(r == 3 || c == 3)) { - SPIEL_CHECK_EQ(cstate->BoardAt(r, c).value, 0); - } - } + for (Action action : cstate->LegalActions()) { + SPIEL_CHECK_NE(action, kMoveDown); } - SPIEL_CHECK_EQ(cstate->BoardAt(3, 0).value, 2); - SPIEL_CHECK_EQ(cstate->BoardAt(3, 3).value, 2); - // Check move didn't go to random player since board didn't change - SPIEL_CHECK_EQ(cstate->CurrentPlayer(), 0); } } // namespace diff --git a/open_spiel/integration_tests/playthroughs/2048.txt b/open_spiel/integration_tests/playthroughs/2048.txt index 676ffee684..07081083eb 100644 --- a/open_spiel/integration_tests/playthroughs/2048.txt +++ b/open_spiel/integration_tests/playthroughs/2048.txt @@ -50,1070 +50,641 @@ ChanceOutcomes() = [(0,0.05625), (1,0.00625), (2,0.05625), (3,0.00625), (4,0.056 LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] StringLegalActions() = ["2 added to row 1, column 1", "4 added to row 1, column 1", "2 added to row 1, column 2", "4 added to row 1, column 2", "2 added to row 1, column 3", "4 added to row 1, column 3", "2 added to row 1, column 4", "4 added to row 1, column 4", "2 added to row 2, column 1", "4 added to row 2, column 1", "2 added to row 2, column 2", "4 added to row 2, column 2", "2 added to row 2, column 3", "4 added to row 2, column 3", "2 added to row 2, column 4", "4 added to row 2, column 4", "2 added to row 3, column 1", "4 added to row 3, column 1", "2 added to row 3, column 2", "4 added to row 3, column 2", "2 added to row 3, column 3", "4 added to row 3, column 3", "2 added to row 3, column 4", "4 added to row 3, column 4", "2 added to row 4, column 1", "4 added to row 4, column 1", "2 added to row 4, column 2", "4 added to row 4, column 2", "2 added to row 4, column 3", "4 added to row 4, column 3", "2 added to row 4, column 4", "4 added to row 4, column 4"] -# Apply action "4 added to row 1, column 2" -action: 3 +# Apply action "2 added to row 3, column 3" +action: 20 # State 1 -# 0 4 0 0 # 0 0 0 0 # 0 0 0 0 +# 0 0 2 0 # 0 0 0 0 IsTerminal() = False -History() = [3] -HistoryString() = "3" +History() = [20] +HistoryString() = "20" IsChanceNode() = True IsSimultaneousNode() = False CurrentPlayer() = -1 -ObservationString(0) = " 0 4 0 0\n 0 0 0 0\n 0 0 0 0\n 0 0 0 0\n" -ObservationTensor(0) = [0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] -ChanceOutcomes() = [(0,0.06), (1,0.00666667), (4,0.06), (5,0.00666667), (6,0.06), (7,0.00666667), (8,0.06), (9,0.00666667), (10,0.06), (11,0.00666667), (12,0.06), (13,0.00666667), (14,0.06), (15,0.00666667), (16,0.06), (17,0.00666667), (18,0.06), (19,0.00666667), (20,0.06), (21,0.00666667), (22,0.06), (23,0.00666667), (24,0.06), (25,0.00666667), (26,0.06), (27,0.00666667), (28,0.06), (29,0.00666667), (30,0.06), (31,0.00666667)] -LegalActions() = [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] -StringLegalActions() = ["2 added to row 1, column 1", "4 added to row 1, column 1", "2 added to row 1, column 3", "4 added to row 1, column 3", "2 added to row 1, column 4", "4 added to row 1, column 4", "2 added to row 2, column 1", "4 added to row 2, column 1", "2 added to row 2, column 2", "4 added to row 2, column 2", "2 added to row 2, column 3", "4 added to row 2, column 3", "2 added to row 2, column 4", "4 added to row 2, column 4", "2 added to row 3, column 1", "4 added to row 3, column 1", "2 added to row 3, column 2", "4 added to row 3, column 2", "2 added to row 3, column 3", "4 added to row 3, column 3", "2 added to row 3, column 4", "4 added to row 3, column 4", "2 added to row 4, column 1", "4 added to row 4, column 1", "2 added to row 4, column 2", "4 added to row 4, column 2", "2 added to row 4, column 3", "4 added to row 4, column 3", "2 added to row 4, column 4", "4 added to row 4, column 4"] +ObservationString(0) = " 0 0 0 0\n 0 0 0 0\n 0 0 2 0\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ChanceOutcomes() = [(0,0.06), (1,0.00666667), (2,0.06), (3,0.00666667), (4,0.06), (5,0.00666667), (6,0.06), (7,0.00666667), (8,0.06), (9,0.00666667), (10,0.06), (11,0.00666667), (12,0.06), (13,0.00666667), (14,0.06), (15,0.00666667), (16,0.06), (17,0.00666667), (18,0.06), (19,0.00666667), (22,0.06), (23,0.00666667), (24,0.06), (25,0.00666667), (26,0.06), (27,0.00666667), (28,0.06), (29,0.00666667), (30,0.06), (31,0.00666667)] +LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] +StringLegalActions() = ["2 added to row 1, column 1", "4 added to row 1, column 1", "2 added to row 1, column 2", "4 added to row 1, column 2", "2 added to row 1, column 3", "4 added to row 1, column 3", "2 added to row 1, column 4", "4 added to row 1, column 4", "2 added to row 2, column 1", "4 added to row 2, column 1", "2 added to row 2, column 2", "4 added to row 2, column 2", "2 added to row 2, column 3", "4 added to row 2, column 3", "2 added to row 2, column 4", "4 added to row 2, column 4", "2 added to row 3, column 1", "4 added to row 3, column 1", "2 added to row 3, column 2", "4 added to row 3, column 2", "2 added to row 3, column 4", "4 added to row 3, column 4", "2 added to row 4, column 1", "4 added to row 4, column 1", "2 added to row 4, column 2", "4 added to row 4, column 2", "2 added to row 4, column 3", "4 added to row 4, column 3", "2 added to row 4, column 4", "4 added to row 4, column 4"] -# Apply action "4 added to row 2, column 3" -action: 13 +# Apply action "2 added to row 2, column 2" +action: 10 # State 2 -# 0 4 0 0 -# 0 0 4 0 # 0 0 0 0 +# 0 2 0 0 +# 0 0 2 0 # 0 0 0 0 IsTerminal() = False -History() = [3, 13] -HistoryString() = "3, 13" +History() = [20, 10] +HistoryString() = "20, 10" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 0 4 0 0\n 0 0 4 0\n 0 0 0 0\n 0 0 0 0\n" -ObservationTensor(0) = [0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ObservationString(0) = " 0 0 0 0\n 0 2 0 0\n 0 0 2 0\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0] Rewards() = [0] Returns() = [0] LegalActions() = [0, 1, 2, 3] StringLegalActions() = ["Up", "Right", "Down", "Left"] -# Apply action "Down" -action: 2 +# Apply action "Left" +action: 3 # State 3 -# Apply action "4 added to row 4, column 4" -action: 31 +# Apply action "2 added to row 3, column 3" +action: 20 # State 4 # 0 0 0 0 +# 2 0 0 0 +# 2 0 2 0 # 0 0 0 0 -# 0 0 0 0 -# 0 4 4 4 IsTerminal() = False -History() = [3, 13, 2, 31] -HistoryString() = "3, 13, 2, 31" +History() = [20, 10, 3, 20] +HistoryString() = "20, 10, 3, 20" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 0 0 0 0\n 0 0 0 0\n 0 0 0 0\n 0 4 4 4\n" -ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 4.0] +ObservationString(0) = " 0 0 0 0\n 2 0 0 0\n 2 0 2 0\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0] Rewards() = [0] Returns() = [0] LegalActions() = [0, 1, 2, 3] StringLegalActions() = ["Up", "Right", "Down", "Left"] -# Apply action "Left" -action: 3 +# Apply action "Up" +action: 0 # State 5 -# Apply action "2 added to row 4, column 4" -action: 30 +# Apply action "4 added to row 2, column 3" +action: 13 # State 6 +# 4 0 2 0 +# 0 0 4 0 # 0 0 0 0 # 0 0 0 0 -# 0 0 0 0 -# 8 4 0 2 IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30] -HistoryString() = "3, 13, 2, 31, 3, 30" +History() = [20, 10, 3, 20, 0, 13] +HistoryString() = "20, 10, 3, 20, 0, 13" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 0 0 0 0\n 0 0 0 0\n 0 0 0 0\n 8 4 0 2\n" -ObservationTensor(0) = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.0, 4.0, 0.0, 2.0] -Rewards() = [8] -Returns() = [8] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] +ObservationString(0) = " 4 0 2 0\n 0 0 4 0\n 0 0 0 0\n 0 0 0 0\n" +ObservationTensor(0) = [4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [4] +Returns() = [4] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["Right", "Down", "Left"] -# Apply action "Up" -action: 0 +# Apply action "Left" +action: 3 # State 7 -# Apply action "4 added to row 3, column 4" -action: 23 +# Apply action "2 added to row 3, column 3" +action: 20 # State 8 # Apply action "Left" action: 3 # State 9 -# Apply action "2 added to row 4, column 4" -action: 30 +# Apply action "4 added to row 3, column 2" +action: 19 # State 10 -# Apply action "Left" -action: 3 +# Apply action "Down" +action: 2 # State 11 -# Apply action "4 added to row 1, column 4" -action: 7 +# Apply action "2 added to row 2, column 1" +action: 8 # State 12 -# Apply action "Down" -action: 2 +# Apply action "Up" +action: 0 # State 13 -# Apply action "4 added to row 3, column 2" -action: 19 +# Apply action "4 added to row 4, column 2" +action: 27 # State 14 -# Apply action "Up" -action: 0 +# Apply action "Right" +action: 1 # State 15 -# Apply action "2 added to row 3, column 2" -action: 18 +# Apply action "4 added to row 3, column 2" +action: 19 # State 16 # Apply action "Down" action: 2 # State 17 -# Apply action "2 added to row 1, column 2" -action: 2 +# Apply action "4 added to row 1, column 2" +action: 3 # State 18 # Apply action "Right" action: 1 # State 19 -# Apply action "2 added to row 2, column 1" -action: 8 +# Apply action "4 added to row 2, column 3" +action: 13 # State 20 -# Apply action "Up" -action: 0 +# Apply action "Left" +action: 3 # State 21 -# Apply action "2 added to row 4, column 4" -action: 30 +# Apply action "4 added to row 1, column 4" +action: 7 # State 22 -# 2 2 8 2 -# 0 0 0 16 -# 0 0 0 4 -# 0 0 0 2 +# 4 0 0 4 +# 4 8 0 0 +# 2 0 0 0 +# 4 8 4 0 IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30" +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 2 2 8 2\n 0 0 0 16\n 0 0 0 4\n 0 0 0 2\n" -ObservationTensor(0) = [2.0, 2.0, 8.0, 2.0, 0.0, 0.0, 0.0, 16.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 2.0] -Rewards() = [24] -Returns() = [44] +ObservationString(0) = " 4 0 0 4\n 4 8 0 0\n 2 0 0 0\n 4 8 4 0\n" +ObservationTensor(0) = [4.0, 0.0, 0.0, 4.0, 4.0, 8.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 4.0, 8.0, 4.0, 0.0] +Rewards() = [0] +Returns() = [24] LegalActions() = [0, 1, 2, 3] StringLegalActions() = ["Up", "Right", "Down", "Left"] -# Apply action "Down" -action: 2 +# Apply action "Up" +action: 0 # State 23 -# Apply action "4 added to row 2, column 1" -action: 9 +# Apply action "4 added to row 4, column 2" +action: 27 # State 24 # Apply action "Up" action: 0 # State 25 -# Apply action "4 added to row 4, column 2" -action: 27 +# Apply action "2 added to row 3, column 4" +action: 22 # State 26 -# Apply action "Down" -action: 2 +# Apply action "Left" +action: 3 # State 27 -# Apply action "2 added to row 2, column 1" -action: 8 +# Apply action "4 added to row 4, column 1" +action: 25 # State 28 -# Apply action "Up" -action: 0 +# Apply action "Right" +action: 1 # State 29 -# Apply action "2 added to row 3, column 2" -action: 18 +# Apply action "4 added to row 3, column 2" +action: 19 # State 30 -# Apply action "Left" -action: 3 +# Apply action "Down" +action: 2 # State 31 -# Apply action "4 added to row 4, column 2" -action: 27 +# Apply action "4 added to row 1, column 2" +action: 3 # State 32 -# Apply action "Left" -action: 3 +# Apply action "Down" +action: 2 # State 33 -# Apply action "4 added to row 4, column 3" -action: 29 +# Apply action "2 added to row 1, column 2" +action: 2 # State 34 # Apply action "Left" action: 3 # State 35 -# Apply action "2 added to row 2, column 3" -action: 12 +# Apply action "2 added to row 2, column 4" +action: 14 # State 36 # Apply action "Up" action: 0 # State 37 -# Apply action "2 added to row 3, column 4" -action: 22 +# Apply action "4 added to row 4, column 3" +action: 29 # State 38 # Apply action "Up" action: 0 # State 39 -# Apply action "4 added to row 2, column 3" -action: 13 +# Apply action "4 added to row 2, column 4" +action: 15 # State 40 # Apply action "Right" action: 1 # State 41 -# Apply action "2 added to row 3, column 1" -action: 16 +# Apply action "4 added to row 2, column 1" +action: 9 # State 42 -# 4 8 4 2 -# 0 0 32 4 -# 2 0 2 8 +# 0 2 16 2 +# 4 4 16 4 +# 0 0 16 8 # 0 0 0 0 IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16" +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 4 8 4 2\n 0 0 32 4\n 2 0 2 8\n 0 0 0 0\n" -ObservationTensor(0) = [4.0, 8.0, 4.0, 2.0, 0.0, 0.0, 32.0, 4.0, 2.0, 0.0, 2.0, 8.0, 0.0, 0.0, 0.0, 0.0] -Rewards() = [32] -Returns() = [128] +ObservationString(0) = " 0 2 16 2\n 4 4 16 4\n 0 0 16 8\n 0 0 0 0\n" +ObservationTensor(0) = [0.0, 2.0, 16.0, 2.0, 4.0, 4.0, 16.0, 4.0, 0.0, 0.0, 16.0, 8.0, 0.0, 0.0, 0.0, 0.0] +Rewards() = [16] +Returns() = [116] LegalActions() = [0, 1, 2, 3] StringLegalActions() = ["Up", "Right", "Down", "Left"] -# Apply action "Up" -action: 0 +# Apply action "Right" +action: 1 # State 43 -# Apply action "2 added to row 4, column 1" -action: 24 +# Apply action "2 added to row 4, column 3" +action: 28 # State 44 -# Apply action "Down" -action: 2 +# Apply action "Right" +action: 1 # State 45 -# Apply action "4 added to row 1, column 4" -action: 7 +# Apply action "2 added to row 4, column 3" +action: 28 # State 46 -# Apply action "Up" -action: 0 +# Apply action "Down" +action: 2 # State 47 -# Apply action "2 added to row 2, column 2" -action: 10 +# Apply action "2 added to row 4, column 1" +action: 24 # State 48 -# Apply action "Left" -action: 3 +# Apply action "Up" +action: 0 # State 49 -# Apply action "2 added to row 4, column 4" -action: 30 +# Apply action "2 added to row 4, column 1" +action: 24 # State 50 -# Apply action "Down" -action: 2 +# Apply action "Up" +action: 0 # State 51 -# Apply action "2 added to row 2, column 3" -action: 12 +# Apply action "4 added to row 3, column 1" +action: 17 # State 52 -# Apply action "Down" -action: 2 +# Apply action "Right" +action: 1 # State 53 -# Apply action "4 added to row 2, column 3" -action: 13 +# Apply action "2 added to row 4, column 3" +action: 28 # State 54 -# Apply action "Right" -action: 1 +# Apply action "Left" +action: 3 # State 55 -# Apply action "4 added to row 1, column 4" -action: 7 +# Apply action "4 added to row 4, column 2" +action: 27 # State 56 -# Apply action "Up" -action: 0 +# Apply action "Right" +action: 1 # State 57 -# Apply action "2 added to row 1, column 1" -action: 0 +# Apply action "4 added to row 4, column 3" +action: 29 # State 58 -# Apply action "Down" -action: 2 +# Apply action "Up" +action: 0 # State 59 -# Apply action "4 added to row 3, column 1" -action: 17 +# Apply action "2 added to row 2, column 1" +action: 8 # State 60 -# Apply action "Up" -action: 0 +# Apply action "Right" +action: 1 # State 61 -# Apply action "4 added to row 4, column 3" -action: 29 +# Apply action "2 added to row 4, column 3" +action: 28 # State 62 -# 4 16 8 8 -# 2 8 4 32 -# 0 0 8 2 -# 0 0 4 0 +# 4 2 16 2 +# 2 8 32 4 +# 0 4 2 16 +# 0 0 2 4 IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29" +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 4 16 8 8\n 2 8 4 32\n 0 0 8 2\n 0 0 4 0\n" -ObservationTensor(0) = [4.0, 16.0, 8.0, 8.0, 2.0, 8.0, 4.0, 32.0, 0.0, 0.0, 8.0, 2.0, 0.0, 0.0, 4.0, 0.0] +ObservationString(0) = " 4 2 16 2\n 2 8 32 4\n 0 4 2 16\n 0 0 2 4\n" +ObservationTensor(0) = [4.0, 2.0, 16.0, 2.0, 2.0, 8.0, 32.0, 4.0, 0.0, 4.0, 2.0, 16.0, 0.0, 0.0, 2.0, 4.0] Rewards() = [0] Returns() = [188] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] +LegalActions() = [0, 2, 3] +StringLegalActions() = ["Up", "Down", "Left"] -# Apply action "Right" -action: 1 +# Apply action "Down" +action: 2 # State 63 -# Apply action "2 added to row 4, column 2" -action: 26 +# Apply action "2 added to row 1, column 2" +action: 2 # State 64 -# Apply action "Up" -action: 0 +# Apply action "Left" +action: 3 # State 65 -# Apply action "4 added to row 2, column 1" -action: 9 +# Apply action "4 added to row 1, column 3" +action: 5 # State 66 -# Apply action "Left" -action: 3 +# Apply action "Up" +action: 0 # State 67 -# Apply action "2 added to row 1, column 4" -action: 6 +# Apply action "2 added to row 4, column 2" +action: 26 # State 68 -# Apply action "Left" -action: 3 +# Apply action "Down" +action: 2 # State 69 -# Apply action "Left" -action: 3 +# Apply action "2 added to row 1, column 4" +action: 6 # State 70 -# Apply action "Up" -action: 0 +# Apply action "Right" +action: 1 # State 71 -# Apply action "4 added to row 3, column 2" -action: 19 +# Apply action "2 added to row 1, column 2" +action: 2 # State 72 # Apply action "Up" action: 0 # State 73 -# Apply action "Down" -action: 2 +# Apply action "2 added to row 3, column 1" +action: 16 # State 74 -# Apply action "2 added to row 1, column 4" -action: 6 +# Apply action "Right" +action: 1 # State 75 -# Apply action "Down" -action: 2 - -# State 76 -# Apply action "4 added to row 1, column 3" -action: 5 +# Apply action "4 added to row 3, column 1" +action: 17 -# State 77 +# State 76 # Apply action "Left" action: 3 +# State 77 +# Apply action "4 added to row 4, column 2" +action: 27 + # State 78 -# Apply action "4 added to row 1, column 3" -action: 5 +# Apply action "Down" +action: 2 # State 79 -# 2 4 4 0 -# 8 32 0 0 -# 2 16 8 0 -# 8 2 32 0 +# Apply action "2 added to row 1, column 2" +action: 2 + +# State 80 +# Apply action "Right" +action: 1 + +# State 81 +# Apply action "2 added to row 1, column 3" +action: 4 + +# State 82 +# 0 0 2 2 +# 0 0 2 4 +# 0 8 4 2 +# 16 4 16 64 IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5" +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 2 4 4 0\n 8 32 0 0\n 2 16 8 0\n 8 2 32 0\n" -ObservationTensor(0) = [2.0, 4.0, 4.0, 0.0, 8.0, 32.0, 0.0, 0.0, 2.0, 16.0, 8.0, 0.0, 8.0, 2.0, 32.0, 0.0] -Rewards() = [24] -Returns() = [280] +ObservationString(0) = " 0 0 2 2\n 0 0 2 4\n 0 8 4 2\n 16 4 16 64\n" +ObservationTensor(0) = [0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 2.0, 4.0, 0.0, 8.0, 4.0, 2.0, 16.0, 4.0, 16.0, 64.0] +Rewards() = [0] +Returns() = [364] LegalActions() = [0, 1, 2, 3] StringLegalActions() = ["Up", "Right", "Down", "Left"] -# Apply action "Left" -action: 3 - -# State 80 -# Apply action "2 added to row 1, column 4" -action: 6 - -# State 81 -# Apply action "Up" -action: 0 - -# State 82 -# Apply action "2 added to row 4, column 4" -action: 30 +# Apply action "Down" +action: 2 # State 83 -# Apply action "Left" -action: 3 +# Apply action "4 added to row 2, column 1" +action: 9 # State 84 -# Apply action "4 added to row 3, column 3" -action: 21 +# Apply action "Up" +action: 0 # State 85 -# Apply action "Left" -action: 3 +# Apply action "2 added to row 4, column 3" +action: 28 # State 86 -# Apply action "Up" -action: 0 +# Apply action "Left" +action: 3 # State 87 -# Apply action "4 added to row 3, column 3" -action: 21 +# Apply action "2 added to row 4, column 3" +action: 28 # State 88 -# Apply action "Left" -action: 3 +# Apply action "Right" +action: 1 # State 89 -# Apply action "Down" -action: 2 +# Apply action "4 added to row 3, column 1" +action: 17 # State 90 -# Apply action "2 added to row 3, column 4" -action: 22 +# Apply action "Down" +action: 2 # State 91 -# Apply action "Right" -action: 1 +# Apply action "4 added to row 2, column 2" +action: 11 # State 92 -# Apply action "2 added to row 1, column 2" -action: 2 - -# State 93 # Apply action "Up" action: 0 +# State 93 +# Apply action "2 added to row 3, column 4" +action: 22 + # State 94 -# Apply action "2 added to row 3, column 2" -action: 18 +# Apply action "Right" +action: 1 # State 95 -# Apply action "Left" -action: 3 +# Apply action "4 added to row 4, column 3" +action: 29 # State 96 -# Apply action "2 added to row 2, column 3" -action: 12 +# Apply action "Down" +action: 2 # State 97 -# 4 2 16 0 -# 16 64 2 0 -# 2 16 4 0 -# 4 8 0 0 -IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12" -IsChanceNode() = False -IsSimultaneousNode() = False -CurrentPlayer() = 0 -ObservationString(0) = " 4 2 16 0\n 16 64 2 0\n 2 16 4 0\n 4 8 0 0\n" -ObservationTensor(0) = [4.0, 2.0, 16.0, 0.0, 16.0, 64.0, 2.0, 0.0, 2.0, 16.0, 4.0, 0.0, 4.0, 8.0, 0.0, 0.0] -Rewards() = [16] -Returns() = [404] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] - -# Apply action "Left" -action: 3 +# Apply action "2 added to row 2, column 1" +action: 8 # State 98 -# Apply action "Left" -action: 3 - -# State 99 # Apply action "Up" action: 0 -# State 100 -# Apply action "Right" -action: 1 - -# State 101 -# Apply action "4 added to row 3, column 1" -action: 17 - -# State 102 -# Apply action "Up" -action: 0 - -# State 103 -# Apply action "4 added to row 3, column 1" -action: 17 - -# State 104 -# Apply action "Left" -action: 3 - -# State 105 -# Apply action "2 added to row 2, column 4" -action: 14 - -# State 106 -# Apply action "Up" -action: 0 - -# State 107 -# Apply action "2 added to row 4, column 4" -action: 30 - -# State 108 -# Apply action "Up" -action: 0 - -# State 109 -# Apply action "2 added to row 4, column 4" -action: 30 - -# State 110 -# Apply action "Left" -action: 3 - -# State 111 -# Apply action "2 added to row 4, column 4" -action: 30 - -# State 112 -# Apply action "Left" -action: 3 - -# State 113 +# State 99 # Apply action "2 added to row 4, column 3" action: 28 -# State 114 -# 8 2 16 2 -# 16 64 2 4 -# 8 2 16 2 -# 8 4 2 0 -IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28" -IsChanceNode() = False -IsSimultaneousNode() = False -CurrentPlayer() = 0 -ObservationString(0) = " 8 2 16 2\n 16 64 2 4\n 8 2 16 2\n 8 4 2 0\n" -ObservationTensor(0) = [8.0, 2.0, 16.0, 2.0, 16.0, 64.0, 2.0, 4.0, 8.0, 2.0, 16.0, 2.0, 8.0, 4.0, 2.0, 0.0] -Rewards() = [4] -Returns() = [424] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] - -# Apply action "Down" -action: 2 - -# State 115 -# Apply action "4 added to row 1, column 4" -action: 7 - -# State 116 -# Apply action "Left" -action: 3 - -# State 117 -# Apply action "4 added to row 4, column 4" -action: 31 - -# State 118 -# Apply action "Right" -action: 1 - -# State 119 -# Apply action "2 added to row 1, column 1" -action: 0 - -# State 120 -# Apply action "Down" -action: 2 - -# State 121 -# Apply action "4 added to row 1, column 1" -action: 1 - -# State 122 -# Apply action "Up" -action: 0 - -# State 123 -# Apply action "2 added to row 4, column 4" -action: 30 - -# State 124 -# Apply action "Down" -action: 2 - -# State 125 -# Apply action "2 added to row 1, column 4" -action: 6 - -# State 126 -# Apply action "Left" -action: 3 - -# State 127 -# Apply action "2 added to row 4, column 4" -action: 30 - -# State 128 -# Apply action "Left" -action: 3 - -# State 129 -# Apply action "4 added to row 1, column 4" -action: 7 - -# State 130 -# Apply action "Right" -action: 1 - -# State 131 -# Apply action "2 added to row 4, column 2" -action: 26 - -# State 132 +# State 100 # Apply action "Left" action: 3 -# State 133 -# Apply action "2 added to row 3, column 4" -action: 22 +# State 101 +# Apply action "4 added to row 3, column 4" +action: 23 -# State 134 -# 2 16 2 4 -# 4 8 64 4 -# 4 32 0 2 -# 2 32 8 0 +# State 102 +# 2 4 32 2 +# 16 8 64 8 +# 8 4 0 4 +# 2 0 0 0 IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22" +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = 0 -ObservationString(0) = " 2 16 2 4\n 4 8 64 4\n 4 32 0 2\n 2 32 8 0\n" -ObservationTensor(0) = [2.0, 16.0, 2.0, 4.0, 4.0, 8.0, 64.0, 4.0, 4.0, 32.0, 0.0, 2.0, 2.0, 32.0, 8.0, 0.0] -Rewards() = [0] -Returns() = [560] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] +ObservationString(0) = " 2 4 32 2\n 16 8 64 8\n 8 4 0 4\n 2 0 0 0\n" +ObservationTensor(0) = [2.0, 4.0, 32.0, 2.0, 16.0, 8.0, 64.0, 8.0, 8.0, 4.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0] +Rewards() = [8] +Returns() = [456] +LegalActions() = [1, 2, 3] +StringLegalActions() = ["Right", "Down", "Left"] # Apply action "Right" action: 1 -# State 135 -# Apply action "4 added to row 4, column 1" -action: 25 +# State 103 +# Apply action "2 added to row 4, column 2" +action: 26 -# State 136 +# State 104 # Apply action "Down" action: 2 -# State 137 -# Apply action "2 added to row 1, column 3" -action: 4 +# State 105 +# Apply action "4 added to row 1, column 2" +action: 3 -# State 138 +# State 106 # Apply action "Down" action: 2 -# State 139 +# State 107 # Apply action "4 added to row 1, column 1" action: 1 -# State 140 +# State 108 # Apply action "Left" action: 3 -# State 141 +# State 109 # Apply action "4 added to row 2, column 4" action: 15 -# State 142 -# Apply action "Left" -action: 3 - -# State 143 -# Apply action "2 added to row 1, column 4" -action: 6 - -# State 144 -# Apply action "Down" -action: 2 - -# State 145 -# Apply action "4 added to row 1, column 4" -action: 7 - -# State 146 -# Apply action "Left" -action: 3 - -# State 147 -# Apply action "4 added to row 3, column 4" -action: 23 - -# State 148 -# Apply action "Up" -action: 0 - -# State 149 -# Apply action "4 added to row 3, column 3" -action: 21 - -# State 150 -# Apply action "Left" -action: 3 - -# State 151 -# Apply action "Left" -action: 3 - -# State 152 -# 4 16 8 4 -# 16 4 128 8 -# 2 8 4 0 -# 8 2 0 0 -IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3" -IsChanceNode() = False -IsSimultaneousNode() = False -CurrentPlayer() = 0 -ObservationString(0) = " 4 16 8 4\n 16 4 128 8\n 2 8 4 0\n 8 2 0 0\n" -ObservationTensor(0) = [4.0, 16.0, 8.0, 4.0, 16.0, 4.0, 128.0, 8.0, 2.0, 8.0, 4.0, 0.0, 8.0, 2.0, 0.0, 0.0] -Rewards() = [0] -Returns() = [808] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] - +# State 110 # Apply action "Right" action: 1 -# State 153 -# Apply action "4 added to row 4, column 1" -action: 25 - -# State 154 -# Apply action "Up" -action: 0 - -# State 155 -# Apply action "4 added to row 4, column 2" -action: 27 - -# State 156 -# Apply action "Left" -action: 3 - -# State 157 -# Apply action "4 added to row 4, column 3" -action: 29 - -# State 158 -# Apply action "Left" +# State 111 +# Apply action "4 added to row 1, column 2" action: 3 -# State 159 +# State 112 # Apply action "Right" action: 1 -# State 160 -# Apply action "4 added to row 4, column 1" -action: 25 - -# State 161 -# Apply action "Left" -action: 3 - -# State 162 -# Apply action "4 added to row 4, column 4" -action: 31 - -# State 163 -# Apply action "Down" -action: 2 - -# State 164 -# Apply action "2 added to row 1, column 2" -action: 2 - -# State 165 -# Apply action "Down" -action: 2 - -# State 166 +# State 113 # Apply action "2 added to row 1, column 2" action: 2 -# State 167 -# Apply action "Right" -action: 1 - -# State 168 -# Apply action "2 added to row 2, column 1" -action: 8 - -# State 169 -# Apply action "Up" -action: 0 - -# State 170 -# Apply action "2 added to row 4, column 1" -action: 24 - -# State 171 -# 2 4 4 8 -# 0 16 32 128 -# 0 4 4 4 -# 2 16 0 16 -IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3, 1, 25, 0, 27, 3, 29, 3, 1, 25, 3, 31, 2, 2, 2, 2, 1, 8, 0, 24] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3, 1, 25, 0, 27, 3, 29, 3, 1, 25, 3, 31, 2, 2, 2, 2, 1, 8, 0, 24" -IsChanceNode() = False -IsSimultaneousNode() = False -CurrentPlayer() = 0 -ObservationString(0) = " 2 4 4 8\n 0 16 32 128\n 0 4 4 4\n 2 16 0 16\n" -ObservationTensor(0) = [2.0, 4.0, 4.0, 8.0, 0.0, 16.0, 32.0, 128.0, 0.0, 4.0, 4.0, 4.0, 2.0, 16.0, 0.0, 16.0] -Rewards() = [4] -Returns() = [920] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] - -# Apply action "Left" -action: 3 - -# State 172 -# Apply action "2 added to row 3, column 4" -action: 22 - -# State 173 -# Apply action "Down" -action: 2 - -# State 174 -# Apply action "2 added to row 2, column 4" -action: 14 - -# State 175 -# Apply action "Down" -action: 2 - -# State 176 -# Apply action "2 added to row 2, column 3" -action: 12 - -# State 177 -# Apply action "Up" -action: 0 - -# State 178 -# Apply action "4 added to row 4, column 4" -action: 31 - -# State 179 -# Apply action "Up" -action: 0 - -# State 180 -# Apply action "4 added to row 3, column 4" -action: 23 - -# State 181 +# State 114 # Apply action "Left" action: 3 -# State 182 -# Apply action "Up" -action: 0 - -# State 183 -# Apply action "4 added to row 3, column 4" -action: 23 - -# State 184 -# Apply action "Up" -action: 0 - -# State 185 -# Apply action "4 added to row 3, column 4" -action: 23 - -# State 186 -# Apply action "Down" -action: 2 - -# State 187 +# State 115 # Apply action "2 added to row 1, column 3" action: 4 -# State 188 -# Apply action "Down" -action: 2 - -# State 189 -# Apply action "2 added to row 1, column 4" -action: 6 - -# State 190 -# 2 8 0 2 -# 16 32 4 0 -# 8 4 8 16 -# 2 32 128 4 -IsTerminal() = False -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3, 1, 25, 0, 27, 3, 29, 3, 1, 25, 3, 31, 2, 2, 2, 2, 1, 8, 0, 24, 3, 22, 2, 14, 2, 12, 0, 31, 0, 23, 3, 0, 23, 0, 23, 2, 4, 2, 6] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3, 1, 25, 0, 27, 3, 29, 3, 1, 25, 3, 31, 2, 2, 2, 2, 1, 8, 0, 24, 3, 22, 2, 14, 2, 12, 0, 31, 0, 23, 3, 0, 23, 0, 23, 2, 4, 2, 6" -IsChanceNode() = False -IsSimultaneousNode() = False -CurrentPlayer() = 0 -ObservationString(0) = " 2 8 0 2\n 16 32 4 0\n 8 4 8 16\n 2 32 128 4\n" -ObservationTensor(0) = [2.0, 8.0, 0.0, 2.0, 16.0, 32.0, 4.0, 0.0, 8.0, 4.0, 8.0, 16.0, 2.0, 32.0, 128.0, 4.0] -Rewards() = [4] -Returns() = [1008] -LegalActions() = [0, 1, 2, 3] -StringLegalActions() = ["Up", "Right", "Down", "Left"] - -# Apply action "Left" -action: 3 - -# State 191 -# Apply action "4 added to row 2, column 4" -action: 15 - -# State 192 -# Apply action "Left" -action: 3 - -# State 193 -# Apply action "4 added to row 2, column 4" -action: 15 - -# State 194 -# Apply action "Down" -action: 2 - -# State 195 -# Apply action "2 added to row 1, column 4" -action: 6 - -# State 196 -# Apply action "Down" -action: 2 - -# State 197 -# Apply action "Down" -action: 2 - -# State 198 -# Apply action "Down" -action: 2 - -# State 199 -# Apply action "Up" -action: 0 - -# State 200 -# Apply action "2 added to row 4, column 3" -action: 28 - -# State 201 -# Apply action "Up" -action: 0 - -# State 202 -# Apply action "Left" -action: 3 +# State 116 +# Apply action "Right" +action: 1 -# State 203 -# Apply action "2 added to row 1, column 4" -action: 6 +# State 117 +# Apply action "4 added to row 1, column 1" +action: 1 -# State 204 -# 2 8 4 2 -# 16 32 16 4 -# 8 4 128 16 -# 2 32 2 4 +# State 118 +# 4 2 8 2 +# 8 32 2 4 +# 2 8 64 16 +# 16 2 8 2 IsTerminal() = True -History() = [3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3, 1, 25, 0, 27, 3, 29, 3, 1, 25, 3, 31, 2, 2, 2, 2, 1, 8, 0, 24, 3, 22, 2, 14, 2, 12, 0, 31, 0, 23, 3, 0, 23, 0, 23, 2, 4, 2, 6, 3, 15, 3, 15, 2, 6, 2, 2, 2, 0, 28, 0, 3, 6] -HistoryString() = "3, 13, 2, 31, 3, 30, 0, 23, 3, 30, 3, 7, 2, 19, 0, 18, 2, 2, 1, 8, 0, 30, 2, 9, 0, 27, 2, 8, 0, 18, 3, 27, 3, 29, 3, 12, 0, 22, 0, 13, 1, 16, 0, 24, 2, 7, 0, 10, 3, 30, 2, 12, 2, 13, 1, 7, 0, 0, 2, 17, 0, 29, 1, 26, 0, 9, 3, 6, 3, 3, 0, 19, 0, 2, 6, 2, 5, 3, 5, 3, 6, 0, 30, 3, 21, 3, 0, 21, 3, 2, 22, 1, 2, 0, 18, 3, 12, 3, 3, 0, 1, 17, 0, 17, 3, 14, 0, 30, 0, 30, 3, 30, 3, 28, 2, 7, 3, 31, 1, 0, 2, 1, 0, 30, 2, 6, 3, 30, 3, 7, 1, 26, 3, 22, 1, 25, 2, 4, 2, 1, 3, 15, 3, 6, 2, 7, 3, 23, 0, 21, 3, 3, 1, 25, 0, 27, 3, 29, 3, 1, 25, 3, 31, 2, 2, 2, 2, 1, 8, 0, 24, 3, 22, 2, 14, 2, 12, 0, 31, 0, 23, 3, 0, 23, 0, 23, 2, 4, 2, 6, 3, 15, 3, 15, 2, 6, 2, 2, 2, 0, 28, 0, 3, 6" +History() = [20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23, 1, 26, 2, 3, 2, 1, 3, 15, 1, 3, 1, 2, 3, 4, 1, 1] +HistoryString() = "20, 10, 3, 20, 0, 13, 3, 20, 3, 19, 2, 8, 0, 27, 1, 19, 2, 3, 1, 13, 3, 7, 0, 27, 0, 22, 3, 25, 1, 19, 2, 3, 2, 2, 3, 14, 0, 29, 0, 15, 1, 9, 1, 28, 1, 28, 2, 24, 0, 24, 0, 17, 1, 28, 3, 27, 1, 29, 0, 8, 1, 28, 2, 2, 3, 5, 0, 26, 2, 6, 1, 2, 0, 16, 1, 17, 3, 27, 2, 2, 1, 4, 2, 9, 0, 28, 3, 28, 1, 17, 2, 11, 0, 22, 1, 29, 2, 8, 0, 28, 3, 23, 1, 26, 2, 3, 2, 1, 3, 15, 1, 3, 1, 2, 3, 4, 1, 1" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = -4 -ObservationString(0) = " 2 8 4 2\n 16 32 16 4\n 8 4 128 16\n 2 32 2 4\n" -ObservationTensor(0) = [2.0, 8.0, 4.0, 2.0, 16.0, 32.0, 16.0, 4.0, 8.0, 4.0, 128.0, 16.0, 2.0, 32.0, 2.0, 4.0] -Rewards() = [4] -Returns() = [1036] +ObservationString(0) = " 4 2 8 2\n 8 32 2 4\n 2 8 64 16\n 16 2 8 2\n" +ObservationTensor(0) = [4.0, 2.0, 8.0, 2.0, 8.0, 32.0, 2.0, 4.0, 2.0, 8.0, 64.0, 16.0, 16.0, 2.0, 8.0, 2.0] +Rewards() = [0] +Returns() = [496] diff --git a/open_spiel/python/examples/twenty_forty_eight_td_n_tuple_network.py b/open_spiel/python/examples/twenty_forty_eight_td_n_tuple_network.py new file mode 100644 index 0000000000..27d7da4e27 --- /dev/null +++ b/open_spiel/python/examples/twenty_forty_eight_td_n_tuple_network.py @@ -0,0 +1,143 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TD Learning with N-Tuple Networks for 2048""" + +from absl import app +from absl import flags +from absl import logging + +import numpy as np +import pyspiel + +flags.DEFINE_string("game", "2048", "Name of the game.") +flags.DEFINE_integer("num_train_episodes", 15000, + "Number of training episodes.") +flags.DEFINE_integer("eval_every", 1000, + "Episode frequency at which the agent is evaluated.") +flags.DEFINE_float("alpha", 0.02, "Learning rate") +FLAGS = flags.FLAGS + + +class NTupleNetwork: + """ + N-Tuple Networks are an effective way of reducing the storage requirement for + evaluating and learning state values. This is accomplished by defining a + collection of N-Tuples that represent various segments in a game's + ObservationTensor. + + The value of a given state is defined as the sum of values of each N-Tuple, + which are stored in a look up table. The policy of the agent is to chose an + action that maximises the value of the after-state. After each episode, all + the states that were reached in that episode is used for updating the state + values using Temporal Difference Learning. + + References: + [1] Szubert, Marcin and Wojciech Jaśkowski. "Temporal difference learning of + n-tuple networks for the game 2048." Computational Intelligence and Games + (CIG), 2014 IEEE Conference on. IEEE, 2014. + """ + + def __init__(self, n_tuple_size, max_tuple_index, n_tuples): + for tuples in n_tuples: + if len(tuples) != n_tuple_size: + raise ValueError("n_tuple_size does not match size of tuples") + n_tuple_network_size = len(n_tuples) + look_up_table_shape = ( + n_tuple_network_size,) + (max_tuple_index,) * n_tuple_size + + self.n_tuples = n_tuples + self.look_up_table = np.zeros(look_up_table_shape) + + def learn(self, states): + target = 0 + while states: + state = states.pop() + error = target - self.value(state) + target = state.rewards()[0] + self.update(state, FLAGS.alpha * error) + + def update(self, state, adjust): + v = 0 + for idx, n_tuple in enumerate(self.n_tuples): + v += self.update_tuple(idx, n_tuple, state, adjust) + return v + + def update_tuple(self, idx, n_tuple, state, adjust): + observation_tensor = state.observation_tensor(0) + index = (idx,) + tuple([0 if observation_tensor[tile] == 0 + else int(np.log2(observation_tensor[tile])) for tile in n_tuple]) + self.look_up_table[index] += adjust + return self.look_up_table[index] + + def evaluator(self, state, action): + working_state = state.clone() + working_state.apply_action(action) + return working_state.rewards()[0] + self.value(working_state) + + def value(self, state): + observation_tensor = state.observation_tensor(0) + v = 0 + for idx, n_tuple in enumerate(self.n_tuples): + lookup_tuple_index = [0 if observation_tensor[tile] == 0 + else int(np.log2(observation_tensor[tile])) for tile in n_tuple] + lookup_index = (idx,) + tuple(lookup_tuple_index) + v += self.look_up_table[lookup_index] + return v + + +def main(_): + n_tuple_network = NTupleNetwork(6, 15, [[0, 1, 2, 3, 4, 5], + [4, 5, 6, 7, 8, 9], [0, 1, 2, 4, 5, 6], [4, 5, 6, 8, 9, 10],]) + game = pyspiel.load_game(FLAGS.game) + sum_rewards = 0 + largest_tile = 0 + max_score = 0 + for ep in range(FLAGS.num_train_episodes): + state = game.new_initial_state() + states_in_episode = [] + while not state.is_terminal(): + if state.is_chance_node(): + outcomes = state.chance_outcomes() + action_list, prob_list = zip(*outcomes) + action = np.random.choice(action_list, p=prob_list) + state.apply_action(action) + else: + legal_actions = state.legal_actions(state.current_player()) + best_action = max(legal_actions, + key=lambda action: n_tuple_network.evaluator(state, action)) + state.apply_action(best_action) + states_in_episode.append(state.clone()) + + sum_rewards += state.returns()[0] + largest_tile_from_episode = max(state.observation_tensor(0)) + if largest_tile_from_episode > largest_tile: + largest_tile = largest_tile_from_episode + if state.returns()[0] > max_score: + max_score = state.returns()[0] + + n_tuple_network.learn(states_in_episode) + + if (ep + 1) % FLAGS.eval_every == 0: + logging.info( + "[%s] Average Score: %s, Max Score: %s, Largest Tile Reached: %s", + ep + 1, int(sum_rewards / FLAGS.eval_every), int(max_score), + int(largest_tile) + ) + sum_rewards = 0 + largest_tile = 0 + max_score = 0 + + +if __name__ == "__main__": + app.run(main)