Minesweeper using Reinforcement Learning¶

Import Libraires¶

In [1]:

import pandas as pd
import numpy as np
from itertools import product
import random
from random import choice
from collections import namedtuple
from scipy.signal import convolve2d
from tqdm import trange
from time import sleep
import matplotlib.pyplot as plt
from IPython.display import clear_output
import ipywidgets as widgets
%matplotlib qt
%matplotlib inline

Minesweeper Class¶

https://upgado.com/2020/04/16/minesweeper.html

In [2]:

class Minesweeper(object):
    def __init__(self, difficulty):  # difficulty is consist of [[width, height], number of bomb]
        self.size = (difficulty[0][1], difficulty[0][0])
        self.discovered = np.zeros(self.size, dtype=bool)
        is_bomb = np.zeros(difficulty[0][1]*difficulty[0][0], dtype=bool)
        is_bomb[:difficulty[1]] = True
        np.random.shuffle(is_bomb)
        self.is_bomb = is_bomb.reshape(self.size)
        neighbour_kernel = np.ones((3, 3))
        neighbour_kernel[1, 1] = 0
        self.n_neighbours = convolve2d(self.is_bomb, neighbour_kernel, mode="same").astype(int)
        self.dead = False
        
    def reset(self):
        self.dead = False
        self.discovered = np.zeros_like(self.discovered)
        
    @property
    def board_numpy(self):
        array = np.zeros_like(self.is_bomb, dtype='U')
        array[self.is_bomb] = 'B'
        array = np.where(~self.is_bomb, self.n_neighbours, array)
        array = np.where(self.discovered, array, '?')
        return array
    
    @property
    def board_show(self):  # Returns a grid view of the game using dataframes
        array = self.board_numpy
        df = pd.DataFrame(array)
        def style(cell):
            if cell == "B":
                return "background-color : indianred"
            elif cell == "?":
                return ""
            else:
                return "background-color : lightgreen"
        df = df.style.applymap(style)
        return df
    
    @property
    def map_str(self):
        array = self.board_numpy
        return "\n".join(map(lambda row: "".join(map(str, row)), array))
        
    @property
    def is_terminal(self):
        n_undiscovered_non_bombs = (~self.is_bomb & ~self.discovered).sum()
        return self.dead or n_undiscovered_non_bombs == 0
        
    def click(self, row, column):
        self.discovered[row, column] = True
        if self.is_bomb[row, column]:
            self.dead = True
            reward = -100000
        elif self.is_terminal:
            if self.dead:
                reward = -100000
            else:
                reward = 100000
        else:
            reward = -1
        return reward

    def gameplay_human(self):
        self.reset()
        display(self.board_show)
        print("Enter q to exit.")
        while not self.is_terminal:
            try:
                row, column = map(int, input("Enter row and column numbers:").split(" "))
            except:
                print("aborted..")
                break
            self.click(row, column)
            clear_output(wait=True)
            display(self.board_show)
        if self.dead:
            print("you loss")
        elif self.is_terminal:
            print("you won!")
        print("re-run me for a new game!")

Game Difficulty¶

In [3]:

difficulty = {"beginning" : [[9, 9], 10],  # difficulty is consist of [[width, height], number of bomb]
              "intermediate" : [[16, 16], 40],
              "advanced" : [[30, 16], 99]}

Play Game¶

In [4]:

game = Minesweeper(difficulty["advanced"])
game.gameplay_human()

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29
0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
1	0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
2	?	?	1	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
3	?	?	?	2	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
4	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
5	?	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
6	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
7	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
8	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
9	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
10	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
11	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
12	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
13	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
14	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
15	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?

Enter row and column numbers:3
aborted..
re-run me for a new game!

Reinforcement Learning (Q-Learning)¶

In [5]:

def q_learning(game, episodes, alpha, gamma, epsilon):
    click_count_list = []
    is_win_list = []
    
    size = game.size
    actions = list(product(range(size[0]), range(size[1])))
    q_table = {(i, j): np.zeros(len(actions)) for i in range(size[0]) for j in range(size[1])}

    for episode in trange(episodes, desc="Training", unit="episodes"):
        game.reset()
        state = (0, 0)  # (random.choice(range(size[0])), random.choice(range(size[1])))
        click_count = 0
        while not game.is_terminal:
            click_count += 1
            available_actions = [i for i, action in enumerate(actions) if not game.discovered[action[0]][action[1]]]
            if np.random.rand() < epsilon:
                action = random.choice(available_actions)
            else:
                action = np.argmax(q_table[state][available_actions])
                action = available_actions[action]

            reward = game.click(*actions[action])
            next_state = actions[action]

            q_table[state][action] = q_table[state][action]+alpha*(reward+gamma*np.max(q_table[next_state])-q_table[state][action])
            state = next_state
        if game.dead:
            is_win_list.append(False)
        else:
            is_win_list.append(True)
        click_count_list.append(click_count)
            
    return q_table, click_count_list, is_win_list

Learning¶

In [15]:

def train(game_difficulty, episodes, alpha=0.01, gamma=0.99, epsilon=0.1):
    game = Minesweeper(game_difficulty)
    q_table, click_count_list, is_win_list = q_learning(game, episodes, alpha, gamma, epsilon)
    
    goal_click_count = game_difficulty[0][1]*game_difficulty[0][0]-game_difficulty[1]
    max_click_count = max(click_count_list)
    win_count = sum(is_win_list)
    max_click_loss_count = click_count_list.count(max_click_count)-win_count
    print(f"goal click = {goal_click_count}")
    print(f"max click = {max_click_count}")
    print(f"max click(win) = {win_count}")
    print(f"max click(loss) = {max_click_loss_count}")
    print(f"first max click episode = {click_count_list.index(max_click_count)}")
    if win_count > 0:
        print(f"first win episode = {is_win_list.index(True)}")
    else:
        print(f"first win episode = None")

    plt.figure(figsize=(12, 4))
    plt.plot(click_count_list)
    plt.ylim(0, goal_click_count)
    plt.show()

Difficulty : Beginning¶

In [12]:

train(difficulty["beginning"], episodes=5000)

Training: 100%|██████████████████████| 5000/5000 [00:09<00:00, 553.53episodes/s]

goal click = 71
max click = 71
max click(win) = 525
max click(loss) = 51
first max click episode = 790
first win episode = 790

Difficulty : Intermediate¶

In [11]:

train(difficulty["intermediate"], episodes=100000)

Training: 100%|██████████████████| 100000/100000 [08:26<00:00, 197.53episodes/s]

goal click = 216
max click = 216
max click(win) = 39
max click(loss) = 5
first max click episode = 10378
first win episode = 11382

Difficulty : Advanced¶

In [9]:

train(difficulty["advanced"], episodes=10000000)

Training: 100%|███████████| 10000000/10000000 [22:23:51<00:00, 124.02episodes/s]

goal click = 381
max click = 381
max click(win) = 2
max click(loss) = 0
first max click episode = 5353892
first win episode = 5353892

Ablation Study (in Intermediate Difficulty)¶

$\epsilon$ value variation¶

In [13]:

train(difficulty["intermediate"], episodes=100000, epsilon=0.1)

Training: 100%|██████████████████| 100000/100000 [08:26<00:00, 197.26episodes/s]

goal click = 216
max click = 216
max click(win) = 40
max click(loss) = 2
first max click episode = 10533
first win episode = 10533

In [21]:

train(difficulty["intermediate"], episodes=100000, epsilon=0.25)

Training: 100%|██████████████████| 100000/100000 [03:50<00:00, 433.65episodes/s]

goal click = 216
max click = 178
max click(win) = 0
max click(loss) = 1
first max click episode = 12333
first win episode = None

In [16]:

train(difficulty["intermediate"], episodes=100000, epsilon=0.5)

Training: 100%|██████████████████| 100000/100000 [01:56<00:00, 857.65episodes/s]

goal click = 216
max click = 112
max click(win) = 0
max click(loss) = 1
first max click episode = 80817
first win episode = None

In [22]:

train(difficulty["intermediate"], episodes=100000, epsilon=0.75)

Training: 100%|█████████████████| 100000/100000 [01:17<00:00, 1290.60episodes/s]

goal click = 216
max click = 85
max click(win) = 0
max click(loss) = 1
first max click episode = 62118
first win episode = None

In [17]:

train(difficulty["intermediate"], episodes=100000, epsilon=0.99)

Training: 100%|█████████████████| 100000/100000 [00:57<00:00, 1751.72episodes/s]

goal click = 216
max click = 63
max click(win) = 0
max click(loss) = 1
first max click episode = 18102
first win episode = None

$\gamma$ value variation¶

In [18]:

train(difficulty["intermediate"], episodes=100000, gamma=0.99)

Training: 100%|██████████████████| 100000/100000 [08:34<00:00, 194.55episodes/s]

goal click = 216
max click = 216
max click(win) = 42
max click(loss) = 10
first max click episode = 13244
first win episode = 13244

In [23]:

train(difficulty["intermediate"], episodes=100000, gamma=0.75)

Training: 100%|██████████████████| 100000/100000 [08:29<00:00, 196.25episodes/s]

goal click = 216
max click = 216
max click(win) = 40
max click(loss) = 6
first max click episode = 17118
first win episode = 17118

In [19]:

train(difficulty["intermediate"], episodes=100000, gamma=0.5)

Training: 100%|██████████████████| 100000/100000 [08:31<00:00, 195.69episodes/s]

goal click = 216
max click = 216
max click(win) = 49
max click(loss) = 8
first max click episode = 10149
first win episode = 10149

In [24]:

train(difficulty["intermediate"], episodes=100000, gamma=0.25)

Training: 100%|██████████████████| 100000/100000 [08:31<00:00, 195.49episodes/s]

goal click = 216
max click = 216
max click(win) = 45
max click(loss) = 5
first max click episode = 11296
first win episode = 11296

In [20]:

train(difficulty["intermediate"], episodes=100000, gamma=0.1)

Training: 100%|██████████████████| 100000/100000 [08:26<00:00, 197.38episodes/s]

goal click = 216
max click = 216
max click(win) = 57
max click(loss) = 7
first max click episode = 12414
first win episode = 12414

learn(dict(map_size=difficulty["beginning"], window_size=1), episodes=10_000)

Last Card Game (2)	2024.09.10
Solving Maze using Reinforcement Learning (0)	2024.09.10
AutoEncoder Implementation (0)	2024.09.10

새소식

인기 검색어

Mine Sweeper Game

Minesweeper using Reinforcement Learning¶

Import Libraires¶

Minesweeper Class¶

Game Difficulty¶

Play Game¶

Reinforcement Learning (Q-Learning)¶

Learning¶

Difficulty : Beginning¶

Difficulty : Intermediate¶

Difficulty : Advanced¶

Ablation Study (in Intermediate Difficulty)¶

$\epsilon$ value variation¶

$\gamma$ value variation¶

'Graduate School > Topics on AI' 카테고리의 다른 글

당신이 좋아할만한 콘텐츠

티스토리툴바

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29
0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
1	0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
2	?	?	1	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
3	?	?	?	2	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
4	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
5	?	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
6	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
7	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
8	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
9	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
10	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
11	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
12	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
13	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
14	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
15	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29
0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
1	0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
2	?	?	1	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
3	?	?	?	2	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
4	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
5	?	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
6	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
7	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
8	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
9	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
10	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
11	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
12	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
13	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
14	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
15	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29
0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
1	0	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
2	?	?	1	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
3	?	?	?	2	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
4	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
5	?	?	?	?	?	0	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
6	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
7	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
8	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
9	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
10	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
11	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
12	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
13	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
14	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?
15	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?	?