728x90
import numpy as np
# Define the maze (0: empty, 1: obstacle, 2: treasure)
maze = np.array([
[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 0, 0, 1],
[0, 1, 0, 2]
])
# Define the rewards
rewards = np.array([
[-1, -1, -1, -1],
[-1, 0, 0, -1],
[-1, -1, -1, 0],
[-1, 0, -1, 10]
])
# Initialize the values of each state
values = np.zeros_like(maze, dtype=float)
# Define the discount factor
gamma = 0.9
# Perform value iteration (Bellman updates)
epsilon = 1e-6
delta = epsilon + 1 # Initialize delta greater than epsilon for the loop
while delta > epsilon:
delta = 0 # Reset delta for each iteration
for i in range(maze.shape[0]):
for j in range(maze.shape[1]):
if maze[i, j] == 2: # If the state is the treasure
continue
# Calculate the value for each action (up, down, left, right)
new_values = []
for action in [(i-1, j), (i+1, j), (i, j-1), (i, j+1)]:
new_i, new_j = action
if new_i >= 0 and new_i < maze.shape[0] and new_j >= 0 and new_j < maze.shape[1]:
new_values.append(rewards[new_i, new_j] + gamma * values[new_i, new_j])
else:
new_values.append(-np.inf)
# Update the value of the current state
old_value = values[i, j]
values[i, j] = max(new_values)
delta = max(delta, abs(old_value - values[i, j]))
# Find the optimal path from start to treasure
path = []
current_state = (0, 0)
while maze[current_state] != 2: # Until reaching the treasure
path.append(current_state)
i, j = current_state
new_values = []
for action in [(i-1, j), (i+1, j), (i, j-1), (i, j+1)]:
new_i, new_j = action
if new_i >= 0 and new_i < maze.shape[0] and new_j >= 0 and new_j < maze.shape[1]:
new_values.append(values[new_i, new_j])
else:
new_values.append(-np.inf)
action_index = np.argmax(new_values)
current_state = [(i-1, j), (i+1, j), (i, j-1), (i, j+1)][action_index]
# Print the maze
print("Maze:")
for row in maze:
row_str = ""
for cell in row:
if cell == 0:
row_str += " " # Two spaces for empty cells
elif cell == 1:
row_str += "# " # A hash symbol for obstacles
elif cell == 2:
row_str += "X " # An X for the treasure
print(row_str)
# Print the values
print("\nValues:")
for row in values:
row_str = ""
for value in row:
row_str += f"{value:.2f} "
print(row_str)
# Print the optimal path
print("\nOptimal Path:")
maze_with_path = np.copy(maze)
for p in path:
maze_with_path[p] = 3 # Mark the path with a special symbol
for row in maze_with_path:
row_str = ""
for cell in row:
if cell == 0:
row_str += " " # Two spaces for empty cells
elif cell == 1:
row_str += "# " # A hash symbol for obstacles
elif cell == 2:
row_str += "X " # An X for the treasure
elif cell == 3:
row_str += ". " # A dot for the path
print(row_str)
728x90
'호그와트' 카테고리의 다른 글
Kaggle ConnectX야 놀쟈 (0) | 2023.05.23 |
---|---|
IoT n Traffic (0) | 2023.05.21 |
The Bellman equation python code (0) | 2023.05.21 |
test (0) | 2023.05.18 |
Tryhackme에서 GURU 단계에 도달했다 (0) | 2023.05.17 |