728x90
import numpy as np
# Define the maze (0: empty, 1: obstacle, 2: treasure)
maze = np.array([
[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 0, 0, 1],
[0, 1, 0, 2]
])
# Define the rewards
rewards = np.array([
[-1, -1, -1, -1],
[-1, 0, 0, -1],
[-1, -1, -1, 0],
[-1, 0, -1, 10]
])
# Initialize the values of each state
values = np.zeros_like(maze, dtype=float)
# Define the discount factor
gamma = 0.9
# Perform value iteration (Bellman updates)
epsilon = 1e-6
delta = epsilon + 1 # Initialize delta greater than epsilon for the loop
while delta > epsilon:
delta = 0 # Reset delta for each iteration
for i in range(maze.shape[0]):
for j in range(maze.shape[1]):
if maze[i, j] == 2: # If the state is the treasure
continue
# Calculate the value for each action (up, down, left, right)
new_values = []
for action in [(i-1, j), (i+1, j), (i, j-1), (i, j+1)]:
new_i, new_j = action
if new_i >= 0 and new_i < maze.shape[0] and new_j >= 0 and new_j < maze.shape[1]:
new_values.append(rewards[new_i, new_j] + gamma * values[new_i, new_j])
else:
new_values.append(-np.inf)
# Update the value of the current state
old_value = values[i, j]
values[i, j] = max(new_values)
delta = max(delta, abs(old_value - values[i, j]))
# Find the optimal path from start to treasure
path = []
current_state = (0, 0)
while maze[current_state] != 2: # Until reaching the treasure
path.append(current_state)
i, j = current_state
new_values = []
for action in [(i-1, j), (i+1, j), (i, j-1), (i, j+1)]:
new_i, new_j = action
if new_i >= 0 and new_i < maze.shape[0] and new_j >= 0 and new_j < maze.shape[1]:
new_values.append(values[new_i, new_j])
else:
new_values.append(-np.inf)
action_index = np.argmax(new_values)
current_state = [(i-1, j), (i+1, j), (i, j-1), (i, j+1)][action_index]
# Print the maze with values
print("Maze:")
for i in range(maze.shape[0]):
for j in range(maze.shape[1]):
if maze[i, j] == 0:
print(" ", end=" ")
elif maze[i, j] == 1:
print("#", end=" ")
elif maze[i, j] == 2:
print("X", end=" ")
print()
print("\nValues:")
for i in range(maze.shape[0]):
for j in range(maze.shape[1]):
print(f"{values[i, j]:.2f}", end=" ")
print()
print("\nOptimal Path:")
maze_with_path = np.copy(maze)
for p in path:
maze_with_path[p] = 3 # Mark the path with a special symbol
for i in range(maze_with_path.shape[0]):
for j in range(maze_with_path.shape[1]):
if maze_with_path[i, j] == 0:
print(" ", end=" ")
elif maze_with_path[i, j] == 1:
print("#", end=" ")
elif maze_with_path[i, j] == 2:
print("X", end=" ")
elif maze_with_path[i, j] == 3:
print(".", end=" ")
print()
728x90
'hacking sorcerer' 카테고리의 다른 글
IoT n Traffic (0) | 2023.05.21 |
---|---|
The right Bellman equation python code (0) | 2023.05.21 |
test (0) | 2023.05.18 |
Tryhackme에서 GURU 단계에 도달했다 (0) | 2023.05.17 |
화가를 1초만에 끔살하는 방법 뀨~ (0) | 2023.05.14 |