The right Bellman equation python code
import numpy as np # Define the maze (0: empty, 1: obstacle, 2: treasure) maze = np.array([ [0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 2] ]) # Define the rewards rewards = np.array([ [-1, -1, -1, -1], [-1, 0, 0, -1], [-1, -1, -1, 0], [-1, 0, -1, 10] ]) # Initialize the values of each state values = np.zeros_like(maze, dtype=float) # Define the discount factor gamma = 0.9 # Perform value..
The Bellman equation python code
import numpy as np # Define the maze (0: empty, 1: obstacle, 2: treasure) maze = np.array([ [0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 1], [0, 1, 0, 2] ]) # Define the rewards rewards = np.array([ [-1, -1, -1, -1], [-1, 0, 0, -1], [-1, -1, -1, 0], [-1, 0, -1, 10] ]) # Initialize the values of each state values = np.zeros_like(maze, dtype=float) # Define the discount factor gamma = 0.9 # Perform value..