import random import numpy as np actions = [1, 2, 0] def state(data, t, n): d = t - n + 1 block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] res = [] for i in range(n - 1): res.append(block[i + 1] - block[i]) return np.array([res]) def QLearning(data, n, episodes, a, b, g, eta): Q = {} for episode in range(episodes): t = 0 total_profit = 0 state_t = state(data, t, n) action = np..