Reinforcement Learning with Gymnasium in Python
Fouad Trad
Machine Learning Engineer




env = gym.make("FrozenLake", is_slippery=False)num_states = env.observation_space.n num_actions = env.action_space.nQ = np.zeros((num_states, num_actions))alpha = 0.1 gamma = 1 num_episodes = 1000
for episode in range(num_episodes):state, info = env.reset() action = env.action_space.sample()terminated = False while not terminated: next_state, reward, terminated, truncated, info = env.step(action)next_action = env.action_space.sample()update_q_table(state, action, reward, next_state, next_action)state, action = next_state, next_action
def update_q_table(state, action, reward, next_state, next_action):old_value = Q[state, action]next_value = Q[next_state, next_action]Q[state, action] = (1 - alpha) * old_value + alpha * (reward + gamma * next_value)

policy = get_policy()
print(policy)
{ 0: 1, 1: 2, 2: 1, 3: 0,
4: 1, 5: 0, 6: 1, 7: 0,
8: 2, 9: 1, 10: 1, 11: 0,
12: 0, 13: 2, 14: 2, 15: 0}

Reinforcement Learning with Gymnasium in Python