Reinforcement Learning met Gymnasium in Python
Fouad Trad
Machine Learning Engineer



env = gym.make("FrozenLake", is_slippery=True)num_episodes = 1000 alpha = 0.1 gamma = 1num_states, num_actions = env.observation_space.n, env.action_space.n Q = np.zeros((num_states, num_actions))reward_per_random_episode = []
for episode in range(num_episodes): state, info = env.reset() terminated = False episode_reward = 0while not terminated:# Willekeurige actieselectie action = env.action_space.sample()# Voer actie uit en observeer nieuwe staat en beloning new_state, reward, terminated, truncated, info = env.step(action)# Update Q-tabel update_q_table(state, action, new_state)episode_reward += reward state = new_statereward_per_random_episode.append(episode_reward)

def update_q_table(state, action, reward, new_state):old_value = Q[state, action]next_max = max(Q[new_state])Q[state, action] = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
reward_per_learned_episode = []
policy = get_policy()for episode in range(num_episodes): state, info = env.reset() terminated = False episode_reward = 0 while not terminated: # Kies de beste actie op basis van de geleerde Q-tabel action = policy[state] # Voer de actie uit en observeer nieuwe staat new_state, reward, terminated, truncated, info = env.step(action) state = new_stateepisode_reward += rewardreward_per_learned_episode.append(episode_reward)
import numpy as np import matplotlib.pyplot as pltavg_random_reward = np.mean(reward_per_random_episode) avg_learned_reward = np.mean(reward_per_learned_episode)plt.bar(['Random Policy', 'Learned Policy'], [avg_random_reward, avg_learned_reward], color=['blue', 'green']) plt.title('Gemiddelde beloning per episode') plt.ylabel('Gemiddelde beloning') plt.show()

Reinforcement Learning met Gymnasium in Python