Reinforcement Learning with Gymnasium in Python
Fouad Trad
Machine Learning Engineer
env = gym.make('FrozenLake', is_slippery=True) action_size = env.action_space.n state_size = env.observation_space.n Q = np.zeros((state_size, action_size))
alpha = 0.1 gamma = 0.99 total_episodes = 10000
def epsilon_greedy(state):
if np.random.rand() < epsilon: action = env.action_space.sample() # Explore
else: action = np.argmax(Q[state, :]) # Exploit return action
epsilon = 0.9 # Exploration rate
rewards_eps_greedy = []
for episode in range(total_episodes): state, info = env.reset() terminated = False episode_reward = 0 while not terminated: action = epsilon_greedy(state) new_state, reward, terminated, truncated, info = env.step(action) Q[state, action] = update_q_table(state, action, new_state) state = new_state
episode_reward += reward rewards_eps_greedy.append(episode_reward)
epsilon = 1.0 # Exploration rate epsilon_decay = 0.999 min_epsilon = 0.01
rewards_decay_eps_greedy = [] for episode in range(total_episodes): state, info = env.reset() terminated = False episode_reward = 0 while not terminated: action = epsilon_greedy(state) new_state, reward, terminated, truncated, info = env.step(action) episode_reward += reward Q[state, action] = update_q_table(state, action, new_state) state = new_state rewards_decay_eps_greedy.append(episode_reward)
epsilon = max(min_epsilon, epsilon * epsilon_decay)
avg_eps_greedy= np.mean(rewards_eps_greedy)
avg_decay = np.mean(rewards_decay_eps_greedy)
plt.bar(['Epsilon Greedy', 'Decayed Epsilon Greedy'],
[avg_eps_greedy, avg_decay],
color=['blue', 'green'])
plt.title('Average Reward per Episode')
plt.ylabel('Average Reward')
plt.show()
Reinforcement Learning with Gymnasium in Python