# ðŸš¦ Reinforcement Learning for Traffic Signal Control
This notebook demonstrates a simple RL-based simulation to minimize vehicle delays at an intersection.

In [None]:
!pip install gym numpy matplotlib



In [None]:
import gym
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt

# Define synthetic traffic environment
class TrafficSignalEnv(gym.Env):
    def __init__(self):
        super(TrafficSignalEnv, self).__init__()
        self.max_cars = 20
        self.action_space = spaces.Discrete(2)  # 0: keep phase, 1: change phase
        self.observation_space = spaces.Box(low=0, high=self.max_cars, shape=(2,), dtype=np.int32)  # [queue_1, queue_2]
        self.reset()

    def reset(self):
        self.queue_1 = np.random.randint(0, self.max_cars)
        self.queue_2 = np.random.randint(0, self.max_cars)
        self.phase = 0  # 0: green for queue_1, 1: green for queue_2
        return np.array([self.queue_1, self.queue_2])

    def step(self, action):
        if action == 1:
            self.phase = 1 - self.phase  # switch phase

        if self.phase == 0:
            self.queue_1 = max(0, self.queue_1 - np.random.randint(1, 5))
            self.queue_2 += np.random.randint(1, 3)
        else:
            self.queue_2 = max(0, self.queue_2 - np.random.randint(1, 5))
            self.queue_1 += np.random.randint(1, 3)

        reward = - (self.queue_1 + self.queue_2)  # minimize queue
        state = np.array([self.queue_1, self.queue_2])
        done = False

        return state, reward, done, {}

    def render(self, mode='human'):
        print(f"Queue1: {self.queue_1}, Queue2: {self.queue_2}, Phase: {self.phase}")


In [None]:
env = TrafficSignalEnv()
q_table = np.zeros((21, 21, 2))  # (queue1, queue2, phase) â†’ 2 actions
alpha = 0.1
gamma = 0.95
epsilon = 1.0
episodes = 1000
reward_log = []

for ep in range(episodes):
    state = env.reset()
    phase = env.phase
    total_reward = 0
    for _ in range(50):  # max steps
        s = (state[0], state[1], phase)
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[s])

        next_state, reward, _, _ = env.step(action)
        next_phase = env.phase
        ns = (next_state[0], next_state[1], next_phase)


        q_table[s][action] += alpha * (reward + gamma * np.max(q_table[ns]) - q_table[s][action])
        state = next_state
        phase = next_phase
        total_reward += reward

    epsilon = max(0.01, epsilon * 0.995)
    reward_log.append(total_reward)

plt.plot(reward_log)
plt.title("Training Reward over Episodes")
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.show()

IndexError: invalid index to scalar variable.