In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib

In [3]:
np.random.seed(42)
n_samples = 1000

data = {
    'hour': np.random.randint(0, 24, size=n_samples),
    'minute': np.random.randint(0, 60, size=n_samples),
    'day_of_week': np.random.randint(0, 7, size=n_samples),
    'temperature': np.random.normal(25, 5, size=n_samples),
    'precipitation': np.random.uniform(0, 10, size=n_samples),
    'visibility': np.random.uniform(0.5, 10.0, size=n_samples),
    'event': np.random.randint(0, 2, size=n_samples),
    'roadwork': np.random.randint(0, 2, size=n_samples),
    'latitude': np.random.uniform(12.9, 13.1, size=n_samples),
    'longitude': np.random.uniform(77.5, 77.7, size=n_samples),
}

df = pd.DataFrame(data)
df['traffic_flow'] = (
    1000
    - df['hour'] * 10
    - df['precipitation'] * 5
    - df['roadwork'] * 100
    + df['event'] * 150
    + np.random.normal(0, 50, size=n_samples)
).astype(int)

df.head()

Unnamed: 0,hour,minute,day_of_week,temperature,precipitation,visibility,event,roadwork,latitude,longitude,traffic_flow
0,6,46,4,17.45289,2.146679,0.764517,0,0,13.016685,77.564022,920
1,19,0,6,23.621364,6.649945,6.415957,0,1,12.992522,77.667671,716
2,14,25,4,24.381052,4.980404,8.164197,1,1,13.06366,77.630846,860
3,10,13,2,29.880742,7.42825,8.499149,1,0,13.036665,77.555086,1028
4,7,37,3,24.849423,3.270446,5.085778,0,0,12.936713,77.693548,950


In [4]:
X = df.drop(columns='traffic_flow')
y = df['traffic_flow']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [7]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 2599.5328635


In [8]:
joblib.dump(model, "traffic_flow_model.pkl")
print("Model saved as 'traffic_flow_model.pkl'")

Model saved as 'traffic_flow_model.pkl'


In [9]:
sample_input = X_test.iloc[[0]]
sample_prediction = model.predict(sample_input)
print("Sample Input:")
print(sample_input)
print("Predicted Traffic Flow:", sample_prediction[0])

Sample Input:
     hour  minute  day_of_week  temperature  precipitation  visibility  event  \
521    12      21            3    34.839495       7.294638    5.113645      0   

     roadwork   latitude  longitude  
521         1  13.018034  77.510119  
Predicted Traffic Flow: 761.96
