In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import joblib

## Load the dataset

In [2]:
np.random.seed(42)
n_samples = 1000

data = {
    'hour': np.random.randint(0, 24, size=n_samples),
    'day_of_week': np.random.randint(0, 7, size=n_samples),
    'temperature': np.random.normal(25, 5, size=n_samples),
    'precipitation': np.random.uniform(0, 10, size=n_samples),
    'latitude': np.random.uniform(12.9, 13.1, size=n_samples),
    'longitude': np.random.uniform(77.5, 77.7, size=n_samples),
    'event': np.random.randint(0, 2, size=n_samples),
}

df = pd.DataFrame(data)
df['fill_level'] = (
    50
    + df['hour'] * 1.2
    + df['event'] * 15
    - df['precipitation'] * 0.8
    + np.random.normal(0, 5, size=n_samples)
).clip(0, 100)

df.head()

Unnamed: 0,hour,day_of_week,temperature,precipitation,latitude,longitude,event,fill_level
0,6,6,17.732868,6.02117,13.076917,77.638803,0,58.042115
1,19,0,34.826638,5.914068,12.925044,77.53655,1,83.643008
2,14,1,34.814113,8.065893,12.940111,77.563809,0,62.806651
3,10,5,23.074699,4.764834,12.943245,77.583103,0,51.639966
4,7,5,26.642477,9.961229,13.038242,77.681603,0,51.592089


In [3]:
X = df.drop(columns='fill_level')
y = df['fill_level']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
model = GradientBoostingRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [5]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

Mean Squared Error: 28.52


In [6]:
joblib.dump(model, "bin_fill_level_model.pkl")
print("Model saved as 'bin_fill_level_model.pkl'")

Model saved as 'bin_fill_level_model.pkl'


In [7]:
sample_input = X_test.iloc[[0]]
sample_prediction = model.predict(sample_input)
print("Sample Input:")
print(sample_input)
print("Predicted Fill Level:", sample_prediction[0])

Sample Input:
     hour  day_of_week  temperature  precipitation   latitude  longitude  \
521    12            2    25.415403       3.334626  13.075024  77.561254   

     event  
521      1  
Predicted Fill Level: 80.25464898151773
