-
Notifications
You must be signed in to change notification settings - Fork 31.7k
Assistance with gym environment code #647
-
Hi everyone, I am trying to create an inventory management program using proximal policy optimization. This is the environment I have created below. Problem is that from the reward graphs there is no improvement in the reward, the agents just fill the inventory instantly without considering transportation, delays or even the carried over unsatisfied demand. Appreciate any assistance in this.
The environment:
import numpy as np
import gym
from gym import spaces
import matplotlib.pyplot as plt
import pandas as pd
class InventoryMgmt(gym.Env):
def init(self, data, weights_cost, weights_emission, initial_inventory, max_transport_capacity, max_inventory_capacity):
super(InventoryMgmt, self).init()
self.data = data
self.weights_cost = weights_cost
self.weights_emission = weights_emission
self.initial_inventory = initial_inventory
self.max_transport_capacity = max_transport_capacity
self.max_inventory_capacity = max_inventory_capacity
# Continuous action space to control order quantity
self.action_space = spaces.Box(low=0, high=self.max_transport_capacity, shape=(1,), dtype=np.float32)
# Observation space: normalized inventory level, demand, and reorder point
self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)
self.results_df = pd.DataFrame() # DataFrame to store results
self.pending_orders = [] # Track pending orders (quantity and delivery day)
self.carry_over_backorder = 0 # Track backordered demand carried over
self.reset()
def step(self, action):
"""
Step through one day in the environment. Process the current day's demand, satisfy it using available
inventory, and carry over unmet demand to the next day if necessary.
"""
row = self.data.iloc[self.current_day]
actual_demand = row['Demand']
# Add any carried-over backorder to today's demand
total_demand = actual_demand + self.carry_over_backorder
carrying_cost = row['Carrying Cost']
item_cost = row['Item Cost']
order_cost = row['Order Cost']
transport_emission_rate = row['Transport Emission Rate']
holding_emission_rate = row['Holding Emission Rate']
lead_time = row['Lead Time']
backorder_cost = row['Backorder Cost']
max_demand = self.data['Demand'].max()
avg_demand = self.data['Demand'].mean()
avg_lead_time = self.data['Lead Time'].mean()
holding_cost = item_cost * carrying_cost
safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)
reorder_point = safety_stock + (avg_demand * lead_time)
# Initialize order_quantity to 0 in case no order is placed
order_quantity = 0
# Process pending orders
for order in self.pending_orders:
if self.current_day >= order['delivery_day']:
self.inventory_level += order['quantity']
print(f"Order delivered: {order['quantity']} units on day {self.current_day}")
self.pending_orders = [order for order in self.pending_orders if self.current_day < order['delivery_day']]
# Ensure inventory level does not exceed capacity
self.inventory_level = min(self.inventory_level, self.max_inventory_capacity)
# Calculate backorder before placing new orders
if total_demand > self.inventory_level:
backorder_quantity = total_demand - self.inventory_level
backorder_penalty = backorder_quantity * backorder_cost
self.inventory_level = 0
self.carry_over_backorder = backorder_quantity
else:
self.inventory_level -= total_demand
backorder_quantity = 0
backorder_penalty = 0
self.carry_over_backorder = 0
# Check if inventory is below the reorder point
if self.inventory_level <= reorder_point:
order_quantity = max(0, min(action[0], self.max_inventory_capacity - self.inventory_level))
if order_quantity > 0:
self.pending_orders.append({'quantity': order_quantity, 'delivery_day': self.current_day + lead_time})
print(f"Order placed: {order_quantity} units on day {self.current_day}")
# Calculate daily costs and emissions
holding_cost_total = max(0, self.inventory_level * holding_cost)
order_cost_total = max(0, order_quantity * item_cost + order_cost)
daily_cost = holding_cost_total + order_cost_total + backorder_penalty
holding_emissions = max(0, self.inventory_level * holding_emission_rate)
transport_emissions = max(0, (np.ceil(order_quantity / self.max_transport_capacity)) * transport_emission_rate)
daily_emissions = holding_emissions + transport_emissions
# Update totals
self.total_cost += daily_cost
self.total_emissions += daily_emissions
self.daily_costs.append(daily_cost)
self.daily_emissions.append(daily_emissions)
self.inventory_levels.append(self.inventory_level)
# Refined reward calculation
backorder_penalty_reward = -5.0 * backorder_quantity * backorder_cost
inventory_penalty = -0.5 * self.inventory_level
desired_inventory_level = reorder_point
efficiency_reward = -abs(self.inventory_level - desired_inventory_level) * 0.2
if self.inventory_level <= reorder_point:
order_incentive = 2.0
else:
order_incentive = -0.5
reward_cost = -daily_cost + backorder_penalty_reward + inventory_penalty + efficiency_reward + order_incentive
reward_emission = -daily_emissions
normalized_inventory = self.inventory_level / self.max_inventory_capacity
normalized_demand = total_demand / max_demand
normalized_reorder_point = reorder_point / self.max_inventory_capacity
normalized_observation = np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)
daily_data = {
'Day': self.current_day + 1,
'Actual Demand': actual_demand,
'Carried Over Demand': self.carry_over_backorder,
'Inventory Level': self.inventory_level,
'Order Quantity': order_quantity if self.inventory_level <= reorder_point else 0,
'Holding Cost': holding_cost_total,
'Order Cost': order_cost_total,
'Backorder Penalty': backorder_penalty,
'Total Daily Cost': daily_cost,
'Daily Holding Emissions': holding_emissions,
'Daily Transport Emissions': transport_emissions,
'Total Daily Emissions': daily_emissions
}
self.results_df = pd.concat([self.results_df, pd.DataFrame([daily_data])], ignore_index=True)
self.current_day += 1
done = self.current_day >= len(self.data)
self.rewards.append((reward_cost, reward_emission))
return normalized_observation, (reward_cost, reward_emission), done, {}
def reset(self):
self.current_day = 0
self.inventory_level = self.initial_inventory
self.total_cost = 0
self.total_emissions = 0
self.daily_costs = []
self.daily_emissions = []
self.inventory_levels = []
self.rewards = []
self.pending_orders = []
self.carry_over_backorder = 0
# Calculate initial values for safety_stock, avg_demand, and lead_time
max_demand = self.data['Demand'].max()
avg_demand = self.data['Demand'].mean()
avg_lead_time = self.data['Lead Time'].mean()
initial_lead_time = self.data.iloc[self.current_day]['Lead Time']
holding_cost = self.data.iloc[self.current_day]['Item Cost'] * self.data.iloc[self.current_day]['Carrying Cost']
safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)
reorder_point = safety_stock + (avg_demand * initial_lead_time) # Calculate reorder_point
# Initial observation
initial_demand = self.data.iloc[self.current_day]['Demand']
normalized_inventory = self.inventory_level / self.max_inventory_capacity
normalized_demand = initial_demand / max_demand
normalized_reorder_point = reorder_point / self.max_inventory_capacity # Normalize reorder_point
return np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)
def render(self):
print(f"Final Total Cost: {self.total_cost}")
print(f"Final Total Emissions: {self.total_emissions}")
days = range(1, len(self.inventory_levels) + 1)
plt.figure(figsize=(20, 10))
plt.subplot(5, 1, 1)
plt.plot(days, self.inventory_levels, marker='o')
plt.title('Inventory Level Over Time')
plt.xlabel('Day')
plt.ylabel('Inventory Level')
plt.subplot(5, 1, 2)
plt.plot(days, self.daily_costs, marker='o')
plt.title('Daily Costs Over Time')
plt.xlabel('Day')
plt.ylabel('Costs')
plt.subplot(5, 1, 3)
plt.plot(days, self.daily_emissions, marker='o')
plt.title('Daily Emissions Over Time')
plt.xlabel('Day')
plt.ylabel('Emissions')
plt.subplot(5, 1, 4)
cost_rewards = [r[0] for r in self.rewards]
plt.plot(days, cost_rewards, marker='o', label='Cost Rewards')
plt.title('Cost Rewards Over Time')
plt.xlabel('Day')
plt.ylabel('Rewards')
plt.subplot(5, 1, 5)
emission_rewards = [r[1] for r in self.rewards]
plt.plot(days, emission_rewards, marker='o', label='Emission Rewards')
plt.title('Emission Rewards Over Time')
plt.xlabel('Day')
plt.ylabel('Rewards')
plt.tight_layout()
plt.show()
def save_to_excel(self, file_name='inventory_output.xlsx'):
"""Save the results DataFrame to an Excel file."""
if not self.results_df.empty:
self.results_df.to_excel(file_name, index=False)
print(f"Results saved to {file_name}")
else:
print("No results to save.")
Register the environment
gym.envs.registration.register(
id='InventoryMgmt-v1',
entry_point='InvEnv_multi_v1:InventoryMgmt',
)
Beta Was this translation helpful? Give feedback.