Sorour332000
Oct 13, 2024

Hi everyone, I am trying to create an inventory management program using proximal policy optimization. This is the environment I have created below. Problem is that from the reward graphs there is no improvement in the reward, the agents just fill the inventory instantly without considering transportation, delays or even the carried over unsatisfied demand. Appreciate any assistance in this.

The environment:
import numpy as np
import gym
from gym import spaces
import matplotlib.pyplot as plt
import pandas as pd

class InventoryMgmt(gym.Env):
def init(self, data, weights_cost, weights_emission, initial_inventory, max_transport_capacity, max_inventory_capacity):
super(InventoryMgmt, self).init()
self.data = data
self.weights_cost = weights_cost
self.weights_emission = weights_emission
self.initial_inventory = initial_inventory
self.max_transport_capacity = max_transport_capacity
self.max_inventory_capacity = max_inventory_capacity

 # Continuous action space to control order quantity
 self.action_space = spaces.Box(low=0, high=self.max_transport_capacity, shape=(1,), dtype=np.float32)
 # Observation space: normalized inventory level, demand, and reorder point
 self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)
 self.results_df = pd.DataFrame() # DataFrame to store results
 self.pending_orders = [] # Track pending orders (quantity and delivery day)
 self.carry_over_backorder = 0 # Track backordered demand carried over
 self.reset()
def step(self, action):
 """
 Step through one day in the environment. Process the current day's demand, satisfy it using available
 inventory, and carry over unmet demand to the next day if necessary.
 """
 row = self.data.iloc[self.current_day]
 actual_demand = row['Demand']
 # Add any carried-over backorder to today's demand
 total_demand = actual_demand + self.carry_over_backorder
 carrying_cost = row['Carrying Cost']
 item_cost = row['Item Cost']
 order_cost = row['Order Cost']
 transport_emission_rate = row['Transport Emission Rate']
 holding_emission_rate = row['Holding Emission Rate']
 lead_time = row['Lead Time']
 backorder_cost = row['Backorder Cost']
 max_demand = self.data['Demand'].max()
 avg_demand = self.data['Demand'].mean()
 avg_lead_time = self.data['Lead Time'].mean()
 holding_cost = item_cost * carrying_cost
 safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)
 reorder_point = safety_stock + (avg_demand * lead_time)
 # Initialize order_quantity to 0 in case no order is placed
 order_quantity = 0
 # Process pending orders
 for order in self.pending_orders:
 if self.current_day >= order['delivery_day']:
 self.inventory_level += order['quantity']
 print(f"Order delivered: {order['quantity']} units on day {self.current_day}")
 self.pending_orders = [order for order in self.pending_orders if self.current_day < order['delivery_day']]
 # Ensure inventory level does not exceed capacity
 self.inventory_level = min(self.inventory_level, self.max_inventory_capacity)
 # Calculate backorder before placing new orders
 if total_demand > self.inventory_level:
 backorder_quantity = total_demand - self.inventory_level
 backorder_penalty = backorder_quantity * backorder_cost
 self.inventory_level = 0
 self.carry_over_backorder = backorder_quantity
 else:
 self.inventory_level -= total_demand
 backorder_quantity = 0
 backorder_penalty = 0
 self.carry_over_backorder = 0
 # Check if inventory is below the reorder point
 if self.inventory_level <= reorder_point:
 order_quantity = max(0, min(action[0], self.max_inventory_capacity - self.inventory_level))
 if order_quantity > 0:
 self.pending_orders.append({'quantity': order_quantity, 'delivery_day': self.current_day + lead_time})
 print(f"Order placed: {order_quantity} units on day {self.current_day}")
 # Calculate daily costs and emissions
 holding_cost_total = max(0, self.inventory_level * holding_cost)
 order_cost_total = max(0, order_quantity * item_cost + order_cost)
 daily_cost = holding_cost_total + order_cost_total + backorder_penalty
 holding_emissions = max(0, self.inventory_level * holding_emission_rate)
 transport_emissions = max(0, (np.ceil(order_quantity / self.max_transport_capacity)) * transport_emission_rate)
 daily_emissions = holding_emissions + transport_emissions
 # Update totals
 self.total_cost += daily_cost
 self.total_emissions += daily_emissions
 self.daily_costs.append(daily_cost)
 self.daily_emissions.append(daily_emissions)
 self.inventory_levels.append(self.inventory_level)
 # Refined reward calculation
 backorder_penalty_reward = -5.0 * backorder_quantity * backorder_cost
 inventory_penalty = -0.5 * self.inventory_level
 desired_inventory_level = reorder_point
 efficiency_reward = -abs(self.inventory_level - desired_inventory_level) * 0.2
 if self.inventory_level <= reorder_point:
 order_incentive = 2.0
 else:
 order_incentive = -0.5
 reward_cost = -daily_cost + backorder_penalty_reward + inventory_penalty + efficiency_reward + order_incentive
 reward_emission = -daily_emissions
 normalized_inventory = self.inventory_level / self.max_inventory_capacity
 normalized_demand = total_demand / max_demand
 normalized_reorder_point = reorder_point / self.max_inventory_capacity
 normalized_observation = np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)
 daily_data = {
 'Day': self.current_day + 1,
 'Actual Demand': actual_demand,
 'Carried Over Demand': self.carry_over_backorder,
 'Inventory Level': self.inventory_level,
 'Order Quantity': order_quantity if self.inventory_level <= reorder_point else 0,
 'Holding Cost': holding_cost_total,
 'Order Cost': order_cost_total,
 'Backorder Penalty': backorder_penalty,
 'Total Daily Cost': daily_cost,
 'Daily Holding Emissions': holding_emissions,
 'Daily Transport Emissions': transport_emissions,
 'Total Daily Emissions': daily_emissions
 }
 self.results_df = pd.concat([self.results_df, pd.DataFrame([daily_data])], ignore_index=True)
 self.current_day += 1
 done = self.current_day >= len(self.data)
 self.rewards.append((reward_cost, reward_emission))
 return normalized_observation, (reward_cost, reward_emission), done, {}
def reset(self):
 self.current_day = 0
 self.inventory_level = self.initial_inventory
 self.total_cost = 0
 self.total_emissions = 0
 self.daily_costs = []
 self.daily_emissions = []
 self.inventory_levels = []
 self.rewards = []
 self.pending_orders = []
 self.carry_over_backorder = 0
 # Calculate initial values for safety_stock, avg_demand, and lead_time
 max_demand = self.data['Demand'].max()
 avg_demand = self.data['Demand'].mean()
 avg_lead_time = self.data['Lead Time'].mean()
 initial_lead_time = self.data.iloc[self.current_day]['Lead Time']
 holding_cost = self.data.iloc[self.current_day]['Item Cost'] * self.data.iloc[self.current_day]['Carrying Cost']
 safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)
 reorder_point = safety_stock + (avg_demand * initial_lead_time) # Calculate reorder_point
 # Initial observation
 initial_demand = self.data.iloc[self.current_day]['Demand']
 normalized_inventory = self.inventory_level / self.max_inventory_capacity
 normalized_demand = initial_demand / max_demand
 normalized_reorder_point = reorder_point / self.max_inventory_capacity # Normalize reorder_point
 return np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)
def render(self):
 print(f"Final Total Cost: {self.total_cost}")
 print(f"Final Total Emissions: {self.total_emissions}")
 days = range(1, len(self.inventory_levels) + 1)
 plt.figure(figsize=(20, 10))
 plt.subplot(5, 1, 1)
 plt.plot(days, self.inventory_levels, marker='o')
 plt.title('Inventory Level Over Time')
 plt.xlabel('Day')
 plt.ylabel('Inventory Level')
 plt.subplot(5, 1, 2)
 plt.plot(days, self.daily_costs, marker='o')
 plt.title('Daily Costs Over Time')
 plt.xlabel('Day')
 plt.ylabel('Costs')
 plt.subplot(5, 1, 3)
 plt.plot(days, self.daily_emissions, marker='o')
 plt.title('Daily Emissions Over Time')
 plt.xlabel('Day')
 plt.ylabel('Emissions')
 plt.subplot(5, 1, 4)
 cost_rewards = [r[0] for r in self.rewards]
 plt.plot(days, cost_rewards, marker='o', label='Cost Rewards')
 plt.title('Cost Rewards Over Time')
 plt.xlabel('Day')
 plt.ylabel('Rewards')
 plt.subplot(5, 1, 5)
 emission_rewards = [r[1] for r in self.rewards]
 plt.plot(days, emission_rewards, marker='o', label='Emission Rewards')
 plt.title('Emission Rewards Over Time')
 plt.xlabel('Day')
 plt.ylabel('Rewards')
 plt.tight_layout()
 plt.show()
def save_to_excel(self, file_name='inventory_output.xlsx'):
 """Save the results DataFrame to an Excel file."""
 if not self.results_df.empty:
 self.results_df.to_excel(file_name, index=False)
 print(f"Results saved to {file_name}")
 else:
 print("No results to save.")

Register the environment

gym.envs.registration.register(
id='InventoryMgmt-v1',
entry_point='InvEnv_multi_v1:InventoryMgmt',
)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Assistance with gym environment code #647

Uh oh!

{{title}}

Uh oh!

Sorour332000
Oct 13, 2024

Register the environment

Replies: 0 comments

Select a reply

Uh oh!

Assistance with gym environment code #647

Uh oh!

Sorour332000 Oct 13, 2024

Register the environment

Replies: 0 comments

Sorour332000
Oct 13, 2024