Source code for smpl.envs.pensimenv

import codecs
import csv
import math
import random
import sys

import numpy as np
from gym import spaces, Env
from mzutils import get_things_in_loc
from pensimpy.peni_env_setup import PenSimEnv

from .utils import *


[docs]def get_observation_data_reformed(observation, t): """ Get observation data at t. vars are Temperature,Acid flow rate,Base flow rate,Cooling water,Heating water,Vessel Weight,Dissolved oxygen concentration respectively in csv terms, but used abbreviation here to stay consistent with peni_env_setup """ vars = ['T', 'Fa', 'Fb', 'Fc', 'Fh', 'Wt', 'DO2'] pH = observation.pH.y[t] pH = -math.log(pH) / math.log(10) if pH != 0 else pH return [t * STEP_IN_MINUTES / MINUTES_PER_HOUR, pH] + [ eval(f"observation.{var}.y[t]", {'observation': observation, 't': t}) for var in vars]
[docs]class PenSimEnvGym(PenSimEnv, smplEnvBase): def __init__( self, recipe_combo, dense_reward=True, normalize=True, debug_mode=False, action_dim=6, observation_dim=9, reward_function=None, done_calculator=None, max_observations=[552.0, 16.10523, 725.6828, 13.717274, 540.0, 3600.0002, 1892.07874, 253840.11, 47.898834], min_observations=[0.0, 0.0, 118.98977, 0.0, 0.0, 0.0, 0.0, 25003.258, 0.0], max_actions=[4100.0, 151.0, 36.0, 76.0, 1.2, 510.0], min_actions=[0.0, 7.0, 21.0, 29.0, 0.5, 0.0], observation_name=None, action_name=None, initial_state_deviation_ratio=0.1, np_dtype=np.float32, max_steps=NUM_STEPS, error_reward=-100.0, fast=True, random_seed=0, random_seed_max=20000): """ Time is not in our observation_space. We make the env time unaware and MDP. _max_episode_steps and random_seed_ref are for PenSimEnv usage. random_seed_max is the max random seed to be used in the sample_initial_state. Args: recipe_combo (RecipeCombo): The recipe combo defined for the PenSimEnv. Attributes: random_seed_ref (int): The random seed used in the sample_initial_state. """ super(PenSimEnvGym, self).__init__(recipe_combo, fast=fast) self.step_count = 0 self.total_reward = 0 self.done = False self.dense_reward = dense_reward self.normalize = normalize self.debug_mode = debug_mode self.action_dim = action_dim self.observation_dim = observation_dim self.reward_function = reward_function self.done_calculator = done_calculator self.max_observations = max_observations self.min_observations = min_observations self.max_actions = max_actions self.min_actions = min_actions self.observation_name = observation_name self.action_name = action_name if self.observation_name is None: self.observation_name = [f'o_{i}' for i in range(self.observation_dim)] if self.action_name is None: self.action_name = ["Discharge rate", ",Sugar feed rate", "Soil bean feed rate", "Aeration rate", "Back pressure", "Water injection/dilution"] # discharge, Fs, Foil, Fg, pressure, Fw self.initial_state_deviation_ratio = initial_state_deviation_ratio self.np_dtype = np_dtype self.max_steps = max_steps self._max_episode_steps = max_steps self.error_reward = error_reward if self.reward_function is None: self.reward_function = self.reward_function_standard if self.done_calculator is None: self.done_calculator = self.done_calculator_standard # define the state and action spaces self.max_observations = np.array(self.max_observations, dtype=self.np_dtype) self.min_observations = np.array(self.min_observations, dtype=self.np_dtype) self.max_actions = np.array(self.max_actions, dtype=self.np_dtype) self.min_actions = np.array(self.min_actions, dtype=self.np_dtype) if self.normalize: self.observation_space = spaces.Box(low=-1, high=1, shape=(self.observation_dim,)) self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_dim,)) else: self.observation_space = spaces.Box(low=self.min_observations, high=self.max_observations, shape=(self.observation_dim,)) self.action_space = spaces.Box(low=self.min_actions, high=self.max_actions, shape=(self.action_dim,)) random.seed(random_seed) self.random_seed_ref = random_seed self.random_seed_max = random_seed_max self.init_observation_from_dataset = np.array([0.2, 6.5, 300.0, 0.0, 10.828493, 0.0001, 150.0, 62500.0, 14.75], dtype=self.np_dtype) # init observation generated from dataset.
[docs] def sample_initial_state(self, random_seed_ref=None): # notice that this function here has to reset the PenSimEnv. if random_seed_ref: self.random_seed_ref = random_seed_ref else: self.random_seed_ref = random.randint(3, self.random_seed_max) _, x = super().reset() self.x = x observation = get_observation_data_reformed(x, 0) observation = np.array(observation, dtype=np.float32) return observation
[docs] def reset(self, normalize=None, random_seed_ref=None): self.step_count = 0 self.total_reward = 0 self.done = False observation_0s = self.sample_initial_state(random_seed_ref=random_seed_ref) # this observation is all zeros. observation = self.init_observation_from_dataset self.init_observation = observation self.previous_observation = observation normalize = self.normalize if normalize is None else normalize if normalize: observation, _, _ = normalize_spaces(observation, self.max_observations, self.min_observations) return observation
[docs] def step(self, action, normalize=None): if self.debug_mode: print("action:", action) reward = None done_info = None action = np.array(action, dtype=self.np_dtype) normalize = self.normalize if normalize is None else normalize if normalize: action, _, _ = denormalize_spaces(action, self.max_actions, self.min_actions) try: self.step_count += 1 # here we increment at front values_dict = self.recipe_combo.get_values_dict_at(self.step_count * STEP_IN_MINUTES) # served as a batch buffer below pensimpy_observation, x, yield_per_run, done = super().step(self.step_count, self.x, action[1], action[2], action[3], action[4], action[0], action[5], values_dict['Fpaa']) # in pensimpy, done = True if k == NUM_STEPS else False if done: done_info = {"timeout": True, "error_occurred": False, "terminal": True} else: done_info = {"timeout": False, "error_occurred": False, "terminal": False} reward = yield_per_run self.x = x observation = get_observation_data_reformed(x, self.step_count - 1) except Exception as e: observation = self.min_observations done_info = {"timeout": False, "error_occurred": True, "terminal": True} observation, reward, done, done_info = self.observation_done_and_reward_calculator(observation, action, normalize=normalize, step_reward=reward, done_info=done_info) self.step_count -= 1 # we already increment at front. info = {} info.update(done_info) return observation, reward, done, info
[docs]class PeniControlData: """ dataset class helper, mainly aims to mimic d4rl's qlearning_dataset format (which returns a dictionary). produced from PenSimPy generated csvs. """ def __init__(self, load_just_a_file='', dataset_folder='examples/example_batches', delimiter=',', observation_dim=9, action_dim=6, normalize=True, np_dtype=np.float32) -> None: """ :param dataset_folder: where all dataset csv files are living in """ self.dataset_folder = dataset_folder self.delimiter = delimiter self.observation_dim = observation_dim self.action_dim = action_dim self.normalize = normalize self.np_dtype = np_dtype self.max_observations = [276.0, 8.052615, 362.8414, 6.858637, 270.0, 1800.0001, 946.03937, 126920.055, 23.949417] self.min_observations = [0.16000001, 4.5955915, 237.97954, 0.0, 0.0, 0.0, 0.0, 50006.516, 2.3598127] self.max_actions = [4100.0, 151.0, 36.0, 76.0, 1.2, 510.0] self.min_actions = [0.0, 7.0, 21.0, 29.0, 0.5, 0.0] self.max_observations = np.array(self.max_observations, dtype=self.np_dtype) self.min_observations = np.array(self.min_observations, dtype=self.np_dtype) self.max_actions = np.array(self.max_actions, dtype=self.np_dtype) self.min_actions = np.array(self.min_actions, dtype=self.np_dtype) if load_just_a_file != '': file_list = [load_just_a_file] else: file_list = get_things_in_loc(dataset_folder, just_files=True) self.file_list = file_list
[docs] def load_file_list_to_dict(self, file_list, shuffle=True): file_list = file_list.copy() random.shuffle(file_list) dataset = {} observations = [] actions = [] next_observations = [] rewards = [] terminals = [] for file_path in file_list: tmp_observations = [] tmp_actions = [] tmp_next_observations = [] tmp_rewards = [] tmp_terminals = [] with, 'r', encoding='utf-8') as fp: csv_reader = csv.reader(fp, delimiter=self.delimiter) next(csv_reader) # get rid of the first line containing only titles for row in csv_reader: observation = [row[0]] + row[7:-1] # there are 9 items: Time Step, pH,Temperature,Acid flow rate,Base flow rate,Cooling water,Heating water,Vessel Weight,Dissolved oxygen concentration assert len(observation) == self.observation_dim action = [row[1], row[2], row[3], row[4], row[5], row[6]] # there are 6 items: Discharge rate,Sugar feed rate,Soil bean feed rate,Aeration rate,Back pressure,Water injection/dilution assert len(action) == self.action_dim reward = row[-1] terminal = False tmp_observations.append(observation) tmp_actions.append(action) tmp_rewards.append(reward) tmp_terminals.append(terminal) tmp_terminals[-1] = True tmp_next_observations = tmp_observations[1:] + [tmp_observations[-1]] observations += tmp_observations actions += tmp_actions next_observations += tmp_next_observations rewards += tmp_rewards terminals += tmp_terminals dataset['observations'] = np.array(observations, dtype=np.float32) dataset['actions'] = np.array(actions, dtype=np.float32) dataset['next_observations'] = np.array(next_observations, dtype=np.float32) dataset['rewards'] = np.array(rewards, dtype=np.float32) dataset['terminals'] = np.array(terminals, dtype=bool) self.dataset_max_observations = dataset['observations'].max(axis=0) self.dataset_min_observations = dataset['observations'].min(axis=0) self.dataset_max_actions = dataset['actions'].max(axis=0) self.dataset_min_actions = dataset['actions'].min(axis=0) print("max observations:", self.max_observations) print("min observations:", self.min_observations) print("dataset max observations:", self.dataset_max_observations) print("dataset min observations:", self.dataset_min_observations) print("max actions:", self.max_actions) print("min actions:", self.min_actions) print("dataset max actions:", self.dataset_max_actions) print("dataset min actions:", self.dataset_min_actions) print("normalize:", self.normalize) print("using max/min observations and actions.") if self.normalize: dataset['observations'], _, _ = normalize_spaces(dataset['observations'], self.max_observations, self.min_observations) dataset['next_observations'], _, _ = normalize_spaces(dataset['next_observations'], self.max_observations, self.min_observations) dataset['actions'], _, _ = normalize_spaces(dataset['actions'], self.max_actions, self.min_actions) # passed in a normalized version. # self.action_space = spaces.Box(low=-1, high=1, shape=(self.action_dim,)) return dataset
[docs] def get_dataset(self): return self.load_file_list_to_dict(self.file_list)