Source code for causal_world.wrappers.env_wrappers

import numpy as np
from gym import spaces
import gym


[docs]class HERGoalEnvWrapper(gym.GoalEnv):

[docs]    def __init__(self, env, activate_sparse_reward=False):
        """

        :param env: (causal_world.CausalWorld) the environment to convert.
        :param activate_sparse_reward: (bool) True to activate sparse rewards.
        """
        super(HERGoalEnvWrapper, self).__init__()
        self.env = env
        self.metadata = self.env.metadata
        self.action_space = env.action_space
        current_goal = self.env.get_task().get_achieved_goal().flatten()
        goal_space_shape = current_goal.shape
        self.action_space = self.env.action_space
        if activate_sparse_reward:
            self.env.get_task().activate_sparse_reward()
        self.observation_space = spaces.Dict(
            dict(desired_goal=spaces.Box(-np.inf,
                                         np.inf,
                                         shape=goal_space_shape,
                                         dtype=np.float64),
                 achieved_goal=spaces.Box(-np.inf,
                                          np.inf,
                                          shape=goal_space_shape,
                                          dtype=np.float64),
                 observation=self.env.observation_space))
        self.reward_range = self.env.reward_range
        self.metadata = self.env.metadata
        self.env.add_wrapper_info({
            'her_environment': {
                'activate_sparse_reward': activate_sparse_reward
            }
        })

    def __getattr__(self, name):
        if name.startswith('_'):
            raise AttributeError(
                "attempted to get missing private attribute '{}'".format(name))
        return getattr(self.env, name)

    @property
    def spec(self):
        """

        :return:
        """
        return self.env.spec

[docs]    @classmethod
    def class_name(cls):
        """

        :return:
        """
        return cls.__name__

[docs]    def step(self, action):
        """
        Used to step through the enviroment.

        :param action: (nd.array) specifies which action should be taken by
                                  the robot, should follow the same action
                                  mode specified.

        :return: (nd.array) specifies the observations returned after stepping
                            through the environment. Again, it follows the
                            observation_mode specified.
        """
        obs_dict = dict()
        normal_obs, reward, done, info = self.env.step(action)
        obs_dict['observation'] = normal_obs
        obs_dict['achieved_goal'] = info['achieved_goal'].flatten()
        obs_dict['desired_goal'] = info['desired_goal'].flatten()
        return obs_dict, reward, done, info

[docs]    def reset(self):
        """
        Resets the environment to the current starting state of the environment.

        :return: (nd.array) specifies the observations returned after resetting
                            the environment. Again, it follows the
                            observation_mode specified.
        """
        obs_dict = dict()
        normal_obs = self.env.reset()
        obs_dict['observation'] = normal_obs
        obs_dict['achieved_goal'] = self.env.get_task().get_achieved_goal(
        ).flatten()
        obs_dict['desired_goal'] = self.env.get_task().get_desired_goal(
        ).flatten()
        return obs_dict

[docs]    def render(self, mode='human', **kwargs):
        """
        Returns an RGB image taken from above the platform.

        :param mode: (str) not taken in account now.

        :return: (nd.array) an RGB image taken from above the platform.
        """
        return self.env.render(mode, **kwargs)

[docs]    def close(self):
        """
        closes the environment in a safe manner should be called at the
        end of the program.

        :return: None
        """
        return self.env.close()

[docs]    def seed(self, seed=None):
        """
        Used to set the seed of the environment,
        to reproduce the same randomness.

        :param seed: (int) specifies the seed number

        :return: (int in list) the numpy seed that you can use further.
        """
        return self.env.seed(seed)

[docs]    def compute_reward(self, achieved_goal, desired_goal, info):
        """
        Used to calculate the reward given a hypothetical situation that could
        be used in hindsight experience replay algorithms variants.
        Can only be used in the spare reward setting for the other setting
        it can be tricky here.

        :param achieved_goal: (nd.array) specifies the achieved goal as bounding boxes of
                            objects by default.
        :param desired_goal: (nd.array) specifies the desired goal as bounding boxes of
                            goal shapes by default.
        :param info: (dict) not used for now.

        :return: (float) the final reward achieved given the hypothetical
                         situation.
        """
        return self.env.get_task().compute_reward(achieved_goal, desired_goal,
                                                  info)

    def __str__(self):
        """

        :return:
        """
        return '<{}{}>'.format(type(self).__name__, self.env)

    def __repr__(self):
        """

        :return:
        """
        return str(self)

    @property
    def unwrapped(self):
        """

        :return:
        """
        return self.env.unwrapped