Getting Started¶

Causal World is a new framework that is designed to foster the research on causal structure and transfer learning of RL agents within a robotics manipulation environment. Tasks range from simple to extremely challenging.

The goal of the robot in each of the tasks is to fill a 3d goal shape in the arena with some tool objects available for it to manipulate. Each environment comprises a simulation version of the “TriFinger robot” that has three fingers - each with 3 joints - to learn dexterous object manipulation skills with rigid bodies available in a stage.

We provide some task generators which generates a 3d shape from a specific goal shape distribution which can be rather simple like pushing, picking, pick and place of single objects but can also be much more involved like stacking towers, restoring a certain scene of objects or building structures that require imagination.

All environments are compatible with the OpenAI gym interface with a lot of features that you can use in your research, which means you can start right out of the box.

Setting up an environment can be as simple as two lines of codes

from causal_world.envs import CausalWorld
from causal_world.task_generators import task_generator

task = task_generator(task_generator_id='stacked_blocks')
env = CausalWorld(task=task)
env.reset()
for _ in range(2000):
    obs, reward, done, info = env.step(env.action_space.sample())
env.close()

By default you are getting a task that has structured observation space and the low level controller is a joint positions controller. The goal is to solve the task within (the number of objects in the arena multiplied by 10) seconds. All of this is easily customizable as you will learn in one of the many tutorials provided.

Basics: Controlling your Environment and Task¶

"""
This tutorial shows you how to generate a task using one of the task generators
and then using it with the CausalWorld environment.
"""
from causal_world.envs import CausalWorld
from causal_world.task_generators import generate_task


def example():
    task = generate_task(task_generator_id='creative_stacked_blocks')
    env = CausalWorld(task=task, enable_visualization=True)
    for _ in range(20):
        env.reset()
        for _ in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
    env.close()


if __name__ == '__main__':
    example()

"""
This tutorial shows you how to generate a task using one of the task generators
and then using it with the CausalWorld environment but having camera observations
instead of the default structured observations.
"""

from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task
import matplotlib.pyplot as plt


def example():
    task = generate_task(task_generator_id='stacked_blocks')
    env = CausalWorld(task=task,
                      skip_frame=10,
                      enable_visualization=True,
                      seed=0,
                      action_mode="joint_positions",
                      observation_mode="pixel",
                      camera_indicies=[0, 1, 2])
    env.reset()
    for _ in range(5):
        obs, reward, done, info = env.step(env.action_space.sample())
    #show last images
    for i in range(6):
        plt.imshow(obs[i])
        plt.show()
    env.close()


if __name__ == '__main__':
    example()

"""
This tutorial shows you how to solve reaching task using inverse kinemetics
provided in the package internally.
"""

from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task


def control_policy(env, obs):
    return \
        env.get_robot().get_joint_positions_from_tip_positions(
            obs[-9:], obs[1:10])


def end_effector_pos():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=False,
                      normalize_observations=False)
    obs = env.reset()
    for _ in range(100):
        goal_dict = env.sample_new_goal()
        success_signal, obs = env.do_intervention(goal_dict)
        obs, reward, done, info = env.step(control_policy(env, obs))
        for _ in range(250):
            obs, reward, done, info = env.step(control_policy(env, obs))
    env.close()


if __name__ == '__main__':
    end_effector_pos()

Performing Interventions¶

"""
This tutorial shows you how to perform a random intervention on one of the
variables in the environment.
"""

from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task


def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(50):
        random_intervention_dict, success_signal, obs = \
            env.do_single_random_intervention()
        print("The random intervention performed is ",
              random_intervention_dict)
        for i in range(100):
            obs, reward, done, info = env.step(env.action_space.sample())
    env.close()


if __name__ == '__main__':
    example()

"""
This tutorial shows you how to control the allowed space for interventions
"""

from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task
import numpy as np


def without_intervention_split():
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention(
            {'stage_color': np.random.uniform(0, 1, [
                3,
            ])})
        print("Intervention success signal", success_signal)
    env.close()


def with_intervention_split_1():
    task = generate_task(task_generator_id='pushing',
                          variables_space='space_a')
    env = CausalWorld(task=task, enable_visualization=False)
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention(
            {'stage_color': np.random.uniform(0, 1, [
                3,
            ])})
        print("Intervention success signal", success_signal)
    env.close()


def with_intervention_split_2():
    task = generate_task(task_generator_id='pushing',
                          variables_space='space_b')
    env = CausalWorld(task=task, enable_visualization=False)
    interventions_space = task.get_intervention_space_a()
    env.reset()
    for _ in range(2):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        success_signal, obs = env.do_intervention({
            'stage_color':
                np.random.uniform(interventions_space['stage_color'][0],
                                  interventions_space['stage_color'][1])
        })
        print("Intervention success signal", success_signal)
    env.close()


if __name__ == '__main__':
    without_intervention_split()
    with_intervention_split_1()
    with_intervention_split_2()

"""
This tutorial shows you how to sample new goals by intervening
on the environment.
"""
from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task


def goal_interventions():
    task = generate_task(task_generator_id='stacked_blocks')
    env = CausalWorld(task=task, enable_visualization=True)
    env.reset()
    for _ in range(10):
        for i in range(200):
            obs, reward, done, info = env.step(env.action_space.sample())
        goal_intervention_dict = env.sample_new_goal()
        print("new goal chosen: ", goal_intervention_dict)
        success_signal, obs = env.do_intervention(goal_intervention_dict)
        print("Goal Intervention success signal", success_signal)
    env.close()


if __name__ == '__main__':
    goal_interventions()

"""
This tutorial shows you how to use privileged information to solve the
task at hand.
"""
from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task


def privileged_information():
    task = generate_task(task_generator_id='general')
    env = CausalWorld(task=task, enable_visualization=True)
    env.expose_potential_partial_solution()
    env.reset()
    for _ in range(10):
        goal_intervention_dict = env.sample_new_goal()
        success_signal, obs = env.do_intervention(goal_intervention_dict)
        print("Goal Intervention success signal", success_signal)
        for i in range(100):
            obs, reward, done, info = env.step(env.action_space.low)
        print("now we solve it with privileged info")
        success_signal, obs = env.do_intervention(
            info['possible_solution_intervention'], check_bounds=False)
        print("Partial Solution Setting Intervention Succes Signal",
              success_signal)
        for i in range(100):
            obs, reward, done, info = env.step(env.action_space.low)
        print("fractional_success is:", info['fractional_success'])
    env.close()


if __name__ == '__main__':
    privileged_information()

Set Up Your Learning Curriculum¶

"""
This tutorial shows you how to create a curriculum using an intervention actor
which intervenes on the goal at specific time (start of the episode)
"""

from causal_world.task_generators.task import generate_task
from causal_world.envs.causalworld import CausalWorld
from causal_world.intervention_actors import GoalInterventionActorPolicy
from causal_world.wrappers.curriculum_wrappers import CurriculumWrapper


def example():
    #initialize env
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task, skip_frame=10, enable_visualization=True)

    # define a custom curriculum of interventions:
    # Goal intervention actor each episode after reset

    env = CurriculumWrapper(env,
                            intervention_actors=[GoalInterventionActorPolicy()],
                            actives=[(0, 1000000000, 1, 0)])

    for reset_idx in range(30):
        obs = env.reset()
        for time in range(100):
            desired_action = env.action_space.sample()
            obs, reward, done, info = env.step(action=desired_action)
    env.close()


if __name__ == '__main__':
    example()

Training Agents¶

"""
This tutorial shows you how to train a policy using stable baselines with PPO
"""
from causal_world.task_generators.task import generate_task
from causal_world.envs.causalworld import CausalWorld
from stable_baselines import PPO2
from stable_baselines.common.policies import MlpPolicy
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import os
import json
from stable_baselines.common import set_global_seeds
from stable_baselines.common.vec_env import SubprocVecEnv
import argparse


def train_policy(num_of_envs, log_relative_path, maximum_episode_length,
                 skip_frame, seed_num, ppo_config, total_time_steps,
                 validate_every_timesteps, task_name):

    def _make_env(rank):

        def _init():
            task = generate_task(task_generator_id=task_name)
            env = CausalWorld(task=task,
                              skip_frame=skip_frame,
                              enable_visualization=False,
                              seed=seed_num + rank,
                              max_episode_length=maximum_episode_length)
            return env

        set_global_seeds(seed_num)
        return _init

    os.makedirs(log_relative_path)
    policy_kwargs = dict(act_fun=tf.nn.tanh, net_arch=[256, 128])
    env = SubprocVecEnv([_make_env(rank=i) for i in range(num_of_envs)])
    model = PPO2(MlpPolicy,
                 env,
                 _init_setup_model=True,
                 policy_kwargs=policy_kwargs,
                 verbose=1,
                 **ppo_config)
    save_config_file(ppo_config,
                     _make_env(0)(),
                     os.path.join(log_relative_path, 'config.json'))
    for i in range(int(total_time_steps / validate_every_timesteps)):
        model.learn(total_timesteps=validate_every_timesteps,
                    tb_log_name="ppo2",
                    reset_num_timesteps=False)
        model.save(os.path.join(log_relative_path, 'saved_model'))
    return


def save_config_file(ppo_config, env, file_path):
    task_config = env._task.get_task_params()
    for task_param in task_config:
        if not isinstance(task_config[task_param], str):
            task_config[task_param] = str(task_config[task_param])
    env_config = env.get_world_params()
    env.close()
    configs_to_save = [task_config, env_config, ppo_config]
    with open(file_path, 'w') as fout:
        json.dump(configs_to_save, fout)


if __name__ == '__main__':
    ap = argparse.ArgumentParser()
    #TODO: pass reward weights here!!
    ap.add_argument("--seed_num", required=False, default=0, help="seed number")
    ap.add_argument("--skip_frame",
                    required=False,
                    default=10,
                    help="skip frame")
    ap.add_argument("--max_episode_length",
                    required=False,
                    default=2500,
                    help="maximum episode length")
    ap.add_argument("--total_time_steps_per_update",
                    required=False,
                    default=100000,
                    help="total time steps per update")
    ap.add_argument("--num_of_envs",
                    required=False,
                    default=20,
                    help="number of parallel environments")
    ap.add_argument("--task_name",
                    required=False,
                    default="reaching",
                    help="the task nam for training")
    ap.add_argument("--fixed_position",
                    required=False,
                    default=True,
                    help="define the reset intervention wrapper")
    ap.add_argument("--log_relative_path", required=True, help="log folder")
    args = vars(ap.parse_args())
    total_time_steps_per_update = int(args['total_time_steps_per_update'])
    num_of_envs = int(args['num_of_envs'])
    log_relative_path = str(args['log_relative_path'])
    maximum_episode_length = int(args['max_episode_length'])
    skip_frame = int(args['skip_frame'])
    seed_num = int(args['seed_num'])
    task_name = str(args['task_name'])
    fixed_position = bool(args['fixed_position'])
    assert (((float(total_time_steps_per_update) / num_of_envs) /
             5).is_integer())
    ppo_config = {
        "gamma": 0.9995,
        "n_steps": 5000,
        "ent_coef": 0,
        "learning_rate": 0.00025,
        "vf_coef": 0.5,
        "max_grad_norm": 10,
        "nminibatches": 1000,
        "noptepochs": 4,
        "tensorboard_log": log_relative_path
    }
    train_policy(num_of_envs=num_of_envs,
                 log_relative_path=log_relative_path,
                 maximum_episode_length=maximum_episode_length,
                 skip_frame=skip_frame,
                 seed_num=seed_num,
                 ppo_config=ppo_config,
                 total_time_steps=60000000,
                 validate_every_timesteps=1000000,
                 task_name=task_name)

"""
This tutorial shows you how to train a policy using stable baselines with
SAC + HER
"""
from causal_world.task_generators.task import generate_task
from causal_world.envs.causalworld import CausalWorld
from stable_baselines import HER, SAC
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import os
import json
from stable_baselines.common import set_global_seeds
from stable_baselines.common.vec_env import SubprocVecEnv
from causal_world.wrappers.env_wrappers import HERGoalEnvWrapper
import argparse


def train_policy(num_of_envs, log_relative_path, maximum_episode_length,
                 skip_frame, seed_num, sac_config, total_time_steps,
                 validate_every_timesteps, task_name):

    def _make_env(rank):

        def _init():
            task = generate_task(task_generator_id=task_name)
            env = CausalWorld(task=task,
                              skip_frame=skip_frame,
                              enable_visualization=False,
                              seed=seed_num + rank,
                              max_episode_length=maximum_episode_length)
            env = HERGoalEnvWrapper(env)
            return env

        set_global_seeds(seed_num)
        return _init

    os.makedirs(log_relative_path)
    env = SubprocVecEnv([_make_env(rank=i) for i in range(num_of_envs)])
    model = HER('MlpPolicy',
                env,
                SAC,
                verbose=1,
                policy_kwargs=dict(layers=[256, 256, 256]),
                **sac_config)
    save_config_file(sac_config,
                     _make_env(0)(),
                     os.path.join(log_relative_path, 'config.json'))
    for i in range(int(total_time_steps / validate_every_timesteps)):
        model.learn(total_timesteps=validate_every_timesteps,
                    tb_log_name="sac",
                    reset_num_timesteps=False)
    model.save(os.path.join(log_relative_path, 'saved_model'))
    return


def save_config_file(sac_config, env, file_path):
    task_config = env.get_task().get_task_params()
    for task_param in task_config:
        if not isinstance(task_config[task_param], str):
            task_config[task_param] = str(task_config[task_param])
    env_config = env.get_world_params()
    env.close()
    configs_to_save = [task_config, env_config, sac_config]
    with open(file_path, 'w') as fout:
        json.dump(configs_to_save, fout)


if __name__ == '__main__':
    ap = argparse.ArgumentParser()
    #TODO: pass reward weights here!!
    ap.add_argument("--seed_num", required=False, default=0, help="seed number")
    ap.add_argument("--skip_frame",
                    required=False,
                    default=10,
                    help="skip frame")
    ap.add_argument("--max_episode_length",
                    required=False,
                    default=2500,
                    help="maximum episode length")
    ap.add_argument("--total_time_steps_per_update",
                    required=False,
                    default=150000,
                    help="total time steps per update")
    ap.add_argument("--task_name",
                    required=False,
                    default="reaching",
                    help="the task nam for training")
    ap.add_argument("--log_relative_path", required=True, help="log folder")
    args = vars(ap.parse_args())
    total_time_steps_per_update = int(args['total_time_steps_per_update'])
    num_of_envs = 1
    log_relative_path = str(args['log_relative_path'])
    maximum_episode_length = int(args['max_episode_length'])
    skip_frame = int(args['skip_frame'])
    seed_num = int(args['seed_num'])
    task_name = str(args['task_name'])
    assert (((float(total_time_steps_per_update) / num_of_envs) /
             5).is_integer())
    #params are taken from
    #https://github.com/araffin/rl-baselines-zoo/blob/master/hyperparams/her.yml
    sac_config = {
        "n_sampled_goal": 4,
        "goal_selection_strategy": 'future',
        "learning_rate": 0.001,
        "train_freq": 1,
        "gradient_steps": 1,
        "learning_starts": 1000,
        "tensorboard_log": log_relative_path,
        "buffer_size": int(1e6),
        "gamma": 0.95,
        "batch_size": 256,
        "ent_coef": 'auto',
        "random_exploration": 0.1
    }
    train_policy(num_of_envs=num_of_envs,
                 log_relative_path=log_relative_path,
                 maximum_episode_length=maximum_episode_length,
                 skip_frame=skip_frame,
                 seed_num=seed_num,
                 sac_config=sac_config,
                 total_time_steps=60000000,
                 validate_every_timesteps=1000000,
                 task_name=task_name)

Disentangle the Generalization Capabilities¶

"""
This tutorial shows you how to train a policy and evaluate it afterwards using
an evaluation pipeline compromised of different evaluation protocols.
"""

from causal_world.task_generators.task import generate_task
from causal_world.envs.causalworld import CausalWorld
from stable_baselines import PPO2
from stable_baselines.common.policies import MlpPolicy
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import os
from stable_baselines.common import set_global_seeds
from stable_baselines.common.vec_env import SubprocVecEnv
from causal_world.evaluation.evaluation import EvaluationPipeline
import causal_world.evaluation.protocols as protocols

log_relative_path = './pushing_policy_tutorial_1'


def _make_env(rank):

    def _init():
        task = generate_task(task_generator_id="pushing")
        env = CausalWorld(task=task, enable_visualization=False, seed=rank, skip_frame=3)
        return env

    set_global_seeds(0)
    return _init


def train_policy():
    ppo_config = {
        "gamma": 0.9988,
        "n_steps": 200,
        "ent_coef": 0,
        "learning_rate": 0.001,
        "vf_coef": 0.99,
        "max_grad_norm": 0.1,
        "lam": 0.95,
        "nminibatches": 5,
        "noptepochs": 100,
        "cliprange": 0.2,
        "tensorboard_log": log_relative_path
    }
    os.makedirs(log_relative_path)
    policy_kwargs = dict(act_fun=tf.nn.tanh, net_arch=[256, 128])
    env = SubprocVecEnv([_make_env(rank=i) for i in range(5)])
    model = PPO2(MlpPolicy,
                 env,
                 _init_setup_model=True,
                 policy_kwargs=policy_kwargs,
                 verbose=1,
                 **ppo_config)
    model.learn(total_timesteps=1000,
                tb_log_name="ppo2",
                reset_num_timesteps=False)
    model.save(os.path.join(log_relative_path, 'model'))
    env.env_method("save_world", log_relative_path)
    env.close()
    return


def evaluate_trained_policy():
    # Load the PPO2 policy trained on the pushing task
    model = PPO2.load(os.path.join(log_relative_path, 'model.zip'))

    # define a method for the policy fn of your trained model

    def policy_fn(obs):
        return model.predict(obs)[0]

    # pass the different protocols you'd like to evaluate in the following
    evaluator = EvaluationPipeline(evaluation_protocols=[
        protocols.FullyRandomProtocol(name='P11', variable_space='space_b')],
                                   visualize_evaluation=True,
                                   tracker_path=log_relative_path,
                                   initial_seed=0)

    # For demonstration purposes we evaluate the policy on 10 per cent of the default number of episodes per protocol
    scores = evaluator.evaluate_policy(policy_fn, fraction=0.05)
    evaluator.save_scores(log_relative_path)
    print(scores)


if __name__ == '__main__':
    train_policy()
    evaluate_trained_policy()

"""
This tutorial shows you how to train a policy and evaluate it afterwards using
one of the default evaluation benchmarks.
"""
from causal_world.evaluation.evaluation import EvaluationPipeline
from causal_world.benchmark.benchmarks import REACHING_BENCHMARK
from causal_world.task_generators.task import generate_task
from causal_world.envs.causalworld import CausalWorld
import causal_world.evaluation.visualization.visualiser as vis

from stable_baselines import PPO2
from stable_baselines.common.policies import MlpPolicy
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import os
from stable_baselines.common import set_global_seeds
from stable_baselines.common.vec_env import SubprocVecEnv

log_relative_path = './pushing_policy_tutorial_2'


def _make_env(rank):

    def _init():
        task = generate_task(task_generator_id="reaching")
        env = CausalWorld(task=task, enable_visualization=False, seed=rank)
        return env

    set_global_seeds(0)
    return _init


def train_policy():
    ppo_config = {
        "gamma": 0.9988,
        "n_steps": 200,
        "ent_coef": 0,
        "learning_rate": 0.001,
        "vf_coef": 0.99,
        "max_grad_norm": 0.1,
        "lam": 0.95,
        "nminibatches": 5,
        "noptepochs": 100,
        "cliprange": 0.2,
        "tensorboard_log": log_relative_path
    }
    os.makedirs(log_relative_path)
    policy_kwargs = dict(act_fun=tf.nn.tanh, net_arch=[256, 128])
    env = SubprocVecEnv([_make_env(rank=i) for i in range(5)])
    model = PPO2(MlpPolicy,
                 env,
                 _init_setup_model=True,
                 policy_kwargs=policy_kwargs,
                 verbose=1,
                 **ppo_config)
    model.learn(total_timesteps=1000,
                tb_log_name="ppo2",
                reset_num_timesteps=False)
    model.save(os.path.join(log_relative_path, 'model'))
    env.env_method("save_world", log_relative_path)
    env.close()
    return


def evaluate_model():
    # Load the PPO2 policy trained on the reaching task
    model = PPO2.load(os.path.join(log_relative_path, 'model.zip'))

    # define a method for the policy fn of your trained model

    def policy_fn(obs):
        return model.predict(obs)[0]

    # Let's evaluate the policy on some default evaluation protocols for reaching task
    evaluation_protocols = REACHING_BENCHMARK['evaluation_protocols']

    evaluator = EvaluationPipeline(evaluation_protocols=evaluation_protocols,
                                   tracker_path=log_relative_path,
                                   initial_seed=0)

    # For demonstration purposes we evaluate the policy on 10 per cent of the default number of episodes per protocol
    scores = evaluator.evaluate_policy(policy_fn, fraction=0.1)
    evaluator.save_scores(log_relative_path)
    experiments = {'reaching_model': scores}
    vis.generate_visual_analysis(log_relative_path, experiments=experiments)


if __name__ == '__main__':
    #first train the policy, skip if u already trained the policy
    train_policy()
    evaluate_model()

Viewing and Recording Policies or Logged Data¶

"""
This tutorial shows you how to view a policy acting in the world
"""
from causal_world.task_generators.task import generate_task
from causal_world.actors.reacher_policy import ReacherActorPolicy
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import causal_world.viewers.task_viewer as viewer


def example():
    # This tutorial shows how to view a pretrained reacher policy
    task = generate_task(task_generator_id='reaching')
    world_params = dict()
    world_params["skip_frame"] = 1
    world_params["seed"] = 0
    agent = ReacherActorPolicy()

    # define a method for the policy fn of your trained model
    def policy_fn(obs):
        return agent.act(obs)

    # Similarly for interactive visualization in the GUI
    viewer.view_policy(task=task,
                       world_params=world_params,
                       policy_fn=policy_fn,
                       max_time_steps=40 * 960,
                       number_of_resets=40)


if __name__ == '__main__':
    example()

"""
This tutorial shows you how to record a video of a saved episode after loading it.
"""

import causal_world.viewers.task_viewer as viewer
from causal_world.loggers.data_loader import DataLoader


def example():
    # This tutorial shows how to view logged episodes

    data = DataLoader(episode_directory='pushing_episodes')
    episode = data.get_episode(6)

    # Record a video of the logged episode is done in one line
    viewer.record_video_of_episode(episode=episode, file_name="pushing_video")

    # Similarly for interactive visualization in the GUI
    viewer.view_episode(episode)


if __name__ == '__main__':
    example()

"""
This tutorial shows you how to record a video of a random policy in the world.
"""

from causal_world.task_generators.task import generate_task
import causal_world.viewers.task_viewer as viewer
from causal_world.loggers.data_loader import DataLoader


def example():
    # This tutorial shows how to view a random policy on the pyramid task

    task = generate_task(task_generator_id='picking')
    world_params = dict()
    world_params["skip_frame"] = 3
    world_params["seed"] = 200

    viewer.record_video_of_random_policy(task=task,
                                         world_params=world_params,
                                         file_name="picking_video",
                                         number_of_resets=1,
                                         max_time_steps=300)


if __name__ == '__main__':
    example()

"""
This tutorial shows you how to to view policies of trained actors.
"""

from causal_world.task_generators.task import generate_task
from stable_baselines import SAC
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import causal_world.viewers.task_viewer as viewer


def example():
    # This tutorial shows how to view policies of trained actors

    task = generate_task(task_generator_id='picking')
    world_params = dict()
    world_params["skip_frame"] = 3
    world_params["seed"] = 0
    stable_baselines_policy_path = "./model_2000000_steps.zip"
    model = SAC.load(stable_baselines_policy_path)

    # define a method for the policy fn of your trained model
    def policy_fn(obs):
        return model.predict(obs, deterministic=True)[0]

    # # Record a video of the policy is done in one line
    viewer.record_video_of_policy(task=task,
                                  world_params=world_params,
                                  policy_fn=policy_fn,
                                  file_name="pushing_video",
                                  number_of_resets=10,
                                  max_time_steps=10 * 100)

    # Similarly for interactive visualization in the GUI
    viewer.view_policy(task=task,
                       world_params=world_params,
                       policy_fn=policy_fn,
                       max_time_steps=40 * 600,
                       number_of_resets=40)


if __name__ == '__main__':
    example()

Changing the Action Space of the Robot¶

"""
This tutorial shows you how to change the action space of the robot by using
a Delta Action wrapper, which can be used with joint positions or
end effector positions as well.
"""

from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task
from causal_world.wrappers.action_wrappers import DeltaActionEnvWrapper
import numpy as np


def apply_delta_action():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=True,
                      normalize_observations=True,
                      skip_frame=1)
    env = DeltaActionEnvWrapper(env)
    for _ in range(50):
        obs = env.reset()
        for _ in range(1000):
            desired_action = np.zeros([
                9,
            ])
            obs, reward, done, info = env.step(desired_action)
    env.close()


if __name__ == '__main__':
    apply_delta_action()

"""
This tutorial shows you how to discretize the action space of the robot itself,
by using the object selector wrapper, first position chooses the object,
second position chooses up, down, right, left, up, stay and third position is
for rotation.
"""

from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task
from causal_world.wrappers.planning_wrappers import ObjectSelectorWrapper


def example():
    task = generate_task(task_generator_id='picking')
    env = CausalWorld(task=task, enable_visualization=True)
    env = ObjectSelectorWrapper(env)
    for _ in range(50):
        obs = env.reset()
        #go up
        for i in range(70):
            obs, reward, done, info = env.step([0, 1, 0])
        # rotate yaw
        for i in range(20):
            obs, reward, done, info = env.step([0, 0, 1])
        for i in range(50):
            obs, reward, done, info = env.step([0, 5, 0])
        for i in range(20):
            obs, reward, done, info = env.step([0, 0, 1])
            # print(obs)
        for i in range(50):
            obs, reward, done, info = env.step([0, 2, 0])
            # print(obs)
    env.close()


if __name__ == '__main__':
    example()

"""
This tutorial shows you how to change the action space of the robot by using
a Moving Average wrapper to smooth the actions further
, which can be used with joint positions or end effector positions as well.
"""
from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task
from causal_world.wrappers.action_wrappers import MovingAverageActionEnvWrapper
import numpy as np


def smooth_action():
    task = generate_task(task_generator_id='reaching')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      action_mode="joint_positions",
                      normalize_actions=True,
                      normalize_observations=True,
                      skip_frame=1)
    env = MovingAverageActionEnvWrapper(env)
    for _ in range(50):
        obs = env.reset()
        for _ in range(1000):
            desired_action = np.zeros([
                9,
            ])
            obs, reward, done, info = env.step(desired_action)
    env.close()


if __name__ == '__main__':
    smooth_action()

Other Utilities: Model Predictive Control, Logging data¶

"""
This tutorial shows you how to use Model Predictive Control
with the true model.
"""
from stable_baselines.common import set_global_seeds
from causal_world.envs.causalworld import CausalWorld
from causal_world.dynamics_model import SimulatorModel
from causal_world.utils.mpc_optimizers import \
    CrossEntropyMethod
from gym.wrappers.monitoring.video_recorder import VideoRecorder
import numpy as np
from causal_world.task_generators.task import generate_task

seed = 0
skip_frame = 35
num_of_particles = 500
num_elite = 50
max_iterations = 20
horizon_length = 6
parallel_agents = 25


def _make_env():

    def _init():
        task = generate_task(
            task_generator_id='picking',
            joint_positions=[-0.21737874, 0.55613149,
                             -1.09308519, -0.12868997,
                             0.52551013, -1.08006493,
                             -0.00221536, 0.46163487,
                             -1.00948735],
            tool_block_position=[0.0, 0, 0.035],
            fractional_reward_weight=1,
            dense_reward_weights=np.array([0, 10, 0,
                                           1, 1, 0, 0,
                                           0]))
        env = CausalWorld(task=task,
                          skip_frame=skip_frame,
                          enable_visualization=False,
                          seed=seed)
        return env

    set_global_seeds(seed)
    return _init


def run_mpc():
    task = generate_task(
        task_generator_id='picking',
        joint_positions=[-0.21737874, 0.55613149,
                         -1.09308519, -0.12868997,
                         0.52551013, -1.08006493,
                         -0.00221536, 0.46163487,
                         -1.00948735],
        tool_block_position=[0.0, 0, 0.035],
        fractional_reward_weight=1,
        dense_reward_weights=np.array([0, 10, 0,
                                       1, 1, 0, 0,
                                       0]))
    env = CausalWorld(task=task,
                      skip_frame=1,
                      enable_visualization=False,
                      seed=seed)
    true_model = SimulatorModel(_make_env, parallel_agents=parallel_agents)
    optimizer = CrossEntropyMethod(
        planning_horizon=horizon_length,
        max_iterations=max_iterations,
        population_size=num_of_particles,
        num_elite=num_elite,
        action_upper_bound=np.array(env.action_space.high),
        action_lower_bound=np.array(env.action_space.low),
        model=true_model)
    env.reset()
    actions = optimizer.get_actions()
    true_model.end_sim()
    recorder = VideoRecorder(env, 'picking.mp4')
    for i in range(horizon_length):
        for _ in range(skip_frame):
            recorder.capture_frame()
            obs, reward, done, info = env.step(actions[i])
    recorder.capture_frame()
    recorder.close()
    env.close()


if __name__ == '__main__':
    run_mpc()

"""
This tutorial shows you how to use a data recorder to record some data for
imitation learning for instance and how to load the data again. Or replay some
episodes.
"""
from causal_world.envs.causalworld import CausalWorld
from causal_world.task_generators.task import generate_task

from causal_world.loggers.data_recorder import DataRecorder
from causal_world.loggers.data_loader import DataLoader

import causal_world.viewers.task_viewer as viewer


def example():
    # Here you learn how to record/ log entire episodes into a directory
    # to reuse it later e.g. for reviewing logged episodes or using this
    # data for pre-training  policies.

    # Construct a data_recorder that keeps track of every change in the environment
    # We set the recording dumb frequency of episodes into log_files to 11 (default is 100)
    data_recorder = DataRecorder(output_directory='pushing_episodes',
                                 rec_dumb_frequency=11)

    # Pass the data recorder to the World
    task = generate_task(task_generator_id='pushing')
    env = CausalWorld(task=task,
                      enable_visualization=True,
                      data_recorder=data_recorder)

    # Record some episodes
    for _ in range(23):
        env.reset()
        for _ in range(50):
            env.step(env.action_space.sample())
    env.close()

    # Load the logged episodes
    data = DataLoader(episode_directory='pushing_episodes')
    episode = data.get_episode(14)

    # Initialize a new environment according a specific episode and replay it
    task = generate_task(episode.task_name, **episode.task_params)
    env = CausalWorld(task, **episode.world_params, enable_visualization=True)
    env.set_starting_state(episode.initial_full_state,
                           check_bounds=False)
    for action in episode.robot_actions:
        env.step(action)
    env.close()

    # You can achieve the same by using the viewer module in one line
    viewer.view_episode(episode)


if __name__ == '__main__':
    example()

Much More Available on the Repo¶

here