# PufferLib Integration Guide ## Overview PufferLib provides an emulation layer that enables seamless integration with popular RL frameworks including Gymnasium, OpenAI Gym, PettingZoo, and many specialized environment libraries. The emulation layer flattens observation and action spaces for efficient vectorization while maintaining compatibility. ## Gymnasium Integration ### Basic Gymnasium Environments ```python import gymnasium as gym import pufferlib # Method 1: Direct wrapping gym_env = gym.make('CartPole-v1') puffer_env = pufferlib.emulate(gym_env, num_envs=256) # Method 2: Using make env = pufferlib.make('gym-CartPole-v1', num_envs=256) # Method 3: Custom Gymnasium environment class MyGymEnv(gym.Env): def __init__(self): self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(4,)) self.action_space = gym.spaces.Discrete(2) def reset(self, seed=None, options=None): super().reset(seed=seed) return self.observation_space.sample(), {} def step(self, action): obs = self.observation_space.sample() reward = 1.0 terminated = False truncated = False info = {} return obs, reward, terminated, truncated, info # Wrap custom environment puffer_env = pufferlib.emulate(MyGymEnv, num_envs=128) ``` ### Atari Environments ```python import gymnasium as gym from gymnasium.wrappers import AtariPreprocessing, FrameStack import pufferlib # Standard Atari setup def make_atari_env(env_name='ALE/Pong-v5'): env = gym.make(env_name) env = AtariPreprocessing(env, frame_skip=4) env = FrameStack(env, num_stack=4) return env # Vectorize with PufferLib env = pufferlib.emulate(make_atari_env, num_envs=256) # Or use built-in env = pufferlib.make('atari-pong', num_envs=256, frameskip=4, framestack=4) ``` ### Complex Observation Spaces ```python import gymnasium as gym from gymnasium.spaces import Dict, Box, Discrete import pufferlib class ComplexObsEnv(gym.Env): def __init__(self): # Dict observation space self.observation_space = Dict({ 'image': Box(low=0, high=255, shape=(84, 84, 3), dtype=np.uint8), 'vector': Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32), 'discrete': Discrete(5) }) self.action_space = Discrete(4) def reset(self, seed=None, options=None): return { 'image': np.zeros((84, 84, 3), dtype=np.uint8), 'vector': np.zeros(10, dtype=np.float32), 'discrete': 0 }, {} def step(self, action): obs = { 'image': np.random.randint(0, 256, (84, 84, 3), dtype=np.uint8), 'vector': np.random.randn(10).astype(np.float32), 'discrete': np.random.randint(0, 5) } return obs, 1.0, False, False, {} # PufferLib automatically flattens and unflattens complex spaces env = pufferlib.emulate(ComplexObsEnv, num_envs=128) ``` ## PettingZoo Integration ### Parallel Environments ```python from pettingzoo.butterfly import pistonball_v6 import pufferlib # Wrap PettingZoo parallel environment pz_env = pistonball_v6.parallel_env() puffer_env = pufferlib.emulate(pz_env, num_envs=128) # Or use make directly env = pufferlib.make('pettingzoo-pistonball', num_envs=128) ``` ### AEC (Agent Environment Cycle) Environments ```python from pettingzoo.classic import chess_v5 import pufferlib # Wrap AEC environment (PufferLib handles conversion to parallel) aec_env = chess_v5.env() puffer_env = pufferlib.emulate(aec_env, num_envs=64) # Works with any PettingZoo AEC environment env = pufferlib.make('pettingzoo-chess', num_envs=64) ``` ### Multi-Agent Training ```python import pufferlib from pufferlib import PuffeRL # Create multi-agent environment env = pufferlib.make('pettingzoo-knights-archers-zombies', num_envs=128) # Shared policy for all agents policy = create_policy(env.observation_space, env.action_space) # Train trainer = PuffeRL(env=env, policy=policy) for iteration in range(num_iterations): # Observations are dicts: {agent_id: batch_obs} rollout = trainer.evaluate() # Train on multi-agent data trainer.train() trainer.mean_and_log() ``` ## Third-Party Environments ### Procgen ```python import pufferlib # Procgen environments env = pufferlib.make('procgen-coinrun', num_envs=256, distribution_mode='easy') # Custom configuration env = pufferlib.make( 'procgen-coinrun', num_envs=256, num_levels=200, # Number of unique levels start_level=0, # Starting level seed distribution_mode='hard' ) ``` ### NetHack ```python import pufferlib # NetHack Learning Environment env = pufferlib.make('nethack', num_envs=128) # MiniHack variants env = pufferlib.make('minihack-corridor', num_envs=128) env = pufferlib.make('minihack-room', num_envs=128) ``` ### Minigrid ```python import pufferlib # Minigrid environments env = pufferlib.make('minigrid-empty-8x8', num_envs=256) env = pufferlib.make('minigrid-doorkey-8x8', num_envs=256) env = pufferlib.make('minigrid-multiroom', num_envs=256) ``` ### Neural MMO ```python import pufferlib # Large-scale multi-agent environment env = pufferlib.make( 'neuralmmo', num_envs=64, num_agents=128, # Agents per environment map_size=128 ) ``` ### Crafter ```python import pufferlib # Open-ended crafting environment env = pufferlib.make('crafter', num_envs=128) ``` ### GPUDrive ```python import pufferlib # GPU-accelerated driving simulator env = pufferlib.make( 'gpudrive', num_envs=1024, # Can handle many environments on GPU num_vehicles=8 ) ``` ### MicroRTS ```python import pufferlib # Real-time strategy game env = pufferlib.make( 'microrts', num_envs=128, map_size=16, max_steps=2000 ) ``` ### Griddly ```python import pufferlib # Grid-based games env = pufferlib.make('griddly-clusters', num_envs=256) env = pufferlib.make('griddly-sokoban', num_envs=256) ``` ## Custom Wrappers ### Observation Wrappers ```python import numpy as np import pufferlib from pufferlib import PufferEnv class NormalizeObservations(pufferlib.Wrapper): """Normalize observations to zero mean and unit variance.""" def __init__(self, env): super().__init__(env) self.obs_mean = np.zeros(env.observation_space.shape) self.obs_std = np.ones(env.observation_space.shape) self.count = 0 def reset(self): obs = self.env.reset() return self._normalize(obs) def step(self, action): obs, reward, done, info = self.env.step(action) return self._normalize(obs), reward, done, info def _normalize(self, obs): # Update running statistics self.count += 1 delta = obs - self.obs_mean self.obs_mean += delta / self.count self.obs_std = np.sqrt(((self.count - 1) * self.obs_std ** 2 + delta * (obs - self.obs_mean)) / self.count) # Normalize return (obs - self.obs_mean) / (self.obs_std + 1e-8) ``` ### Reward Wrappers ```python class RewardShaping(pufferlib.Wrapper): """Add shaped rewards to environment.""" def __init__(self, env, shaping_fn): super().__init__(env) self.shaping_fn = shaping_fn def step(self, action): obs, reward, done, info = self.env.step(action) # Add shaped reward shaped_reward = reward + self.shaping_fn(obs, action) return obs, shaped_reward, done, info # Usage def proximity_shaping(obs, action): """Reward agent for getting closer to goal.""" goal_pos = np.array([10, 10]) agent_pos = obs[:2] distance = np.linalg.norm(goal_pos - agent_pos) return -0.1 * distance env = pufferlib.make('myenv', num_envs=128) env = RewardShaping(env, proximity_shaping) ``` ### Frame Stacking ```python class FrameStack(pufferlib.Wrapper): """Stack frames for temporal context.""" def __init__(self, env, num_stack=4): super().__init__(env) self.num_stack = num_stack self.frames = None def reset(self): obs = self.env.reset() # Initialize frame stack self.frames = np.repeat(obs[np.newaxis], self.num_stack, axis=0) return self._get_obs() def step(self, action): obs, reward, done, info = self.env.step(action) # Update frame stack self.frames = np.roll(self.frames, shift=-1, axis=0) self.frames[-1] = obs if done: self.frames = None return self._get_obs(), reward, done, info def _get_obs(self): return self.frames ``` ### Action Repeat ```python class ActionRepeat(pufferlib.Wrapper): """Repeat actions for multiple steps.""" def __init__(self, env, repeat=4): super().__init__(env) self.repeat = repeat def step(self, action): total_reward = 0.0 done = False for _ in range(self.repeat): obs, reward, done, info = self.env.step(action) total_reward += reward if done: break return obs, total_reward, done, info ``` ## Space Conversion ### Flattening Spaces PufferLib automatically flattens complex observation/action spaces: ```python from gymnasium.spaces import Dict, Box, Discrete import pufferlib # Complex space original_space = Dict({ 'image': Box(0, 255, (84, 84, 3), dtype=np.uint8), 'vector': Box(-np.inf, np.inf, (10,), dtype=np.float32), 'discrete': Discrete(5) }) # Automatically flattened by PufferLib # Observations are presented as flat arrays for efficient processing # But can be unflattened when needed for policy processing ``` ### Unflattening for Policies ```python from pufferlib.pytorch import unflatten_observations class PolicyWithUnflatten(nn.Module): def __init__(self, observation_space, action_space): super().__init__() self.observation_space = observation_space # ... policy architecture ... def forward(self, flat_observations): # Unflatten to original structure observations = unflatten_observations( flat_observations, self.observation_space ) # Now observations is a dict with 'image', 'vector', 'discrete' image_features = self.image_encoder(observations['image']) vector_features = self.vector_encoder(observations['vector']) # ... ``` ## Environment Registration ### Registering Custom Environments ```python import pufferlib # Register environment for easy access pufferlib.register( id='my-custom-env', entry_point='my_package.envs:MyEnvironment', kwargs={'param1': 'value1'} ) # Now can use with make env = pufferlib.make('my-custom-env', num_envs=256) ``` ### Registering in Ocean Suite To add your environment to Ocean: ```python # In ocean/environment.py OCEAN_REGISTRY = { 'my-env': { 'entry_point': 'my_package.envs:MyEnvironment', 'kwargs': { 'default_param': 'default_value' } } } ``` ## Compatibility Patterns ### Gymnasium to PufferLib ```python import gymnasium as gym import pufferlib # Standard Gymnasium environment class GymEnv(gym.Env): def reset(self, seed=None, options=None): return observation, info def step(self, action): return observation, reward, terminated, truncated, info # Convert to PufferEnv puffer_env = pufferlib.emulate(GymEnv, num_envs=128) ``` ### PettingZoo to PufferLib ```python from pettingzoo import ParallelEnv import pufferlib # PettingZoo parallel environment class PZEnv(ParallelEnv): def reset(self, seed=None, options=None): return {agent: obs for agent, obs in ...}, {agent: info for agent in ...} def step(self, actions): return observations, rewards, terminations, truncations, infos # Convert to PufferEnv puffer_env = pufferlib.emulate(PZEnv, num_envs=128) ``` ### Legacy Gym (v0.21) to PufferLib ```python import gym # Old gym import pufferlib # Legacy gym environment (returns done instead of terminated/truncated) class LegacyEnv(gym.Env): def reset(self): return observation def step(self, action): return observation, reward, done, info # PufferLib handles legacy format automatically puffer_env = pufferlib.emulate(LegacyEnv, num_envs=128) ``` ## Performance Considerations ### Efficient Integration ```python # Fast: Use built-in integrations when available env = pufferlib.make('procgen-coinrun', num_envs=256) # Slower: Generic wrapper (still fast, but overhead) import gymnasium as gym gym_env = gym.make('CartPole-v1') env = pufferlib.emulate(gym_env, num_envs=256) # Slowest: Nested wrappers add overhead import gymnasium as gym gym_env = gym.make('CartPole-v1') gym_env = SomeWrapper(gym_env) gym_env = AnotherWrapper(gym_env) env = pufferlib.emulate(gym_env, num_envs=256) ``` ### Minimize Wrapper Overhead ```python # BAD: Too many wrappers env = gym.make('CartPole-v1') env = Wrapper1(env) env = Wrapper2(env) env = Wrapper3(env) puffer_env = pufferlib.emulate(env, num_envs=256) # GOOD: Combine wrapper logic class CombinedWrapper(gym.Wrapper): def step(self, action): obs, reward, done, truncated, info = self.env.step(action) # Apply all transformations at once obs = self._transform_obs(obs) reward = self._transform_reward(reward) return obs, reward, done, truncated, info env = gym.make('CartPole-v1') env = CombinedWrapper(env) puffer_env = pufferlib.emulate(env, num_envs=256) ``` ## Debugging Integration ### Verify Environment Compatibility ```python def test_environment(env, num_steps=100): """Test environment for common issues.""" # Test reset obs = env.reset() assert env.observation_space.contains(obs), "Invalid initial observation" # Test steps for _ in range(num_steps): action = env.action_space.sample() obs, reward, done, info = env.step(action) assert env.observation_space.contains(obs), "Invalid observation" assert isinstance(reward, (int, float)), "Invalid reward type" assert isinstance(done, bool), "Invalid done type" assert isinstance(info, dict), "Invalid info type" if done: obs = env.reset() print("✓ Environment passed compatibility test") # Test before vectorizing test_environment(MyEnvironment()) ``` ### Compare Outputs ```python # Verify PufferLib emulation matches original import gymnasium as gym import pufferlib import numpy as np gym_env = gym.make('CartPole-v1') puffer_env = pufferlib.emulate(lambda: gym.make('CartPole-v1'), num_envs=1) # Test with same seed gym_env.reset(seed=42) puffer_obs = puffer_env.reset() for _ in range(100): action = gym_env.action_space.sample() gym_obs, gym_reward, gym_done, gym_truncated, gym_info = gym_env.step(action) puffer_obs, puffer_reward, puffer_done, puffer_info = puffer_env.step(np.array([action])) # Compare outputs (accounting for batch dimension) assert np.allclose(gym_obs, puffer_obs[0]) assert gym_reward == puffer_reward[0] assert gym_done == puffer_done[0] ```