Source code for ray.rllib.core.rl_module.default_model_config

from dataclasses import dataclass, field
from typing import Callable, List, Optional, Union

from ray.rllib.utils.typing import ConvFilterSpec
from ray.util.annotations import DeveloperAPI


[docs] @DeveloperAPI @dataclass class DefaultModelConfig: """Dataclass to configure all default RLlib RLModules. Users should NOT use this class for configuring their own custom RLModules, but use a custom `model_config` dict with arbitrary (str) keys passed into the `RLModuleSpec` used to define the custom RLModule. For example: .. testcode:: import gymnasium as gym import numpy as np from ray.rllib.core.rl_module.rl_module import RLModuleSpec from ray.rllib.examples.rl_modules.classes.tiny_atari_cnn_rlm import ( TinyAtariCNN ) my_rl_module = RLModuleSpec( module_class=TinyAtariCNN, observation_space=gym.spaces.Box(-1.0, 1.0, (64, 64, 4), np.float32), action_space=gym.spaces.Discrete(7), # DreamerV3-style stack working on a 64x64, color or 4x-grayscale-stacked, # normalized image. model_config={ "conv_filters": [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]], }, ).build() Only RLlib's default RLModules (defined by the various algorithms) should use this dataclass. Pass an instance of it into your algorithm config like so: .. testcode:: from ray.rllib.algorithms.ppo import PPOConfig from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig config = ( PPOConfig() .rl_module( model_config=DefaultModelConfig(fcnet_hiddens=[32, 32]), ) ) """ # ==================================================== # MLP stacks # ==================================================== # __sphinx_doc_default_model_config_fcnet_begin__ #: List containing the sizes (number of nodes) of a fully connected (MLP) stack. #: Note that in an encoder-based default architecture with a policy head (and #: possible value head), this setting only affects the encoder component. To set the #: policy (and value) head sizes, use `post_fcnet_hiddens`, instead. For example, #: if you set `fcnet_hiddens=[32, 32]` and `post_fcnet_hiddens=[64]`, you would get #: an RLModule with a [32, 32] encoder, a [64, act-dim] policy head, and a [64, 1] #: value head (if applicable). fcnet_hiddens: List[int] = field(default_factory=lambda: [256, 256]) #: Activation function descriptor for the stack configured by `fcnet_hiddens`. #: Supported values are: 'tanh', 'relu', 'swish' (or 'silu', which is the same), #: and 'linear' (or None). fcnet_activation: str = "tanh" #: Initializer function or class descriptor for the weight/kernel matrices in the #: stack configured by `fcnet_hiddens`. Supported values are the initializer names #: (str), classes or functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. fcnet_kernel_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `fcnet_kernel_initializer`. fcnet_kernel_initializer_kwargs: Optional[dict] = None #: Initializer function or class descriptor for the bias vectors in the stack #: configured by `fcnet_hiddens`. Supported values are the initializer names (str), #: classes or functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. fcnet_bias_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `fcnet_bias_initializer`. fcnet_bias_initializer_kwargs: Optional[dict] = None # __sphinx_doc_default_model_config_fcnet_end__ # ==================================================== # Conv2D stacks # ==================================================== # __sphinx_doc_default_model_config_conv_begin__ #: List of lists of format [num_out_channels, kernel, stride] defining a Conv2D #: stack if the input space is 2D. Each item in the outer list represents one Conv2D #: layer. `kernel` and `stride` may be single ints (width and height have same #: value) or 2-tuples (int, int) specifying width and height dimensions separately. #: If None (default) and the input space is 2D, RLlib tries to find a default filter #: setup given the exact input dimensions. conv_filters: Optional[ConvFilterSpec] = None #: Activation function descriptor for the stack configured by `conv_filters`. #: Supported values are: 'tanh', 'relu', 'swish' (or 'silu', which is the same), and #: 'linear' (or None). conv_activation: str = "relu" #: Initializer function or class descriptor for the weight/kernel matrices in the #: stack configured by `conv_filters`. Supported values are the initializer names #: (str), classes or functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. conv_kernel_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `conv_kernel_initializer`. conv_kernel_initializer_kwargs: Optional[dict] = None #: Initializer function or class descriptor for the bias vectors in the stack #: configured by `conv_filters`. Supported values are the initializer names (str), #: classes or functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. conv_bias_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `conv_bias_initializer`. conv_bias_initializer_kwargs: Optional[dict] = None # __sphinx_doc_default_model_config_conv_end__ # ==================================================== # Head configs (e.g. policy- or value function heads) # ==================================================== #: List containing the sizes (number of nodes) of a fully connected (MLP) head (ex. #: policy-, value-, or Q-head). Note that in order to configure the encoder #: architecture, use `fcnet_hiddens`, instead. head_fcnet_hiddens: List[int] = field(default_factory=lambda: []) #: Activation function descriptor for the stack configured by `head_fcnet_hiddens`. #: Supported values are: 'tanh', 'relu', 'swish' (or 'silu', which is the same), #: and 'linear' (or None). head_fcnet_activation: str = "relu" #: Initializer function or class descriptor for the weight/kernel matrices in the #: stack configured by `head_fcnet_hiddens`. Supported values are the initializer #: names (str), classes or functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. head_fcnet_kernel_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `head_fcnet_kernel_initializer`. head_fcnet_kernel_initializer_kwargs: Optional[dict] = None #: Initializer function or class descriptor for the bias vectors in the stack #: configured by `head_fcnet_hiddens`. Supported values are the initializer names #: (str), classes or functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. head_fcnet_bias_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `head_fcnet_bias_initializer`. head_fcnet_bias_initializer_kwargs: Optional[dict] = None # ==================================================== # Continuous action settings # ==================================================== #: If True, for DiagGaussian action distributions (or any other continuous control #: distribution), make the second half of the policy's outputs a "free" bias #: parameter, rather than state-/NN-dependent nodes. In this case, the number of #: nodes of the policy head have the same dimension as the action space as no slots #: for log(stddev) are required (only for the mean values). free_log_std: bool = False #: Whether to clip the log(stddev) when using a DiagGaussian action distribution #: (or any other continuous control distribution). This can stabilize training and #: avoid very small or large log(stddev) values leading to numerical instabilities #: turning outputs to `nan`. The default is to clamp the log(stddev) in between #: -20 and 20. Set to float("inf") for no clamping. log_std_clip_param: float = 20.0 #: Whether encoder layers (defined by `fcnet_hiddens` or `conv_filters`) should be #: shared between policy- and value function. vf_share_layers: bool = True # ==================================================== # LSTM settings # ==================================================== #: Whether to wrap the encoder component (defined by `fcnet_hiddens` or #: `conv_filters`) with an LSTM. use_lstm: bool = False #: The maximum seq len for building the train batch for an LSTM model. #: Defaults to 20. max_seq_len: int = 20 #: The size of the LSTM cell. lstm_cell_size: int = 256 lstm_use_prev_action: bool = False lstm_use_prev_reward: bool = False #: Initializer function or class descriptor for the weight/kernel matrices in the #: LSTM layer. Supported values are the initializer names (str), classes or #: functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. lstm_kernel_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `lstm_kernel_initializer`. lstm_kernel_initializer_kwargs: Optional[dict] = None #: Initializer function or class descriptor for the bias vectors in the stack #: configured by the LSTM layer. Supported values are the initializer names (str), #: classes or functions listed by the frameworks (`torch`). See #: https://pytorch.org/docs/stable/nn.init.html for `torch`. If `None` (default), #: the default initializer defined by `torch` is used. lstm_bias_initializer: Optional[Union[str, Callable]] = None #: Kwargs passed into the initializer function defined through #: `lstm_bias_initializer`. lstm_bias_initializer_kwargs: Optional[dict] = None