from abc import ABC, abstractmethod
from numbers import Real
from typing import Tuple, Dict
import gym
import numpy as np
import torch
State = np.ndarray
Action = np.ndarray
Reward = Real
Space = gym.Space
[docs]class StorageABC(ABC):
[docs] @abstractmethod
def update(self, action, reward, done, state):
pass
[docs] @abstractmethod
def new_state_update(self, state):
pass
[docs] @abstractmethod
def get_states(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_last_state(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_rewards(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_actions(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_dones(self) -> np.ndarray:
pass
[docs] @abstractmethod
def sample_batch(
self, replay_buffor_size: int, batch_size: int, returns: bool, next_states: bool
) -> tuple:
pass
@abstractmethod
def __getitem__(self, indicies) -> tuple:
pass
@abstractmethod
def __len__(self):
pass
@abstractmethod
def __repr__(self):
pass
MemoryABC = StorageABC
[docs]class HistoryABC(ABC):
_index: int
states: np.ndarray
actions: np.ndarray
rewards: np.ndarray
dones: np.ndarray
[docs] @abstractmethod
def update(self, action, reward, done, state):
pass
[docs] @abstractmethod
def new_state_update(self, state):
pass
[docs] @abstractmethod
def get_states(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_last_state(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_rewards(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_actions(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_dones(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_returns(self, discount_factor: float, horizon: float) -> np.ndarray:
pass
[docs] @abstractmethod
def get_total_rewards(self) -> np.ndarray:
pass
[docs] @abstractmethod
def get_number_of_episodes(self) -> int:
pass
[docs] @abstractmethod
def sample_batch(
self, replay_buffor_size: int, batch_size: int, returns: bool, next_states: bool
) -> tuple:
pass
[docs] @abstractmethod
def get_summary(self):
pass
@abstractmethod
def _enlarge(self):
pass
@abstractmethod
def __getitem__(self, indicies) -> tuple:
pass
@abstractmethod
def __iadd__(self, other: "HistoryABC") -> "HistoryABC":
pass
@abstractmethod
def __len__(self) -> int:
pass
@abstractmethod
def __repr__(self) -> str:
pass
[docs]class EnvironmentABC(ABC):
_id: str
_state_transformer: StateTransformerABC
_reward_transformer: RewardTransformerABC
_action_transformer: ActionTransformerABC
_state_history: HistoryABC
_env: gym.Env
_action_dtype: type
true_reward: bool
initial_history_length: int
@property
@abstractmethod
def id(self):
pass
@property
@abstractmethod
def state_transformer(self) -> StateTransformerABC:
pass
@property
@abstractmethod
def reward_transformer(self) -> RewardTransformerABC:
pass
@property
@abstractmethod
def action_transformer(self) -> ActionTransformerABC:
pass
@property
@abstractmethod
def action_space(self) -> Space:
pass
@property
@abstractmethod
def observation_space(self) -> Space:
pass
@property
@abstractmethod
def state_history(self) -> HistoryABC:
pass
[docs] @abstractmethod
def reset(self) -> State:
pass
[docs] @abstractmethod
def step(self, action: Action) -> Tuple[State, Reward, bool, Dict]:
pass
[docs] @abstractmethod
def close(self):
pass
[docs]class AgentABC(ABC):
step_count: int
iteration_count: int
episode_count: int
@property
@abstractmethod
def id(self) -> str:
pass
[docs] @abstractmethod
def train(
self, env: EnvironmentABC, n_iterations: int, callback_list: list, **kwargs
):
pass
[docs] @abstractmethod
def train_iteration(self, env: EnvironmentABC, **kwargs) -> (float, HistoryABC):
pass
[docs] @abstractmethod
def pre_train_setup(self, env: EnvironmentABC, **kwargs):
pass
[docs] @abstractmethod
def post_train_cleanup(self, env: EnvironmentABC, **kwargs):
pass
[docs] @abstractmethod
def act(self, state: State):
pass
[docs] @abstractmethod
def play_episodes(self, env, episodes: int) -> HistoryABC:
pass
[docs] @abstractmethod
def play_steps(
self, env: EnvironmentABC, n_steps: int, history: HistoryABC
) -> HistoryABC:
pass
[docs] @abstractmethod
def test(self, env) -> HistoryABC:
pass
[docs]class AdvantageABC(ABC):
@abstractmethod
def __call__(
self,
rewards: np.ndarray,
baselines: np.ndarray,
dones: np.ndarray,
discount_factor: float,
) -> np.ndarray:
pass
[docs]class FunctionApproximatorABC(ABC):
@property
@abstractmethod
def id(self) -> str:
pass
[docs] @abstractmethod
def train(self, x: np.ndarray, *loss_args) -> float:
pass
[docs] @abstractmethod
def predict(self, x):
pass
[docs]class AgentCallbackABC(ABC):
time_logger_cid: int
agent_logger_cid: int
memory_logger_cid: int
nn_logger_cid: int
misc_logger_cid: int
iteration_interval: int
number_of_test_runs: int
needs_tests: bool
[docs] def on_iteration_end(self, agent: AgentABC) -> bool:
pass
[docs] def on_training_end(self, agent: AgentABC):
pass
[docs] def on_training_begin(self, agent: AgentABC):
pass
[docs]class PytorchNetABC(torch.nn.Module):
[docs] @abstractmethod
def forward(self, x: torch.Tensor):
pass
[docs] @abstractmethod
def predict(self, x: torch.Tensor):
pass