aiaccel.torch.lightning package#

Subpackages#

Submodules#

aiaccel.torch.lightning.abci_environment module#

class aiaccel.torch.lightning.abci_environment.ABCIEnvironment[source]#

Bases: ClusterEnvironment

Environment class for ABCI.

This class provides methods to interact with the ABCI environment, such as retrieving the world size, global rank, node rank, and local rank.

property creates_processes_externally: bool#

Whether the environment creates the subprocesses or not.

static detect() bool[source]#

Detects the environment settings corresponding to this cluster and returns True if they match.

global_rank() int[source]#

The rank (index) of the currently running process across all nodes and devices.

local_rank() int[source]#

The rank (index) of the currently running process inside of the current node.

property main_address: str#

The main address through which all processes connect and communicate.

property main_port: int#

An open and configured port in the main node through which all processes communicate.

node_rank() int[source]#

The rank (index) of the node on which the current process runs.

set_global_rank(rank: int) None[source]#
set_world_size(size: int) None[source]#
validate_settings(num_devices: int, num_nodes: int) None[source]#

Validates settings configured in the script against the environment, and raises an exception if there is an inconsistency.

world_size() int[source]#

The number of processes across all devices and nodes.

aiaccel.torch.lightning.opt_lightning_module module#

class aiaccel.torch.lightning.opt_lightning_module.OptimizerConfig(optimizer_generator: Callable[..., optim.optimizer.Optimizer], scheduler_generator: Callable[..., optim.lr_scheduler.LRScheduler] | None = None, scheduler_interval: str | None = 'step', scheduler_monitor: str | None = 'validation/loss')[source]#

Bases: object

Configuration class for the optimizer and scheduler in the LightningModule.

optimizer_generator: Callable[..., optim.optimizer.Optimizer]#
scheduler_generator: Callable[..., optim.lr_scheduler.LRScheduler] | None = None#
scheduler_interval: str | None = 'step'#
scheduler_monitor: str | None = 'validation/loss'#
class aiaccel.torch.lightning.opt_lightning_module.OptimizerLightningModule(optimizer_config: OptimizerConfig)[source]#

Bases: LightningModule

LightningModule subclass for models that use custom optimizers and schedulers.

Parameters:

optimizer_config (OptimizerConfig) – Configuration object for the optimizer.

optcfg#

Configuration object for the optimizer.

Type:

OptimizerConfig

configure_optimizers()[source]#

Configures the optimizer and scheduler for training.

configure_optimizers() optim.optimizer.Optimizer | OptimizerLRSchedulerConfig[source]#

Configures the optimizer and scheduler for training.

Returns:

The optimizer and scheduler configuration.

Return type:

Union[optim.optimizer.Optimizer, OptimizerLRSchedulerConfig]

Module contents#

class aiaccel.torch.lightning.ABCIEnvironment[source]#

Bases: ClusterEnvironment

Environment class for ABCI.

This class provides methods to interact with the ABCI environment, such as retrieving the world size, global rank, node rank, and local rank.

property creates_processes_externally: bool#

Whether the environment creates the subprocesses or not.

static detect() bool[source]#

Detects the environment settings corresponding to this cluster and returns True if they match.

global_rank() int[source]#

The rank (index) of the currently running process across all nodes and devices.

local_rank() int[source]#

The rank (index) of the currently running process inside of the current node.

property main_address: str#

The main address through which all processes connect and communicate.

property main_port: int#

An open and configured port in the main node through which all processes communicate.

node_rank() int[source]#

The rank (index) of the node on which the current process runs.

set_global_rank(rank: int) None[source]#
set_world_size(size: int) None[source]#
validate_settings(num_devices: int, num_nodes: int) None[source]#

Validates settings configured in the script against the environment, and raises an exception if there is an inconsistency.

world_size() int[source]#

The number of processes across all devices and nodes.

class aiaccel.torch.lightning.OptimizerConfig(optimizer_generator: Callable[..., optim.optimizer.Optimizer], scheduler_generator: Callable[..., optim.lr_scheduler.LRScheduler] | None = None, scheduler_interval: str | None = 'step', scheduler_monitor: str | None = 'validation/loss')[source]#

Bases: object

Configuration class for the optimizer and scheduler in the LightningModule.

optimizer_generator: Callable[..., optim.optimizer.Optimizer]#
scheduler_generator: Callable[..., optim.lr_scheduler.LRScheduler] | None = None#
scheduler_interval: str | None = 'step'#
scheduler_monitor: str | None = 'validation/loss'#
class aiaccel.torch.lightning.OptimizerLightningModule(optimizer_config: OptimizerConfig)[source]#

Bases: LightningModule

LightningModule subclass for models that use custom optimizers and schedulers.

Parameters:

optimizer_config (OptimizerConfig) – Configuration object for the optimizer.

optcfg#

Configuration object for the optimizer.

Type:

OptimizerConfig

configure_optimizers()[source]#

Configures the optimizer and scheduler for training.

configure_optimizers() optim.optimizer.Optimizer | OptimizerLRSchedulerConfig[source]#

Configures the optimizer and scheduler for training.

Returns:

The optimizer and scheduler configuration.

Return type:

Union[optim.optimizer.Optimizer, OptimizerLRSchedulerConfig]