import logging

from extensions.rl_poisoneddoors.poisoneddoors_experiments.base import (
    PoisonedDoorsBaseExperimentConfig,
)
from extensions.rl_poisoneddoors.poisoneddoors_offpolicy import (
    PoisonedDoorsOffPolicyExpertCELoss,
)
from utils.experiment_utils import PipelineStage, OffPolicyPipelineComponent

LOGGER = logging.getLogger("embodiedrl")


class PPOPoisonedDoorsBossLevelExperimentConfig(PoisonedDoorsBaseExperimentConfig):
    """PPO and off policy imitation."""

    @classmethod
    def extra_tag(cls):
        return "PureOffPolicyBC__lr_{}".format(cls.lr())

    @classmethod
    def training_pipeline(cls, **kwargs):
        training_steps = cls.TOTAL_TRAIN_STEPS
        offpolicy_demo_info = cls.offpolicy_demo_defaults(also_using_ppo=False)

        return cls._training_pipeline(
            named_losses={
                "offpolicy_expert_ce_loss": PoisonedDoorsOffPolicyExpertCELoss(),
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=[],
                    max_stage_steps=training_steps,
                    early_stopping_criterion=cls.task_info().get(
                        "early_stopping_criterion"
                    ),
                    offpolicy_component=OffPolicyPipelineComponent(
                        data_iterator_builder=offpolicy_demo_info[
                            "data_iterator_builder"
                        ],
                        loss_names=["offpolicy_expert_ce_loss"],
                        updates=offpolicy_demo_info["offpolicy_updates"],
                    ),
                ),
            ],
            num_mini_batch=0,
            update_repeats=0,
        )
