Source code for amlgym.algorithms.OLAM

from dataclasses import dataclass
from typing import Tuple, ClassVar, OrderedDict

from unified_planning.model import UPState
from unified_planning.shortcuts import SequentialSimulator

from amlgym.algorithms.ActiveAlgorithmAdapter import ActiveAlgorithmAdapter
from amlgym.modeling.trajectory import Trajectory
from olam.OLAM import OLAM as OLAMLearner


[docs]@dataclass
class OLAM(ActiveAlgorithmAdapter):
    """
    Adapter class for running the OLAM algorithm: "Online Learning of Action Models
    for PDDL Planning", L. Lamanna, A. Saetti, L. Serafini, A. Gerevini, and P. Traverso,
    Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, 2021.
    https://doi.org/10.24963/ijcai.2021/566

    Example:
        .. code-block:: python

            from unified_planning.io import PDDLReader
            from unified_planning.shortcuts import SequentialSimulator
            from amlgym.algorithms import get_algorithm
            from amlgym.benchmarks import get_domain_path, get_problems_path
            from amlgym.util.util import empty_domain

            domain = 'blocksworld'
            domain_ref_path = get_domain_path(domain)
            input_domain_path = empty_domain(domain_ref_path)
            problem_path = get_problems_path(domain, kind='learning')[0]
            problem = PDDLReader().parse_problem(domain_ref_path, problem_path)

            env = SequentialSimulator(problem=problem)
            olam = get_algorithm('OLAM', input_domain_path=input_domain_path)
            model, trajectory = olam.learn(env)

            print("##################### Learned model #####################")
            print(model)

            print("################# Generated trajectory ##################")
            print(trajectory)

    Args:
        planning_timeout (int): Time limit in seconds for each planning call (default: 30)
        max_length (int): Maximum number of uncertain preconditions/effects considered in
            goal conjunctions (default: 8)
        max_subproblems (int): Maximum number of subproblems when handling object type
            ambiguity (default: 5)
        max_goals (int): Maximum number of disjunctions in a goal formula used
                              during planning for learning preconditions and effects.
                              When the number of generated goals exceeds this limit,
                              some goals are discarded. (default: 10000)
    """
    _reference: ClassVar[OrderedDict[str, str]] = {
        'Authors': "L. Lamanna, A. Saetti, L. Serafini, A. Gerevini, and P. Traverso",
        'Title': "Online Learning of Action Models for PDDL Planning",
        'Venue': "International Joint Conference on Artificial Intelligence",
        'Year': 2021,
        'URL': "https://doi.org/10.24963/ijcai.2021/566",
    }

    planning_timeout: int = 30
    max_length: int = 8
    max_subproblems: int = 5
    max_goals: int = 10000

    def __post_init__(self):
        self._learner = OLAMLearner(
            domain_path=self.input_domain_path,
            planning_timeout=self.planning_timeout,
            max_length=self.max_length,
            max_subproblems=self.max_subproblems,
            max_goals=self.max_goals,
        )

[docs]    def learn(self,
              simulator: SequentialSimulator,
              max_steps: int = 10000,
              seed: int = 123) -> Tuple[str, Trajectory]:
        """
        Learns a PDDL action model from:
         (i)   a simulator of the environment to learn from
         (ii)    a (possibly empty) input model which is required to specify the predicates and operators signature
                 (set via the input_domain_path attribute at instantiation time);

        :parameter simulator: environment simulator
        :parameter max_steps: maximum number of interaction steps with the simulator
        :parameter seed: random seed for reproducibility

        :return: a string representing the learned PDDL model, and a JSON specification of the trajectory
        """

        domain_str, olam_traj = self._learner.run(simulator, max_steps=max_steps)

        # Convert OLAM trajectory (SymbolicObservation states) to AMLGym Trajectory (UPState states)
        UPState.MAX_ANCESTORS = None
        states = [UPState(obs.fluents, simulator._problem) for obs in olam_traj.observations]
        trajectory = Trajectory(states, olam_traj.actions)

        return domain_str, trajectory