Source code for nubo.optimisation.adam

import torch
from torch import Tensor
from torch.optim import Adam
from typing import Optional, Tuple, Callable, Any
from nubo.optimisation import gen_candidates
from nubo.utils import unnormalise, normalise


def _adam(func: callable,
          x: Tensor,
          lr: Optional[float]=0.1,
          steps: Optional[int]=200,
          **kwargs: Any) -> Tuple[Tensor, Tensor]:
    """
    Adam optimiser. Minimises `func`.

    Parameters
    ----------
    x : ``torch.Tensor``
        (size 1 x d) Initial starting point of ``torch.optim.Adam`` algorithm.
    lr : ``float``, optional
        Learning rate of ``torch.optim.Adam`` algorithm, default is 0.1.
    steps : ``int``, optional
        Optimisation steps of ``torch.optim.Adam`` algorithm, default is 200.
    num_starts : ``int``
        Number of start for multi-start optimisation, default is 10.
    num_samples : ``int``
        Number of samples from which to draw the starts, default is 100.
    **kwargs : ``Any``
        Keyword argument passed to ``torch.optim.Adam``.

    Returns
    -------
    x : ``torch.Tensor``
        (size 1 x d) Minimiser input.
    loss : ``torch.Tensor``
        (size 1) Minimiser output.
    """

    x.requires_grad_(True)

    # specify Adam
    adam = Adam([x], lr=lr, **kwargs)

    # fit Gaussian process
    for i in range(steps):

        # set gradients from previous iteration equal to 0
        adam.zero_grad()
        
        # calculate loss
        loss = func(x)

        # backpropagate gradients
        loss.backward()

        # take next optimisation step
        adam.step()
  
    return x.detach(), loss



[docs]
def adam(func: Callable,
         bounds: Tensor,
         lr: Optional[float]=0.1,
         steps: Optional[int]=200,
         num_starts: Optional[int]=10,
         num_samples: Optional[int]=100,
         **kwargs: Any) -> Tuple[Tensor, Tensor]:
    """
    Multi-start Adam optimiser using the ``torch.optim.Adam`` implementation
    from ``PyTorch``.
    
    Used for optimising Monte Carlo acquisition function when base samples are
    not fixed. Bounds are enforced by transforming `func` with the sigmoid
    function and scaling results. Picks the best `num_starts` points from a
    total `num_samples` Latin hypercube samples to initialise the optimiser.
    Returns the best result. Minimises `func`.

    Parameters
    ----------
    func : ``Callable``
        Function to optimise.
    bounds : ``torch.Tensor``
        (size 2 x d) Optimisation bounds of input space.
    lr : ``float``, optional
        Learning rate of ``torch.optim.Adam`` algorithm, default is 0.1.
    steps : ``int``, optional
        Optimisation steps of ``torch.optim.Adam`` algorithm, default is 200.
    num_starts : ``int``, optional
        Number of start for multi-start optimisation, default is 10.
    num_samples : ``int``, optional
        Number of samples from which to draw the starts, default is 100.
    **kwargs : ``Any``
        Keyword argument passed to ``torch.optim.Adam``.

    Returns
    -------
    best_result : ``torch.Tensor``
        (size 1 x d) Minimiser input.
    best_func_result : ``torch.Tensor``
        (size 1) Minimiser output.
    """
    
    dims = bounds.size(1)

    # transform function s.t. it takes real numbers
    trans_func = lambda x: func(unnormalise(torch.sigmoid(x), bounds).reshape(1, -1))

    # generate candidates and transfrom to real numbers
    candidates = gen_candidates(func, bounds, num_starts, num_samples)
    inv_sigmoid = lambda x: torch.log(x/(1-x))
    trans_candidates = inv_sigmoid(normalise(candidates, bounds))

    # initialise objects for results
    results = torch.zeros((num_starts, dims))
    func_results = torch.zeros(num_starts)

    # iteratively optimise over candidates
    for i in range(num_starts):
        x, fun = _adam(trans_func, lr=lr, x=trans_candidates[i], steps=steps, **kwargs)
        results[i, :] = unnormalise(torch.sigmoid(x), bounds) # transfrom results to bounds
        func_results[i] = fun
    
    # select best candidate
    best_i = torch.argmin(func_results)
    best_result =  torch.reshape(results[best_i, :], (1, -1))
    best_func_result = func_results[best_i]

    return best_result, torch.reshape(best_func_result, (1,))



def _adam_mixed(func: callable,
          x: Tensor,
          bounds: Tensor,
          lr: Optional[float]=0.1,
          steps: Optional[int]=200,
          **kwargs: Any) -> Tuple[Tensor, Tensor]:
    """
    Adam optimiser for mixed parameters. Minimises `func`.

    Parameters
    ----------
    x : ``torch.Tensor``
        (size 1 x d) Initial starting point of ``torch.optim.Adam`` algorithm.
    bounds : ``torch.Tensor``
        (size 2 x d) Optimisation bounds of input space.
    lr : ``float``, optional
        Learning rate of ``torch.optim.Adam`` algorithm, default is 0.1.
    steps : ``int``, optional
        Optimisation steps of ``torch.optim.Adam`` algorithm, default is 200.
    num_starts : ``int``
        Number of start for multi-start optimisation, default is 10.
    num_samples : ``int``
        Number of samples from which to draw the starts, default is 100.
    **kwargs : ``Any``
        Keyword argument passed to ``torch.optim.Adam``.

    Returns
    -------
    x : ``torch.Tensor``
        (size 1 x d) Minimiser input.
    loss : ``torch.Tensor``
        (size 1) Minimiser output.
    """

    x.requires_grad_(True)

    # specify Adam
    adam = Adam([x], lr=lr, **kwargs)

    # fit Gaussian process
    for i in range(steps):

        # set gradients from previous iteration equal to 0
        adam.zero_grad()
        
        # calculate loss
        loss = func(x)

        # backpropagate gradients
        loss.backward()

        # take next optimisation step
        adam.step()

        # enforce bounds
        with torch.no_grad():
            x[:] = x.clamp(min=bounds[0, :], max=bounds[1, :])
  
    return x.detach(), loss



[docs]
def adam_mixed(func: Callable,
               bounds: Tensor,
               lr: Optional[float]=0.1,
               steps: Optional[int]=200,
               num_starts: Optional[int]=10,
               num_samples: Optional[int]=100,
               **kwargs: Any) -> Tuple[Tensor, Tensor]:
    """
    Multi-start Adam optimiser using the ``torch.optim.Adam`` implementation
    from ``PyTorch``.
    
    Used for optimising Monte Carlo acquisition function when base samples are
    not fixed. Bounds are enforced by clamping where values exceed them. Picks
    the best `num_starts` points from a total `num_samples` Latin hypercube
    samples to initialise the optimiser. Returns the best result. Minimises
    `func`.

    Parameters
    ----------
    func : ``Callable``
        Function to optimise.
    bounds : ``torch.Tensor``
        (size 2 x d) Optimisation bounds of input space.
    lr : ``float``, optional
        Learning rate of ``torch.optim.Adam`` algorithm, default is 0.1.
    steps : ``int``, optional
        Optimisation steps of ``torch.optim.Adam`` algorithm, default is 200.
    num_starts : ``int``, optional
        Number of start for multi-start optimisation, default is 10.
    num_samples : ``int``, optional
        Number of samples from which to draw the starts, default is 100.
    **kwargs : ``Any``
        Keyword argument passed to ``torch.optim.Adam``.

    Returns
    -------
    best_result : ``torch.Tensor``
        (size 1 x d) Minimiser input.
    best_func_result : ``torch.Tensor``
        (size 1) Minimiser output.
    """
    
    dims = bounds.size(1)

    # generate candidates and transfrom to real numbers
    candidates = gen_candidates(func, bounds, num_starts, num_samples)

    # initialise objects for results
    results = torch.zeros((num_starts, dims))
    func_results = torch.zeros(num_starts)

    # iteratively optimise over candidates
    for i in range(num_starts):
        results[i, :], func_results[i] = _adam_mixed(func, x=candidates[i], bounds=bounds, lr=lr, steps=steps, **kwargs)
        
    # select best candidate
    best_i = torch.argmin(func_results)
    best_result =  torch.reshape(results[best_i, :], (1, -1))
    best_func_result = func_results[best_i]

    return best_result, torch.reshape(best_func_result, (1,))