Mixed-algorithms: a ListofStates tutorial
We illustrate here the use of ListofStates
in dealing with a warm start procedure. The full code of this tutorial can be found here.
ListofStates
is designed to store the of the iteration process. In this tutorial, we compare the resolution of a convex unconstrained problem with 3 variants:
- a steepest descent method
- an inverse-BFGS method
- a mix of 5 steps of steepest descent and then switching to a BFGS initialized with the 5 previous steps.
using Stopping, ADNLPModels, NLPModels, LinearAlgebra, Printf
First, we introduce our two implementations that both uses an backtracking Armijo linesearch. First, we define a steepest descent method and a BFGS quasi-Newton method both using an elementary backtracking Armijo linesearch.
import Stopping.armijo
function armijo(xk, dk, fk, slope, f)
t = 1.0
fk_new = f(xk + dk)
while f(xk + t * dk) > fk + 1.0e-4 * t * slope
t /= 1.5
fk_new = f(xk + t * dk)
end
return t, fk_new
end
function steepest_descent(stp::NLPStopping)
xk = stp.current_state.x
fk, gk = objgrad(stp.pb, xk)
OK = update_and_start!(stp, fx = fk, gx = gk)
@printf "%2s %9s %7s %7s %7s\n" "k" "fk" "||∇f(x)||" "t" "λ"
@printf "%2d %7.1e %7.1e\n" stp.meta.nb_of_stop fk norm(stp.current_state.current_score)
while !OK
dk = - gk
slope = dot(dk, gk)
t, fk = armijo(xk, dk, fk, slope, x->obj(stp.pb, x))
xk += t * dk
fk, gk = objgrad(stp.pb, xk)
OK = update_and_stop!(stp, x = xk, fx = fk, gx = gk)
@printf "%2d %9.2e %7.1e %7.1e %7.1e\n" stp.meta.nb_of_stop fk norm(stp.current_state.current_score) t slope
end
return stp
end
function bfgs_quasi_newton_armijo(stp::NLPStopping; Hk = I)
xk = stp.current_state.x
fk, gk = objgrad(stp.pb, xk)
gm = gk
dk, t = similar(gk), 1.
OK = update_and_start!(stp, fx = fk, gx = gk)
@printf "%2s %7s %7s %7s %7s\n" "k" "fk" "||∇f(x)||" "t" "cos"
@printf "%2d %7.1e %7.1e\n" stp.meta.nb_of_stop fk norm(stp.current_state.current_score)
while !OK
if stp.meta.nb_of_stop != 0
sk = t * dk
yk = gk - gm
ρk = 1/dot(yk, sk)
#we need yk'*sk > 0 for instance yk'*sk ≥ 1.0e-2 * sk' * Hk * sk
Hk = ρk ≤ 0.0 ? Hk : (I - ρk * sk * yk') * Hk * (I - ρk * yk * sk') + ρk * sk * sk'
if norm(sk) ≤ 1e-14
break
end
#H2 = Hk + sk * sk' * (dot(sk,yk) + yk'*Hk*yk )*ρk^2 - ρk*(Hk * yk * sk' + sk * yk'*Hk)
end
dk = - Hk * gk
slope = dot(dk, gk) # ≤ -1.0e-4 * norm(dk) * gnorm
t, fk = armijo(xk, dk, fk, slope, x->obj(stp.pb, x))
xk = xk + t * dk
gm = copy(gk)
gk = grad(stp.pb, xk)
OK = update_and_stop!(stp, x = xk, fx = fk, gx = gk)
@printf "%2d %7.1e %7.1e %7.1e %7.1e\n" stp.meta.nb_of_stop fk norm(stp.current_state.current_score) t slope
end
stp.stopping_user_struct = Dict(:Hk => Hk)
return stp
end
bfgs_quasi_newton_armijo (generic function with 1 method)
We consider the following convex unconstrained problem model using ADNLPModels.jl
and defines a related NLPStopping
.
fH(x) = (x[2] + x[1] .^ 2 - 11) .^ 2 + (x[1] + x[2] .^ 2 - 7) .^ 2
nlp = ADNLPModel(fH, [10., 20.])
stp = NLPStopping(
nlp,
optimality_check = unconstrained_check,
atol = 1e-6,
rtol = 0.0,
max_iter = 100,
)
NLPStopping{ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, StoppingMeta{Float64, Float64, Nothing, Stopping.var"#46#54", Stopping.var"#47#55"{Stopping.var"#46#54"}, typeof(unconstrained_check)}, StopRemoteControl, NLPAtX{Float64, Float64, Vector{Float64}}, VoidStopping{Any, StoppingMeta, StopRemoteControl, GenericState, Nothing, VoidListofStates}, VoidListofStates}
It has no main_stp.
It doesn't keep track of the state history.
Problem is ADNLPModel - Model with automatic differentiation backend ADNLPModels.ADModelBackend{ADNLPModels.ForwardDiffADGradient, ADNLPModels.ForwardDiffADHvprod, ADNLPModels.ForwardDiffADJprod, ADNLPModels.ForwardDiffADJtprod, ADNLPModels.ForwardDiffADJacobian, ADNLPModels.ForwardDiffADHessian, ADNLPModels.ForwardDiffADGHjvprod}(ADNLPModels.ForwardDiffADGradient(ForwardDiff.GradientConfig{ForwardDiff.Tag{typeof(Main.fH), Float64}, Float64, 2, Vector{ForwardDiff.Dual{ForwardDiff.Tag{typeof(Main.fH), Float64}, Float64, 2}}}((Partials(1.0, 0.0), Partials(0.0, 1.0)), ForwardDiff.Dual{ForwardDiff.Tag{typeof(Main.fH), Float64}, Float64, 2}[Dual{ForwardDiff.Tag{typeof(Main.fH), Float64}}(6.9118642514387e-310,6.91186019307676e-310,6.9118642514395e-310), Dual{ForwardDiff.Tag{typeof(Main.fH), Float64}}(6.91186019307676e-310,6.9118644694402e-310,6.91186019307676e-310)])), ADNLPModels.ForwardDiffADHvprod(), ADNLPModels.ForwardDiffADJprod(), ADNLPModels.ForwardDiffADJtprod(), ADNLPModels.ForwardDiffADJacobian(0), ADNLPModels.ForwardDiffADHessian(3), ADNLPModels.ForwardDiffADGHjvprod())
Problem name: Generic
All variables: ████████████████████ 2 All constraints: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
free: ████████████████████ 2 free: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
lower: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 lower: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
upper: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 upper: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
low/upp: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 low/upp: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
fixed: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 fixed: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
infeas: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 infeas: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
nnzh: ( 0.00% sparsity) 3 linear: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
nonlinear: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
nnzj: (------% sparsity)
Counters:
obj: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 grad: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 cons: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
cons_lin: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 cons_nln: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jcon: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
jgrad: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jac: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jac_lin: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
jac_nln: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jprod: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jprod_lin: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
jprod_nln: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jtprod: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jtprod_lin: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
jtprod_nln: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 hess: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 hprod: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
jhess: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0 jhprod: ⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅ 0
No user-defined structure is furnished.
Our first elementary runs will use separately the steepest descent method and the quasi-Newton method to solve the problem.
Steepest descent
reinit!(stp, rstate = true, x = nlp.meta.x0)
steepest_descent(stp)
(status(stp), elapsed_time(stp), get_list_of_states(stp), neval_obj(nlp), neval_grad(nlp))
(:Optimal, 1.091270923614502, VoidListofStates(), 889, 37)
BFGS quasi-Newton
reinit!(stp, rstate = true, x = nlp.meta.x0, rcounters = true)
bfgs_quasi_newton_armijo(stp)
(status(stp), elapsed_time(stp), get_list_of_states(stp), neval_obj(nlp), neval_grad(nlp))
(:Optimal, 0.009361982345581055, VoidListofStates(), 91, 18)
Mix of Algorithms
NLPModels.reset!(nlp)
stp_warm = NLPStopping(
nlp,
optimality_check = unconstrained_check,
atol = 1e-6,
rtol = 0.0,
max_iter = 5,
n_listofstates = 5, #shortcut for list = ListofStates(5, Val{NLPAtX{Float64,Array{Float64,1},Array{Float64,2}}}()))
)
get_list_of_states(stp_warm)
ListofStates{NLPAtX{Float64, Float64, Vector{Float64}}, VoidListofStates}(5, 0, Tuple{NLPAtX{Float64, Float64, Vector{Float64}}, VoidListofStates}[])
steepest_descent(stp_warm)
status(stp_warm) # :IterationLimit
:IterationLimit
length(get_list_of_states(stp_warm)) # 5
5
Hwarm = I
for i=2:5
sk = stp_warm.listofstates.list[i][1].x - stp_warm.listofstates.list[i-1][1].x
yk = stp_warm.listofstates.list[i][1].gx - stp_warm.listofstates.list[i-1][1].gx
ρk = 1/dot(yk, sk)
if ρk > 0.0
global Hwarm = (I - ρk * sk * yk') * Hwarm * (I - ρk * yk * sk') + ρk * sk * sk'
end
end
reinit!(stp_warm)
stp_warm.meta.max_iter = 100
bfgs_quasi_newton_armijo(stp_warm, Hk = Hwarm)
(status(stp_warm), elapsed_time(stp_warm), get_list_of_states(stp_warm), neval_obj(nlp), neval_grad(nlp))
(:Optimal, 0.008723020553588867, ListofStates{NLPAtX{Float64, Float64, Vector{Float64}}, VoidListofStates}(-1, 10, Tuple{NLPAtX{Float64, Float64, Vector{Float64}}, VoidListofStates}[(NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates()), (NLPAtX{Float64, Float64, Vector{Float64}} with an iterate of type Vector{Float64} and a score of type Float64.
, VoidListofStates())]), 192, 16)