Augmented formulation
We consider the augmented formulation of the same problem as before
\[ \left\{ \begin{array}{ll} \displaystyle \min_{x,u} x^0(t_f) \\[1em] \text{s.c.}~\dot x^0(t) = x_1(t), & t\in [t_0, t_f]~\mathrm{a.e.}, \\[0.5em] \phantom{\mathrm{s.c.}~} \dot x_1(t) = u(t), & t\in [t_0, t_f]~\mathrm{a.e.}, \\[0.5em] \phantom{\mathrm{s.c.}~} u(t) \in [-1,1], & t\in [t_0, t_f], \\[0.5em] \phantom{\mathrm{s.c.}~} x(t_0) = x_0, \quad x_1(t_f) = x_f, \end{array} \right.\]
with fixed $x_0$, $t_0$, $x_f$ and $t_f$, and where $x = (x^0, x_1)$. The augmented Hamiltonian of this problem is given by
\[ H(x,p) = p^0 x_1 + \lvert p_1 \rvert,\]
where $p = (p^0, p_1)$ is the augmented costate, composed by the costate $p_1$ associated to the state $x_1$ and multiplier $p^0$ associated to the cost $x^0$ which is no longer fixed to $-1$. The flow $\varphi$ associated to the true Hamiltonian $H$ is computed thanks to the $\texttt{flow}$ and the $\texttt{Hamiltonian}$ functions.
The general shooting function $S \colon \mathbb R^2 \to \mathbb R$ associated to this Hamiltonian is defined by
\[ S(p_0) = \pi_x \big( \varphi(t_0, x_0, p_0, t_f) \big),\]
where $\pi_x(x^0, x_1, p^0, x_1) = x_1$ is still the state projection.
We are interested in two normalization of this shooting function, thanks to the homogeneity of the BC-extremals on the augmented costate, which are
\[ S_1(p_0) = S(-1, p_0) \qquad \text{and} \qquad S_2(p_0) = S \big( \eta(p), p_0 \big)\]
where the function $\eta \colon [-1, 1] \to \mathbb R$ is defined by
\[ \eta(p) = -\sqrt{ 1 - p^2}.\]
The following code creates and plots these shooting functions.
using OptimalControl
using Plots
using ForwardDiff
using DifferentialEquations
using MINPACK
using Statistics
global α = 1
function condition(z, t, integrator) # event when condition(z,t,integrator) == 0
x⁰,x,p⁰,p = z
return p
end
function affect!(integrator) # action when condition == 0
global α = -α
nothing
end
cb = ContinuousCallback(condition, affect!) # callback
H(x, p) = p[1] * x[2] + α * p[2]
ϕ_ = Flow(Hamiltonian(H), callback = cb) # flow with maximizing control
function ϕ(t0, x0, p0, tf; kwargs...)
if p0[2] == 0
global α = -sign(p0[1])
else
global α = sign(p0[2])
end
return ϕ_(t0, x0, p0, tf; kwargs...)
end
function ϕ((t0, tf), x0, p0; kwargs...) # flow for plot
if p0[2] == 0
global α = -sign(p0[1])
else
global α = sign(p0[2])
end
return ϕ_((t0, tf), x0, p0; kwargs...)
end
t0 = 0 # initial time
x0 = [0,0] # initial augmented state
tf = 5 # final time
xT = 0 # final state
π((x,p)) = x[2] # projection on state space
η(p0) = -sqrt.(1 - p0.^2) # function η(⋅)
S(p0) = π( ϕ(t0, x0, p0, tf) ) - xT # general hooting function
S₁(p0) = S([-1, p0]) # normalization 1
S₂(p0) = abs(p0) < 1 ? S([η(p0), p0]) : sign(p0)*tf - xT # normalization 2
# Plot
plt_S1 = plot(range(-7, 2, 500), S₁ , color = 2, label = "")
plot!(xlabel = "p₀", ylabel = "S₁", xlim = [-7,2])
plt_S2 = plot(range(-1, 1, 500), S₂, color = 3)
plot!(xlabel = "p₀", ylabel = "S₂", xlim = [-1,1], legend=false)
S_(p⁰, p) = S([p⁰, p])
plt_S = surface(range(0, -1.5, 100), range(-8, 2, 100), S_, camera = (30,30))
surface!(xlabel = "p⁰", ylabel = "p₀", zlabel = "φₓ", xflip = true)
plot3d!(-1*ones(100), range(-8, 2, 100), S₁.(range(-8, 2, 100)), label = "S₁")
plot3d!(η.(range(-1, 1, 100)), range(-1, 1, 100), S₂.(range(-1, 1, 100)), label = "S₂")
plt_S12 = plot(plt_S1, plt_S2, layout = (1,2))
plt_total = plot(plt_S, plt_S12, layout = grid(2,1, heights = [2/3, 1/3]), size=(700, 600))
@gif for i ∈ [range(30, 90, 50); 90*ones(25); range(90, 30, 50); 30*ones(25)]
plot!(plt_total[1], camera=(i,i),
zticks = i==90 ? false : true,
zlabel = i==90 ? "" : "S" )
end

As done before, we use the solver $\texttt{hybrd1}$ from the $\texttt{MINPACK.jl}$ package to find a zero of $S_2$.
global iterate_S2 = Vector{Float64}() # global vector to store iterates of the solver
function S₂!(s₂, ξ) # intermediate function
push!(iterate_S2, ξ[1]) # stock the iterate
return (s₂[:] .= S₂(ξ[1]); nothing)
end
JS₂(ξ) = ForwardDiff.jacobian(p0 -> [S₂(p0[1])], ξ) # compute jacobian by forward differentiation
JS₂!(js₂, ξ) = (js₂[:] .= JS₂(ξ); nothing) # intermediate function
ξ = [-0.5] # initial guess
p0_sol = fsolve(S₂!, JS₂!, ξ, show_trace = true) # solve
println(p0_sol) # print solution
Iter f(x) inf-norm Step 2-norm Step time
------ -------------- -------------- --------------
1 3.845299e+00 0.000000e+00 0.006703
2 5.000000e+00 1.559496e+00 4.822490
3 5.000000e+00 4.983094e-01 0.000077
4 5.000000e+00 4.983094e-01 0.000350
5 5.000000e+00 4.983094e-01 0.000022
6 2.825589e+00 9.417471e-02 0.000232
7 3.275582e+00 5.569993e-02 0.000165
8 1.835694e+00 1.605481e-02 0.000177
9 5.000000e+00 4.108358e-02 0.000023
10 8.767661e-01 2.198078e-02 0.000145
11 1.052824e+00 2.476848e-03 0.000145
12 2.252319e-01 7.373618e-04 0.000141
13 6.733089e-02 6.111140e-05 0.000142
14 4.061534e-03 3.236777e-06 0.000142
15 6.974839e-05 1.047584e-08 0.000159
16 7.333033e-08 3.198326e-12 0.000147
17 1.321943e-12 3.527841e-18 0.000142
Results of Nonlinear Solver Algorithm
* Algorithm: Modified Powell (User Jac, Expert)
* Starting Point: [-0.5]
* Zero: [-0.9284766908852254]
* Inf-norm of residuals: 0.000000
* Convergence: true
* Message: algorithm estimates that the relative error between x and the solution is at most tol
* Total time: 4.831416 seconds
* Function Calls: 17
* Jacobian Calls (df/dx): 2
sol = ϕ((t0, tf), x0, [η(p0_sol.x[1]), p0_sol.x[1]],
saveat=range(t0, tf, 500)) # get optimal trajectory
# plot
t = sol.t
x⁰ = [sol.u[i][1] for i in 1:length(sol.u)]
x = [sol.u[i][2] for i in 1:length(sol.u)]
p⁰ = [sol.u[i][3] for i in 1:length(sol.u)]
p = [sol.u[i][4] for i in 1:length(sol.u)]
u = sign.(p)
plt_x⁰ = plot(t, x⁰, label = "x⁰")
plt_x = plot(t, x , label = "x" )
plt_p⁰ = plot(t, p⁰, label = "p⁰", ylim=[-1,1])
plt_p = plot(t, p , label = "p" )
plt_u = plot(t, u, label = "u" )
plt_xp = plot(plt_x⁰, plt_p⁰, plt_x, plt_p, layout=(2, 2))
plot(plt_xp, plt_u, layout = grid(2,1, heights = [2/3, 1/3]), size=(700, 600))
Construction of the geometric preconditioner
The goal is now to use the geometric preconditioning method proposed in [mettre article]. For this purpose, the first step is to create some points in the boundary of the accessible augmented set, and to fit an ellipse on these points.
The second step is to create the linear diffeomorphism $\phi \colon \mathbb R^2 \to \mathbb R^2, (x^0,x_1) \to Ax + B$ that transforms the fitted ellipse into the unit circle, and that satisfy the condition
\[ \frac{\partial \phi}{\partial x^0} = k e_1, \]
with $k>0$ and $e_1 = (1,0)$. In this context, we denote
\[ A = \left( \begin{array}{cc} k & A_{x^0} \\ 0 & A_{x_1} \end{array} \right) \qquad \text{and} \qquad B = \left( \begin{array}{c} B_{x^0} \\ B_{x_1} \end{array} \right).\]
This diffeomorphism is given from the semi-axis $a,b >0$, the angle $\theta \in [0, \frac{\pi}{2}[$ between the semi-axis $b$ and the $x$-axis, and the center $c \in \mathbb R^2$ by
\[ \phi(x) = r(-\beta_0) s(a^{-1}, b^{-1}) r(\theta) (x - c),\]
where $r$ and $s$ correspond respectively to the rotation and the scale matrix, defined by
\[ r(\theta) = \left( \begin{array}{cc} \phantom - \cos(\theta) & \sin(\theta) \\ -\sin(\theta) & \cos(\theta) \end{array} \right) \qquad \text{and} \qquad s(a,b) = \left( \begin{array}{cc} a & 0 \\ 0 & b \end{array} \right),\]
and where $\beta_0 = \arctan \left(\frac{a \sin(\theta)}{b \cos(\theta)} \right)$.
fit_ellipse
— Function
n = 15 # number of points for fit : 2n
n_ = 100 # number of points for plot: 2n_
p0 = [[[-1, i] for i ∈ range(-tf, 0, n)]; # initial costate for fit
[[1, i] for i ∈ range(tf, 0, n)]]
p0_ = [[[-1, i] for i ∈ range(-tf, 0, n_)]; # initial costate for plot
[[1, i] for i ∈ range(tf, 0, n_)]]
x = zeros(2, 2*n); p = zeros(2,2*n) # init final state and costate
x_ = zeros(2, 2*n_); p_ = zeros(2, 2*n_)
for i = 1:length(p0)
x[:,i], p[:,i] = ϕ(t0, x0, p0[i], tf) # compute flow for fit
end
for i = 1:length(p0_)
x_[:,i], p_[:,i] = ϕ(t0, x0, p0_[i], tf) # compute flow for plot
end
a, b, θ, c = fit_ellipse(x[1,:], x[2,:]) # fit ellipse
r(β) = [[cos(β), sin(β)] [-sin(β), cos(β)]] # 2x2 rotation matrix
s(a,b) = [[a,0] [0,b]] # 2x2 scale matrix
β = range(-Base.π, Base.π; length = 100) # angle for plot ellipse
xₑ = r(-θ)*s(a,b)*
transpose(reduce(hcat,[sin.(β), cos.(β)])).+c # points of the ellipse
# construction of the linear diffeomorphism φ(x) = Ax + B
d = (a*sin(θ))/(b*cos(θ)); β₀ = atan(d) # intermediate values
A = r(-β₀)*s(1/a,1/b)*r(θ); B = -A*c # calculate A and B
φ(x) = A*x .+ B # function φ
y = φ(x); y_ = φ(x_); yₑ = φ(xₑ) # compute φ on x, x_ and xₑ
# plot
plt_x = plot(x_[2,:], x_[1,:], label = "")
scatter!(x[2,:], x[1,:], label="Observations", legend = :topleft)
plot!(xₑ[2,:], xₑ[1,:], label = "Fitted ellipse")
plot!(xlim = [-15,15], ylim = [-15,15], xlabel = "x", ylabel = "x⁰")
plt_y = plot(y_[2,:], y_[1,:], label = "")
scatter!(y[2,:], y[1,:], label="")
plot!(yₑ[2,:], yₑ[1,:], label = "")
plot!( xlabel = "y", ylabel = "y⁰")
plot(plt_x, plt_y, layout = (1,2), size=(800, 400))
The general shooting function $T \colon \mathbb R^2 \to \mathbb R$ in the new coordinate is defined by
\[ T(q) = A_{x_1} \varphi \big(t_0, x_0, p_0(A^\top q), t_f) + B_{x_1} - y_T,\]
where the function $p_0 \colon \mathbb R^2 \to \mathbb R^2$ correspond to the mapping between the final and the initial augmented costate, and is given by
\[ p_0(p^0, p_1) = (p^0, p_1 + p^0 t_f),\]
and $y_T = A_{x_1} x_T + B_x$ to the target in the new system of coordinates. By using the definition of $S$ and $y_T$, we obtain
\[\begin{align*} T(q) &= A_{x_1} \varphi \big(t_0, x_0, p_0(A^\top q), t_f) + B_{x_1} - (A_{x_1} x_T + B_{x_1}) \\ &= A_{x_1} (S \circ p_0)(A^\top q). \end{align*}\]
which highlight that the proposed geometric preconditioning method is a left and right side preconditioner of the shooting function. Finally, we define the two shooting functions $T_1$ and $T_2$ by using the two method of normalization used before for the function $S$
\[ T_1(p) = T(-1, p) \qquad \text{and} \qquad T_2(p) = T \big(\eta(p), p \big).\]
p₀(p) = [p[1], p[2] + p[1]*tf] # function p₀(⋅)
Aₓ = A[2,2]; Bₓ = B[2] # Aₓ and Bₓ
yT = Aₓ*xT + Bₓ # target in the new coordinate
T(q) = Aₓ*(S ∘ p₀)(transpose(A)*q) # state flow in the new coordinates
T₁(q) = T([-1, q]) # normalization 1
T₂(q) = abs(q) < 1 ? T([η(q), q]) : sign(q)*(Aₓ*tf + Bₓ) # normalization 2
# Plot
plt_S1 = plot(range(-3, 3, 500), T₁ , color = 2)
plot!([-3,3], [yT,yT], color = :black)
plot!(xlabel = "q₀", ylabel = "T₁", xlim = [-3,3], legend=false)
plt_S2 = plot(range(-1, 1, 500), T₂, color = 3)
plot!([-1,1], [yT,yT], color = :black)
plot!(xlabel = "q₀", ylabel = "T₂", xlim = [-1,1], legend=false)
T_(q⁰, q) = T([q⁰, q])
plt_S = surface(range(0, -1.5, 100), range(-3, 3, 100), T_, camera = (30,30))
surface!(xlabel = "q⁰", ylabel = "q₀", zlabel = "T", xflip = true)
plot3d!(-1*ones(100), range(-3, 3, 100), T₁.(range(-3, 3, 100)), label = "T₁")
plot3d!(η.(range(-1, 1, 100)), range(-1, 1, 100), T₂.(range(-1, 1, 100)), label = "T₂")
plt_S12 = plot(plt_S1, plt_S2, layout = (1,2))
plt_total = plot(plt_S, plt_S12, layout = grid(2,1, heights = [2/3, 1/3]), size=(700, 600))
@gif for i ∈ [range(30, 90, 50); 90*ones(25); range(90, 30, 50); 30*ones(25)]
plot!(plt_total[1], camera=(i,i),
zticks = i==90 ? false : true,
zlabel = i==90 ? "" : "T" )
end

Preconditioned shoot
global iterate_T2 = Vector{Float64}() # global vector to store iterates of the solver
function T₂!(t₂, ξ) # intermediate function
push!(iterate_T2, ξ[1])
return (t₂[:] .= T₂(ξ[1]); nothing)
end
JT₂(ξ) = ForwardDiff.jacobian(q0 -> [T₂(q0[1])], ξ) # compute jacobian by forward differentiation
JT₂!(jt₂, ξ) = (jt₂[:] .= JT₂(ξ); nothing) # intermediate function
ξ = [0.5] # initial guess
q_sol = fsolve(T₂!, JT₂!, ξ, show_trace = true) # solve
println(q_sol) # print solution
Iter f(x) inf-norm Step 2-norm Step time
------ -------------- -------------- --------------
1 3.223430e-01 0.000000e+00 0.000452
2 7.034101e-02 1.406250e-01 5.031574
3 1.135104e-02 1.095650e-02 0.000266
4 1.033639e-04 4.056828e-04 0.000171
5 2.155036e-08 3.426091e-08 0.000156
6 6.478526e-16 1.489880e-15 0.000151
7 6.478526e-16 1.346464e-30 0.000148
8 2.155036e-08 1.489881e-15 0.000163
9 6.478526e-16 1.489881e-15 0.000421
10 5.387591e-09 9.311755e-17 0.000157
11 6.478526e-16 9.311755e-17 0.000146
12 1.346899e-09 5.819851e-18 0.000148
13 6.478526e-16 5.819851e-18 0.000164
14 3.367251e-10 3.637417e-19 0.000153
15 6.478526e-16 3.637417e-19 0.000147
16 8.418213e-11 2.273412e-20 0.000149
Results of Nonlinear Solver Algorithm
* Algorithm: Modified Powell (User Jac, Expert)
* Starting Point: [0.5]
* Zero: [1.2022523417221025e-14]
* Inf-norm of residuals: 0.000000
* Convergence: true
* Message: iteration is not making good progress, measured by improvement from last 10 iterations
* Total time: 5.034581 seconds
* Function Calls: 16
* Jacobian Calls (df/dx): 2
p0_sol = p₀(transpose(A)*[η(q_sol.x[1]), q_sol.x[1]]) # get the optimal initial costate in old coordinates
sol = ϕ((t0, tf), x0, p0_sol, saveat=range(t0, tf, 500)) # get optimal trajectory
# plot
t = sol.t
x⁰ = [sol.u[i][1] for i in 1:length(sol.u)]
x = [sol.u[i][2] for i in 1:length(sol.u)]
p⁰ = [sol.u[i][3] for i in 1:length(sol.u)]
p = [sol.u[i][4] for i in 1:length(sol.u)]
u = sign.(p)
plt_x⁰ = plot(t, x⁰, label = "x⁰")
plt_x = plot(t, x , label = "x" )
plt_p⁰ = plot(t, p⁰, label = "p⁰", ylim=[-1,1])
plt_p = plot(t, p , label = "p" )
plt_u = plot(t, u, label = "u" )
plt_xp = plot(plt_x⁰, plt_p⁰, plt_x, plt_p, layout=(2, 2))
plot(plt_xp, plt_u, layout = grid(2,1, heights = [2/3, 1/3]), size=(700, 600))
Comparison
It is shown in [mettre article] that if the boundary of augmented accessible set is the fitted ellipse then the shooting function $T_2$ is the identity function. Due to the error of the approximation, the function $T_2$ is not the identity, but we hope that this function is close to this ideal function, and so that the convergence of $T_2$ is faster than the one of $S_2$.
The following code compares the convergence of these two shooting function. We also study for these functions the domain of initial guesses for the solver that hit the bounds $[-1, 1]$ during the solving process.
## initial guesses
N = 1000; ξ = range(-1.,1.,N)
# initialization of the matrix
fnorms_S2 = zeros(N, 100); fnorms_T2 = zeros(N, 100)
fnorms_T2_in_x = zeros(N, 100)
iterates_S2 = zeros(N, 100); iterates_T2 = zeros(N, 100)
conv_S2 = zeros(N,1); conv_T2 = zeros(N,1)
#-1 : not converged; 1 converged; 0: converged but hit bounds
# intermediate function to get value of S from T iterates
T₂_(q0) = abs(q0) < 1 ? S(p₀(transpose(A)*[η.(q0),q0])) : sign(q0) * tf - xT
for i = 1:N
# remove old iterates
global iterate_S2 = Vector{Float64}()
global iterate_T2 = Vector{Float64}()
# solve
q_sol_S2 = fsolve(S₂!, JS₂!, [ξ[i]], show_trace = false, tracing = true)
q_sol_T2 = fsolve(T₂!, JT₂!, [ξ[i]], show_trace = false, tracing = true)
# store results is converged
if q_sol_S2.converged
fnorm_S2 = [q_sol_S2.trace.trace[j].fnorm for j ∈ 1:length(q_sol_S2.trace.trace)]
iterates_S2[i,1:length(iterate_S2)] = iterate_S2
conv_S2[i] = length(findall(x-> abs(x) > 1, iterate_S2)) == 0
fnorms_S2[i,1:length(fnorm_S2)] = fnorm_S2
else
conv_S2[i] = -1
end
if q_sol_T2.converged
fnorm_T2 = [q_sol_T2.trace.trace[j].fnorm for j ∈ 1:length(q_sol_T2.trace.trace)]
iterates_T2[i,1:length(iterate_T2)] = iterate_T2
conv_T2[i] = length(findall(x-> abs(x) > 1, iterate_T2)) == 0
fnorms_T2[i,1:length(fnorm_T2)] = fnorm_T2
fnorms_T2_in_x[i, 1:length(iterate_T2)] = abs.(T₂_.(iterate_T2))
else
conv_T2[i] = -1
end
end
# mean
mean_fnorms_S2 = mean(fnorms_S2, dims = 1)
mean_fnorms_T2 = mean(fnorms_T2, dims = 1)
mean_fnorms_T2_in_x = mean(fnorms_T2_in_x, dims = 1)
# remove zeros with tolerance ε
ε = 1e-9;
mean_fnorms_S2 = mean_fnorms_S2[1:findall(x -> x < ε, mean_fnorms_S2)[1][2]]
mean_fnorms_T2 = mean_fnorms_T2[1:findall(x -> x < ε, mean_fnorms_T2)[1][2]]
mean_fnorms_T2_in_x = mean_fnorms_T2_in_x[1:findall(x -> x < ε, mean_fnorms_T2_in_x)[1][2]]
# plots
plt_1 = plot(0:length(mean_fnorms_S2)-1, mean_fnorms_S2, label = "S₂")
plot!(0:length(mean_fnorms_T2)-1, mean_fnorms_T2, label = "T₂")
plot!(0:length(mean_fnorms_T2_in_x)-1, mean_fnorms_T2_in_x, label = "T₂ in x")
plot!(yaxis = :log10, xlim = [0, 30], ylim = [ε, 10], xlabel = "Error", ylabel = "Iterations")
plt_21 = plot(ξ, S₂, color = :black, label = "")
color = [conv_S2[i]==1 ? :green : conv_S2[i] == 0 ? :blue : :red for i ∈ 1:N]
scatter!(ξ, S₂, color = color, markerstrokecolor = color, marker = 2, label ="")
plot!([-1,1], [xT,xT], color = :black, label = "")
plot!(xlabel = "p₀", ylabel = "S₂", label = "")
plt_22 = plot(ξ, T₂, color = :black, label = "")
color = [conv_T2[i]==1 ? :green : conv_T2[i] == 0 ? :blue : :red for i ∈ 1:N]
scatter!(ξ, T₂, markercolor = color, markerstrokecolor = color, marker = 2, label = "")
plot!([-1,1], [yT,yT], color = :black, label = "")
plot!(xlabel = "q₀", ylabel = "T₂", label = "")
scatter!(1,1, markercolor = :green, markerstrokecolor = :green, marker = 2, label = "converged")
scatter!(1,1, markercolor = :blue, markerstrokecolor = :blue, marker = 2, label = "converged but hit bounds")
scatter!(1,1, markercolor = :red, markerstrokecolor = :red, marker = 2, label = "not converged")
plt_2 = plot(plt_21, plt_22, layout = (1,2))
plot(plt_1, plt_2, layout = grid(2,1, heights = [0.5, 0.5]), size=(700, 600))