# ## MIT License

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#

# In[3]:
import numpy as np
import numpy.linalg as la
import scipy.optimize as sopt
import matplotlib.pyplot as pt
from mpl_toolkits.mplot3d import axes3d
# ## Setup
# Here's a function. It's an oblong bowl made of two quadratic functions.
#
# This is pretty much the easiest 2D optimization job out there.
# In[4]:
def f(x):
return 0.5*x[0]**2 + 2.5*x[1]**2
def df(x):
return np.array([x[0], 5*x[1]])
# Let's take a look at the function. First in 3D:
# In[5]:
ax = pt.figure().add_subplot(projection='3d')
xmesh, ymesh = np.mgrid[-2:2:50j,-2:2:50j]
fmesh = f(np.array([xmesh, ymesh]))
ax.plot_surface(xmesh, ymesh, fmesh)
# And then as a "contour plot":
# In[6]:
pt.axis("equal")
pt.contour(xmesh, ymesh, fmesh)
# ## Part 1: Steepest Descent
#
# Next, initialize steepest descent with a starting guess:
# In[319]:
guesses = [np.array([2, 2./5])]
# Next, run Steepest Descent:
# In[356]:
x = guesses[-1]
s = -df(x)
def f1d(alpha):
return f(x + alpha*s)
alpha_opt = sopt.golden(f1d)
next_guess = x + alpha_opt * s
guesses.append(next_guess)
print(next_guess)
# Here's some plotting code to illustrate what just happened:
# In[357]:
pt.figure(figsize=(9, 9))
pt.axis("equal")
pt.contour(xmesh, ymesh, fmesh, 50)
it_array = np.array(guesses)
pt.plot(it_array.T[0], it_array.T[1], "x-")
# In[358]:
for i, guess in enumerate(guesses):
print(i, la.norm(guess, 2))
# ## Part 2: Adding in "momentum" / the "heavy ball" method
#
# Steepest descent with added "momentum" term:
#
# $$x_{k+1} = x_k - \alpha \nabla f(x_k) \color{red}{+ \beta (x_{k}-x_{k-1})}$$
# In[285]:
guesses = [np.array([2, 2./5])]
# beta = 0.01
beta = 0.1
# beta = 0.5
# beta = 1
# Explore different choices of the "momentum parameter" $\beta$ above.
# In[312]:
x = guesses[-1]
s = -df(x)
def f1d(alpha):
return f(x + alpha*s)
alpha_opt = sopt.golden(f1d)
next_guess = x + alpha_opt * s
if len(guesses) >= 2:
next_guess = next_guess + beta * (guesses[-1] - guesses[-2])
guesses.append(next_guess)
print(next_guess)
# In[317]:
pt.figure(figsize=(9, 9))
pt.axis("equal")
pt.contour(xmesh, ymesh, fmesh, 50)
it_array = np.array(guesses)
pt.plot(it_array.T[0], it_array.T[1], "x-")
# In[315]:
for i, guess in enumerate(guesses):
print(i, la.norm(guess, 2))
# In[ ]: