#!/usr/bin/env python
# coding: utf-8

# # Steepest Descent
# 
# Copyright (C) 2020 Andreas Kloeckner
# 
# <details>
# <summary>MIT License</summary>
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# </details>

# In[3]:


import numpy as np
import numpy.linalg as la

import scipy.optimize as sopt

import matplotlib.pyplot as pt
from mpl_toolkits.mplot3d import axes3d


# ## Setup
# Here's a function. It's an oblong bowl made of two quadratic functions.
# 
# This is pretty much the easiest 2D optimization job out there.

# In[4]:


def f(x):
    return 0.5*x[0]**2 + 2.5*x[1]**2

def df(x):
    return np.array([x[0], 5*x[1]])


# Let's take a look at the function. First in 3D:

# In[5]:


fig = pt.figure()
ax = fig.gca(projection="3d")

xmesh, ymesh = np.mgrid[-2:2:50j,-2:2:50j]
fmesh = f(np.array([xmesh, ymesh]))
ax.plot_surface(xmesh, ymesh, fmesh)


# And then as a "contour plot":

# In[6]:


pt.axis("equal")
pt.contour(xmesh, ymesh, fmesh)


# ## Part 1: Steepest Descent
# 
# Next, initialize steepest descent with a starting guess:

# In[319]:


guesses = [np.array([2, 2./5])]


# Next, run Steepest Descent:

# In[356]:


x = guesses[-1]
s = -df(x)

def f1d(alpha):
    return f(x + alpha*s)

alpha_opt = sopt.golden(f1d)
next_guess = x + alpha_opt * s
guesses.append(next_guess)

print(next_guess)


# Here's some plotting code to illustrate what just happened:

# In[357]:


pt.figure(figsize=(9, 9))
pt.axis("equal")
pt.contour(xmesh, ymesh, fmesh, 50)
it_array = np.array(guesses)
pt.plot(it_array.T[0], it_array.T[1], "x-")


# In[358]:


for i, guess in enumerate(guesses):
    print(i, la.norm(guess, 2))


# ## Part 2: Adding in "momentum" / the "heavy ball" method
# 
# Steepest descent with added "momentum" term:
# 
# $$x_{k+1} = x_k - \alpha \nabla f(x_k) \color{red}{+ \beta (x_{k}-x_{k-1})}$$

# In[285]:


guesses = [np.array([2, 2./5])]

# beta = 0.01
beta = 0.1
# beta = 0.5
# beta = 1


# Explore different choices of the "momentum parameter" $\beta$ above.

# In[312]:


x = guesses[-1]
s = -df(x)

def f1d(alpha):
    return f(x + alpha*s)

alpha_opt = sopt.golden(f1d)
next_guess = x + alpha_opt * s

if len(guesses) >= 2:
    next_guess = next_guess + beta * (guesses[-1] - guesses[-2])

guesses.append(next_guess)

print(next_guess)


# In[317]:


pt.figure(figsize=(9, 9))
pt.axis("equal")
pt.contour(xmesh, ymesh, fmesh, 50)
it_array = np.array(guesses)
pt.plot(it_array.T[0], it_array.T[1], "x-")


# In[315]:


for i, guess in enumerate(guesses):
    print(i, la.norm(guess, 2))


# In[ ]: