一、概述
继上一章代码后,这篇主要是针对于第5章代码的实现。部分代码有更改,会在下面说明,程序运行结果跟书中不完全一样,因为部分参数,书中并没有给出其在运行时设置的值,所以我根据我自己的调试进行了设置,运行出来的结果跟书中的结果大致一致。
二、具体实现
(一)最速下降法
1.示例函数文件
(1)示例函数1
f_test1
function y = f_test1(x)
%F_TEST1 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
y = -1 / (x1 ^ 2 + x2 ^ 2 + 2);
end
g_test1
function g = g_test1(x)
%G_TEST1 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
g1 = (2 * x1) / ((x1 ^ 2 + x2 ^ 2 + 2) ^ 2);
g2 = (2 * x2) / ((x1 ^ 2 + x2 ^ 2 + 2) ^ 2);
g = [g1; g2];
end
(2)示例函数2
f_test2
function y = f_test2(x)
%F_TEST2 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
y = x1 ^ 2 + x2 ^ 2 + x1 * x2 + 2;
end
g_test2
function g = g_test2(x)
%G_TEST2 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
g1 = 2 * x1 + x2;
g2 = x1 + 2 * x2;
g = [g1; g2];
end
(3)示例函数3
f_test3
function y = f_test3(x)
%F_TEST3 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
y = (x1 ^ 2 + x2 ^ 2 - 1) ^ 2 + (x1 + x2 - 2) ^ 2;
end
g_test3
function g = g_test3(x)
%G_TEST3 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
g1 = 2 * x1 + 2 * x2 + 4 * x1 * (x1 ^ 2 + x2 ^ 2 - 1) - 4;
g2 = 2 * x1 + 2 * x2 + 4 * x2 * (x1 ^ 2 + x2 ^ 2 - 1) - 4;
g = [g1; g2];
end
2.main.m文件
% 最速下降法的主运行文件,使用Wolfe_search条件进行步长探索
% 清空
close;
clear;
clc;
% 示例一
% x_initial = [2; 2];
% tolerance = 1e-6;
% [x_optimal, f_optimal, k] = Steepest_Descent(@f_test1, @g_test1, x_initial, tolerance);
% 示例二
% x_initial = [1; -4];
% tolerance = 1e-6;
% [x_optimal, f_optimal, k] = Steepest_Descent(@f_test2, @g_test2, x_initial, tolerance);
% 示例三
x_initial = [2; 2];
tolerance = 1e-6;
[x_optimal, f_optimal, k] = Steepest_Descent(@f_test3, @g_test3, x_initial, tolerance);
3.Steepest_Descent.m文件
部分内容存在更改。
function [x_optimal, f_optimal, k] = Steepest_Descent(f_test, g_test, x_initial, tolerance)
k = 1;
rho = 0.1;
sigma = 0.11;
x_current = x_initial;
g_current = g_test(x_current);
d_current = -g_current;
[x_next, f_next] = Wolfe_search(f_test, g_test, x_current, d_current, rho, sigma);
while (norm(x_next - x_current) > tolerance)
k = k + 1;
x_current = x_next;
g_current = g_test(x_current);
d_current = -g_current;
[x_next, f_next] = Wolfe_search(f_test, g_test, x_current, d_current, rho, sigma);
end
x_optimal = x_next;
f_optimal = f_next;
end
4.Wolfe_search.m文件
最速下降法所使用的Wolfe搜索。
function [x_next, f_next] = Wolfe_search(f_test, g_test, x_current, d_current, rho, sigma)
% f_test, 目标函数
% g_test, 目标函数对决策变量x的导数
% x_current, x在向量空间中的当前点
% d_current, f_test在x_current的下降搜索方向
% rho, 可接受系数
% sigma, 可接受点处的切线斜率大于初始点处切线斜率的倍数,0<rho<sigma<1
k_max = 1000;
k = 0;
f_current = f_test(x_current);
g_current = g_test(x_current);
f_alpha_lower_k = f_current;
g_alpha_lower_k = g_current;
df_alpha_lower_k = (d_current') * g_alpha_lower_k;
f_alpha_lower_0 = f_alpha_lower_k;
df_alpha_lower_0 = df_alpha_lower_k;
tolerance = 1e-4;
% tolerance = 0.0075;
if (abs(df_alpha_lower_k) > tolerance)
alpha_initial = - 2 * f_alpha_lower_k ./ df_alpha_lower_k;
else
alpha_initial = 1;
end
alpha_lower_k = 0;
alpha_upper_k = 1e8;
alpha_k = alpha_initial; % 这个值是从初始值开始
for k = 1:k_max
x_alpha_k = x_current + alpha_k .* d_current;
f_alpha_k = f_test(x_alpha_k);
g_alpha_k = g_test(x_alpha_k);
df_alpha_k = (d_current') * g_alpha_k;
Wolfe_condition1 = f_alpha_k - f_alpha_lower_0 - rho * alpha_k * (df_alpha_lower_0');
Wolfe_condition2 = sum(sigma * df_alpha_lower_0 - df_alpha_k);
if(Wolfe_condition1 <= 0)
if(Wolfe_condition2 <= 0)
% alpha_acceptable = alpha_k;
x_next = x_alpha_k;
f_next = f_alpha_k;
break;
else
delta_alpha_k = (alpha_k - alpha_lower_k) .* df_alpha_k ./ (df_alpha_lower_k - df_alpha_k);
if(delta_alpha_k <= 0)
alpha_k_temp = 2 * alpha_k;
else
alpha_k_temp = alpha_k + delta_alpha_k;
end
alpha_lower_k = alpha_k;
f_alpha_lower_k = f_alpha_k;
df_alpha_lower_k = df_alpha_k;
alpha_k = alpha_k_temp;
end
else
if (alpha_k < alpha_upper_k)
alpha_upper_k = alpha_k;
end
alpha_k_temp = alpha_lower_k + (1/2) * (((alpha_k - alpha_lower_k) ^ 2) * df_alpha_lower_k) / (f_alpha_lower_k - f_alpha_k + (alpha_k - alpha_lower_k) * df_alpha_lower_k);
alpha_k = alpha_k_temp;
end
if(alpha_upper_k - alpha_lower_k < tolerance)
% alpha_acceptable = alpha_k;
x_next = x_alpha_k;
f_next = f_alpha_k;
break;
end
end
% if((Wolfe_condition1 > 0)||(Wolfe_condition2 > 0))
% disp('Wolfe inexact line search algorithm failed');
% x_next = NaN;
% f_next = NaN;
% end
end
5.注意
因为书中并没有明确写出来这个地方的代码,所以我将第四章的代码进行后放进去运行。
(1)改动部分1
如图所示,这个地方的函数调用形式,我给修改了。
(2)改动部分2
针对Wolfe_search搜索中的tolerance设置的问题。发现,如果设置太小的话,可能无法运行出来正常的结果。这是因为搜索无法收敛导致的。
将tolerance适当增大可以有效避免这个问题。因为这个只是循环的一部分,之后还有循环,这个循环tolerance的大小对最后结果的影响不太大。
(二)牛顿法
1.示例函数
因为所使用的示例函数相同,根据书中内容,只需要格外增加海森矩阵即可。
(1)示例函数1
H_test1
function H = H_test1(x)
%H_TEST1 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
h11 = 2 / (x1 ^ 2 + x2 ^ 2 + 2) - (8 * x1 ^ 2) / ((x1 ^ 2 + x2 ^ 2 + 2) ^ 3);
h12 = -(8 * x1 * x2) / ((x1 ^ 2 + x2 ^ 2 + 2) ^ 3);
h21 = -(8 * x1 * x2) / ((x1 ^ 2 + x2 ^ 2 + 2) ^ 3);
h22 = 2 / (x1 ^ 2 + x2 ^ 2 + 2) - (8 * x2 ^ 2) / ((x1 ^ 2 + x2 ^ 2 + 2) ^ 3);
H = [h11, h12; h21, h22];
end
(2)示例函数2
H_test2
function H = H_test2(x)
%H_TEST2 此处显示有关此函数的摘要
% 此处显示详细说明
h11 = 2;
h12 = 1;
h21 = 1;
h22 = 2;
H = [h11, h12; h21, h22];
end
(3)示例函数3
H_test3
function H = H_test3(x)
%H_TEST3 此处显示有关此函数的摘要
% 此处显示详细说明
x1 = x(1);
x2 = x(2);
h11 = 12 * x1 ^ 2 + 4 * x2 ^ 2 - 2;
h12 = 8 * x1 * x2 + 2;
h21 = 8 * x1 * x2 + 2;
h22 = 4 * x1 ^ 2 + 12 * x2 ^ 2 - 2;
H = [h11, h12; h21, h22];
end
2.main.m文件
% 牛顿法的主运行文件,使用Wolfe_search条件进行步长探索
% 清空
close;
clear;
clc;
% 示例一
% x_initial = [2; 2];
% tolerance = 1e-6;
% [x_optimal, f_optimal, k] = Newton(@f_test1, @g_test1, @H_test1, x_initial, tolerance);
% 示例二
% x_initial = [1; -4];
% tolerance = 1e-6;
% [x_optimal, f_optimal, k] = Newton(@f_test2, @g_test2, @H_test2, x_initial, tolerance);
% 示例三
x_initial = [2; 2];
tolerance = 1e-6;
[x_optimal, f_optimal, k] = Newton(@f_test3, @g_test3, @H_test3, x_initial, tolerance);
3.Newton.m文件
function [x_optimal, f_optimal, k] = Newton(f_test, g_test, H_test, x_initial, tolerance)
k = 1;
n = length(x_initial);
x_current = x_initial;
g_current = g_test(x_current);
H_current = H_test(x_current);
eigenvalue = eig(H_current);
min_eigenvalue = eigenvalue(1);
for i = 1:n
if (eigenvalue(i) < min_eigenvalue)
min_eigenvalue = eigenvalue(i);
end
end
if(min_eigenvalue <= 1e-8)
H_current = H_current + (-min_eigenvalue + 1e-4) * eye(n);
end
d_current = -inv(H_current) * g_current;
rho = 0.1;
sigma = 0.11;
[x_next, f_next] = Wolfe_search(f_test, g_test, x_current, d_current, rho, sigma);
while (norm(x_next - x_current) > tolerance)
k = k + 1;
x_current = x_next;
g_current = g_test(x_current);
H_current = H_test(x_current);
eigenvalue = eig(H_current);
min_eigenvalue = eigenvalue(1);
for i = 1:n
if (eigenvalue(i) < min_eigenvalue)
min_eigenvalue = eigenvalue(i);
end
end
if(min_eigenvalue <= 1e-8)
H_current = H_current + (-min_eigenvalue + 1e-4) * eye(n);
end
d_current = -inv(H_current) * g_current;
[x_next, f_next] = Wolfe_search(f_test, g_test, x_current, d_current, rho, sigma);
end
x_optimal = x_next;
f_optimal = f_next;
end
4.Wolfe_search.m文件
function [x_next, f_next] = Wolfe_search(f_test, g_test, x_current, d_current, rho, sigma)
% f_test, 目标函数
% g_test, 目标函数对决策变量x的导数
% x_current, x在向量空间中的当前点
% d_current, f_test在x_current的下降搜索方向
% rho, 可接受系数
% sigma, 可接受点处的切线斜率大于初始点处切线斜率的倍数,0<rho<sigma<1
k_max = 1000;
k = 0;
f_current = f_test(x_current);
g_current = g_test(x_current);
f_alpha_lower_k = f_current;
g_alpha_lower_k = g_current;
df_alpha_lower_k = (d_current') * g_alpha_lower_k;
f_alpha_lower_0 = f_alpha_lower_k;
df_alpha_lower_0 = df_alpha_lower_k;
tolerance = 1e-4;
% tolerance = 0.0075;
if (abs(df_alpha_lower_k) > tolerance)
alpha_initial = - 2 * f_alpha_lower_k ./ df_alpha_lower_k;
else
alpha_initial = 1;
end
alpha_lower_k = 0;
alpha_upper_k = 1e8;
alpha_k = alpha_initial; % 这个值是从初始值开始
for k = 1:k_max
x_alpha_k = x_current + alpha_k .* d_current;
f_alpha_k = f_test(x_alpha_k);
g_alpha_k = g_test(x_alpha_k);
df_alpha_k = (d_current') * g_alpha_k;
Wolfe_condition1 = f_alpha_k - f_alpha_lower_0 - rho * alpha_k * (df_alpha_lower_0');
Wolfe_condition2 = sum(sigma * df_alpha_lower_0 - df_alpha_k);
if(Wolfe_condition1 <= 0)
if(Wolfe_condition2 <= 0)
% alpha_acceptable = alpha_k;
x_next = x_alpha_k;
f_next = f_alpha_k;
break;
else
delta_alpha_k = (alpha_k - alpha_lower_k) .* df_alpha_k ./ (df_alpha_lower_k - df_alpha_k);
if(delta_alpha_k <= 0)
alpha_k_temp = 2 * alpha_k;
else
alpha_k_temp = alpha_k + delta_alpha_k;
end
alpha_lower_k = alpha_k;
f_alpha_lower_k = f_alpha_k;
df_alpha_lower_k = df_alpha_k;
alpha_k = alpha_k_temp;
end
else
if (alpha_k < alpha_upper_k)
alpha_upper_k = alpha_k;
end
alpha_k_temp = alpha_lower_k + (1/2) * (((alpha_k - alpha_lower_k) ^ 2) * df_alpha_lower_k) / (f_alpha_lower_k - f_alpha_k + (alpha_k - alpha_lower_k) * df_alpha_lower_k);
alpha_k = alpha_k_temp;
end
if(alpha_upper_k - alpha_lower_k < tolerance)
% alpha_acceptable = alpha_k;
x_next = x_alpha_k;
f_next = f_alpha_k;
break;
end
end
% if((Wolfe_condition1 > 0)||(Wolfe_condition2 > 0))
% disp('Wolfe inexact line search algorithm failed');
% x_next = NaN;
% f_next = NaN;
% end
end
(三)高斯牛顿法
1.示例函数
(1)示例函数1
F_test1
function y = F_test1(x)
x1 = x(1);
x2 = x(2);
y = (x1 ^ 2 + x2 ^ 2 - 1) ^ 2 + (x1 + x2 - 2) ^ 2;
end
G_test1
function G = G_test1(x)
x1 = x(1);
x2 = x(2);
G1 = 2 * (x1 ^ 2 + x2 ^ 2 - 1) * 2 * x1 + 2 * (x1 + x2 - 2);
G2 = 2 * (x1 ^ 2 + x2 ^ 2 - 1) * 2 * x2 + 2 * (x1 + x2 - 2);
G = [G1; G2];
end
J_test1
function J = J_test1(x)
x1 = x(1);
x2 = x(2);
J11 = 2 * x1;
J12 = 2 * x2;
J21 = 1;
J22 = 1;
J = [J11, J12; J21, J22];
end
(2)示例函数2
F_test2
function y = F_test2(x)
x1 = x(1);
x2 = x(2);
x3 = x(3);
y = (x1 + 5) ^ 2 + (x2 + 8) ^ 2 + (x3 + 7) ^ 2 + 2 * (x1 * x2) ^ 2 + 4 * (x1 * x2) ^ 2;
end
G_test2
function G = G_test2(x)
x1 = x(1);
x2 = x(2);
x3 = x(3);
G1 = 4 * x1 * x2 ^ 2 + 8 * x1 * x3 ^ 2 + 2 * x1 + 10;
G2 = 4 * x2 * x1 ^ 2 + 2 * x2 + 16;
G3 = 8 * x3 * x1 ^ 2 + 2 * x3 + 14;
G = [G1; G2; G3];
end
J_test2
function J = J_test2(x)
x1 = x(1);
x2 = x(2);
x3 = x(3);
J11 = 1;
J12 = 0;
J13 = 0;
J21 = 0;
J22 = 1;
J23 = 0;
J31 = 0;
J32 = 0;
J33 = 1;
J41 = 2 ^ (1/2) * x2;
J42 = 2 ^ (1/2) * x1;
J43 = 0;
J51 = 2 * x3;
J52 = 0;
J53 = 2 * x1;
J = [J11, J12, J13; J21, J22, J23; J31, J32, J33; J41, J42, J43; J51, J52, J53];
end
2.main.m文件
% 高斯牛顿法的主运行文件,使用Wolfe_search条件进行步长探索
% 清空
close;
clear;
clc;
% 示例一
% x_initial = [2; 2];
% tolerance = 1e-6;
% [x_optimal, f_optimal, k] = Guass_Newton(@F_test1, @G_test1, @J_test1, x_initial, tolerance);
% 示例二
x_initial = [4; 13; 11];
tolerance = 1e-6;
[x_optimal, f_optimal, k] = Guass_Newton(@F_test2, @G_test2, @J_test2, x_initial, tolerance);
3.Guass_Newton.m文件
function [x_optimal, f_optimal, k] = Guass_Newton(F_test, G_test, J_test, x_initial, tolerance)
k = 1;
n = length(x_initial);
H_modification = 1e-10 * eye(n);
x_current = x_initial;
G_current = G_test(x_current);
J_current = J_test(x_current);
H_current = 2 * (J_current') * J_current + H_modification;
d_current = -inv(H_current) * G_current;
rho = 0.1;
sigma = 0.11;
[x_next, f_next] = Wolfe_search(F_test, G_test, x_current, d_current, rho, sigma);
while (norm(x_next - x_current) > tolerance)
k = k + 1;
x_current = x_next;
G_current = G_test(x_current);
J_current = J_test(x_current);
H_current = 2 * (J_current') * J_current + H_modification;
d_current = -inv(H_current) * G_current;
[x_next, f_next] = Wolfe_search(F_test, G_test, x_current, d_current, rho, sigma);
end
x_optimal = x_next;
f_optimal = f_next;
end
4.Wolfe_search.m文件
function [x_next, f_next] = Wolfe_search(f_test, g_test, x_current, d_current, rho, sigma)
% f_test, 目标函数
% g_test, 目标函数对决策变量x的导数
% x_current, x在向量空间中的当前点
% d_current, f_test在x_current的下降搜索方向
% rho, 可接受系数
% sigma, 可接受点处的切线斜率大于初始点处切线斜率的倍数,0<rho<sigma<1
k_max = 1000;
k = 0;
f_current = f_test(x_current);
g_current = g_test(x_current);
f_alpha_lower_k = f_current;
g_alpha_lower_k = g_current;
df_alpha_lower_k = (d_current') * g_alpha_lower_k;
f_alpha_lower_0 = f_alpha_lower_k;
df_alpha_lower_0 = df_alpha_lower_k;
tolerance = 1e-8;
% tolerance = 0.0075;
if (abs(df_alpha_lower_k) > tolerance)
alpha_initial = - 2 * f_alpha_lower_k ./ df_alpha_lower_k;
else
alpha_initial = 1;
end
alpha_lower_k = 0;
alpha_upper_k = 1e8;
alpha_k = alpha_initial; % 这个值是从初始值开始
for k = 1:k_max
x_alpha_k = x_current + alpha_k .* d_current;
f_alpha_k = f_test(x_alpha_k);
g_alpha_k = g_test(x_alpha_k);
df_alpha_k = (d_current') * g_alpha_k;
Wolfe_condition1 = f_alpha_k - f_alpha_lower_0 - rho * alpha_k * (df_alpha_lower_0');
Wolfe_condition2 = sum(sigma * df_alpha_lower_0 - df_alpha_k);
if(Wolfe_condition1 <= 0)
if(Wolfe_condition2 <= 0)
% alpha_acceptable = alpha_k;
x_next = x_alpha_k;
f_next = f_alpha_k;
break;
else
delta_alpha_k = (alpha_k - alpha_lower_k) .* df_alpha_k ./ (df_alpha_lower_k - df_alpha_k);
if(delta_alpha_k <= 0)
alpha_k_temp = 2 * alpha_k;
else
alpha_k_temp = alpha_k + delta_alpha_k;
end
alpha_lower_k = alpha_k;
f_alpha_lower_k = f_alpha_k;
df_alpha_lower_k = df_alpha_k;
alpha_k = alpha_k_temp;
end
else
if (alpha_k < alpha_upper_k)
alpha_upper_k = alpha_k;
end
alpha_k_temp = alpha_lower_k + (1/2) * (((alpha_k - alpha_lower_k) ^ 2) * df_alpha_lower_k) / (f_alpha_lower_k - f_alpha_k + (alpha_k - alpha_lower_k) * df_alpha_lower_k);
alpha_k = alpha_k_temp;
end
if(alpha_upper_k - alpha_lower_k < tolerance)
% alpha_acceptable = alpha_k;
x_next = x_alpha_k;
f_next = f_alpha_k;
break;
end
end
% if((Wolfe_condition1 > 0)||(Wolfe_condition2 > 0))
% disp('Wolfe inexact line search algorithm failed');
% x_next = NaN;
% f_next = NaN;
% end
end