﻿{"id":3299,"date":"2025-08-27T16:54:21","date_gmt":"2025-08-27T08:54:21","guid":{"rendered":"https:\/\/www.leexinghai.com\/aic\/?p=3299"},"modified":"2025-08-27T16:57:53","modified_gmt":"2025-08-27T08:57:53","slug":"3-10%e8%ae%ad%e7%bb%83%e5%8f%82%e6%95%b0%e8%ae%be%e7%bd%ae","status":"publish","type":"post","link":"https:\/\/www.leexinghai.com\/aic\/3-10%e8%ae%ad%e7%bb%83%e5%8f%82%e6%95%b0%e8%ae%be%e7%bd%ae\/","title":{"rendered":"3.10\u8bad\u7ec3\u53c2\u6570\u8bbe\u7f6e"},"content":{"rendered":"\n<p>\u4e09\u4e2a\u8bad\u7ec3\u795e\u7ecf\u7f51\u7edc\u7684\u5efa\u8bae<\/p>\n\n\n\n<p>(1)\u4e00\u822c\u60c5\u51b5\u4e0b\uff0c\u5728\u8bad\u7ec3\u96c6\u4e0a\u7684\u76ee\u6807\u51fd\u6570\u7684\u5e73\u5747\u503c(cost)\u4f1a\u968f\u7740\u8bad\u7ec3\u7684\u6df1\u5165\u800c\u4e0d\u65ad\u51cf\u5c0f\uff0c\u5982\u679c\u8fd9\u4e2a\u6307\u6807\u6709\u589e\u5927\u60c5\u51b5\uff0c\u505c\u4e0b\u6765\u3002<br>\u6709\u4e24\u79cd\u60c5\u51b5:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u91c7\u7528\u7684\u6a21\u578b\u4e0d\u591f\u590d\u6742\uff0c\u4ee5\u81f4\u4e8e\u4e0d\u80fd\u5728\u8bad\u7ec3\u96c6\u4e0a\u5b8c\u5168\u62df\u5408;<\/li>\n\n\n\n<li>\u5df2\u7ecf\u8bad\u7ec3\u5f88\u597d\u4e86\u3002<\/li>\n<\/ul>\n\n\n\n<p>(2)<strong>\u5206\u51fa\u4e00\u4e9b\u9a8c\u8bc1\u96c6Validation Set\uff0c\u8bad\u7ec3\u672c\u8d28\u76ee\u6807\u662f\u5728\u9a8c\u8bc1\u96c6\u4e0a\u83b7\u53d6\u6700\u5927\u8bc6\u522b\u7387\u3002<\/strong>\u56e0\u6b64\u8bad\u7ec3\u4e00\u6bb5\u65f6\u95f4\u540e\uff0c\u5fc5\u987b\u5728\u9a8c\u8bc1\u96c6\u4e0a\u6d4b\u8bd5\u8bc6\u522b\u7387\uff0c\u4fdd\u5b58\u4f7f\u9a8c\u8bc1\u96c6\u4e0a\u8bc6\u522b\u7387\u6700\u5927\u7684\u6a21\u578b\u53c2\u6570\u4f5c\u4e3a\u6700\u540e\u7684\u7ed3\u679c\u3002<\/p>\n\n\n\n<p>(3)<strong>\u6ce8\u610f\u8c03\u6574\u5b66\u4e60\u7387Learning Rate<\/strong>\uff0c\u5982\u679c\u521a\u8bad\u7ec3\u51e0\u6b65\u635f\u5931\u51fd\u6570\u5c31\u589e\u52a0\uff0c\u4e00\u822c\u6765\u8bf4\u662f\u5b66\u4e60\u7387\u592a\u9ad8\uff1b\u53cd\u4e4b\u5982\u679c\u6bcf\u6b21cost\u53d8\u5316\u5f88\u5c0f\uff0c\u8bf4\u660e\u5b66\u4e60\u7387\u592a\u4f4e\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e00\u70b9\u4eba\u751f\u7684\u7ecf\u9a8c\uff1a<\/h2>\n\n\n\n<p>\uff081\uff09\u76ee\u6807\u51fd\u6570\u53ef\u4ee5\u52a0\u5165\u6b63\u5219\u9879<\/p>\n\n\n\n<p class=\"has-text-align-center\">Minimize E(\u03c9\uff0cb)=L(\u03c9\uff0cb)+\u03bb\/2 ||\u03c9||<sup>2<\/sup>  <\/p>\n\n\n\n<p>L(\u03c9\uff0cb)\u4e3a\u539f\u6765\u7684\u76ee\u6807\u51fd\u6570\uff0c\u03bb\/2 ||\u03c9||<sup>2<\/sup>\u4e3a\u6b63\u5219\u9879\u3002\u03bb\u4e3a\u6743\u503c\u8870\u51cf\u7cfb\u6570<\/p>\n\n\n\n<p>\u53c2\u8003\u524d\u5411\u4f20\u64ad<code>nn_forward.m<\/code><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>if strcmp(nn.objective_function,'MSE')\n            nn.cost(s) = 0.5 \/ m * sum(sum((nn.a{k} - batch_y).^2)) + 0.5 * nn.weight_decay * cost2;\n        elseif strcmp(nn.objective_function,'Cross Entropy')\n            nn.cost(s) = -0.5*sum(sum(batch_y.*log(nn.a{k})))\/m + 0.5 * nn.weight_decay * cost2;<\/code><\/pre>\n\n\n\n<p>\u540e\u5411\u4f20\u64ad<code>nn_backpropagation.m<\/code><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>nn.W_grad{nn.depth-1} = nn.theta{nn.depth}*nn.a{nn.depth-1}'\/m + nn.weight_decay*nn.W{nn.depth-1};\nnn.b_grad{nn.depth-1} = sum(nn.theta{nn.depth},2)\/m;<\/code><\/pre>\n\n\n\n<p>\uff082\uff09\u8bad\u7ec3\u6570\u636e\u5f52\u4e00\u5316<\/p>\n\n\n\n<p>newX=[X-mean(X)]\/ std(X)<\/p>\n\n\n\n<p>\uff083\uff09\u53c2\u6570\u03c9\u548cb\u7684\u521d\u59cb\u5316<\/p>\n\n\n\n<p>\u4e00\u79cd\u6bd4\u8f83\u7b80\u5355\u6709\u6548\u7684\u65b9\u6cd5\uff1a<\/p>\n\n\n\n<p>\uff08\u03c9\uff0cb\uff09\u521d\u59cb\u5316\u4ece\u533a\u95f4\uff08-1\/sqrt(d),1\/sqrt(d)\uff09\u5747\u5300\u968f\u673a\u53d6\u503c\uff0c\u5176\u4e2dd\u4e3a\uff08\u03c9\uff0cb\uff09\u6240\u5728\u5c42\u7684\u795e\u7ecf\u5143\u4e2a\u6570\u3002<\/p>\n\n\n\n<p>\u53ef\u4ee5\u8bc1\u660e\u5982\u679cX\u670d\u4ece\u5747\u503c0\u65b9\u5dee1\u7684\u6b63\u6001\u5206\u5e03\uff0c\u4e14\u5404\u4e2a\u7ef4\u5ea6\u65e0\u5173\uff0c\u800c\uff08\u03c9\uff0cb\uff09\u662f\u533a\u95f4\uff08-1\/sqrt(d),1\/sqrt(d)\uff09\u7684\u5747\u5300\u5206\u5e03\uff0c\u5219\u03c9<sup>T<\/sup>X+b\u662f\u5747\u503c0\uff0c\u65b9\u5dee\u4e3a1\/3\u7684\u6b63\u6001\u5206\u5e03<\/p>\n\n\n\n<p><code>nn_create.m<\/code><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>nn.W{k} = 2*rand(height, width)\/sqrt(width)-1\/sqrt(width);%rand\u4ea7\u751f\u4f2a\u968f\u673a\u6570\u77e9\u9635\uff0c\u5373W\u6743\u91cd\u77e9\u9635\u521d\u59cb\u5316\nnn.b{k} = 2*rand(height, 1)\/sqrt(width)-1\/sqrt(width);%b\u9608\u503c\u7684\u521d\u59cb\u5316<\/code><\/pre>\n\n\n\n<p>\u907f\u514d\u4e00\u5f00\u59cb\u68af\u5ea6\u8d8b\u8fd1\u4e8e0\u7684\u73b0\u8c61\u3002<\/p>\n\n\n\n<p>\uff084\uff09BATCH NORMALIZATION<\/p>\n\n\n\n<p>\u8bba\u6587:Batch normalization accelerating deep network training by reducing internal covariate shift(2015)<\/p>\n\n\n\n<p>\u5728\u8fd9\u53ef\u4ee5\u770b\uff1a<\/p>\n\n\n\n<div data-wp-interactive=\"core\/file\" class=\"wp-block-file\"><object data-wp-bind--hidden=\"!state.hasPdfPreview\" hidden class=\"wp-block-file__embed\" data=\"https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/43442.pdf\" type=\"application\/pdf\" style=\"width:100%;height:600px\" aria-label=\"\u5d4c\u5165 \u4e5f\u53ef\u4ee5\u70b9\u51fb\u76f4\u63a5\u4e0b\u8f7d\u8bba\u6587\uff1a\"><\/object><a id=\"wp-block-file--media-0073f6c1-f969-40f8-885f-d0335b359207\" href=\"https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/43442.pdf\">\u4e5f\u53ef\u4ee5\u70b9\u51fb\u76f4\u63a5\u4e0b\u8f7d\u8bba\u6587\uff1a<\/a><a href=\"https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/43442.pdf\" class=\"wp-block-file__button wp-element-button\" download aria-describedby=\"wp-block-file--media-0073f6c1-f969-40f8-885f-d0335b359207\">\u4e0b\u8f7d<\/a><\/div>\n\n\n\n<p><br>\u57fa\u672c\u601d\u60f3:\u65e2\u7136\u6211\u4eec\u5e0c\u671b\u6bcf\u4e00\u5c42\u83b7\u5f97\u7684\u503c\u90fd\u57280\u9644\u8fd1\uff0c\u4ece\u800c\u907f\u514d\u68af\u5ea6\u6d88\u5931\u73b0\u8c61\uff0c\u90a3\u4e48\u6211\u4eec\u4e3a\u4ec0\u4e48\u4e0d\u76f4\u63a5\u628a\u6bcf\u4e00\u5c42\u7684\u503c\u505a\u57fa\u4e8e\u5747\u503c\u548c\u65b9\u5dee\u7684\u5f52\u4e00\u5316\u5462?<\/p>\n\n\n\n<p>\uff085\uff09\u53c2\u6570\u7684\u66f4\u65b0\u7b56\u7565<\/p>\n\n\n\n<p>ADAGRAD\u7684\u65b9\u6cd5<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>if strcmp(nn.optimization_method,'AdaGrad')\nnn.rW{k}= nn.rW{k}+nn.W_grad{k}.^2;nn.rb{k}= nn.rb{k}+nn.b_grad{k}.^2;\nnn.W{k}=nn.W{k}-nn.learning_rate*nn.W_grad{k}.\/(sqrt(nn.rW{k})+0.001);\nnn.b{k}=nn.b{k}-nn.learning_rate*nn.b_qrad{k}.\/(sqrt(nn.rb{k})+0.001);<\/code><\/pre>\n\n\n\n<p>\u89e3\u51b3\u68af\u5ea6\u968f\u673a\u6027\u7684\u95ee\u9898\uff1a\u5f15\u5165Momentum<\/p>\n\n\n\n<p>\u540c\u65f6\u7ed3\u5408\uff1aAdam-\u89e3\u51b3\u68af\u5ea6\u7edd\u5bf9\u503c\u5206\u91cf\u4e0d\u5e73\u8861\u548c\u68af\u5ea6\u65b9\u5411\u968f\u673a\u6027\u7684\u95ee\u9898\uff0c\u4e5f\u5f15\u5165\u4e86\u9010\u6e10\u964d\u4f4e\u68af\u5ea6\u641c\u7d22\u6b65\u957f\u7684\u673a\u5236\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"770\" src=\"https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-70-1024x770.png\" alt=\"\" class=\"wp-image-3302\" srcset=\"https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-70-1024x770.png 1024w, https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-70-300x226.png 300w, https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-70-768x578.png 768w, https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-70.png 1058w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\">\ud83d\udcdd \u7b97\u6cd5\u6b65\u9aa4\u89e3\u91ca<\/h2>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>Require<\/strong>\n<ul class=\"wp-block-list\">\n<li><strong>Step size<\/strong> (\u03f5\uff0c\u5b66\u4e60\u7387)\uff0c\u63a8\u8350\u9ed8\u8ba4\u503c 0.001\u3002<\/li>\n\n\n\n<li><strong>Exponential decay rates<\/strong> (\u03c11,\u03c12\u200b)\uff1a\u5206\u522b\u63a7\u5236\u4e00\u9636\u3001\u4e8c\u9636\u52a8\u91cf\u7684\u8870\u51cf\u901f\u7387\u3002\u63a8\u8350\u9ed8\u8ba4\u503c \u03c11=0.9\uff0c\u03c12=0.999\u3002<\/li>\n\n\n\n<li><strong>Small constant \u03b4<\/strong>\uff1a\u6570\u503c\u7a33\u5b9a\u5e38\u6570\uff0c\u9632\u6b62\u5206\u6bcd\u4e3a\u96f6\uff0c\u9ed8\u8ba4 10<sup>\u22128<\/sup>\u3002<\/li>\n\n\n\n<li><strong>Initial parameters<\/strong> \u03b8\uff1a\u6a21\u578b\u521d\u59cb\u53c2\u6570\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ol>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<ol start=\"2\" class=\"wp-block-list\">\n<li><strong>Initialize<\/strong>\n<ul class=\"wp-block-list\">\n<li>\u4e00\u9636\u52a8\u91cf\u53d8\u91cf s=0\uff08\u5b58\u653e\u68af\u5ea6\u7684\u6307\u6570\u52a0\u6743\u5e73\u5747\uff0c\u7c7b\u4f3c Momentum\uff09\u3002<\/li>\n\n\n\n<li>\u4e8c\u9636\u52a8\u91cf\u53d8\u91cf r=0\uff08\u5b58\u653e\u68af\u5ea6\u5e73\u65b9\u7684\u6307\u6570\u52a0\u6743\u5e73\u5747\uff0c\u7c7b\u4f3c RMSProp\uff09\u3002<\/li>\n\n\n\n<li>\u65f6\u95f4\u6b65 t=0\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>\u5faa\u73af\u8fc7\u7a0b<\/strong> (\u76f4\u5230\u6ee1\u8db3\u505c\u6b62\u6761\u4ef6\uff0c\u4f8b\u5982\u8fed\u4ee3\u6b21\u6570\u7528\u5b8c\u6216\u6536\u655b) \n<ul class=\"wp-block-list\">\n<li><strong>Step A. \u91c7\u6837\u4e00\u4e2a minibatch<\/strong>\n<ul class=\"wp-block-list\">\n<li>\u4ece\u8bad\u7ec3\u96c6\u53d6\u51fa\u4e00\u4e2a\u5c0f\u6279\u91cf\u6837\u672c {x<sup>(1)<\/sup>,...,x<sup>(m)<\/sup>} \u548c\u5bf9\u5e94\u6807\u7b7e\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>Step B. \u8ba1\u7b97\u68af\u5ea6<\/strong><\/li>\n<\/ul>\n<\/li>\n<\/ol>\n\n\n\n<math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><mi>g<\/mi><mo>=<\/mo><mfrac><mn>1<\/mn><mi>m<\/mi><\/mfrac><msub><mi mathvariant=\"normal\">\u2207<\/mi><mi>\u03b8<\/mi><\/msub><munderover><mo data-mjx-texclass=\"OP\">\u2211<\/mo><mrow><mi>i<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>m<\/mi><\/munderover><mi>L<\/mi><mo stretchy=\"false\">(<\/mo><mi>f<\/mi><mo stretchy=\"false\">(<\/mo><msup><mi>x<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo>;<\/mo><mi>\u03b8<\/mi><mo stretchy=\"false\">)<\/mo><mo>,<\/mo><msup><mi>y<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo stretchy=\"false\">)<\/mo><\/math>\n\n\n\n<p>\u5373\u5c0f\u6279\u91cf\u5e73\u5747\u68af\u5ea6\u3002 <\/p>\n\n\n\n<p><strong>Step C. \u65f6\u95f4\u6b65\u9012\u589e<\/strong> <\/p>\n\n\n\n<p class=\"has-text-align-center\">t\u2190t+1<\/p>\n\n\n\n<p> <strong>Step D. \u66f4\u65b0\u4e00\u9636\u52a8\u91cf\uff08\u504f\u7f6e\u7684\uff09<\/strong> <\/p>\n\n\n\n<p class=\"has-text-align-center\">s\u2190\u03c11s+(1\u2212\u03c11)g<\/p>\n\n\n\n<p> \u2014\u2014\u8fd9\u662f\u68af\u5ea6\u7684\u6307\u6570\u6ed1\u52a8\u5e73\u5747\uff08\u7c7b\u4f3c Momentum\uff09\u3002<\/p>\n\n\n\n<p><strong>Step E. \u66f4\u65b0\u4e8c\u9636\u52a8\u91cf\uff08\u504f\u7f6e\u7684\uff09<\/strong> <\/p>\n\n\n\n<p class=\"has-text-align-center\">r\u2190\u03c12r+(1\u2212\u03c12)(g\u2299g)<\/p>\n\n\n\n<p>\u2014\u2014\u8fd9\u91cc \u2299\u8868\u793a\u9010\u5143\u7d20\u4e58\u6cd5\u3002\u5373\u5bf9\u68af\u5ea6\u5e73\u65b9\u53d6\u6307\u6570\u6ed1\u52a8\u5e73\u5747\uff08\u7c7b\u4f3c RMSProp\uff09\u3002<\/p>\n\n\n\n<p> <strong>Step F. \u504f\u5dee\u4fee\u6b63<\/strong><br>\u7531\u4e8e\u521d\u59cb\u5316 s=0,r=0\uff0c\u524d\u671f\u4f1a\u6709\u5411\u96f6\u504f\u79fb\uff0c\u9700\u8981\u4fee\u6b63\uff1a <\/p>\n\n\n\n<math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><mrow><mover><mi>s<\/mi><mo stretchy=\"false\">^<\/mo><\/mover><\/mrow><mo>=<\/mo><mfrac><mi>s<\/mi><mrow><mn>1<\/mn><mo>\u2212<\/mo><msubsup><mi>\u03c1<\/mi><mn>1<\/mn><mi>t<\/mi><\/msubsup><\/mrow><\/mfrac><mo>,<\/mo><mstyle scriptlevel=\"0\"><mspace width=\"1em\"><\/mspace><\/mstyle><mrow><mover><mi>r<\/mi><mo stretchy=\"false\">^<\/mo><\/mover><\/mrow><mo>=<\/mo><mfrac><mi>r<\/mi><mrow><mn>1<\/mn><mo>\u2212<\/mo><msubsup><mi>\u03c1<\/mi><mn>2<\/mn><mi>t<\/mi><\/msubsup><\/mrow><\/mfrac><\/math>\n\n\n\n<p>\u200b<strong>Step G. \u8ba1\u7b97\u66f4\u65b0\u91cf<\/strong> <\/p>\n\n\n\n<math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><mi mathvariant=\"normal\">\u0394<\/mi><mi>\u03b8<\/mi><mo>=<\/mo><mo>\u2212<\/mo><mi>\u03f5<\/mi><mo>\u22c5<\/mo><mfrac><mrow><mover><mi>s<\/mi><mo stretchy=\"false\">^<\/mo><\/mover><\/mrow><mrow><msqrt><mrow><mover><mi>r<\/mi><mo stretchy=\"false\">^<\/mo><\/mover><\/mrow><\/msqrt><mo>+<\/mo><mi>\u03b4<\/mi><\/mrow><\/mfrac><\/math>\n\n\n\n<p><strong>Step H. \u66f4\u65b0\u53c2\u6570<\/strong> <\/p>\n\n\n\n<p>\u03b8\u2190\u03b8+\u0394\u03b8<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\ud83d\udd11 \u603b\u7ed3<\/h2>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>sss<\/strong>\uff1a\u68af\u5ea6\u7684\u4e00\u9636\u52a8\u91cf\uff08\u65b9\u5411 + \u5e73\u6ed1\uff09\u3002<\/li>\n\n\n\n<li><strong>rrr<\/strong>\uff1a\u68af\u5ea6\u7684\u4e8c\u9636\u52a8\u91cf\uff08\u5e45\u5ea6 + \u81ea\u9002\u5e94\u7f29\u653e\uff09\u3002<\/li>\n\n\n\n<li><strong>\u504f\u5dee\u4fee\u6b63<\/strong>\uff1a\u89e3\u51b3\u521d\u671f (s,r\u22480)\u7684\u4f30\u8ba1\u504f\u5dee\u95ee\u9898\u3002<\/li>\n\n\n\n<li><strong>\u66f4\u65b0\u516c\u5f0f<\/strong>\uff1a\u5b66\u4e60\u7387\u4f1a\u6839\u636e\u68af\u5ea6\u5386\u53f2\u52a8\u6001\u8c03\u6574\uff0c\u6bcf\u4e2a\u53c2\u6570\u6709\u81ea\u5df1\u72ec\u7acb\u7684\u5b66\u4e60\u7387\u3002<\/li>\n<\/ul>\n\n\n\n<p>Adam \u7684\u66f4\u65b0\u53ef\u4ee5\u7406\u89e3\u4e3a\uff1a<br>\ud83d\udc49 <strong>\u7528 Momentum \u51b3\u5b9a\u65b9\u5411<\/strong>\uff0c\u518d <strong>\u7528 RMSProp \u51b3\u5b9a\u6b65\u957f\u5927\u5c0f<\/strong>\u3002<\/p>\n\n\n\n<p>Python\u4ee3\u7801\u793a\u4f8b\uff08\u4e00\u9636\u52a8\u91cf\u7528 <code>s<\/code>\uff0c\u4e8c\u9636\u52a8\u91cf\u7528 <code>r<\/code>\uff0c\u542b\u504f\u5dee\u4fee\u6b63\uff1b\u5e76\u7ed9\u4e86\u4e00\u4e2a\u6700\u5c0f\u5316\u4e8c\u6b21\u51fd\u6570\u7684\u5c0f\u793a\u4f8b\uff09\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import numpy as np\n\nclass Adam:\n    \"\"\"\n    Adam \u4f18\u5316\u5668\uff08Algorithm 8.7\uff09\n    s: \u4e00\u9636\u52a8\u91cf\uff08biased\uff09\n    r: \u4e8c\u9636\u52a8\u91cf\uff08biased\uff09\n    \"\"\"\n    def __init__(self, shape, lr=1e-3, rho1=0.9, rho2=0.999, eps=1e-8):\n        self.lr   = lr        # \u03b5 (step size)\n        self.rho1 = rho1      # \u03c11\n        self.rho2 = rho2      # \u03c12\n        self.eps  = eps       # \u03b4\n        self.s    = np.zeros(shape)  # \u521d\u59cb\u5316\u4e00\u9636\u52a8\u91cf s=0\n        self.r    = np.zeros(shape)  # \u521d\u59cb\u5316\u4e8c\u9636\u52a8\u91cf r=0\n        self.t    = 0                  # \u521d\u59cb\u5316\u65f6\u95f4\u6b65 t=0\n\n    def step(self, theta, g):\n        \"\"\"\n        \u5355\u6b21\u66f4\u65b0\uff1a\n        theta: \u53c2\u6570\n        g:     \u5f53\u524d\u68af\u5ea6\uff08\u5bf9 minibatch \u7684\u5e73\u5747\u68af\u5ea6\uff09\n        return: \u66f4\u65b0\u540e\u7684\u53c2\u6570\n        \"\"\"\n        # t \u2190 t + 1\n        self.t += 1\n\n        # Update biased first moment estimate: s \u2190 \u03c11 s + (1-\u03c11) g\n        self.s = self.rho1 * self.s + (1.0 - self.rho1) * g\n\n        # Update biased second moment estimate: r \u2190 \u03c12 r + (1-\u03c12) (g \u2299 g)\n        self.r = self.rho2 * self.r + (1.0 - self.rho2) * (g * g)\n\n        # Correct bias:\n        # \u015d = s \/ (1 - \u03c11^t),   r\u0302 = r \/ (1 - \u03c12^t)\n        s_hat = self.s \/ (1.0 - self.rho1 ** self.t)\n        r_hat = self.r \/ (1.0 - self.rho2 ** self.t)\n\n        # Compute update: \u0394\u03b8 = -\u03b5 * \u015d \/ (sqrt(r\u0302) + \u03b4)\n        delta_theta = - self.lr * s_hat \/ (np.sqrt(r_hat) + self.eps)\n\n        # Apply update: \u03b8 \u2190 \u03b8 + \u0394\u03b8\n        theta = theta + delta_theta\n        return theta\n\n# ================= \u793a\u4f8b\uff1a\u6700\u5c0f\u5316 f(\u03b8)=\u2211 \u03b8_i^2 =================\n# \u771f\u68af\u5ea6\uff1a\u2207f(\u03b8)=2\u03b8\nnp.random.seed(0)\ntheta = np.random.randn(3) * 5.0        # \u521d\u59cb\u53c2\u6570\nopt   = Adam(shape=theta.shape, lr=1e-2) # \u7528\u9ed8\u8ba4 \u03c11=0.9, \u03c12=0.999, \u03b4=1e-8\n\nfor k in range(1, 501):\n    g = 2.0 * theta                      # \u8ba1\u7b97\u68af\u5ea6 (\u5c0f\u6279\u91cf\u5e73\u5747\u68af\u5ea6\u5728\u771f\u5b9e\u4efb\u52a1\u91cc\u66ff\u6362\u8fd9\u91cc)\n    theta = opt.step(theta, g)           # \u6309\u56fe\u4e2d\u6d41\u7a0b\u66f4\u65b0\n    if k % 100 == 0:\n        fval = (theta**2).sum()\n        print(f\"iter {k:3d}  f(theta)={fval:.6f}  theta={theta}\")\n\n# \u8f93\u51fa\u4f1a\u770b\u5230 f(\u03b8) \u5355\u8c03\u4e0b\u964d\uff0c\u03b8 \u6536\u655b\u5230 0 \u9644\u8fd1\n<\/code><\/pre>\n\n\n\n<p>\u8f93\u51fa\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>iter 100  f(theta)=78.124320  theta=&#91;7.84011204 1.09919008 3.93048901]\niter 200  f(theta)=57.552498  theta=&#91;6.91704488 0.49699096 3.07570938]\niter 300  f(theta)=42.152403  theta=&#91;6.0541767  0.17936161 2.33819949]\niter 400  f(theta)=30.556370  theta=&#91;5.25288565 0.05091135 1.72074696]\niter 500  f(theta)=21.872385  theta=&#91;4.51437396 0.01130478 1.22175496]<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\ud83d\udcca \u7ed3\u679c\u9010\u884c\u89e3\u91ca<\/h2>\n\n\n\n<p>\u8f93\u51fa\u662f\u6bcf 100 \u6b21\u8fed\u4ee3\u6253\u5370\u4e00\u6b21\uff1a<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Iter 100<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>f(theta)=78.124320  \ntheta=&#91;7.84011204 1.09919008 3.93048901]<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u521d\u59cb \u03b8 \u5f88\u5927\uff08\u4e00\u5f00\u59cb\u662f <code>np.random.randn(3)*5<\/code> \u968f\u673a\u51fa\u6765\u7684\uff09\u3002<\/li>\n\n\n\n<li>\u7ecf\u8fc7 100 \u6b65\u66f4\u65b0\u540e\uff0c\u53c2\u6570\u503c\u6bd4\u521d\u59cb\u5c0f\u4e86\u4e00\u4e9b\uff0c\u4f46\u8fd8\u6bd4\u8f83\u5927\u3002\u76ee\u6807\u51fd\u6570 f(\u03b8) \u8fd8\u5728 78 \u5de6\u53f3\u3002<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">Iter 200<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>f(theta)=57.552498  \ntheta=&#91;6.91704488 0.49699096 3.07570938]<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u03b8 \u7684\u6570\u503c\u8fdb\u4e00\u6b65\u4e0b\u964d\u4e86\uff08\u5c24\u5176\u662f\u7b2c\u4e8c\u4e2a\u5206\u91cf\u4ece ~1.1 \u2192 0.49\uff09\u3002<\/li>\n\n\n\n<li>\u51fd\u6570\u503c f(\u03b8) \u4ece 78 \u964d\u5230\u4e86 57\uff0c\u8bf4\u660e Adam \u5728\u5f80 0 \u7684\u65b9\u5411\u8d70\u3002<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">Iter 300<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>f(theta)=42.152403  \ntheta=&#91;6.0541767  0.17936161 2.33819949]<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u7ee7\u7eed\u4e0b\u964d\uff0cf \u503c\u53d8\u6210 ~42\u3002<\/li>\n\n\n\n<li>\u7b2c\u4e8c\u4e2a\u5206\u91cf\uff080.179\uff09\u51e0\u4e4e\u5feb\u6536\u655b\u5230 0 \u4e86\u3002<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">Iter 400<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>f(theta)=30.556370  \ntheta=&#91;5.25288565 0.05091135 1.72074696]<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4e09\u4e2a\u5206\u91cf\u7ee7\u7eed\u51cf\u5c0f\uff0c\u51fd\u6570\u503c\u4e5f\u7ee7\u7eed\u4e0b\u964d\u3002<\/li>\n\n\n\n<li>\u53ef\u4ee5\u770b\u51fa\u6765\u53c2\u6570\u5728\u9010\u6b65\u5f80 0 \u6536\u7f29\u3002<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">Iter 500<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>f(theta)=21.872385  \ntheta=&#91;4.51437396 0.01130478 1.22175496]<\/code><\/pre>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u6b64\u65f6 f \u503c\u8fd8\u5728\u4e0b\u964d\uff0821\uff09\uff0c\u4f46\u4e0b\u964d\u901f\u5ea6\u53d8\u6162\u4e86\u3002<\/li>\n\n\n\n<li>\u7b2c\u4e8c\u4e2a\u53c2\u6570\u5df2\u7ecf\u57fa\u672c\u5230 0\uff080.01\uff09\uff0c\u5176\u4ed6\u4e24\u4e2a\u53c2\u6570\u4e5f\u660e\u663e\u6bd4\u6700\u5f00\u59cb\u5c0f\u4e86\u5f88\u591a\u3002<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\ud83d\udd11 \u603b\u7ed3<\/h2>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u8d8b\u52bf<\/strong>\uff1a\u51fd\u6570\u503c\u4ece 78 \u2192 57 \u2192 42 \u2192 30 \u2192 21\uff0c\u8bf4\u660e\u4f18\u5316\u5668 Adam \u786e\u5b9e\u5728\u4e0d\u65ad\u8ba9\u76ee\u6807\u51fd\u6570\u4e0b\u964d\u3002<\/li>\n\n\n\n<li><strong>\u53c2\u6570\u6536\u655b<\/strong>\uff1a\u03b8 \u5728\u9010\u6b65\u5f80 0 \u6536\u655b\uff08\u6700\u7ec8\u6700\u4f18\u89e3\uff09\u3002<\/li>\n\n\n\n<li><strong>\u4e0b\u964d\u901f\u5ea6<\/strong>\uff1a\u4e00\u5f00\u59cb\u4e0b\u964d\u5feb\uff0c\u540e\u9762\u8d8a\u6765\u8d8a\u6162\uff0c\u8fd9\u662f\u6b63\u5e38\u7684\uff0c\u56e0\u4e3a\u8d8a\u9760\u8fd1\u6700\u4f18\u70b9\uff0c\u68af\u5ea6\u8d8a\u5c0f\u3002<\/li>\n\n\n\n<li><strong>\u672a\u5230 0<\/strong>\uff1a500 \u6b65\u8fd8\u6ca1\u5b8c\u5168\u5230 0\uff0c\u662f\u56e0\u4e3a\u5b66\u4e60\u7387\u6bd4\u8f83\u5c0f\uff08lr=0.01\uff09\uff0c\u5982\u679c\u7ee7\u7eed\u8fed\u4ee3\u6216\u9002\u5f53\u8c03\u5927\u5b66\u4e60\u7387\uff0c\u03b8 \u4f1a\u66f4\u5feb\u903c\u8fd1 0\u3002<\/li>\n<\/ol>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p><strong>\u753b\u56fe\u9a8c\u8bc1Adam \u80fd\u4e0d\u65ad\u51cf\u5c0f\u635f\u5931\u51fd\u6570\uff0c\u5e76\u9010\u6e10\u6536\u655b\u5230\u6700\u4f18\u89e3\u7684python\u4ee3\u7801\u8bf7\u5230\u7b2c2\u9875\u67e5\u770b\ud83d\udc47<\/strong><\/p>\n\n\n\n<!--nextpage-->\n\n\n\n<p>Adam \u80fd\u4e0d\u65ad\u51cf\u5c0f\u635f\u5931\u51fd\u6570\uff0c\u5e76\u9010\u6e10\u6536\u655b\u5230\u6700\u4f18\u89e3\u3002<\/p>\n\n\n\n<p>\u753b\u56fe\u9a8c\u8bc1python\u4ee3\u7801\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import matplotlib\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport platform\nsystem = platform.system()\nif system == \"Windows\":\n    matplotlib.rcParams&#91;'font.family'] = 'Microsoft YaHei'\nelif system == \"Darwin\":\n    matplotlib.rcParams&#91;'font.family'] = 'Arial Unicode MS'\nelse:\n    matplotlib.rcParams&#91;'font.family'] = 'SimHei'\nmatplotlib.rcParams&#91;'axes.unicode_minus'] = False\nclass Adam:\n    def __init__(self, shape, lr=1e-2, rho1=0.9, rho2=0.999, eps=1e-8):\n        self.lr   = lr\n        self.rho1 = rho1\n        self.rho2 = rho2\n        self.eps  = eps\n        self.s    = np.zeros(shape)\n        self.r    = np.zeros(shape)\n        self.t    = 0\n\n    def step(self, theta, g):\n        self.t += 1\n        self.s = self.rho1 * self.s + (1.0 - self.rho1) * g\n        self.r = self.rho2 * self.r + (1.0 - self.rho2) * (g * g)\n        s_hat = self.s \/ (1.0 - self.rho1 ** self.t)\n        r_hat = self.r \/ (1.0 - self.rho2 ** self.t)\n        delta_theta = - self.lr * s_hat \/ (np.sqrt(r_hat) + self.eps)\n        theta = theta + delta_theta\n        return theta\n\n# \u6a21\u62df\u4f18\u5316 f(\u03b8) = \u2211 \u03b8_i^2\nnp.random.seed(0)\ntheta = np.random.randn(3) * 5.0\nopt = Adam(shape=theta.shape, lr=1e-2)\n\nlosses = &#91;]\nfor k in range(1, 1001):\n    g = 2.0 * theta\n    theta = opt.step(theta, g)\n    losses.append((theta**2).sum())\n\n# \u753b\u6536\u655b\u66f2\u7ebf\nplt.figure(figsize=(8,5))\nplt.plot(losses, label=\"f(\u03b8)\")\nplt.xlabel(\"\u8fed\u4ee3\u6b21\u6570\")\nplt.ylabel(\"\u76ee\u6807\u51fd\u6570\u503c f(\u03b8)\")\nplt.title(\"Adam \u4f18\u5316\u6536\u655b\u66f2\u7ebf\")\nplt.legend()\nplt.grid(True)\nplt.show()\n<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"720\" src=\"https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-71-1024x720.png\" alt=\"\" class=\"wp-image-3303\" srcset=\"https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-71-1024x720.png 1024w, https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-71-300x211.png 300w, https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-71-768x540.png 768w, https:\/\/www.leexinghai.com\/aic\/wp-content\/uploads\/2025\/08\/image-71.png 1204w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n","protected":false},"excerpt":{"rendered":"<p>\u4e09\u4e2a\u8bad\u7ec3\u795e\u7ecf\u7f51\u7edc\u7684\u5efa\u8bae (1)\u4e00\u822c\u60c5\u51b5\u4e0b\uff0c\u5728\u8bad\u7ec3\u96c6\u4e0a\u7684\u76ee\u6807\u51fd\u6570\u7684\u5e73\u5747\u503c(cost)\u4f1a\u968f\u7740\u8bad\u7ec3\u7684\u6df1\u5165\u800c\u4e0d\u65ad\u51cf\u5c0f\uff0c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":3045,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[63],"tags":[],"class_list":["post-3299","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-mlsh"],"_links":{"self":[{"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/posts\/3299","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/comments?post=3299"}],"version-history":[{"count":9,"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/posts\/3299\/revisions"}],"predecessor-version":[{"id":3312,"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/posts\/3299\/revisions\/3312"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/media\/3045"}],"wp:attachment":[{"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/media?parent=3299"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/categories?post=3299"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.leexinghai.com\/aic\/wp-json\/wp\/v2\/tags?post=3299"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}