{"id":644,"date":"2026-05-10T14:22:19","date_gmt":"2026-05-10T06:22:19","guid":{"rendered":"https:\/\/blog.nanoturtle.cn\/?post_type=thread&#038;p=644"},"modified":"2026-05-25T02:04:02","modified_gmt":"2026-05-24T18:04:02","slug":"cplusplus-data-grouping","status":"publish","type":"thread","link":"https:\/\/blog.nanoturtle.cn\/index.php\/thread\/cplusplus-data-grouping\/","title":{"rendered":"\u5982\u4f55\u7528C++\u5b9e\u73b0\u6570\u636e\u5206\u7ec4\u4f18\u5316\uff1f"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">\u6570\u636e\u5206\u7ec4\u4f18\u5316\u5728\u7edf\u8ba1\u5206\u6790\u4e2d\u5e76\u4e0d\u7f55\u89c1\uff0c\u6bd4\u5982\u4e00\u7ef4\u6570\u636e\u7684K-means\u805a\u7c7b\u3001\u76f4\u65b9\u56fe\u5747\u8861\u5316\uff0c\u751a\u81f3\u67d0\u4e9b\u6392\u5e8f\u95ee\u9898\u90fd\u80fd\u5f52\u7ed3\u4e3a\u201c\u5c06\u6709\u5e8f\u5e8f\u5217\u5206\u5272\u6210\u82e5\u5e72\u7ec4\uff0c\u4f7f\u5f97\u7ec4\u5185\u79bb\u5dee\u5e73\u65b9\u548c\u6700\u5c0f\u201d\u3002C++\u5b9e\u73b0\u8fd9\u7c7b\u7b97\u6cd5\u65f6\uff0c\u6838\u5fc3\u6311\u6218\u4e0d\u5728\u4e8e\u6392\u5e8f\u6216\u679a\u4e3e\uff0c\u800c\u5728\u4e8e\u5982\u4f55\u7528O(1)\u65f6\u95f4\u8ba1\u7b97\u4efb\u610f\u5b50\u533a\u95f4\u7684\u79bb\u5dee\u5e73\u65b9\u548c\uff0c\u4ece\u800c\u5c06\u66b4\u529b\u679a\u4e3e\u7684\u590d\u6742\u5ea6\u4eceO(n\u00b3)\u964d\u5230O(n\u00b2)\u751a\u81f3\u66f4\u4f4e\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u6570\u5b66\u539f\u7406\u4e0e\u516c\u5f0f\u63a8\u5bfc<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u79bb\u5dee\u5e73\u65b9\u548c\uff08Sum of Squared Deviations, SSD\uff09\u5b9a\u4e49\u4e3a\u7ec4\u5185\u6bcf\u4e2a\u6570\u636e\u4e0e\u7ec4\u5747\u503c\u7684\u5dee\u7684\u5e73\u65b9\u548c\u3002\u5047\u8bbe\u4e00\u7ec4\u6570\u636e (x_1, x_2, dots, x_k)\uff0c\u5747\u503c (bar{x} = frac{1}{k}sum x_i)\uff0c\u5219 SSD = (sum (x_i &#8211; bar{x})^2)\u3002\u76f4\u63a5\u8ba1\u7b97\u9700\u8981\u904d\u5386\u7ec4\u5185\u6240\u6709\u5143\u7d20\uff0c\u4f46\u5229\u7528\u6052\u7b49\u5f0f\u53ef\u4ee5\u52a0\u901f\uff1a<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">[ sum (x_i &#8211; bar{x})^2 = sum x_i^2 &#8211; frac{(sum x_i)^2}{k} ]<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u56e0\u6b64\uff0c\u53ea\u8981\u9884\u5148\u8ba1\u7b97\u51fa\u524d\u7f00\u548c (S_i = sum_{j=1}^i x_j) \u548c\u524d\u7f00\u5e73\u65b9\u548c (Q_i = sum_{j=1}^i x_j^2)\uff0c\u90a3\u4e48\u533a\u95f4 ([l, r]) \u7684 SSD \u5c31\u80fd\u5728 O(1) \u5185\u5f97\u5230\uff1a(SSD(l,r) = (Q_r &#8211; Q_{l-1}) &#8211; frac{(S_r &#8211; S_{l-1})^2}{r-l+1})\u3002\u8fd9\u4e2a\u516c\u5f0f\u662f\u4f18\u5316\u4e00\u5207\u5206\u7ec4\u7b97\u6cd5\u7684\u57fa\u77f3\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">C++\u5b9e\u73b0\u7684\u5173\u952e\u6b65\u9aa4<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b9e\u73b0\u65f6\uff0c\u6570\u636e\u5148\u6392\u5e8f\uff08\u56e0\u4e3a\u5206\u7ec4\u901a\u5e38\u57fa\u4e8e\u987a\u5e8f\uff0c\u6bd4\u5982\u6309\u5927\u5c0f\u5206\u4e24\u7ec4\uff09\u3002\u7136\u540e\u8ba1\u7b97\u524d\u7f00\u548c\u4e0e\u524d\u7f00\u5e73\u65b9\u548c\uff0c\u7528 <code>std::vector&lt;double&gt;<\/code> \u5b58\u50a8\u3002\u5bf9\u4e8e\u4e24\u5206\u7ec4\u95ee\u9898\uff0c\u679a\u4e3e\u5206\u5272\u70b9 (i)\uff08\u524di\u4e2a\u4e3a\u4e00\u7ec4\uff0c\u540en-i\u4e2a\u4e3a\u53e6\u4e00\u7ec4\uff09\uff0c\u5229\u7528\u516c\u5f0f\u8ba1\u7b97\u4e24\u7ec4SSD\u4e4b\u548c\uff0c\u53d6\u6700\u5c0f\u503c\u5373\u53ef\u3002\u4ee3\u7801\u7ed3\u6784\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code class=\"language-cpp\">#include &lt;vector&gt;\n#include &lt;algorithm&gt;\n#include &lt;numeric&gt;\n#include &lt;limits&gt;\n\nstd::pair&lt;double, int&gt; optimalSplit(std::vector&lt;double&gt;&amp; data) {\n    std::sort(data.begin(), data.end());\n    int n = data.size();\n    std::vector&lt;double&gt; prefSum(n+1, 0), prefSqSum(n+1, 0);\n    for (int i = 0; i &lt; n; ++i) {\n        prefSum[i+1] = prefSum[i] + data[i];\n        prefSqSum[i+1] = prefSqSum[i] + data[i] * data[i];\n    }\n    double bestSSD = std::numeric_limits&lt;double&gt;::max();\n    int bestSplit = 0;\n    for (int i = 1; i &lt; n; ++i) {\n        double ssd1 = (prefSqSum[i] - prefSqSum[0]) - \n                      (prefSum[i] - prefSum[0]) * (prefSum[i] - prefSum[0]) \/ i;\n        double ssd2 = (prefSqSum[n] - prefSqSum[i]) - \n                      (prefSum[n] - prefSum[i]) * (prefSum[n] - prefSum[i]) \/ (n - i);\n        double total = ssd1 + ssd2;\n        if (total &lt; bestSSD) {\n            bestSSD = total;\n            bestSplit = i;\n        }\n    }\n    return {bestSSD, bestSplit};\n}<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u6ce8\u610f\u6d6e\u70b9\u7cbe\u5ea6\uff1a\u5f53\u6570\u636e\u91cf\u5f88\u5927\u6216\u6570\u503c\u5dee\u5f02\u6781\u5c0f\u65f6\uff0c<code>double<\/code> \u53ef\u80fd\u5f15\u5165\u8bef\u5dee\uff0c\u53ef\u8003\u8651\u4f7f\u7528 <code>long double<\/code> \u6216\u5bf9\u516c\u5f0f\u505a\u6570\u503c\u7a33\u5b9a\u7684\u53d8\u5f62\uff08\u6bd4\u5982\u7528Kahan\u6c42\u548c\uff09\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u6027\u80fd\u4f18\u5316\u4e0e\u6269\u5c55\u601d\u8003<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e0a\u8ff0\u679a\u4e3e\u6cd5\u7684\u65f6\u95f4\u590d\u6742\u5ea6\u4e3a O(n log n)\uff08\u6392\u5e8f\u4e3b\u5bfc\uff09\uff0c\u5bf9\u4e8en=10\u2076\u4ecd\u53ef\u63a5\u53d7\u3002\u4f46\u5982\u679c\u8981\u5206\u6210k\u7ec4\uff08k&gt;2\uff09\uff0c\u95ee\u9898\u5c31\u53d8\u6210\u4e86\u52a8\u6001\u89c4\u5212\uff1a\u5b9a\u4e49 dp[i][j] \u4e3a\u524di\u4e2a\u6570\u636e\u5206\u6210j\u7ec4\u7684\u6700\u5c0fSSD\uff0c\u8f6c\u79fb\u65f6\u679a\u4e3e\u6700\u540e\u4e00\u6bb5\u7684\u8d77\u70b9\uff0c\u590d\u6742\u5ea6 O(k n\u00b2)\u3002\u6b64\u65f6\u524d\u7f00\u548c\u516c\u5f0f\u4f9d\u7136\u80fd\u4fdd\u8bc1O(1)\u8f6c\u79fb\uff0c\u4f46\u9700\u8981\u914d\u5408\u56db\u8fb9\u5f62\u4e0d\u7b49\u5f0f\u4f18\u5316\uff08\u6ee1\u8db3\u51f8\u6027\u6761\u4ef6\uff09\u5c06\u590d\u6742\u5ea6\u964d\u5230 O(k n log n) \u6216 O(k n)\u3002C++\u5b9e\u73b0\u65f6\uff0c\u5efa\u8bae\u4f7f\u7528 <code>std::vector&lt;std::vector&lt;double&gt;&gt;<\/code> \u5e76\u914d\u5408 <code>std::min<\/code> \u548c\u5faa\u73af\u5c55\u5f00\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u53e6\u5916\uff0c\u5b9e\u9645\u5e94\u7528\u4e2d\u6570\u636e\u53ef\u80fd\u5e26\u6709\u6743\u91cd\uff0c\u6216\u8005\u8981\u6c42\u7ec4\u5185\u5143\u7d20\u4e2a\u6570\u4e0d\u80fd\u592a\u5c11\uff08\u5982\u6bcf\u7ec4\u81f3\u5c112\u4e2a\uff09\u3002\u8fd9\u4e9b\u7ea6\u675f\u53ea\u9700\u5728\u679a\u4e3e\u65f6\u8c03\u6574\u5faa\u73af\u8303\u56f4\u5373\u53ef\uff0c\u516c\u5f0f\u672c\u8eab\u4e0d\u53d8\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u4e00\u70b9\u9898\u5916\u8bdd<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd9\u4e2a\u95ee\u9898\u7684C++\u5b9e\u73b0\u770b\u4f3c\u7b80\u5355\uff0c\u4f46\u5f88\u591a\u4eba\u7b2c\u4e00\u6b21\u5199\u65f6\u4f1a\u5fd8\u8bb0\u6392\u5e8f\uff0c\u6216\u8005\u76f4\u63a5\u7528\u5e73\u65b9\u548c\u516c\u5f0f\u5374\u5fd8\u4e86\u9664\u4ee5\u7ec4\u5927\u5c0f\u3002\u66f4\u9690\u853d\u7684\u9519\u8bef\u662f\uff1a\u5f53\u7ec4\u5185\u53ea\u6709\u4e00\u4e2a\u5143\u7d20\u65f6\uff0cSSD\u4e3a0\uff0c\u516c\u5f0f\u4ecd\u7136\u6210\u7acb\uff08\u5206\u6bcd\u4e3a1\uff09\u3002\u4f46\u82e5\u7ec4\u5185\u5143\u7d20\u4e2a\u6570\u4e3a0\uff0c\u5219\u9700\u8df3\u8fc7\u3002\u53e6\u5916\uff0c<code>std::numeric_limits&lt;double&gt;::max()<\/code> \u521d\u59cb\u503c\u8db3\u591f\u5927\uff0c\u4f46\u82e5\u6570\u636e\u5168\u662f\u76f8\u540c\u503c\uff0cSSD\u4e3a0\uff0c\u6b64\u65f6\u5206\u5272\u70b9\u4efb\u610f\u9009\u7b2c\u4e00\u4e2a\u5373\u53ef\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u56de\u5230\u5f00\u5934\uff1a\u79bb\u5dee\u5e73\u65b9\u548c\u6700\u5c0f\u5316\u672c\u8d28\u4e0a\u662f\u65b9\u5dee\u6700\u5c0f\u5316\uff0c\u800cC++\u7684\u5f3a\u7c7b\u578b\u548cSTL\u5bb9\u5668\u6070\u597d\u4e3a\u8fd9\u7c7b\u6570\u503c\u8ba1\u7b97\u63d0\u4f9b\u4e86\u9ad8\u6548\u3001\u53ef\u8bfb\u7684\u6846\u67b6\u3002\u4e0b\u6b21\u9047\u5230\u7c7b\u4f3c\u7684\u5206\u7ec4\u95ee\u9898\uff0c\u4e0d\u59a8\u4ece\u524d\u7f00\u548c\u4e0e\u516c\u5f0f\u63a8\u5bfc\u5165\u624b\uff0c\u4f60\u4f1a\u53d1\u73b0\u539f\u6765\u90a3\u4e9b\u770b\u4f3c\u590d\u6742\u7684\u6570\u5b66\u516c\u5f0f\uff0c\u5199\u51fa\u6765\u4e0d\u8fc7\u51e0\u884c\u5faa\u73af\u3002<\/p>\n","protected":false},"featured_media":0,"comment_status":"open","ping_status":"closed","template":"","class_list":["post-644","thread","type-thread","status-publish","hentry"],"_links":{"self":[{"href":"https:\/\/blog.nanoturtle.cn\/index.php\/wp-json\/wp\/v2\/thread\/644","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blog.nanoturtle.cn\/index.php\/wp-json\/wp\/v2\/thread"}],"about":[{"href":"https:\/\/blog.nanoturtle.cn\/index.php\/wp-json\/wp\/v2\/types\/thread"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.nanoturtle.cn\/index.php\/wp-json\/wp\/v2\/comments?post=644"}],"wp:attachment":[{"href":"https:\/\/blog.nanoturtle.cn\/index.php\/wp-json\/wp\/v2\/media?parent=644"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}