C++14
关于 C++ 14
这里会做一些我较为疑惑的测试.
register
在 cppreference.com 中明确指出: C++11 及以后, register 仍可以定义, 但是定义的 register 变量和其它变量没有区别. 而在 C++17 及以后, register 变量不可以被定义, 但 register 仍是保留字.
Since C++11, auto is no longer a storage class specifier; it is used to indicate type deduction.
In C, the address of a register variable cannot be taken, but in C++, a variable declared register is semantically indistinguishable from a variable declared without any storage class specifiers. (until C++17)
In C++, unlike C, variables cannot be declared register. (since C++17)
测试代码: 用固定种子生成一个
Register
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <iostream>
#include <map>
#include <queue>
#include <string>
#include <vector>
#define Wild_Donkey 0
using namespace std;
unsigned ST[22][2000005], Bin[22], Log[2000005], ScdRk[2000005];
unsigned Bucket[2000005], SA[2000005], RK[2000005], RkTmp[4000005], m;
char S[2000005];
inline unsigned Min(unsigned L, unsigned R) {
register unsigned LenLog(Log[R - L + 1]);
return min(ST[LenLog][L], ST[LenLog][R - Bin[LenLog] + 1]);
}
signed main() {
srand(573346); m = 2000000;
for (register unsigned i(1); i <= m; ++i) S[i] = (rand() % 26) + 'a';
for (register unsigned i(1); i <= m; ++i) ++Bucket[RK[i] = (S[i] -= '_')];
for (register unsigned i(1); i <= 27; ++i) Bucket[i] += Bucket[i - 1];
for (register unsigned i(1); i <= m; ++i) SA[Bucket[RK[i]]--] = i;
for (register unsigned i(1), BucketSize(27); i <= m; ++i) {
memset(Bucket, 0, (BucketSize + 1) << 2);
for (register unsigned j(1); j <= m; ++j) ++Bucket[RK[j]];
for (register unsigned j(1); j <= BucketSize; ++j) Bucket[j] += Bucket[j - 1];
for (register unsigned j(m - i + 1); j <= m; ++j) ScdRk[j] = j;
for (register unsigned j(1), TopSR(m - i + 1); j <= m; ++j) if (SA[j] > i) ScdRk[--TopSR] = SA[j] - i;
for (register unsigned j(1); j <= m; ++j) SA[Bucket[RK[ScdRk[j]]]--] = ScdRk[j];
memcpy(RkTmp, RK, (m + 1) << 2), BucketSize = 0;
for (register unsigned j(1); j <= m; ++j) RK[SA[j]] = ((RkTmp[SA[j]] ^ RkTmp[SA[j - 1]]) || (RkTmp[SA[j] + i] ^ RkTmp[SA[j - 1] + i])) ? (++BucketSize) : (BucketSize);
if (BucketSize == m) break;
}
for (register unsigned i(1), j(0); i <= m; ++i) {
if (j) --j;
if (RK[i] == 1) { ST[0][RK[i]] = 0; continue; }
while ((SA[RK[i] - 1] + j <= m) && (i + j <= m) && (S[i + j] == S[SA[RK[i] - 1] + j])) ++j;
ST[0][RK[i]] = j;
}
for (register unsigned i(1), j(0); i <= m; i <<= 1, ++j)
for (register unsigned k(1); k + (i << 1) - 1 <= m; ++k)
ST[j + 1][k] = min(ST[j][k], ST[j][k + i]);
for (register unsigned i(1), j(0); i <= m; i <<= 1, ++j) Bin[j] = i, Log[i] = j;
for (register unsigned i(1); i <= m; ++i) Log[i] = max(Log[i], Log[i - 1]);
return Wild_Donkey;
}
No register
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <iostream>
#include <map>
#include <queue>
#include <string>
#include <vector>
#define Wild_Donkey 0
using namespace std;
unsigned ST[22][2000005], Bin[22], Log[2000005], ScdRk[2000005];
unsigned Bucket[2000005], SA[2000005], RK[2000005], RkTmp[4000005], m;
char S[2000005];
inline unsigned Min(unsigned L, unsigned R) {
unsigned LenLog(Log[R - L + 1]);
return min(ST[LenLog][L], ST[LenLog][R - Bin[LenLog] + 1]);
}
signed main() {
srand(573346); m = 2000000;
for (unsigned i(1); i <= m; ++i) S[i] = (rand() % 26) + 'a';
for (unsigned i(1); i <= m; ++i) ++Bucket[RK[i] = (S[i] -= '_')];
for (unsigned i(1); i <= 27; ++i) Bucket[i] += Bucket[i - 1];
for (unsigned i(1); i <= m; ++i) SA[Bucket[RK[i]]--] = i;
for (unsigned i(1), BucketSize(27); i <= m; ++i) {
memset(Bucket, 0, (BucketSize + 1) << 2);
for (unsigned j(1); j <= m; ++j) ++Bucket[RK[j]];
for (unsigned j(1); j <= BucketSize; ++j) Bucket[j] += Bucket[j - 1];
for (unsigned j(m - i + 1); j <= m; ++j) ScdRk[j] = j;
for (unsigned j(1), TopSR(m - i + 1); j <= m; ++j) if (SA[j] > i) ScdRk[--TopSR] = SA[j] - i;
for (unsigned j(1); j <= m; ++j) SA[Bucket[RK[ScdRk[j]]]--] = ScdRk[j];
memcpy(RkTmp, RK, (m + 1) << 2), BucketSize = 0;
for (unsigned j(1); j <= m; ++j) RK[SA[j]] = ((RkTmp[SA[j]] ^ RkTmp[SA[j - 1]]) || (RkTmp[SA[j] + i] ^ RkTmp[SA[j - 1] + i])) ? (++BucketSize) : (BucketSize);
if (BucketSize == m) break;
}
for (unsigned i(1), j(0); i <= m; ++i) {
if (j) --j;
if (RK[i] == 1) { ST[0][RK[i]] = 0; continue; }
while ((SA[RK[i] - 1] + j <= m) && (i + j <= m) && (S[i + j] == S[SA[RK[i] - 1] + j])) ++j;
ST[0][RK[i]] = j;
}
for (unsigned i(1), j(0); i <= m; i <<= 1, ++j)
for (unsigned k(1); k + (i << 1) - 1 <= m; ++k)
ST[j + 1][k] = min(ST[j][k], ST[j][k + i]);
for (unsigned i(1), j(0); i <= m; i <<= 1, ++j) Bin[j] = i, Log[i] = j;
for (unsigned i(1); i <= m; ++i) Log[i] = max(Log[i], Log[i - 1]);
return Wild_Donkey;
}
两份代码除了 register 以外完全相同.
测试平台: 洛谷在线 IDE + 本地 (5800H)
测试
| 环境 | 平台 | -O2 |
register | 第 |
第 |
第 |
第 |
第 |
平均 |
|---|---|---|---|---|---|---|---|---|---|
| C++11 | 本地 | YES | YES | 329ms | 332ms | 329ms | 361ms | 337ms | 337ms |
| C++11 | 本地 | YES | NO | 328ms | 333ms | 366ms | 352ms | 346ms | 345ms |
| C++14 | 洛谷 | YES | YES | 383ms | 342ms | 364ms | 343ms | 323ms | 351ms |
| C++14 | 本地 | YES | YES | 356ms | 347ms | 371ms | 352ms | 337ms | 352ms |
| C++11 | 洛谷 | YES | YES | 362ms | 343ms | 363ms | 342ms | 364ms | 354ms |
| C++14 | 洛谷 | YES | NO | 363ms | 383ms | 363ms | 343ms | 363ms | 363ms |
| C++11 | 洛谷 | YES | NO | 343ms | 464ms | 342ms | 342ms | 343ms | 366ms |
| C++14 | 本地 | YES | NO | 358ms | 342ms | 477ms | 473ms | 354ms | 400ms |
| C++14 | 洛谷 | NO | YES | 483ms | 484ms | 483ms | 483ms | 504ms | 487ms |
| C++11 | 洛谷 | NO | YES | 503ms | 503ms | 505ms | 484ms | 503ms | 499ms |
| C++14 | 本地 | NO | YES | 504ms | 509ms | 517ms | 498ms | 550ms | 515ms |
| C++14 | 洛谷 | NO | NO | 564ms | 605ms | 604ms | 564ms | 604ms | 588ms |
| C++11 | 洛谷 | NO | NO | 727ms | 584ms | 686ms | 564ms | 544ms | 621ms |
| C++11 | 本地 | NO | YES | 524ms | 749ms | 657ms | 518ms | 695ms | 628ms |
| C++11 | 本地 | NO | NO | 630ms | 654ms | 670ms | 640ms | 666ms | 652ms |
| C++14 | 本地 | NO | NO | 641ms | 760ms | 663ms | 647ms | 669ms | 676ms |
register 优化率:
| 环境 | 平台 | -O2 |
优化率 |
|---|---|---|---|
| C++14 | 本地 | NO | 23.81% |
| C++11 | 洛谷 | NO | 19.64% |
| C++14 | 洛谷 | NO | 17.17% |
| C++14 | 本地 | YES | 12.00% |
| C++11 | 本地 | NO | 3.68% |
| C++14 | 洛谷 | YES | 3.30% |
| C++11 | 洛谷 | YES | 3.27% |
| C++11 | 本地 | YES | 2.31% |
结论:
-
本地 (Windows)
C++11开-O2比C++14快, 无论开不开register. -
本地
C++14对register敏感, 无论开不开-O2, 开了register较快 -
不存在去掉
register反而加速的情况 -
洛谷 (Linux)
C++14比C++11快 (其它变量相同的情况下), 无论开不开register或-O2. -
register的优化在不开-O2时较为敏感, 对C++11和C++14的效果区别不大.
不关于 C++14
一点基准都没有的基准测试...(以后要改成正常的代码而不是
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <iostream>
#include <map>
#include <queue>
#include <vector>
#define Wild_Donkey 0
using namespace std;
unsigned a[10005], m, n, Cnt(0), A, B, C, D, t, Ans(0), Tmp(0);
bool b[10005];
int main() {
for (register unsigned i(1); i <= 1000; ++i) {
for (register unsigned j(1); j <= 1000000000; ++j) {}
printf("%llu\n", i * (unsigned long long)1000000000);
}
return Wild_Donkey;
}
| 机器 | 标准 | -O2 |
最好成绩 |
|---|---|---|---|
| 洛谷 | C++98 | YES | 0.062s |
| r7-5800H | C++11 | YES | 1.243s |
| r7-5800H | C++98 | YES | 1.262s |
| i3-4170 | C++98 | YES | 1.361s |
| Athlon-???? | C++98 | YES | 2.326s |
| r7-3700X | C++98 | NO | 233.7s |
| r7-5800H | C++98 | NO | 241.9s |
| i3-4170 | C++98 | NO | 294.4s |
| i7-4790H | C++98 | NO | 303s |
| MAC Book ???? | Unkonwn | NO | 1116.4s |