优化手册
jijidawang · · 个人记录
我们做题的时候经常会遇到这样的情况:
这时,我们就要想办法优化了。
如何才能施展一个有效的优化,使(有可能是WA) 呢?
玄学编译开关1) #pragma GCC diagnostic error "-std=c++11" #pragma GCC optimize("-fdelete-null-pointer-checks,inline-functions-called-once,-funsafe-loop-optimizations,-fexpensive-optimizations,-foptimize-sibling-calls,-ftree-switch-conversion,-finline-small-functions,inline-small-functions,-frerun-cse-after-loop,-fhoist-adjacent-loads,-findirect-inlining,-freorder-functions,no-stack-protector,-fpartial-inlining,-fsched-interblock,-fcse-follow-jumps,-fcse-skip-blocks,-falign-functions,-fstrict-overflow,-fstrict-aliasing,-fschedule-insns2,-ftree-tail-merge,inline-functions,-fschedule-insns,-freorder-blocks,-fwhole-program,-funroll-loops,-fthread-jumps,-fcrossjumping,-fcaller-saves,-fdevirtualize,-falign-labels,-falign-loops,-falign-jumps,unroll-loops,-fsched-spec,-ffast-math,Ofast,inline,-fgcse,-fgcse-lm,-fipa-sra,-ftree-pre,-ftree-vrp,-fpeephole2",3) #pragma GCC target("avx","sse2")2) #pragma GCC diagnostic error "-std=c++14" #pragma GCC target("avx") #pragma GCC optimize(3) #pragma GCC optimize("Ofast") #pragma GCC optimize("inline") #pragma GCC optimize("-fgcse") #pragma GCC optimize("-fgcse-lm") #pragma GCC optimize("-fipa-sra") #pragma GCC optimize("-ftree-pre") #pragma GCC optimize("-ftree-vrp") #pragma GCC optimize("-fpeephole2") #pragma GCC optimize("-ffast-math") #pragma GCC optimize("-fsched-spec") #pragma GCC optimize("unroll-loops") #pragma GCC optimize("-falign-jumps") #pragma GCC optimize("-falign-loops") #pragma GCC optimize("-falign-labels") #pragma GCC optimize("-fdevirtualize") #pragma GCC optimize("-fcaller-saves") #pragma GCC optimize("-fcrossjumping") #pragma GCC optimize("-fthread-jumps") #pragma GCC optimize("-funroll-loops") #pragma GCC optimize("-fwhole-program") #pragma GCC optimize("-freorder-blocks") #pragma GCC optimize("-fschedule-insns") #pragma GCC optimize("inline-functions") #pragma GCC optimize("-ftree-tail-merge") #pragma GCC optimize("-fschedule-insns2") #pragma GCC optimize("-fstrict-aliasing") #pragma GCC optimize("-fstrict-overflow") #pragma GCC optimize("-falign-functions") #pragma GCC optimize("-fcse-skip-blocks") #pragma GCC optimize("-fcse-follow-jumps") #pragma GCC optimize("-fsched-interblock") #pragma GCC optimize("-fpartial-inlining") #pragma GCC optimize("no-stack-protector") #pragma GCC optimize("-freorder-functions") #pragma GCC optimize("-findirect-inlining") #pragma GCC optimize("-fhoist-adjacent-loads") #pragma GCC optimize("-frerun-cse-after-loop") #pragma GCC optimize("inline-small-functions") #pragma GCC optimize("-finline-small-functions") #pragma GCC optimize("-ftree-switch-conversion") #pragma GCC optimize("-foptimize-sibling-calls") #pragma GCC optimize("-fexpensive-optimizations") #pragma GCC optimize("-funsafe-loop-optimizations") #pragma GCC optimize("inline-functions-called-once") #pragma GCC optimize("-fdelete-null-pointer-checks")都加上的效果不是特别好,最好的是下面几句:
3) #pragma GCC diagnostic error "-std=c++14" #pragma GCC target("avx") #pragma GCC optimize(3) #pragma GCC optimize("Ofast")
- 利用计算机
其中,下列两项常用:
(1) CPU并发
CPU察觉到程序的规律,创建多线程直接运作。
比如
int p=0;
for (int i=0;i<n;i++)
p+=i;
就不能引起CPU迟钝的反应的注意。
要这样:
int p=0;
for (int i=0;i<n;i++)
{
p+=i++; //7次
if (i>=n) break;
p+=i++;
if (i>=n) break;
p+=i++;
if (i>=n) break;
p+=i++;
if (i>=n) break;
p+=i++;
if (i>=n) break;
p+=i++;
if (i>=n) break;
p+=i++;
}
或者这样
int p=0;
for (int i=0;i<n;i++)
{
p+=i++; //7次
p+=i++;
p+=i++;
p+=i++;
p+=i++;
p+=i++;
p+=i++;
}
switch(n%7)
{
case 1:p=p-6*n-21;break; //21=1+2+3+...+6,6=7%1
case 2:p=p-6*n-15;break; //同理。
case 3:p=p-6*n-10;break;
case 4:p=p-6*n-6;break;
case 5:p=p-6*n-3;break;
case 6:p=p-6*n-1;break;
}
(2) 利用Cache(擦车)
如:
//E2
for (int i=0;i<n;i++)
fun1();
for (int i=0;i<n;i++)
fun2();
=====================分割线QwQ====================
//E1
for (int i=0;i<n;i++)
{
fun1();
fun2();
}
E1和E2哪个快(fun1和fun2无关系,也就是E1等价于E2)?
我知道你会回答E1!,错!
E1虽然运行次数少,但是一次执行两个函数。
当函数很大型时,E2因将函数存入Cache,加速。
这两点很适用于这道题
- 多用位运算
计算机内部用的是二进制,所以位运算比普通运算快。
证据
常用优化:
-x=~x+1 //无用...
swap(x,y)=x^=y^=x^=y //x必须不等于y!!
(x==y)=!(x^y)
abs(x)=(x>>31)^x-(x>>31)
------------------------------
if(x==a) x=b;
else x=a;
=x^=a^b;
------------------------------
min(x,y)=y^((x^y)&-(x<y))
max(x,y)=x^((x^y)&-(x<y))
x%2=x&1
2x=x<<1
x/2=x>>1
- IO优化
namespace IO { template<typename T> inline void read(T &x) { x=0; int f=0; char ch=getchar(); while(!(ch>='0'&&ch<='9')){f|=(ch=='-');ch=getchar();} while(ch>='0'&&ch<='9'){x=(x<<1)+(x<<3)+(ch^48);ch=getchar();} x=f?-x:x; } template<typename T> inline void write(T x) { if(x<0) {putchar('-');x=-x;} if(x>=10) write(x/10); putchar(x%10+'0'); } } - 宏定义
- 用
++i代替i++
- 使用高效的东西
(1)能小就小
能用bool类型的不用char类型,能用char类型的不用short, 能用short的不用int,能用int的不用long
在程序运算中,显而易见的变量范围,应当最小化。
虽然在某些情况下不见得会增加很大的效率。 例如:
人的年龄: unsigned char
年份:short
图像像素值:unsigned char
性别:bool
(2)用些inline,register
(3)调用函数传参的时候,使用引用传参。
这可以减少不必要的临时对象生成,减少内存消耗,一般用于类类型传参使用。 --
\text{CSDN}
void inc(int& pp){++pp;}
(4)变量生存期最小
用的时候再声明,减少不必要的开销。
//迷之代码 //洛谷吞Tab不要管 #include<iostream> using namespace std; int main() { int bbt=1024; for (int i=0;i<bbt;i++) { int n=i; cout<<n<<"\tN++\n"; } system("cls") int n=-1; cout<<"N=-1"<<n+-1; while (1) cout<<"\a"; reutrn 0; }(5)初始化
尽可能使用构造初始化,避免使用赋值初始化
std::string str("123"); // good code
std::string str = "123";
好处
(6)能用switch不用if
例如:
if (a<=10) cout<<"m10";
if (a>10&&a<=20) cout<<"m20";
//=============================
switch(a/10) //good code
{
case 1:cout<<"m10";break;
case 2:cout<<"m20";break;
}
(7)循环前声明参数
//E1
ClassPerson str;
for (int i=12;i<32; ++i)
{
str.idcard=i;
// do something //1次构造函数,20次赋值
}
//E2
for (int i=12;i<32; ++i)
{
ClassPerson str;
str.idcard = i; //20次构造函数,20次赋值
// do something
}
(8)10、变量的声明顺序
struct people
{
bool flag;
bool state;
char name;
int age;
double sss;
}
玄学
应从最小的类型开始声明,在某一些编译器里,可以有效的减少内存的占用。
因为有些编译器使用的是
- 使用现有算法
在学术界有很多公认非常优良的算法,可以根据自己情况选择使用。
例如:
查找:二分查找、分块查找、哈希查找
'''''''''''''''''''''''''''''''''''''''
排序:快速排序、堆排序、归并排序
- 其他
用do...while,不用while
用for(;;),不用while(1)
其他优化算法此处有很多
最后有个代码,优化用的:
/*================JWD made====================
*欲使用请在代码首部加上本声明!(~表示段落换行)
-----------------------------------------
*To use, please add this statement ~
*at the beginning of the code!
-----------------------------------------
*Bit operation may make the weak ~
*perplexed. Don't be afraid. Just know how to use it!
-----------------------------------------
*Be sure to read the code and reference ~
*"namespace" when using.
-----------------------------------------
*Therefore, JWD such as WA, TLE, MLE ~
*caused by header code is not responsible.
-----------------------------------------
*If there is any error in this code, ~
*please send it to me personally and I will correct it.
-----------------------------------------
*This header code is produced by JWD.
+++++++++++++++++++++++++++++++++++++++++
* --Please observe the rules of use.
*****************************************
Tips:
1) "I" above refers to JWD.
2) Visit JWD to find jijidawang.[His luogu ID: 227514]
3) Again,Please observe the rules of use!
4) All rights reserved (except for common
functions [read, write...])
=====================The Head==================*/
//C++
#include<cstring>
#include<cstdio> //这些头文件必须有
using namespace std; //必须using namespace std;
namespace JQ
{
#define mian main
#define ture true
typedef long long ll;
typedef unsigned long long ull;
static const char* JCQ="register"; //emm...
const ll MaxInt=0x3f3f3f3f;
const ll INF=MaxInt;
namespace IO
{
template<typename T>
inline void read(T &x)
{
x=0;
int f=0;
char ch=getchar();
while(!(ch>='0'&&ch<='9')){f|=(ch=='-');ch=getchar();}
while(ch>='0'&&ch<='9'){x=(x<<1)+(x<<3)+(ch^48);ch=getchar();}
x=f?-x:x;
}
template<typename T>
inline void write(T x)
{
if(x<0) {putchar('-');x=-x;}
if(x>=10) write(x/10);
putchar(x%10+'0');
}
}
namespace strs
{
const int Atoa=32;
enum letter{A='A',B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,a='a',b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z};
enum chnum{c0='0',c1,c2,c3,c4,c5,c6,c7,c8,c9};
const char* lMap="ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ";
const char* sMap="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
inline bool isnum(char ch){return ch>=c0&&ch<=c9;}
inline bool isler(char ch){return ch>=A&&ch<=Z;}
inline bool isser(char ch){return ch>=a&&ch<=z;}
template<typename T>
inline void ladd(char& d,T t){d=lMap[lMap[d-A]+t%26];}
template<typename T>
inline void lsub(char& d,T t){d=lMap[(sMap[d-a]-t%26)>0?(sMap[d-a]-t%26):-(sMap[d-a]-t%26)];}
template<typename T>
inline void sadd(char& d,T t){d=sMap[sMap[d-a]+t%26];}
template<typename T>
inline void ssub(char& d,T t){d=sMap[(sMap[d-a]-t%26)>0?(sMap[d-a]-t%26):-(sMap[d-a]-t%26)];}
template<typename T>
inline char laddr(char d,T t){return lMap[lMap[d-A]+t%26];}
template<typename T>
inline char lsubr(char d,T t){return lMap[lMap[d-a]+t%26];}
template<typename T>
inline char saddr(char d,T t){return sMap[sMap[d-A]+t%26];}
template<typename T>
inline char ssubr(char d,T t){return sMap[sMap[d-a]+t%26];}
inline char tolerr(char p){return isser(p)?(p-'a'+'A'):p;}
inline char toserr(char p){return isler(p)?(p+'a'-'A'):p;}
inline char reperr(char p){return isler(p)?(p+'a'-'A'):isser(p)?(p-'a'+'A'):p;}
inline void toler(char& p){p=tolerr(p);}
inline void toser(char& p){p=toserr(p);}
inline void reper(char& p){p=reperr(p);}
inline string tolstr(string p){int len=p.length();for (int i=0;i<len;i++) toler(p[i]);return p;}
inline string tosstr(string p){int len=p.length();for (int i=0;i<len;i++) toser(p[i]);return p;}
inline string repstr(string p){int len=p.length();for (int i=0;i<len;i++) reper(p[i]);return p;}
inline void tolst(string& p){p=tolstr(p);}
inline void tosst(string& p){p=tosstr(p);}
inline void repst(string& p){p=repstr(p);}
inline void pt(char to[],string from){int len=from.length();for (int i=0;i<len;i++) to[i]=from[i];}
inline const char* tolstr(char p[]){string tmp(p);return tolstr(tmp).c_str();}
inline const char* tosstr(char p[]){string tmp(p);return tosstr(tmp).c_str();}
inline const char* repstr(char p[]){string tmp(p);return repstr(tmp).c_str();}
inline void tolst(char p[]){string tmp(p);tolstr(tmp);pt(p,tmp);}
inline void tosst(char p[]){string tmp(p);tosstr(tmp);pt(p,tmp);}
inline void repst(char p[]){string tmp(p);repstr(tmp);pt(p,tmp);}
inline bool isr0(char p){return !p;} //p=='\0'
}
namespace algo
{
template<typename X>
inline void Swap(X& a,X& b){(a^b)?a^=b^=a^=b:"";;}
template<typename X>
inline bool equ(X a,X b){return !(a^b);}
template<typename X>
inline X Abs(X x){return (x>>31)^x-(x>>31);}
template<typename X>
inline void Ee1(X& a){a&=a-1;}
template<typename X>
inline X Ee1r(X a){return a&(a-1);}
template<typename X>
inline ll countbit(X a){int ans=0;while (a){Ee1(a);++ans;}return ans;}
template<typename X>
inline bool odd(X a){return a&1;}
template<typename X>
inline bool even(X a){return !odd(a);}
template<typename X>
inline void nati(X& a){a=~a+1;}
template<typename X>
inline X natir(X a){return ~a+1;}
template<typename X>
inline void another(X& x,X a,X b){x^=a^b;}
template<typename X>
inline X anotherr(X x,X a,X b){return x^a^b;}
template<typename X>
inline bool isPowerOf2(X n) {return (n>0)&&(!(n&(n-1)));}
template<typename X>
inline bool rnati(X x,X y){((x^y)<0);}
template<typename X>
inline X Min(X x,X y){return y^((x^y)&-(x<y));}
template<typename X>
inline X Max(X x,X y){return x^((x^y)&-(x<y));}
template<typename X>
inline ll countbit10(X x){return (x/10)?countbit10(x/10)+1:1;}
template<typename X>
X qpow(X a,int b)
{
X res=1;
while(b)
{
if(b&1) res=res*a;
a=a*a;
b>>=1;
}return res;
}
}
}