优化手册

· · 个人记录

我们做题的时候经常会遇到这样的情况:

这时,我们就要想办法优化了。

如何才能施展一个有效的优化,使\texttt{TLE}变成\texttt{unTLE}(有可能是WA) 呢?

  1. 玄学编译开关
    1)
    #pragma GCC diagnostic error "-std=c++11"
    #pragma GCC optimize("-fdelete-null-pointer-checks,inline-functions-called-once,-funsafe-loop-optimizations,-fexpensive-optimizations,-foptimize-sibling-calls,-ftree-switch-conversion,-finline-small-functions,inline-small-functions,-frerun-cse-after-loop,-fhoist-adjacent-loads,-findirect-inlining,-freorder-functions,no-stack-protector,-fpartial-inlining,-fsched-interblock,-fcse-follow-jumps,-fcse-skip-blocks,-falign-functions,-fstrict-overflow,-fstrict-aliasing,-fschedule-insns2,-ftree-tail-merge,inline-functions,-fschedule-insns,-freorder-blocks,-fwhole-program,-funroll-loops,-fthread-jumps,-fcrossjumping,-fcaller-saves,-fdevirtualize,-falign-labels,-falign-loops,-falign-jumps,unroll-loops,-fsched-spec,-ffast-math,Ofast,inline,-fgcse,-fgcse-lm,-fipa-sra,-ftree-pre,-ftree-vrp,-fpeephole2",3)
    #pragma GCC target("avx","sse2")
    2)
    #pragma GCC diagnostic error "-std=c++14"
    #pragma GCC target("avx")
    #pragma GCC optimize(3)
    #pragma GCC optimize("Ofast")
    #pragma GCC optimize("inline")
    #pragma GCC optimize("-fgcse")
    #pragma GCC optimize("-fgcse-lm")
    #pragma GCC optimize("-fipa-sra")
    #pragma GCC optimize("-ftree-pre")
    #pragma GCC optimize("-ftree-vrp")
    #pragma GCC optimize("-fpeephole2")
    #pragma GCC optimize("-ffast-math")
    #pragma GCC optimize("-fsched-spec")
    #pragma GCC optimize("unroll-loops")
    #pragma GCC optimize("-falign-jumps")
    #pragma GCC optimize("-falign-loops")
    #pragma GCC optimize("-falign-labels")
    #pragma GCC optimize("-fdevirtualize")
    #pragma GCC optimize("-fcaller-saves")
    #pragma GCC optimize("-fcrossjumping")
    #pragma GCC optimize("-fthread-jumps")
    #pragma GCC optimize("-funroll-loops")
    #pragma GCC optimize("-fwhole-program")
    #pragma GCC optimize("-freorder-blocks")
    #pragma GCC optimize("-fschedule-insns")
    #pragma GCC optimize("inline-functions")
    #pragma GCC optimize("-ftree-tail-merge")
    #pragma GCC optimize("-fschedule-insns2")
    #pragma GCC optimize("-fstrict-aliasing")
    #pragma GCC optimize("-fstrict-overflow")
    #pragma GCC optimize("-falign-functions")
    #pragma GCC optimize("-fcse-skip-blocks")
    #pragma GCC optimize("-fcse-follow-jumps")
    #pragma GCC optimize("-fsched-interblock")
    #pragma GCC optimize("-fpartial-inlining")
    #pragma GCC optimize("no-stack-protector")
    #pragma GCC optimize("-freorder-functions")
    #pragma GCC optimize("-findirect-inlining")
    #pragma GCC optimize("-fhoist-adjacent-loads")
    #pragma GCC optimize("-frerun-cse-after-loop")
    #pragma GCC optimize("inline-small-functions")
    #pragma GCC optimize("-finline-small-functions")
    #pragma GCC optimize("-ftree-switch-conversion")
    #pragma GCC optimize("-foptimize-sibling-calls")
    #pragma GCC optimize("-fexpensive-optimizations")
    #pragma GCC optimize("-funsafe-loop-optimizations")
    #pragma GCC optimize("inline-functions-called-once")
    #pragma GCC optimize("-fdelete-null-pointer-checks")

    都加上的效果不是特别好,最好的是下面几句:

    3)
    #pragma GCC diagnostic error "-std=c++14"
    #pragma GCC target("avx")
    #pragma GCC optimize(3)
    #pragma GCC optimize("Ofast")
  1. 利用计算机

其中,下列两项常用:

(1) CPU并发

CPU察觉到程序的规律,创建多线程直接运作。

比如

int p=0;
for (int i=0;i<n;i++)
    p+=i;

就不能引起CPU迟钝的反应的注意。

要这样:

int p=0;
for (int i=0;i<n;i++)
{
    p+=i++;    //7次
    if (i>=n) break;
    p+=i++;
    if (i>=n) break;
    p+=i++;
    if (i>=n) break;
    p+=i++;
    if (i>=n) break;
    p+=i++;
    if (i>=n) break;
    p+=i++;
    if (i>=n) break;
    p+=i++;
}

或者这样

int p=0;
for (int i=0;i<n;i++)
{
    p+=i++;    //7次
    p+=i++;
    p+=i++;
    p+=i++;
    p+=i++;
    p+=i++;
    p+=i++;
}
switch(n%7)
{
    case 1:p=p-6*n-21;break;   //21=1+2+3+...+6,6=7%1
    case 2:p=p-6*n-15;break;   //同理。
    case 3:p=p-6*n-10;break;
    case 4:p=p-6*n-6;break;
    case 5:p=p-6*n-3;break;
    case 6:p=p-6*n-1;break;
}

(2) 利用Cache(擦车)

如:

//E2
for (int i=0;i<n;i++)
    fun1();
for (int i=0;i<n;i++)
    fun2();

=====================分割线QwQ====================

//E1
for (int i=0;i<n;i++)
{
    fun1();
    fun2();
}

E1和E2哪个快(fun1和fun2无关系,也就是E1等价于E2)?

我知道你会回答E1!,错!

E1虽然运行次数少,但是一次执行两个函数。

当函数很大型时,E2因将函数存入Cache,加速。

这两点很适用于这道题

  1. 多用位运算

计算机内部用的是二进制,所以位运算比普通运算快。

证据

常用优化:

-x=~x+1                 //无用...
swap(x,y)=x^=y^=x^=y    //x必须不等于y!!
(x==y)=!(x^y)
abs(x)=(x>>31)^x-(x>>31)
------------------------------
if(x==a) x=b;     
else x=a;
=x^=a^b;
------------------------------
min(x,y)=y^((x^y)&-(x<y))
max(x,y)=x^((x^y)&-(x<y))
x%2=x&1
2x=x<<1
x/2=x>>1
  1. IO优化
    namespace IO
    {
    template<typename T>
    inline void read(T &x)
    {
        x=0;
        int f=0;
        char ch=getchar();
        while(!(ch>='0'&&ch<='9')){f|=(ch=='-');ch=getchar();}
        while(ch>='0'&&ch<='9'){x=(x<<1)+(x<<3)+(ch^48);ch=getchar();}
        x=f?-x:x;
    }
    template<typename T> 
    inline void write(T x)
    {
        if(x<0) {putchar('-');x=-x;}
            if(x>=10) write(x/10);
        putchar(x%10+'0');
    }
    }
  2. 宏定义
  1. ++i代替i++
  1. 使用高效的东西

(1)能小就小

能用bool类型的不用char类型,能用char类型的不用short, 能用short的不用int,能用int的不用long

在程序运算中,显而易见的变量范围,应当最小化。

虽然在某些情况下不见得会增加很大的效率。 例如:

人的年龄: unsigned char
年份:short
图像像素值:unsigned char
性别:bool

(2)用些inline,register

(3)调用函数传参的时候,使用引用传参。

这可以减少不必要的临时对象生成,减少内存消耗,一般用于类类型传参使用。 --\text{CSDN}

void inc(int& pp){++pp;}

(4)变量生存期最小

用的时候再声明,减少不必要的开销。

//迷之代码
//洛谷吞Tab不要管
#include<iostream>
using namespace std;
int main()
{
int bbt=1024;
for (int i=0;i<bbt;i++)
{
int n=i;
cout<<n<<"\tN++\n";
}
system("cls")
int n=-1;
cout<<"N=-1"<<n+-1;
while (1) cout<<"\a";
reutrn 0;
}

(5)初始化

尽可能使用构造初始化,避免使用赋值初始化

std::string str("123"); // good code 
std::string str = "123";

好处

(6)能用switch不用if

例如:

if (a<=10) cout<<"m10";
if (a>10&&a<=20) cout<<"m20";
//=============================
switch(a/10)         //good code
{
    case 1:cout<<"m10";break;
    case 2:cout<<"m20";break;
}

(7)循环前声明参数

//E1
ClassPerson str;
for (int i=12;i<32; ++i)
{
        str.idcard=i;
        // do something   //1次构造函数,20次赋值
}
//E2
for (int i=12;i<32; ++i)
{
    ClassPerson str;
    str.idcard = i;       //20次构造函数,20次赋值
    // do something
}

(8)10、变量的声明顺序

struct people
{
    bool flag;
    bool state;
    char name;
    int age;
    double sss;
}

玄学

应从最小的类型开始声明,在某一些编译器里,可以有效的减少内存的占用。

因为有些编译器使用的是4字节对齐,以便分界。细节可以自行查询。

  1. 使用现有算法

在学术界有很多公认非常优良的算法,可以根据自己情况选择使用。

例如:

查找:二分查找、分块查找、哈希查找
'''''''''''''''''''''''''''''''''''''''
排序:快速排序、堆排序、归并排序
  1. 其他

do...while,不用while

for(;;),不用while(1)

其他优化算法此处有很多

最后有个代码,优化用的:

/*================JWD made====================
*欲使用请在代码首部加上本声明!(~表示段落换行)
-----------------------------------------
*To use, please add this statement ~
*at the beginning of the code! 
-----------------------------------------
*Bit operation may make the weak  ~
*perplexed. Don't be afraid. Just know how to use it!
-----------------------------------------
*Be sure to read the code and reference ~
*"namespace" when using.
-----------------------------------------
*Therefore, JWD such as WA, TLE, MLE ~
*caused by header code is not responsible.
-----------------------------------------
*If there is any error in this code, ~
*please send it to me personally and I will correct it.
-----------------------------------------
*This header code is produced by JWD.
+++++++++++++++++++++++++++++++++++++++++
*      --Please observe the rules of use.
*****************************************
Tips:
    1) "I" above refers to JWD.
    2) Visit JWD to find jijidawang.[His luogu ID: 227514] 
    3) Again,Please observe the rules of use!
    4) All rights reserved (except for common
functions [read, write...])
=====================The Head==================*/

//C++
#include<cstring>
#include<cstdio>                       //这些头文件必须有
using namespace std;                   //必须using namespace std;
namespace JQ
{
    #define mian main
    #define ture true
    typedef long long ll;
    typedef unsigned long long ull;
    static const char* JCQ="register"; //emm...
    const ll MaxInt=0x3f3f3f3f;
    const ll INF=MaxInt;
    namespace IO
    {
        template<typename T>
        inline void read(T &x)
        {
            x=0;
            int f=0;
            char ch=getchar();
            while(!(ch>='0'&&ch<='9')){f|=(ch=='-');ch=getchar();}
            while(ch>='0'&&ch<='9'){x=(x<<1)+(x<<3)+(ch^48);ch=getchar();}
            x=f?-x:x;
        }
        template<typename T> 
        inline void write(T x)
        {
            if(x<0) {putchar('-');x=-x;}
            if(x>=10) write(x/10);
            putchar(x%10+'0');
        }
    }
    namespace strs
    {
        const int Atoa=32;
        enum letter{A='A',B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,a='a',b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z};
        enum chnum{c0='0',c1,c2,c3,c4,c5,c6,c7,c8,c9};
        const char* lMap="ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ";
        const char* sMap="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
        inline bool isnum(char ch){return ch>=c0&&ch<=c9;}
        inline bool isler(char ch){return ch>=A&&ch<=Z;}
        inline bool isser(char ch){return ch>=a&&ch<=z;}
        template<typename T>
        inline void ladd(char& d,T t){d=lMap[lMap[d-A]+t%26];}
        template<typename T>
        inline void lsub(char& d,T t){d=lMap[(sMap[d-a]-t%26)>0?(sMap[d-a]-t%26):-(sMap[d-a]-t%26)];}
        template<typename T>
        inline void sadd(char& d,T t){d=sMap[sMap[d-a]+t%26];}
        template<typename T>
        inline void ssub(char& d,T t){d=sMap[(sMap[d-a]-t%26)>0?(sMap[d-a]-t%26):-(sMap[d-a]-t%26)];}
        template<typename T>
        inline char laddr(char d,T t){return lMap[lMap[d-A]+t%26];}
        template<typename T>
        inline char lsubr(char d,T t){return lMap[lMap[d-a]+t%26];}
        template<typename T>
        inline char saddr(char d,T t){return sMap[sMap[d-A]+t%26];}
        template<typename T>
        inline char ssubr(char d,T t){return sMap[sMap[d-a]+t%26];}
        inline char tolerr(char p){return isser(p)?(p-'a'+'A'):p;}
        inline char toserr(char p){return isler(p)?(p+'a'-'A'):p;}
        inline char reperr(char p){return isler(p)?(p+'a'-'A'):isser(p)?(p-'a'+'A'):p;}
        inline void toler(char& p){p=tolerr(p);}
        inline void toser(char& p){p=toserr(p);}
        inline void reper(char& p){p=reperr(p);}
        inline string tolstr(string p){int len=p.length();for (int i=0;i<len;i++) toler(p[i]);return p;}
        inline string tosstr(string p){int len=p.length();for (int i=0;i<len;i++) toser(p[i]);return p;}
        inline string repstr(string p){int len=p.length();for (int i=0;i<len;i++) reper(p[i]);return p;}
        inline void tolst(string& p){p=tolstr(p);}
        inline void tosst(string& p){p=tosstr(p);}
        inline void repst(string& p){p=repstr(p);}
        inline void pt(char to[],string from){int len=from.length();for (int i=0;i<len;i++) to[i]=from[i];}
        inline const char* tolstr(char p[]){string tmp(p);return tolstr(tmp).c_str();}
        inline const char* tosstr(char p[]){string tmp(p);return tosstr(tmp).c_str();}
        inline const char* repstr(char p[]){string tmp(p);return repstr(tmp).c_str();}
        inline void tolst(char p[]){string tmp(p);tolstr(tmp);pt(p,tmp);}
        inline void tosst(char p[]){string tmp(p);tosstr(tmp);pt(p,tmp);}
        inline void repst(char p[]){string tmp(p);repstr(tmp);pt(p,tmp);}
        inline bool isr0(char p){return !p;}          //p=='\0'
    }
    namespace algo
    {
        template<typename X>
        inline void Swap(X& a,X& b){(a^b)?a^=b^=a^=b:"";;}
        template<typename X>
        inline bool equ(X a,X b){return !(a^b);}
        template<typename X>
        inline X Abs(X x){return (x>>31)^x-(x>>31);}
        template<typename X>
        inline void Ee1(X& a){a&=a-1;}
        template<typename X>
        inline X Ee1r(X a){return a&(a-1);}
        template<typename X>
        inline ll countbit(X a){int ans=0;while (a){Ee1(a);++ans;}return ans;}
        template<typename X>
        inline bool odd(X a){return a&1;}
        template<typename X>
        inline bool even(X a){return !odd(a);}
        template<typename X>
        inline void nati(X& a){a=~a+1;}
        template<typename X>
        inline X natir(X a){return ~a+1;}
        template<typename X>
        inline void another(X& x,X a,X b){x^=a^b;}
        template<typename X>
        inline X anotherr(X x,X a,X b){return x^a^b;}
        template<typename X>
        inline bool isPowerOf2(X n) {return (n>0)&&(!(n&(n-1)));} 
        template<typename X>
        inline bool rnati(X x,X y){((x^y)<0);}
        template<typename X>
        inline X Min(X x,X y){return y^((x^y)&-(x<y));}
        template<typename X>
        inline X Max(X x,X y){return x^((x^y)&-(x<y));}
        template<typename X>
        inline ll countbit10(X x){return (x/10)?countbit10(x/10)+1:1;}
        template<typename X>
        X qpow(X a,int b)
        {
            X res=1;
            while(b)
            {
                if(b&1) res=res*a;
                a=a*a;
                b>>=1;
            }return res;
        }
    }
}