题解 P4718 【【模板】Pollard-Rho算法】

· · 题解

upd :多谢大佬zhoutb2333(UID31564)的指正,现在的题解已经是正确的了。

其实我的源程序有个很严重的bug,就是两个Int64类型我给直接乘起来了……于是就会导致一些奇怪的溢出。对于某些素数会被当成合数,于是无论怎样都判不出来质因子导致TLE……

然后大家如果想知道我是怎么知道这个问题的,可以左转讨论区看看我是怎么智障的……

反正最后呢,我发现在用龟速乘之后程序的判素性会变弱,于是又加了几个质数,开了O2就过了233

现在最新的代码放在了最下面。以下是原题解:

我想说一下这个题目是怎么卡常的。

虽然我的代码也不是很快……但是不至于94pts……

以下是战果:

不着急,我们一点一点来:

一、6000+\rm{ms} \to 4000+\rm{ms}

首先奉上在下最慢的代码:

LL Prime[11] = {2, 3, 5, 7, 13, 17, 19, 23} ;

inline LL expow(LL a, LL b, LL p){
    LL res = 1 ;
    while (b){
        if (b & 1) 
            (res *= a) %= p ;
        (a *= a) %= p, b >>= 1 ; 
    }
    return res % p ;
}
inline bool Test(LL p, LL x){
    LL r = 0, d = x - 1 ;
    while (!(d & 1)) d >>= 1, ++ r ;
    for (LL i = expow(p, d, x), j ; r ; -- r){
        j = i * i % x ;
        if (j == 1){
            if (i == 1 || i == x - 1) 
                return 1 ; 
            return 0 ;
        }
        i = j ;
    }
    return 0 ;
}
inline bool Miller_Rabin(LL x){
    if (x == 1) return 0 ;
    for (int i = 0 ; i < 8 ; ++ i){
        if (x == Prime[i]) return 1 ;
        if (!(x % Prime[i])) return 0 ;
        if (!Test(Prime[i], x)) return 0 ;
    }
    return 1 ;
}
LL res[MAXN], tot ;
inline int pksrand(){
    return rand() << 15 ^ rand() ;
} // 2
inline LL Irand(LL x){
    return (((LL)pksrand()) << 30 ^ pksrand()) % x ; //2
}
inline LL guisuMul(LL a, LL b, LL x){
    return (a * b - (LL) ((long double) a * b / x + 1e-9) * x) % x;
} 
inline LL qwq(LL x){
    LL A = Irand(x), C = Irand(x) ;
    LL t1 = A, t2 = (guisuMul(A, A, x) + C) % x ; // 1
    LL dif = max(t1, t2) - min(t1, t2), G = __gcd(x, dif) ;
    while (G == 1){
        t1 = (guisuMul(t1, t1, x) + C) % x ;
        t2 = (guisuMul(t2, t2, x) + C) % x ;
        t2 = (guisuMul(t2, t2, x) + C) % x ;
        dif = max(t1, t2) - min(t1, t2), G = __gcd(x, dif) ;
    }
    return G ;
}
inline void Pollard_Rho(LL x){
    if (x == 1) return ;
    if (Miller_Rabin(x)) {
        res[++ tot] = x ; return ;
    }
    LL y = x ; while (y == x) y = qwq(x) ;
    Pollard_Rho(y), Pollard_Rho(x / y) ;
}
int main(){
    cin >> T ;
    while (T --){
        scanf("%lld", &N), tot = 0, Pollard_Rho(N) ;
        if (tot == 1) { puts("Prime") ; continue ; } 
        sort(res + 1, res + tot + 1), printf("%lld\n", res[tot]) ; 
    }
} 

那么开始优化

*

inline void Pollard_Rho(LL x){
    if (x == 1) return ;
    if (Miller_Rabin(x)){
        Ans = max(Ans, x) ; return ;
    } 
    register LL y = x ; 
    while (y == x) y = qwq(x) ;
    Pollard_Rho(y), Pollard_Rho(x / y) ;
}
int main(){
    srand(19260817) ;
    T =qr() ;
    while (T --){
        N = qr(), Ans = -1, Pollard_Rho(N) ;
        if (Ans == N) puts("Prime") ;else printf("%lld", Ans), putchar('\n') ;
}

虽然这似乎没个卵用

#define mytz __builtin_ctzll

inline LL gcd(LL a, LL b){
    if(!a) return b;
    if(!b) return a;
    register int t = mytz(a|b) ;
    a >>= mytz(a) ;
    do{
        b >>= mytz(b) ;
        if(a>b){LL t=b;b=a,a=t;}
        b-=a;
    }while(b);
    return a<<t;
}

其中\_\_builtin\_ctzll是用来判断二进制下末尾零的数量(针对int64)的。原理的话大概就是二进制乱搞233……

inline bool Miller_Rabin(LL x){
    if (x == Prime[1] || x == Prime[0]) return 1 ;
    if (!(x % Prime[1]) || !(x % Prime[0])) return 0 ;
    if (Test(Prime[1], x) ^ 1) return 0 ;
    if (Test(Prime[0], x) ^ 1) return 0 ;
    return 1 ;
}

然后就喜闻乐见地拿到了4500ms左右的好成绩——可这似乎不能解决最后一个点仍然卡不过去的问题。

二、4000+ms \to 3000+ms

下面才是真正的卡常数……

首先注意到,我们需要频繁调用的Rho函数和Miller中的check,都或多或少定义了一些临时变量,而对于临时变量,我们是可以register优化的……当你全部加上register修饰符之后,你会发现你的程序快了接近300+ms……

但这不重要,重要的是接下来的卡常:

4000+ms时,我的rand是这么写的:

inline LL Irand(LL x){
    return ((LL)(rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2
}//2

看上去并不可以优化,但事实上,只要我们改变一下那个强制类型转换的位置:

inline LL Irand(LL x){
    return 1ll * ((rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2
}//2

他就会快整整500+ms!!

这也从侧面印证了,强制类型转换才是最慢的运算。。。

所以,最后我们就成功地卡了过去qaq

稍微提一句,环境优化参数:

#pragma GCC diagnostic error "-std=c++14"
#pragma GCC target("avx")
#pragma GCC optimize(3)
#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-fwhole-program")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-fstrict-overflow")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-skip-blocks")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("-funsafe-loop-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")

都加上的效果不是特别好,最好的是下面几句:

#pragma GCC diagnostic error "-std=c++14"
#pragma GCC target("avx")
#pragma GCC optimize(3)
#pragma GCC optimize("Ofast")

下面是完整版\rm{qaq}

#define min my_min
#define LL long long
#define max(a, b) (((a) > (b)) ? (a) : (b))

using namespace std ; LL T, N ;
LL Prime[11] = {2, 61, 97, 7, 13, 17, 23, 29}, Ans = -1 ;

inline LL guisuMul(LL a, LL b, LL m){
    LL d =((long double)a / m * b + 1e-8) ;
    LL r = a * b - d * m ;
    return r < 0 ? r + m : r ;
} 
inline LL gsm(LL a, LL b, LL p){
    register LL res = 0 ;
    while (b){
        if (b & 1) 
            res = (res + a) % p ;
        a = (a + a) % p, b >>= 1 ; 
    }
    return res ;
} 
inline LL expow(LL a, LL b, LL p){
    if (a == 1) return 1 ;
    register LL res = 1 ;
    while (b){
        if (b & 1) 
            res = gsm(res, a, p) ;
        a = gsm(a, a, p), b >>= 1 ; 
    }
    return res ;
}
inline LL my_max(LL a, LL b){ return a > b ? a : b ; }
inline LL my_min(LL a, LL b){ return a < b ? a : b ; }
inline LL qr(){
    register LL k = 0 ; char c = getchar() ;
    while (c < '0' || c > '9') c = getchar() ;
    while (c <= '9' && c >= '0') k = (k << 1) + (k << 3) + c - 48, c = getchar() ;
    return k ;
}
inline bool Test(LL p, LL x){
    register LL r = 0, d = x - 1 ;
    while (!(d & 1)) d >>= 1, ++ r ;
    for (register LL i = expow(p, d, x), j ; r ; -- r){
        j = gsm(i, i, x) ;
        if (j == 1){
            if (i == 1 || i == x - 1) 
                return 1 ; 
            return 0 ;
        }
        i = j ;
    }
    return 0 ;
}
#define ctz __builtin_ctzll

inline LL gcd(LL a, LL b){
    if(!a) return b;
    if(!b) return a;
    register int t = ctz(a|b) ;
    a >>= ctz(a) ;
    do{
        b >>= ctz(b) ;
        if(a>b){LL t=b;b=a,a=t;}
        b-=a;
    }while(b);
    return a<<t;
}
inline bool Miller_Rabin(LL x){
    if (x == Prime[1] || x == Prime[0]) return 1 ;
    if (!(x % Prime[1]) || !(x % Prime[0])) return 0 ;
    if (Test(Prime[1], x) ^ 1) return 0 ;
    if (Test(Prime[0], x) ^ 1) return 0 ;
    if (x == Prime[2] || x == Prime[3]) return 1 ;
    if (!(x % Prime[2]) || !(x % Prime[3])) return 0 ;
    if (Test(Prime[2], x) ^ 1) return 0 ;
    if (Test(Prime[3], x) ^ 1) return 0 ;
    if (x == Prime[4] || x == Prime[5]) return 1 ;
    if (!(x % Prime[4]) || !(x % Prime[5])) return 0 ;
    if (Test(Prime[4], x) ^ 1) return 0 ;
    if (Test(Prime[5], x) ^ 1) return 0 ;
    if (x == Prime[6] || x == Prime[7]) return 1 ;
    if (!(x % Prime[6]) || !(x % Prime[7])) return 0 ;
    if (Test(Prime[6], x) ^ 1) return 0 ;
    if (Test(Prime[7], x) ^ 1) return 0 ;
    return 1 ;
}
inline LL Irand(LL x){
    return 1ll * ((rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2
}//2

inline LL qwq(LL x){
    register LL C = Irand(x) ;
    register LL t1 = Irand(x), t2 = guisuMul(t1, t1, x) + C ; // 1
    register LL dif = t1 > t2 ? (t1 - t2) : (t2 - t1), G = gcd(x, dif) ;
    while (G == 1){
        t1 = guisuMul(t1, t1, x) + C ;if (t1 >= x) t1 %= x ;
        t2 = guisuMul(t2, t2, x) + C, t2 = guisuMul(t2, t2, x) + C ;
        if (t2 >= x) t2 %= x ; dif = t1 > t2 ? (t1 - t2) : (t2 - t1), G = gcd(x, dif) ;
    }
    return G ;
}
inline void Pollard_Rho(LL x){
    if (x == 1) return ;
    if (Miller_Rabin(x)){
        Ans = max(Ans, x) ; return ;
    } 
    register LL y = x ; 
    while (y == x) y = qwq(x) ;
    Pollard_Rho(y), Pollard_Rho(x / y) ;
}
int main(){
    srand(19260817) ;
    freopen("sttd.out", "w", stdout) ;
    T =qr() ;
    while (T --){
        N = qr(), Ans = -1, Pollard_Rho(N) ;
        if (Ans == N) puts("Prime") ;else printf("%lld", Ans), putchar('\n') ;
    }
}