大整数乘法

· · 个人记录

#include <bits/stdc++.h>

using f64 = double;
using cpx = std::complex<f64>;
using u64 = unsigned long long;

constexpr int N = 1 << 20;
constexpr int bceil(int x){
    return (x == 1 || x == 0) ? x : (1 << (std::__lg(x - 1) + 1)); 
}

namespace f_f_t
{
    const f64 Pi_2 = acos(-1.0) / 2;
    cpx w[N >> 3];
    int init_l;
    void init(int l)
    {
        if (l <= init_l){
            return;
        }
        int t = std::__lg(l - 1);
        l = 1 << t, *w = cpx(1.0, 0.0), init_l = l << 1;
        for (int i = 1; i < l; i <<= 1){
            w[i] = std::polar(1.0, Pi_2 / i);
        }
        for (int i = 1; i < l; ++i){
            w[i] = w[i & (i - 1)] * w[i & -i];
        }
    }
    void dif(cpx *f, int L)
    {
        for (int l = L >> 1, r = L; l; l >>= 1, r >>= 1){
            for (cpx *j = f, *o = w; j != f + L; j += r, ++o){
                for (cpx *k = j; k != j + l; ++k){
                    cpx x = *k, y = k[l] * *o;
                    *k = x + y, k[l] = x - y;
                }
            }
        }
    }
    void dit(cpx *f, int L)
    {
        for (int l = 1, r = 2; l < L; l <<= 1, r <<= 1){
            for (cpx *j = f, *o = w; j != f + L; j += r, ++o){
                for (cpx *k = j; k != j + l; ++k){
                    cpx x = *k, y = k[l];
                    *k = x + y, k[l] = (x - y) * std::conj(*o);
                }
            }
        }
    }
    void Conv(f64 *f, int lim, f64 *g){
        cpx *F = (cpx*)f, *G = (cpx*)g;
        int l = lim >> 1;
        init(l), dif(F, l), dif(G, l);
        f64 fx = 2.0 / lim, fx2 = 0.5 / lim;
        F[0] = (F[0] * G[0] + 2 * F[0].imag() * G[0].imag()) * fx, F[1] = F[1] * G[1] * fx;
        for (int k = 2, m = 3; k < l; k <<= 1, m <<= 1){
            for (int i = k, j = i + k - 1; i < m; ++i, --j){
                cpx oi = (F[i] + std::conj(F[j])), hi = (F[i] - std::conj(F[j]));
                cpx Oi = (G[i] + std::conj(G[j])), Hi = (G[i] - std::conj(G[j]));
                cpx r0 = oi * Oi - hi * Hi * ((i & 1) ? -w[i >> 1] : w[i >> 1]), r1 = Oi * hi + oi * Hi;
                F[i] = (r0 + r1) * fx2, F[j] = std::conj(r0 - r1) * fx2;
            }
        }
        dit(F, l);
    }
}

f64 F[N >> 1], G[N >> 1];

char buf[N << 1];

void solve(){
    int tot = fread(buf, 1, sizeof(buf), stdin);
    char *bga = buf, *eda = buf, *bgb, *edb = buf + tot;
    while(!isdigit(*(edb - 1))){--edb;}
    for(; (((*((u64*)eda)) + 0x5f5f5f5f5f5f5f5f) & 0x8080808080808080) == 0x8080808080808080; eda+=8){}
    for(; *eda > 32; ){++eda;}
    for(bgb = eda; !isdigit(*bgb); ++bgb){}
    auto radX = [](char* bg, char* ed, f64* out){
        char *edr = bg + 4, *pos = ed;
        for(; pos > edr; pos -= 4){
            *out++ = *(pos - 4) * 1000 + *(pos - 3) * 100 + *(pos - 2) * 10 + *(pos - 1) - 53328;
        }
        return *out++ = std::stod(std::string(bg, pos)), out;
    };
    int n = radX(bga, eda, F) - F, m = radX(bgb, edb, G) - G, lim = std::max(8, bceil(n + m - 1));
    std::fill(F + n, F + lim, 0.0), std::fill(G + m, G + lim, 0.0), f_f_t::Conv(F, lim, G);
    {
        struct ict{
            int num[10000];//小端
            ict(){
                int j = 0;
                for(int e0 = (48 << 0); e0 < (58 << 0); e0 += (1 << 0)){
                    for(int e1 = (48 << 8); e1 < (58 << 8); e1 += (1 << 8)){
                        for(int e2 = (48 << 16); e2 < (58 << 16); e2 += (1 << 16)){
                            for(int e3 = (48 << 24); e3 < (58 << 24); e3 += (1 << 24)){
                                num[j] = e0 ^ e1 ^ e2 ^ e3, ++j;
                            }
                        }
                    }
                }
            }
        }ot;
        int o = (n + m - 2), *ed = (int*)buf + o, *c = ed;
        u64 u = 0;
        for(int p = 0; p < o; ++p){
            u += u64(F[p] + 0.5), *--c = ot.num[u % 10000u], u /= 10000u;
        }
        fprintf(stdout, "%llu", u + u64(F[o] + 0.5)), fwrite(c, sizeof(int), ed - c, stdout);
    }
}

int main(){
    std::cin.tie(nullptr) -> sync_with_stdio(false);
    solve();
    return 0;
}

袖珍版. 速度应该还是不错的,

3次变1.5次的细节。

实际上这个是三个1次变0.5次结合的产物。

我们都知道如果只对湿布做fft只需要半长的fft。

所以实际上这个就是三次只对实部做fft之后等价运算削弱了一下运算强度。

这个东西精度还是不错的