浅谈非常规质数统计算法与亚线性筛法
本文将会介绍一些非常规的质数前缀统计算法以及亚线性积性函数筛法。
可能需要的前置芝士:杜教筛、Min_25 筛、Powerful Number 筛。
符号约定:
-
-
-
-
### Meissel-Lehmer 算法 设 $\phi(n, m) = \displaystyle\sum_{i = 1}^n [\operatorname{lpf}(i) > p_m]$,$P_k(n, m) = \displaystyle\sum_{i = 1}^n [\Omega(i) = k \operatorname{and} \operatorname{lpf}(i) > p_m]$。特别地,令 $\operatorname{lpf}(1) = \inf$,$p_0 = 1$。
显然,
设定权值
令
考虑
考虑
不妨对这个过程进行分治,对于
否则,考虑枚举
注:
令
-
S_1 = \displaystyle\sum_{p = \lfloor \lfloor \sqrt[3]{n} \rfloor \rfloor + 1, p \in \operatorname{Prime}}^m \sum_{i = \lceil \frac{m}{p} \rceil, \operatorname{lpf}(i) > p}^m \mu(i) \phi(\lfloor \frac{n}{ip} \rfloor, \pi(p) - 1) -
S_2 = \displaystyle\sum_{p = \lfloor \sqrt[4]{n} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = \lceil \frac{m}{p} \rceil, \operatorname{lpf}(i) > p}^m \mu(i) \phi(\lfloor \frac{n}{ip} \rfloor, \pi(p) - 1) -
S_3 = \displaystyle\sum_{p \in \operatorname{Prime}}^{\lfloor \sqrt[4]{n} \rfloor} \sum_{i = \lceil \frac{m}{p} \rceil, \operatorname{lpf}(i) > p}^m \mu(i) \phi(\lfloor \frac{n}{ip} \rfloor, \pi(p) - 1)
注意到对于
-
S_1 = -\displaystyle\sum_{p = \lfloor \sqrt[3]{n} \rfloor + 1, p \in \operatorname{Prime}}^m \sum_{i = p + 1, i \in \operatorname{Prime}}^m \phi(\lfloor \frac{n}{ip} \rfloor, \pi(p) - 1) -
S_2 = -\displaystyle\sum_{p = \lfloor \sqrt[4]{n} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = p + 1, i \in \operatorname{Prime}}^m \phi(\lfloor \frac{n}{ip} \rfloor, \pi(p) - 1)
先考虑计算
接下来考虑计算
-
U = \displaystyle\sum_{p = \lfloor \sqrt[4]{n} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = \max(p, \lfloor \frac{n}{p^2} \rfloor) + 1, i \in \operatorname{Prime}}^m \phi(\lfloor \frac{n}{ip} \rfloor, \pi(p) - 1) -
V = \displaystyle\sum_{p = \lfloor \sqrt[4]{n} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = p + 1, i \in \operatorname{Prime}}^{\max(\lfloor \frac{n}{p^2} \rfloor, m)} \phi(\lfloor \frac{n}{ip} \rfloor, \pi(p) - 1)
先考虑计算
所以
直接枚举
再考虑计算
令
-
V_1 = \displaystyle\sum_{p = \lfloor \sqrt[4]{n} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = p + 1, i \in \operatorname{Prime}}^{\min(\lfloor \frac{n}{p^2} \rfloor, m)} (2 - \pi(p)) -
V_2 = \displaystyle\sum_{p = \lfloor \sqrt[4]{n} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = p + 1, i \in \operatorname{Prime}}^{\min(\lfloor \frac{n}{p^2} \rfloor, m)} \pi(\lfloor \frac{n}{ip} \rfloor)
对于
对于
- 类似数论分块地处理枚举
i 的和式即可。注意我们需要对\pi(i) 相同的部分也合在一起计算。可以证明,这样做所需要枚举的状态数为O(\frac{n^{\frac{2}{3}}}{\ln n}) 。需要注意的是这样做的总时间复杂度为O(\frac{n^{\frac{2}{3}}}{\ln n}) 而非后面将会提到的O(\frac{n^{\frac{2}{3}}}{\log^2 n}) 。 - 将
V_2 分为下面五个部分计算。
令
-
W_1 = \displaystyle\sum_{p = \lfloor \sqrt[4]{n} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \frac{n}{m^2} \rfloor} \sum_{i = p + 1, i \in \operatorname{Prime}}^m \pi(\lfloor \frac{n}{ip} \rfloor) -
W_2 = \displaystyle\sum_{p = \lfloor \frac{n}{m^2} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt{\frac{n}{m}} \rfloor} \sum_{i = p + 1, i \in \operatorname{Prime}}^{\lfloor \sqrt{\frac{n}{p}} \rfloor} \pi(\lfloor \frac{n}{ip} \rfloor) -
W_3 = \displaystyle\sum_{p = \lfloor \frac{n}{m^2} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt{\frac{n}{m}} \rfloor} \sum_{i = \lfloor \sqrt{\frac{n}{p}} \rfloor + 1, i \in \operatorname{Prime}}^m \pi(\lfloor \frac{n}{ip} \rfloor) -
W_4 = \displaystyle\sum_{p = \lfloor \sqrt{\frac{n}{m}} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = p + 1, i \in \operatorname{Prime}}^{\lfloor \sqrt{\frac{n}{p}} \rfloor} \pi(\lfloor \frac{n}{ip} \rfloor) -
W_5 = \displaystyle\sum_{p = \lfloor \sqrt{\frac{n}{m}} \rfloor + 1, p \in \operatorname{Prime}}^{\lfloor \sqrt[3]{n} \rfloor} \sum_{i = \lfloor \sqrt{\frac{n}{p}} \rfloor + 1, i \in \operatorname{Prime}}^{\lfloor \frac{n}{p^2} \rfloor} \pi(\lfloor \frac{n}{ip} \rfloor)
对于
对于
最后考虑计算
综上,令
- 实现(by whzzt):https://www.luogu.com.cn/paste/1np91hys
一种求
\pi(n) 的算法 - 其思想最早疑似出现在 LOJ 上 Min_25 的一份提交记录。
注意:这种算法只能求出单个
考虑在前文 Min_25 筛 Part 1 优化三的基础上继续进行优化。
注意到
于是时间复杂度优化至
又注意到若我们只转移
时间复杂度与 Meissel-Lehmer 算法相同,但好写多了。
杜教筛的优化
此时我们依然需要解决这样的问题:已知
杜教筛的时间复杂度瓶颈就在单次
考虑取阈值
如果我们也能快速求出
注意到如果
如果
综上,时间复杂度为
- [例 1] P4213 【模板】杜教筛
模板题,构造
但事实上注意到这道题只需要求单点前缀和,于是在求出
代码:
#include <stdio.h>
#include <math.h>
typedef long long ll;
typedef unsigned long long ull;
typedef __uint128_t ulll;
typedef struct Division_tag {
ulll a;
Division_tag(){}
Division_tag(ull mod){
a = (((ulll)1 << 64) + mod - 1) / mod;
}
} Division;
const int N = 46340 + 7, M = 92680 + 7, K = 7095 + 7;
int cur_n, sqrt_n, limit1, cbrt_n, limit2, available;
int prime[N], mu[N], lpf[N], pi[N], number[M], id1[N], id2[N], g_eq[M], pos[K], g_mu[M];
bool p[N];
Division inv_prime[N], inv_pos[K];
ull operator /(const ull &a, const Division &b){
return a * b.a >> 64;
}
ull operator /=(ull &a, const Division &b){
return a = a / b;
}
inline void init1(){
int cnt = 0;
p[0] = p[1] = true;
lpf[1] = 0x7fffffff;
mu[1] = 1;
pi[1] = 0;
for (register int i = 2; i < N; i++){
if (!p[i]){
cnt++;
prime[cnt] = i;
lpf[i] = i;
mu[i] = -1;
inv_prime[cnt] = Division(i);
}
pi[i] = cnt;
for (register int j = 1; j <= cnt && i * prime[j] < N; j++){
int t = i * prime[j];
p[t] = true;
lpf[t] = prime[j];
if (i % prime[j] == 0){
mu[t] = 0;
break;
}
mu[t] = -mu[i];
}
}
}
inline ll sum1(int n){
return n * ((ll)n + 1) / 2;
}
inline int get_id(int n){
return n <= sqrt_n ? id1[n] : id2[cur_n / n];
}
inline int min(int a, int b){
return a < b ? a : b;
}
inline int init2(int n){
int process, id = 0;
cur_n = n;
sqrt_n = sqrt(n);
process = n / sqrt_n;
limit1 = pow(n, 1.0 / 6.0);
limit2 = pow(n, 2.0 / 3.0);
for (register int i = 1, j; ; i = j + 1){
int tn = n / i;
j = n / tn;
id++;
number[id] = tn;
if (tn <= sqrt_n){
id1[tn] = id;
} else {
id2[j] = id;
}
if (j == n) break;
}
for (register int i = 1; i <= id; i++){
g_eq[i] = number[i] - 1;
}
for (register int i = 1; i <= pi[limit1]; i++){
for (register int j = get_id(prime[i] * prime[i]); j >= 1; j--){
ull cur_val = number[j];
for (register int k = prime[i]; 1ll * k * prime[i] <= number[j]; k *= prime[i]){
cur_val /= inv_prime[i];
g_eq[j] -= (g_eq[get_id(cur_val)] - i) + 1;
}
}
}
for (register int i = 1; i <= id; i++){
g_eq[i] -= pi[min(limit1, number[i])] - 1;
}
available = 0;
for (register int i = 1; i <= process; i++){
if (lpf[i] > limit1){
available++;
pos[available] = i;
inv_pos[available] = Division(i);
}
}
return id;
}
void dfs(int cur, int pos, int val){
int up = pi[min(limit2 / pos, sqrt_n)];
if (pos > sqrt_n || p[pos]) g_mu[get_id(pos)] += val + 1;
for (register int i = cur + 1; i <= up; i++){
bool flag = true;
for (register int j = pos * prime[i]; ; j *= prime[i]){
dfs(i, j, flag ? -val : 0);
if (1ll * j * prime[i] > limit2) break;
flag = false;
}
}
}
inline void solve(int n, int id){
int i = 1;
for (; i <= id; i++){
g_mu[i] = 0;
}
dfs(pi[limit1], 1, 1);
i = id - 1;
for (; i >= 1 && number[i] <= limit2; i--){
g_mu[i] += g_mu[i + 1];
}
i = id;
for (; i >= 1 && number[i] <= limit2; i--){
g_mu[i] -= g_eq[i];
}
for (; i >= 1; i--){
int x = sqrt(number[i]), y = number[i] / x;
g_mu[i] = 1 + g_mu[get_id(x)] * g_eq[get_id(y)];
for (register int j = 1; j <= available && pos[j] <= y; j++){
int cur_id = get_id(number[i] / inv_pos[j]);
if (pos[j] <= x) g_mu[i] -= mu[pos[j]] * g_eq[cur_id];
if (j > 1) g_mu[i] -= g_mu[cur_id];
}
}
i = 1;
for (; i <= id; i++){
g_mu[i] += -pi[min(limit1, number[i])] - 1;
}
i = pi[limit1];
for (; i >= 1; i--){
int t = prime[i] * prime[i];
for (register int j = 1; j <= id && number[j] >= t; j++){
g_mu[j] += -(g_mu[get_id(number[j] / inv_prime[i])] + i);
}
}
i = 1;
for (; i <= id; i++){
g_mu[i]++;
}
}
int main(){
int t;
scanf("%d", &t);
init1();
for (register int i = 1; i <= t; i++){
int n, id;
ll ans = 0;
scanf("%d", &n);
id = init2(n);
solve(n, id);
for (register int j = 1, k; ; j = k + 1){
int tn = n / j;
k = n / tn;
ans += g_mu[get_id(tn)] * (sum1(k) - sum1(j - 1));
if (k == n) break;
}
printf("%lld %d\n", ans, g_mu[1]);
}
return 0;
}
cmd 筛
orz command_block orz!!!
- Part 1(Min_25 筛 Part 1 优化)
1.1. 优化 1
考虑在筛出
则每次新加质数的时候,可以维护每个
递推次数为
取
1.2. 优化 2
容易发现在递推式中
则静态数组的调用次数为
取
1.3. 优化 3
考虑对
取
1.4. 实现
- 优化 1
代码:
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
typedef long long ll;
const int N = 316227 + 7, M = 2498011 + 7, K = 632454 + 7, P = 27293 + 7;
ll limit, sqrt_n;
int id1[N], id2[N], tree[M];
ll prime[M], number[K], g[K];
bool p[M];
vector<ll> v[P];
inline ll sqrt(ll n){
ll ans = sqrt((double)n);
while (ans * ans <= n) ans++;
return ans - 1;
}
inline ll lowbit(ll x){
return x & (-x);
}
inline void add(ll x, int k){
while (x <= limit){
tree[x] += k;
x += lowbit(x);
}
}
inline int get_sum(ll x){
int ans = 0;
while (x > 0){
ans += tree[x];
x -= lowbit(x);
}
return ans;
}
inline int get_id(ll n, ll m){
return n <= sqrt_n ? id1[n] : id2[m / n];
}
inline void init(ll n){
int sqrt_cnt, prime_cnt = 0, id = 0;
ll limit_i;
sqrt_n = sqrt(n);
if (n <= 2){
limit = n;
sqrt_cnt = 0;
} else {
limit = max(pow(n / log(n), 2.0 / 3.0), (double)sqrt_n);
}
limit_i = limit + 1;
p[0] = p[1] = true;
for (register ll i = 2; i <= limit; i++){
if (!p[i]) prime[++prime_cnt] = i;
if (i == sqrt_n) sqrt_cnt = prime_cnt;
for (register int j = 1; j <= prime_cnt && i * prime[j] <= limit; j++){
ll t = i * prime[j];
p[t] = true;
v[j].push_back(t);
if (i % prime[j] == 0) break;
}
}
for (register ll i = 1, j; i <= n; i = j + 1){
ll tn = n / i;
j = n / tn;
id++;
number[id] = tn;
g[id] = tn - 1;
if (tn <= sqrt_n){
id1[tn] = id;
} else {
id2[j] = id;
}
}
add(1, 1);
for (register int i = 1; i <= sqrt_cnt; i++){
int size = v[i].size();
ll t1 = max(prime[i] * prime[i], limit_i);
for (register int j = 1; j <= id && number[j] >= t1; j++){
ll t2 = number[j] / prime[i];
if (t2 <= limit){
g[j] -= t2 - get_sum(t2) - (i - 1);
} else {
g[j] -= g[get_id(t2, n)] - (i - 1);
}
}
for (register int j = 0; j < size; j++){
add(v[i][j], 1);
}
}
}
int main(){
ll n;
cin >> n;
init(n);
cout << g[1];
return 0;
}
- 优化 2
代码:
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
typedef long long ll;
const int N = 316227 + 7, M = 632454 + 7, K = 27293 + 7;
ll sqrt_n, limit;
int pi[N], id1[N], id2[N], tree[N];
ll prime[N], number[M], g[M];
bool p[N];
vector<ll> v[K];
inline ll sqrt(ll n){
ll ans = sqrt((double)n);
while (ans * ans <= n) ans++;
return ans - 1;
}
inline ll lowbit(ll x){
return x & (-x);
}
inline void add(ll x, int k){
while (x <= limit){
tree[x] += k;
x += lowbit(x);
}
}
inline int get_sum(ll x){
int ans = 0;
while (x > 0){
ans += tree[x];
x -= lowbit(x);
}
return ans;
}
inline int get_id(ll n, ll m){
return n <= sqrt_n ? id1[n] : id2[m / n];
}
inline void init(ll n){
int sqrt_cnt, prime_cnt = 0, id = 0;
ll limit_i;
sqrt_n = sqrt(n);
if (n <= 2){
limit = n;
sqrt_cnt = 0;
} else {
limit = max(pow(n, 2.0 / 3.0) / pow(log(n), 4.0 / 3.0), (double)sqrt_n);
}
limit_i = limit + 1;
p[0] = p[1] = true;
pi[1] = 0;
for (register ll i = 2; i <= limit; i++){
if (!p[i]) prime[++prime_cnt] = i;
pi[i] = prime_cnt;
if (i == sqrt_n) sqrt_cnt = prime_cnt;
for (register int j = 1; j <= prime_cnt && i * prime[j] <= limit; j++){
ll t = i * prime[j];
p[t] = true;
v[j].push_back(t);
if (i % prime[j] == 0) break;
}
}
for (register ll i = 1, j; i <= n; i = j + 1){
ll tn = n / i;
j = n / tn;
id++;
number[id] = tn;
g[id] = tn - 1;
if (tn <= sqrt_n){
id1[tn] = id;
} else {
id2[j] = id;
}
}
add(1, 1);
for (register int i = 1; i <= sqrt_cnt; i++){
int size = v[i].size();
ll t1 = prime[i] * prime[i], t2 = max(t1, limit_i);
for (register int j = 1; j <= id && number[j] >= t2; j++){
ll t3 = number[j] / prime[i];
if (t3 <= limit){
if (t3 < t1){
g[j] -= pi[t3] - (i - 1);
} else {
g[j] -= t3 - get_sum(t3) - (i - 1);
}
} else {
g[j] -= g[get_id(t3, n)] - (i - 1);
}
}
for (register int j = 0; j < size; j++){
add(v[i][j], 1);
}
}
}
int main(){
ll n;
cin >> n;
init(n);
cout << g[1];
return 0;
}
- 优化 3
代码:
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
typedef long long ll;
const int N = 2498011 + 7, M = 316227 + 7, K = 632454 + 7, P = 27293 + 7;
ll sqrt_n, limit1;
int pi[N], id1[M], id2[M], sum[N], tree[N];
ll prime[N], number[K], g[K];
bool p[N];
vector<ll> v[P];
inline ll sqrt(ll n){
ll ans = sqrt((double)n);
while (ans * ans <= n) ans++;
return ans - 1;
}
inline ll lowbit(ll x){
return x & (-x);
}
inline void add(ll x, int k){
while (x <= limit1){
tree[x] += k;
x += lowbit(x);
}
}
inline int get_sum(ll x){
int ans = 0;
while (x > 0){
ans += tree[x];
x -= lowbit(x);
}
return ans;
}
inline int get_id(ll n, ll m){
return n <= sqrt_n ? id1[n] : id2[m / n];
}
inline void init(ll n){
int cnt = 0, limit2, id = 0;
ll limit1_i;
sqrt_n = sqrt(n);
if (n <= 2){
limit1 = n;
} else {
limit1 = max(pow(n / log(n), 2.0 / 3.0), (double)sqrt_n);
}
limit1_i = limit1 + 1;
p[0] = p[1] = true;
pi[1] = 0;
for (register ll i = 2; i <= limit1; i++){
if (!p[i]) prime[++cnt] = i;
pi[i] = cnt;
for (register int j = 1; j <= cnt && i * prime[j] <= limit1; j++){
ll t = i * prime[j];
p[t] = true;
v[j].push_back(t);
if (i % prime[j] == 0) break;
}
}
limit2 = pi[(ll)pow(n, 1.0 / 6.0)];
for (register ll i = 1, j; i <= n; i = j + 1){
ll tn = n / i;
j = n / tn;
id++;
number[id] = tn;
g[id] = tn - 1;
if (tn <= sqrt_n){
id1[tn] = id;
} else {
id2[j] = id;
}
}
for (register int i = 1; i <= limit2; i++){
int size = v[i].size();
ll t = prime[i] * prime[i];
for (register int j = 0; j < size; j++){
sum[v[i][j]] = 1;
}
for (register int j = 1; j <= id && number[j] >= t; j++){
g[j] -= g[get_id(number[j] / prime[i], n)] - (i - 1);
}
}
for (register int i = 1; i <= limit1; i++){
sum[i] += sum[i - 1];
}
add(1, 1);
for (register int i = limit2 + 1; i <= pi[sqrt_n]; i++){
int size = v[i].size();
ll t1 = prime[i] * prime[i], t2 = max(t1, limit1_i);
for (register int j = 1; j <= id && number[j] >= t2; j++){
ll t3 = number[j] / prime[i];
if (t3 <= limit1){
if (t3 < t1){
g[j] -= pi[t3] - (i - 1);
} else {
g[j] -= t3 - sum[t3] - get_sum(t3) - (i - 1);
}
} else {
g[j] -= g[get_id(t3, n)] - (i - 1);
}
}
for (register int j = 0; j < size; j++){
add(v[i][j], 1);
}
}
}
int main(){
ll n;
cin >> n;
init(n);
cout << g[1];
return 0;
}
- Part 2
考虑构造 Powerful Number 筛,令
考虑沿用 Min_25 筛第二部分法二的递推式。
设
初值:
转移:
容易发现这里的式子和 Part 1 的式子极为相似,于是沿用上面的三个优化即可。需要注意的是在转移过程中,当
- 时间复杂度
两个 Part 的递推部分均为
- [例 2] P5325 【模板】Min_25 筛
代码:
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
typedef long long ll;
const int N = 449165 + 7, mod = 1e9 + 7;
ll limit1;
typedef struct {
ll tree[N];
inline ll lowbit(ll x){
return x & (-x);
}
inline void add(ll x, ll k){
while (x <= limit1){
tree[x] = (tree[x] + k) % mod;
x += lowbit(x);
}
}
inline ll get_sum(ll x){
ll ans = 0;
while (x > 0){
ans = (ans + tree[x]) % mod;
x -= lowbit(x);
}
return ans;
}
} BIT;
const int M = 1e5 + 7, K = 2e5 + 7, inv2 = 500000004, inv6 = 166666668;
ll sqrt_n;
BIT tree1, tree2, tree3;
int pi[N], mu_sqr[N], id1[M], id2[M];
ll prime[N], f[N], prime_sum[K], prime_sqr_sum[K], number[K], g1[K], g2[K], s1[N], s2[N], h[K];
bool p[N];
vector<ll> v[M];
inline ll sqrt(ll n){
ll ans = sqrt((double)n);
while (ans * ans <= n) ans++;
return ans - 1;
}
inline ll sum1(ll n){
n %= mod;
return n * (n + 1) % mod * inv2 % mod;
}
inline ll sum2(ll n){
n %= mod;
return n * (n + 1) % mod * (n * 2 % mod + 1) % mod * inv6 % mod;
}
inline int get_id(ll n, ll m){
return n <= sqrt_n ? id1[n] : id2[m / n];
}
inline int init(ll n){
int sqrt_cnt, prime_cnt = 0, limit2, id = 0, pos = 1;
ll limit1_i;
sqrt_n = sqrt(n);
if (n <= 2){
limit1 = n;
sqrt_cnt = 0;
} else {
limit1 = max(pow(n / log2(n), 2.0 / 3.0), (double)sqrt_n);
}
limit1_i = limit1 + 1;
p[0] = p[1] = true;
pi[1] = 0;
mu_sqr[1] = 1;
f[1] = 1;
for (register ll i = 2; i <= limit1; i++){
if (!p[i]){
prime[++prime_cnt] = i;
mu_sqr[i] = 1;
f[i] = i * (i - 1) % mod;
}
pi[i] = prime_cnt;
if (i == sqrt_n) sqrt_cnt = prime_cnt;
for (register int j = 1; j <= prime_cnt && i * prime[j] <= limit1; j++){
ll t1 = i * prime[j];
p[t1] = true;
v[j].push_back(t1);
if (i % prime[j] == 0){
ll t2 = t1;
mu_sqr[t1] = 0;
while (t2 % prime[j] == 0){
t2 /= prime[j];
}
f[t1] = f[t2] * f[t1 / t2] % mod;
break;
}
mu_sqr[t1] = mu_sqr[i];
f[t1] = f[i] * f[prime[j]] % mod;
}
}
limit2 = pi[(ll)pow(n, 1.0 / 6.0)];
for (register int i = 1; i <= prime_cnt; i++){
prime_sum[i] = (prime_sum[i - 1] + prime[i]) % mod;
prime_sqr_sum[i] = (prime_sqr_sum[i - 1] + prime[i] * prime[i] % mod) % mod;
}
for (register ll i = 1, j; i <= n; i = j + 1){
ll tn = n / i;
j = n / tn;
id++;
number[id] = tn;
g1[id] = ((sum1(tn) - 1) % mod + mod) % mod;
g2[id] = ((sum2(tn) - 1) % mod + mod) % mod;
if (tn <= sqrt_n){
id1[tn] = id;
} else {
id2[j] = id;
}
}
for (register int i = 1; i <= limit2; i++){
int size = v[i].size();
ll t = prime[i] * prime[i];
for (register int j = 0; j < size; j++){
s1[v[i][j]] = v[i][j];
s2[v[i][j]] = v[i][j] * v[i][j] % mod;
}
for (register int j = 1; j <= id && number[j] >= t; j++){
int cur_id = get_id(number[j] / prime[i], n);
g1[j] = ((g1[j] - prime[i] * (g1[cur_id] - prime_sum[i - 1]) % mod) % mod + mod) % mod;
g2[j] = ((g2[j] - t % mod * (g2[cur_id] - prime_sqr_sum[i - 1]) % mod) % mod + mod) % mod;
}
}
for (register ll i = 1; i <= limit1; i++){
s1[i] = (s1[i - 1] + s1[i]) % mod;
s2[i] = (s2[i - 1] + s2[i]) % mod;
}
tree1.add(1, 1);
tree2.add(1, 1);
for (register int i = limit2 + 1; i <= sqrt_cnt; i++){
int size = v[i].size();
ll t1 = prime[i] * prime[i], t2 = max(t1, limit1_i);
for (register int j = 1; j <= id && number[j] >= t2; j++){
ll t3 = number[j] / prime[i];
if (t3 <= limit1){
if (t3 < t1){
g1[j] = ((g1[j] - prime[i] * (prime_sum[pi[t3]] - prime_sum[i - 1]) % mod) % mod + mod) % mod;
g2[j] = ((g2[j] - t1 % mod * (prime_sqr_sum[pi[t3]] - prime_sqr_sum[i - 1]) % mod) % mod + mod) % mod;
} else {
g1[j] = ((g1[j] - prime[i] * (sum1(t3) - s1[t3] - tree1.get_sum(t3) - prime_sum[i - 1]) % mod) % mod + mod) % mod;
g2[j] = ((g2[j] - t1 % mod * (sum2(t3) - s2[t3] - tree2.get_sum(t3) - prime_sqr_sum[i - 1]) % mod) % mod + mod) % mod;
}
} else {
int cur_id = get_id(t3, n);
g1[j] = ((g1[j] - prime[i] * (g1[cur_id] - prime_sum[i - 1]) % mod) % mod + mod) % mod;
g2[j] = ((g2[j] - t1 % mod * (g2[cur_id] - prime_sqr_sum[i - 1]) % mod) % mod + mod) % mod;
}
}
for (register int j = 0; j < size; j++){
tree1.add(v[i][j], v[i][j]);
tree2.add(v[i][j], (ll)v[i][j] * v[i][j] % mod);
}
}
for (register int i = 1; i <= id && number[i] > limit1; i++){
h[i] = ((g2[i] - g1[i]) % mod + mod) % mod;
}
for (register int i = sqrt_cnt; i > limit2; i--){
int size = v[i].size();
ll t1 = prime[i] * prime[i], t2 = max(t1, limit1_i);
while (pos <= id && number[pos] >= t1){
if (number[pos] <= limit1) h[pos] = ((tree3.get_sum(number[pos]) + (prime_sqr_sum[pi[number[pos]]] - prime_sum[pi[number[pos]]])) % mod + mod) % mod;
pos++;
}
for (register int j = 1; j <= id && number[j] >= t2; j++){
ll t3 = number[j] / prime[i];
if (t3 <= limit1){
if (t3 < t1){
h[j] = ((h[j] + prime[i] * (prime[i] - 1) % mod * ((prime_sqr_sum[pi[t3]] - prime_sum[pi[t3]]) - (prime_sqr_sum[i] - prime_sum[i])) % mod) % mod + mod) % mod;
} else {
h[j] = ((h[j] + prime[i] * (prime[i] - 1) % mod * (tree3.get_sum(t3) + (prime_sqr_sum[pi[t3]] - prime_sum[pi[t3]]) - (prime_sqr_sum[i] - prime_sum[i])) % mod) % mod + mod) % mod;
}
} else {
h[j] = ((h[j] + prime[i] * (prime[i] - 1) % mod * (h[get_id(t3, n)] - (prime_sqr_sum[i] - prime_sum[i])) % mod) % mod + mod) % mod;
}
}
for (register int j = 0; j < size; j++){
if (mu_sqr[v[i][j]] == 1) tree3.add(v[i][j], f[v[i][j]]);
}
}
for (register int i = id; i >= 1 && number[i] <= limit1; i--){
h[i] = ((tree3.get_sum(number[i]) + (prime_sqr_sum[pi[number[i]]] - prime_sum[pi[number[i]]])) % mod + mod) % mod;
}
for (register int i = limit2; i >= 1; i--){
ll t = prime[i] * prime[i];
for (register int j = 1; j <= id && number[j] >= t; j++){
h[j] = ((h[j] + prime[i] * (prime[i] - 1) % mod * (h[get_id(number[j] / prime[i], n)] - (prime_sqr_sum[i] - prime_sum[i])) % mod) % mod + mod) % mod;
}
}
return sqrt_cnt;
}
ll dfs(ll n, ll m, int k, ll val, int cnt){
ll ans = val * (h[get_id(n, m)] + 1) % mod;
for (register int i = k + 1; i <= cnt && prime[i] * prime[i] <= n; i++){
ll t = prime[i] * (prime[i] - 1) % mod;
for (register ll j = prime[i] * prime[i], x = j % mod * (j % mod - 1) % mod; j <= n; j *= prime[i], x = ((j % mod * (j % mod - 1) % mod - t * x % mod) % mod + mod) % mod){
ans = (ans + dfs(n / j, m, i, val * x % mod, cnt)) % mod;
}
}
return ans;
}
int main(){
ll n;
cin >> n;
cout << dfs(n, n, 0, 1, init(n));
return 0;
}
zzt 筛
orz whzzt orz!!!
对于一个积性函数
设
- 设
S_1 = [1, n^{\frac{1}{\alpha}}] ,则第一部分为f_{S_1} 。 - 设
S_2 = (n^{\frac{1}{\alpha}}, n^{\frac{1}{2}}] ,则第二部分为f_{S_2} 。 - 设
S_3 = (n^{\frac{1}{2}}, n] ,则第三部分为f_{S_3} 。
其中
我们的任务即为:
- 分别算出
f_{S_1}, f_{S_2}, f_{S_3} 的块筛。 - 把它们卷起来。
- 计算
f_{S_1} 的块筛
注意到直接暴力卷上一个
- 计算
f_{S_2} 的块筛
在传统的筛法中,我们一般都比较关心质因数的顺序以避免算重。
考虑打破常规。设
但是这样算出的结果显然是有问题的。对于一个
遂考虑构造
那我们如何较快地进行
首先,不难发现
- 引理:
h_k 中非空状态数个数是O(\frac{n}{\ln n}) 的。证明见 zzt 博客。
考虑根号分治,设有阈值
于是这部分的总时间复杂度为
- 计算
f_{S_3} 的块筛
一般地,这里我们需要计算
考虑把
注意到
综上,令
- [例 3] Chef and Working Plan
题意:给定
数据范围:
时限:极限数据
设
首先对原式进行一个莫比乌斯反演:
前面的
参考资料
- Meissel-Lehmer 算法 - OI Wiki
- 题解 P7884 【模板】Meissel–Lehmer 算法 - zydy 的博客
- 积性函数求和问题的一种筛法 - command_block 的博客
- 积性函数求和问题的一种筛法 - whzzt 的博客