答案
include <iostream>
include <cstring>
include <cstdio>
define d isdigit(c)
define g c=t.get();
define L return
define K break
define A(c,a,b)if(c)a else b;
define I(c,a)if(c)a;
define Y goto E
struct MI{private:char bb[4096];FILEf;charbs,*be;char e;bool
o,l;public:MI():f(stdin){}MI(FILE*f):f(f),bs(0),be(0){}
ifdef linux
MI(const chars):f(fmemopen(const_cast<char>(s),strlen(s),"r")){}
endif
inline operator bool(){L!l;}inline char get(){if(o){o=0;L e;}
ifdef MIVIK
char c=fgetc(f);I(c==-1,l=1)L c;
else
I(bs==be,be=(bs=bb)+fread(bb,1,sizeof(bb),f))I(bs==be,{l=1;L-
1;})L*bs++;
endif
}inline void unget(char c){o=1;e=c;}template<typename T>inline T
read(){T r;*this>r;L r;}template<typename T>inline MI&operator>
(T&);};template<typename T>struct Q{const static bool
U=T(-1)>=T(0);inline void operator()(MI&t,T&r)const{r=0;char c;bool
y=0;A(U,for(;;){g I(c==-1,Y)I(d,K)},for(;;){g I(c==-1,Y)A(c=='-',{g
I(d,{y=1;K;})},I(d,K))})for(;;){I(c==-1,Y)A(d,r=r*10+
(c^48);,K)g}t.unget(c);E:;I(y,r=-r)}};template<>struct Q<char>
{inline void operator()(MI&t,char&r){int c;for(;;){g I(c==-1,
{r=-1;L;})I(!isspace(c),{r=c;L;})}}};template<typename T>inline
MI&MI::operator>(T&t){Q<T>()(this,t);Lthis;}
undef d
undef g
undef L
undef K
undef A
undef I
undef Y
template<typename T>inline std::ostream&
operator<(std::ostream&out,const T&t){return out<<t;}
#define endl ('\n')
#define P(x) cout < #x" = " < (x) < endl
define R (cin.read<int>())
using std::cout;
include <algorithm>
include <cassert>
MI cin;
const int N = 21; const int V = 1 << 20; const int mod = 1000000007;
typedef long long ll;
int n, m, cur_len; bool ok[V];
inline void upd(int &x) { x += (x >> 31) & mod; } inline int pro(int x) { return x + ((x >> 31) & mod); } inline ll fast_pow(ll x, int p = mod - 2) { ll ret = 1; while (p) { if (p & 1) (ret = x) %= mod; (x = x) %= mod; p >>= 1; } return ret; } template<class T> inline void fill_zero(T first, T last) { memset(first, 0, sizeof(T) * (last - first)); }
void dfs(int x = 1, int j = 0) { static int pre[N], a[N]; if (x == cur_len + 1) { int x = cur_len; int ret = 0; while (x) { ret |= 1 << (cur_len - x); x = pre[x]; } ok[ret] = 1; return; } for (int &c = a[x] = 0; c < 2; ++c) { int nj = j; while (nj && c != a[nj + 1]) nj = pre[nj]; if (x != 1 && c == a[nj + 1]) ++nj; pre[x] = nj;
dfs(x + 1, nj);
}
}
// 一种 O(n) 找出一种 v 中自由元个数的算法 // 参考 http://www.lirmm.fr/~rivals/RESEARCH/PERIOD/Web-NFC.pdf inline int nfc(int v) { int l = 0, ret = 0; while (true) { const int n = cur_len - l; int i = 1; while (i < n && !((v >> (l + i)) & 1)) ++i; if (i == n) return n + ret; if (i == 1) return 1 + ret; if (i <= (n >> 1)) l += n - i - n % i; else { ret += i * 2 - n; l += i; } } return ret; }
struct poly { int v[N]; inline int &operator[](int i) { return v[i]; } inline int operator[](int i) const { return v[i]; } inline void reset() { memset(v, 0, sizeof(v)); }
inline void from_binary(int x) {
for (int i = 0; i < N; ++i) {
v[i] = x & 1;
x >>= 1;
}
}
// 计算 a 和 b 的乘积并存储在 this 中
// 模 (x ^ N)
inline void from_mul(const poly &a, const poly &b) {
reset();
for (int i = 0; i < N; ++i) {
const int lim = N - i;
for (int j = 0; j < lim; ++j) upd(v[i + j] += (ll)a[i] * b[j] % mod - mod);
}
}
inline void from_inv(const poly &a) {
static poly t, d;
reset();
v[0] = fast_pow(a[0]);
const int lim = N * 2;
for (int l = 2; l < lim; l <<= 1) {
t.from_mul(*this, a);
for (int i = 0; i < N; ++i) t[i] = pro(-t[i]);
upd(t[0] += 2 - mod);
d.from_mul(*this, t);
std::copy(d.v, d.v + std::min(l, N), v);
}
}
inline void shift(int offset) {
for (int i = N - 1; i >= offset; --i) v[i] = v[i - offset];
fill_zero(v, v + offset);
}
};
inline poly containing(const poly &cor) { static poly ret, tmp, fin; ret = { 1, pro(-m) }; tmp.from_mul(ret, cor); upd(tmp[cur_len] += 1 - mod); fin.from_mul(ret, tmp); ret.from_inv(fin); ret.shift(cur_len); return ret; }
int f[V];
int main() { poly p; cin > n > m; int ans = 0; for (cur_len = 1; cur_len <= n; ++cur_len) { const int lim = 1 << cur_len; fill_zero(ok, ok + lim); fill_zero(f, f + lim); dfs(); for (int c = lim - 1; c; --c) { if (!ok[c]) continue; f[c] = fast_pow(m, nfc(c)); for (int s = (c + 1) | c; s < lim; s = (s + 1) | c) upd(f[c] -= f[s]); p.from_binary(c); p = containing(p); upd(ans += (ll)f[c] p[n] % mod - mod); } } cout < (int)((ll)ans fast_pow(fast_pow(m, n)) % mod) < endl; }