#include<iostream>
#include<algorithm>
#include<iomanip>
using namespace std;
#define NWORDS_FIELD 12
#define RADIX 32
#define p377_ZERO_WORDS 5
typedef long long ll;
typedef unsigned long long ull;
typedef unsigned long ul;
typedef uint32_t digit_t;
typedef digit_t felm_t[NWORDS_FIELD];
typedef digit_t dfelm_t[2 * NWORDS_FIELD];
#define NBITS_FIELD 377
#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 377-bit field element
#define NBITS_TO_NWORDS(nbits) (((nbits)+(sizeof(digit_t)*8)-1)/(sizeof(digit_t)*8)) // Conversion macro from number of bits to number of computer words
const uint64_t p377[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x0B46D546BC2A5699,
0xA879CC6988CE7CF5, 0x015B702E0C542196 };
const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x826E131D3839C923, 0x54892C7B7D73E7F7, 0x3F8957D221B867A3, 0xD1217CD71D03BB94,
0xDCCBFB71E3AE5457, 0x00FCC56B6CD4B219 };
const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000000000BC, 0x0000000000000000, 0x0000000000000000, 0xB7FB600DD0E86746,
0x468DE27F885C3C0B, 0x00D99E2EF237555C };
const uint64_t p377p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0B46D546BC2A5699,
0xA879CC6988CE7CF5, 0x015B702E0C542196 };
const uint64_t p377x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x168DAA8D7854AD32,
0x50F398D3119CF9EA, 0x02B6E05C18A8432D };
const felm_t felp377 = { 0xFFFFFFFF,0xFFFFFFFF, 0xFFFFFFFF,0xFFFFFFFF, 0xFFFFFFFF,0x7FFFFFFF,0xBC2A5699, 0x0B46D546,
0x88CE7CF5, 0xA879CC69,0x0C542196, 0x015B702E };
const felm_t precomp = { 0x9c405c58,0xdc95540f,0x986e2d11,0xe16777a2 ,0x59914245 ,0xe93e8c9e ,
0xe7be98,0x5eab71b0, 0xbfc75866 ,0x903d0443,0xa2afa10d,0x1297c0c };
const uint64_t pre_mont[NWORDS64_FIELD] = { 0x8da2ed98a1d92182,0x524605fcb44079c7,0x1f275d7c73d4b139,
0xc1a4a45bce317bd6,0xf599e53f96c83960,0x00f3de27b7515320 };
//2^-1 mod p377;
const felm_t twone = { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xC0000000,
0x5E152B4C,0x85A36AA3,0xC4673E7A,0x543CE634,0x062A10CB,0x00ADB817 };
// Digit multiplication
void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
{ // Digit multiplication, digit * digit -> 2-digit result
register digit_t al, ah, bl, bh, temp;
digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4);
al = a & mask_low; // Low part
ah = a >> (sizeof(digit_t) * 4); // High part
bl = b & mask_low;
bh = b >> (sizeof(digit_t) * 4);
albl = al * bl;
albh = al * bh;
ahbl = ah * bl;
ahbh = ah * bh;
c[0] = albl & mask_low; // C00
res1 = albl >> (sizeof(digit_t) * 4);
res2 = ahbl & mask_low;
res3 = albh & mask_low;
temp = res1 + res2 + res3;
carry = temp >> (sizeof(digit_t) * 4);
c[0] ^= temp << (sizeof(digit_t) * 4); // C01
res1 = ahbl >> (sizeof(digit_t) * 4);
res2 = albh >> (sizeof(digit_t) * 4);
res3 = ahbh & mask_low;
temp = res1 + res2 + res3 + carry;
c[1] = temp & mask_low; // C10
carry = temp & mask_high;
c[1] ^= (ahbh & mask_high) + carry; // C11
}
#define MUL(multiplier, multiplicand, hi, lo) \
digit_x_digit((multiplier), (multiplicand), &(lo));
static __inline unsigned int is_digit_nonzero_ct(digit_t x)
{ // Is x != 0?
return (unsigned int)((x | (0 - x)) >> (RADIX - 1));
}
static __inline unsigned int is_digit_zero_ct(digit_t x)
{ // Is x = 0?
return (unsigned int)(1 ^ is_digit_nonzero_ct(x));
}
static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
{ // Is x < y?
return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1));
}
// Digit addition with carry
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
{ digit_t tempReg = (addend1) + (digit_t)(carryIn); \
(sumOut) = (addend2) + tempReg; \
(carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); }
// Digit subtraction with borrow
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
{ digit_t tempReg = (minuend) - (subtrahend); \
unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \
(differenceOut) = tempReg - (digit_t)(borrowIn); \
(borrowOut) = borrowReg; }
// Shift right with flexible datatype
#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
(shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift)));
// Shift left with flexible datatype
#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
(shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift)));
void fpcorrection(digit_t* a)
{ // Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1].
unsigned int i, borrow = 0;
digit_t mask;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], ((digit_t*)p377)[i], borrow, a[i]);
}
mask = 0 - (digit_t)borrow;
borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, a[i], ((digit_t*)p377)[i] & mask, borrow, a[i]);
}
}
void fpadd377x1(const digit_t* a, const digit_t* b, digit_t* c)
{
// Modular addition, c = a+b mod p377.
// Inputs: a, b in [0, p377-1]
// Output: c in [0, p377-1]
unsigned int i, carry = 0;
digit_t mask;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}
carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(carry, c[i], ((digit_t*)p377)[i], carry, c[i]);
}
mask = 0 - (digit_t)carry;
carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, c[i], ((digit_t*)p377)[i] & mask, carry, c[i]);
}
}
void fpsub377x1(const digit_t* a, const digit_t* b, digit_t* c)
{ // Modular subtraction, c = a-b mod p377.
unsigned int i, borrow = 0;
digit_t mask;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
mask = 0 - (digit_t)borrow;
borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, c[i], ((digit_t*)p377)[i] & mask, borrow, c[i]);
}
}
void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.
unsigned int i, j;
digit_t t = 0, u = 0, v = 0, UV[2];
unsigned int carry = 0;
for (i = 0; i < nwords; i++) {
for (j = 0; j <= i; j++) {
MUL(a[j], b[i - j], UV + 1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}
for (i = nwords; i < 2 * nwords - 1; i++) {
for (j = i - nwords + 1; j < nwords; j++) {
MUL(a[j], b[i - j], UV + 1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}
c[2 * nwords - 1] = v;
}
void rdc_mont(digit_t* ma, digit_t* mc)
{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
// mc = ma*R^-1 mod p377x2, where R = 2^384.
// If ma < 2^384*p377, the output mc is in the range [0, 2*p377-1].
// ma is assumed to be in Montgomery representation.
unsigned int i, j, carry, count = p377_ZERO_WORDS;
digit_t UV[2], t = 0, u = 0, v = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
mc[i] = 0;
}
for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j < i; j++) {
if (j < (i - p377_ZERO_WORDS + 1)) {
MUL(mc[j], ((digit_t*)p377p1)[i - j], UV + 1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i] = v;
v = u;
u = t;
t = 0;
}
for (i = NWORDS_FIELD; i < 2 * NWORDS_FIELD - 1; i++) {
if (count > 0) {
count -= 1;
}
for (j = i - NWORDS_FIELD + 1; j < NWORDS_FIELD; j++) {
if (j < (NWORDS_FIELD - count)) {
MUL(mc[j], ((digit_t*)p377p1)[i - j], UV + 1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i - NWORDS_FIELD] = v;
v = u;
u = t;
t = 0;
}
ADDC(0, v, ma[2 * NWORDS_FIELD - 1], carry, v);
mc[NWORDS_FIELD - 1] = v;
}
void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
{ // Multiprecision multiplication, c = a*b mod p.
dfelm_t temp = { 0 };
mp_mul(ma, mb, temp, NWORDS_FIELD);
rdc_mont(temp, mc);
}
void to_mont(const felm_t a, felm_t mc)
{ // Conversion to Montgomery representation,
// mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1].
// The Montgomery constant R^2 mod p is the global value "Montgomery_R2".
fpmul_mont(a, (digit_t*)&Montgomery_R2, mc);
}
void from_mont(const felm_t ma, felm_t c)
{ // Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
digit_t one[NWORDS_FIELD] = { 0 };
one[0] = 1;
fpmul_mont(ma, one, c);
fpcorrection(c);
}
//锟斤拷锟斤拷锟斤拷锟斤拷锟?
int compare_words(digit_t* a, digit_t* b, unsigned int nwords)
{ // Comparing "nword" elements, a=b? : (1) a>b, (0) a=b, (-1) a<b
// SECURITY NOTE: this function does not have constant-time execution. TO BE USED FOR TESTING ONLY.
int i;
for (i = nwords - 1; i >= 0; i--)
{
if (a[i] > b[i]) return 1;
else if (a[i] < b[i]) return -1;
}
return 0;
}
static void sub_test(digit_t* a, digit_t* b, digit_t* c, unsigned int nwords)
{ // Subtraction without borrow, c = a-b where a>b
// SECURITY NOTE: this function does not have constant-time execution. It is for TESTING ONLY.
unsigned int i;
digit_t res, carry, borrow = 0;
for (i = 0; i < nwords; i++)
{
res = a[i] - b[i];
carry = (a[i] < b[i]);
c[i] = res - borrow;
borrow = carry || (res < borrow);
}
}
void fprandom377_test(digit_t* a)
{ // Generating a pseudo-random field element in [0, p377-1]
// SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
unsigned int i, diff = 384 - 377, nwords = NBITS_TO_NWORDS(377);
unsigned char* string = NULL;
string = (unsigned char*)a;
for (i = 0; i < sizeof(digit_t) * nwords; i++) {
*(string + i) = (unsigned char)rand(); // Obtain 384-bit number
}
a[nwords - 1] &= (((digit_t)(-1) << diff) >> diff);
while (compare_words((digit_t*)p377, a, nwords) < 1) { // Force it to [0, modulus-1]
sub_test(a, (digit_t*)p377, a, nwords);
}
}
void mp_shiftr1(digit_t* x, const unsigned int nwords)
{ // Multiprecision right shift by one.
unsigned int i;
for (i = 0; i < nwords - 1; i++) {
SHIFTR(x[i + 1], x[i], 1, x[i], RADIX);
}
x[nwords - 1] >>= 1;
}
void fpdiv2_377(const digit_t* a, digit_t* c)
{ // Modular division by two, c = a/2 mod p377.
// Input : a in [0, 2*p377-1]
// Output: c in [0, 2*p377-1]
unsigned int i, carry = 0;
digit_t mask;
mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p377
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], ((digit_t*)p377)[i] & mask, carry, c[i]);
}
mp_shiftr1(c, NWORDS_FIELD);
}
typedef struct
{
felm_t val[2][2];
}Matrix2;
void shfRx(digit_t* a, const unsigned int nwords, digit_t x)//锟斤拷锟斤拷x位,x<32;
{
int i;
for (i = 0; i < nwords - 1; i++)
SHIFTR(a[i + 1], a[i], x, a[i], RADIX);
a[nwords - 1] >>= x;
}
void shfLx(digit_t* a, const unsigned int nwords, digit_t x)//锟斤拷锟斤拷x位,x<32;
{
int i;
for (i = nwords - 1; i > 0; i--)
SHIFTL(a[i], a[i - 1], x, a[i], RADIX);
a[0] <<= x;
}
void felcopy(const digit_t* a, digit_t* b)//b=a;
{
for (int i = 0; i < NWORDS_FIELD; i++)
b[i] = a[i];
}
Matrix2 getval(felm_t a, felm_t b, felm_t c, felm_t d)
{
Matrix2 M;
felcopy(a, M.val[0][0]);
felcopy(b, M.val[0][1]);
felcopy(c, M.val[1][0]);
felcopy(d, M.val[1][1]);
return M;
}
digit_t getbit(felm_t n)
{
digit_t len = 0;
digit_t k;
for (k = 0; n[k] && k < NWORDS_FIELD; k++);
digit_t tem = RADIX * (k - 1);
digit_t nt = n[k - 1];
while (nt)
{
len++;
nt >>= 1;
}
return len + tem;
}
void ab(digit_t* a, digit_t b, digit_t* res)//res=a^b mop p;
{
felm_t ma, one = { 1 }, oneR, mat, rest;
to_mont(a, ma);
to_mont(one, res);
felcopy(res, oneR);
fpmul_mont(ma, oneR, mat);
felcopy(mat, ma);
while (b)
{
if (b & 1)//b锟斤拷锟斤拷锟斤拷;
{
fpmul_mont(res, ma, rest);
felcopy(rest, res);
b--;
}
b >>= 1;
fpmul_mont(ma, ma, mat);
felcopy(mat, ma);
}
fpmul_mont(res, oneR, rest);
from_mont(rest, res);
}
bool isfel_nozero(felm_t x)
{
for (int i = 0; i < NWORDS_FIELD; i++)
if (x[i] != 0)return true;
return false;
}
bool isfel_one(felm_t x)
{
for (int i = NWORDS_FIELD - 1; i > 0; i--)
if (x[i] != 0)return false;
if (x[0] != 1)return false;
return true;
}
//21锟斤拷锟斤拷锟斤拷锟姐法实锟斤拷:
void swapll(ll* a, ll* b)
{
a[0] ^= b[0];
b[0] ^= a[0];
a[0] ^= b[0];
}
void swapfel(digit_t* f, digit_t* g)
{
for (unsigned i = 0; i < NWORDS_FIELD; i++)
{
f[i] ^= g[i];
g[i] ^= f[i];
f[i] ^= g[i];
}
}
void getne(digit_t* f)
{
felm_t zero = { 0 };
unsigned int i,borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++)
SUBC(borrow, zero[i], f[i], borrow, f[i]);
}
void divsteps_21(digit_t n, digit_t* delta, felm_t f, felm_t g, Matrix2* M, digit_t* sf)//n>=0,t>=0
{
felm_t ut, vt;
felm_t u = { 1 }, v = { 0 }, q = { 0 }, r = { 1 };
for (int jt = 0; jt < n; jt++)
{
if (!(*delta >> (RADIX - 1)) && (g[0] & 1))
{
*delta = 0 - *delta;
//f,g=g,-f;
swapfel(f, g);
getne(g);
//u,v,q,r=q,r,-u,-v;
swapfel(u, q);
unsigned borrow = 0;
for (int i = 0; i < NWORDS_FIELD; i++)
SUBC(borrow, ((digit_t*)p377)[i], q[i], borrow, q[i]);
swapfel(v, r);
borrow = 0;
for (int i = 0; i < NWORDS_FIELD; i++)
SUBC(borrow, ((digit_t*)p377)[i], r[i], borrow, r[i]);
}
digit_t g0 = 0 - (g[0] & 1);
*delta += 1;
//g = (g + g0 * f) >> 1;
//q = (q + g0 * u);
//r = (r + g0 * v);
//u=2*u
//v=2*v
//ft,ut,vt=f*g0,u*g0,v*g0;
for (unsigned i = 0; i < NWORDS_FIELD; i++)
{
ut[i] = g0 & u[i];
vt[i] = g0 & v[i];
}
unsigned carry = 0;
for (int i = 0; i < NWORDS_FIELD; i++)
ADDC(carry, g0 & f[i], g[i], carry, g[i]);
shfRx(g, NWORDS_FIELD, 1);
//if(g&(2^382))g xor =(2^383)
g[NWORDS_FIELD - 1] |= (g[NWORDS_FIELD - 1] >> (RADIX - 2) << (RADIX - 1));
fpadd377x1(q, ut, q);
fpadd377x1(r, vt, r);
shfLx(u, NWORDS_FIELD, 1);
shfLx(v, NWORDS_FIELD, 1);
fpcorrection(u);
fpcorrection(v);
/*
printf("%d:\n",jt);
for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<f[NWORDS_FIELD-j-1];
cout<<endl;
for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<g[NWORDS_FIELD-j-1];
cout<<endl;/*
for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<u[NWORDS_FIELD-j-1];
cout<<endl;
for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<v[NWORDS_FIELD-j-1];
cout<<endl;
for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<q[NWORDS_FIELD-j-1];
cout<<endl;
for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<r[NWORDS_FIELD-j-1];
cout<<endl;
*/
}
sf[0] = f[0] + 1 >> 1;
M[0] = getval(u, v, q, r);
}
void divsteps_LL(digit_t n,digit_t* delta,digit_t* f,digit_t* g,Matrix2*P,digit_t *sf){
felm_t u={1},v={0},q={0},r={1};
for(int jt=0;jt<n;jt++){
if (!((*delta) >> (RADIX - 1)) && ((*g) & 1))
{
*delta = 0 - *delta;
//f,g=g,-f;
swap(f, g);
*g=-*g;
//u,v,q,r=q,r,-u,-v;
swap(*u, *q);
*q=-*q;
swap(*v, *r);
*r=-*r;
}
digit_t g0 = 0 - (g[0] & 1);
*delta += 2;
//g = (g + g0 * f) >> 1;
//q = (q + g0 * u);
//r = (r + g0 * v);
//u=2*u
//v=2*v
*g+=(g0&(*f));
*g>>=1;
*g |= ((*g) >> (RADIX - 2) << (RADIX - 1));
*q+=g0&*u;
*r+=g0&*v;
*u<<=1;
*v<<=1;
}
for(int i=1;i<NWORDS_FIELD;i++){
u[i]=0-(u[0]>>(RADIX-1));
v[i]=0-(v[0]>>(RADIX-1));
q[i]=0-(q[0]>>(RADIX-1));
r[i]=0-(r[0]>>(RADIX-1));
}
*P=getval(u,v,q,r);
*sf=(*f)+1>>1;
}
void jumpdivstep(digit_t n,digit_t k,digit_t delta,felm_t f,felm_t g,Matrix2*P,digit_t* sf){
felm_t one={1},zero={0};
for(int i=0;i<n/k;i++)to_mont(one,one);
felm_t f_,g_,u_[2],v_[2],q_[2],r_[2];
digit_t ft[NWORDS_FIELD<<1],gt[NWORDS_FIELD<<1];
Matrix2 T=getval(one,zero,zero,one),T_;
for(int i=0;i<n/k;i++){
f_[0]=f[0];
g_[0]=g[0];
divsteps_LL(k,&delta,f_,g_,&T_,sf);
mp_mul(T_.val[0][0],f,ft,NWORDS_FIELD);
ft[NWORDS_FIELD]-=f[0]&(0-(T_.val[0][0][NWORDS_FIELD-1]>>(RADIX-1)));
ft[NWORDS_FIELD]-=T_.val[0][0][0]&(0-(f[NWORDS_FIELD-1]>>(RADIX-1)));
mp_mul(T_.val[0][1],g,gt,NWORDS_FIELD);
gt[NWORDS_FIELD]-=g[0]&(0-(T_.val[0][1][NWORDS_FIELD-1]>>(RADIX-1)));
gt[NWORDS_FIELD]-=T_.val[0][1][0]&(0-(g[NWORDS_FIELD-1]>>(RADIX-1)));
unsigned carry = 0;
for (int j = 0; j <1+NWORDS_FIELD; j++)
ADDC(carry, gt[j], ft[j], carry, f_[j]);
shfRx(f_, 1+NWORDS_FIELD, k);
mp_mul(T_.val[1][0],f,ft,NWORDS_FIELD);
ft[NWORDS_FIELD]-=f[0]&(0-(T_.val[1][0][NWORDS_FIELD-1]>>(RADIX-1)));
ft[NWORDS_FIELD]-=T_.val[1][0][0]&(0-(f[NWORDS_FIELD-1]>>(RADIX-1)));
mp_mul(T_.val[1][1],g,gt,NWORDS_FIELD);
gt[NWORDS_FIELD]-=g[0]&(0-(T_.val[1][1][NWORDS_FIELD-1]>>(RADIX-1)));
gt[NWORDS_FIELD]-=T_.val[1][1][0]&(0-(g[NWORDS_FIELD-1]>>(RADIX-1)));
carry = 0;
for (int j = 0; j < 1+NWORDS_FIELD; j++)
ADDC(carry, ft[j], gt[j], carry, g_[j]);
shfRx(g_, 1+NWORDS_FIELD, k);
felcopy(f_,f);
felcopy(g_,g);
T_.val[0][0][0]+=((digit_t*)p377)[0]&(0-(T_.val[0][0][0]>>(RADIX-1)));
for(int j=1;j<NWORDS_FIELD;j++)T_.val[0][0][j]&=((digit_t*)p377)[j];
T_.val[0][1][0]+=((digit_t*)p377)[0]&(0-(T_.val[0][1][0]>>(RADIX-1)));
for(int j=1;j<NWORDS_FIELD;j++)T_.val[0][1][j]&=((digit_t*)p377)[j];
T_.val[1][0][0]+=((digit_t*)p377)[0]&(0-(T_.val[1][0][0]>>(RADIX-1)));
for(int j=1;j<NWORDS_FIELD;j++)T_.val[1][0][j]&=((digit_t*)p377)[j];
T_.val[1][1][0]+=((digit_t*)p377)[0]&(0-(T_.val[1][1][0]>>(RADIX-1)));
for(int j=1;j<NWORDS_FIELD;j++)T_.val[1][1][j]&=((digit_t*)p377)[j];
// fpmul_mont(T_.val[0][0],T.val[0][0],u_[0]);
// fpmul_mont(T_.val[0][1],T.val[1][0],u_[1]);
fpmul_mont(T_.val[0][0],T.val[0][1],v_[0]);
fpmul_mont(T_.val[0][1],T.val[1][1],v_[1]);
// fpmul_mont(T_.val[1][0],T.val[0][0],q_[0]);
// fpmul_mont(T_.val[1][1],T.val[1][0],q_[1]);
fpmul_mont(T_.val[1][0],T.val[0][1],r_[0]);
fpmul_mont(T_.val[1][1],T.val[1][1],r_[1]);
// fpadd377x1(u_[0],u_[1],T.val[0][0]);
fpadd377x1(v_[0],v_[1],T.val[0][1]);
// fpadd377x1(q_[0],q_[1],T.val[1][0]);
fpadd377x1(r_[0],r_[1],T.val[1][1]);
}
sf[0] = f[0] + 1 >> 1;
*P=T;
}
void BY_inversion(digit_t* f, digit_t* g, digit_t* res)
{
digit_t delta = 1, sf = 1, m = 870;
felm_t e={1},v, ft, gt;
fpdiv2_377(e,e);
ab(e,m,e);
felcopy(f, ft);
felcopy(g, gt);
Matrix2 P[1] = { 0 };
jumpdivstep(m,30,delta,ft,gt,P,&sf);
// felcopy(f, ft);
// felcopy(g, gt);
// divsteps_21(m, &delta, ft, gt, P, &sf);
//g^-1=pre_mont*v*sign(f)
to_mont(e,e);
to_mont(P[0].val[0][1], v);
fpmul_mont(e, v, res);
from_mont(res, res);
unsigned borrow = 0;
for (int i = 0; i < NWORDS_FIELD; i++)
SUBC(borrow, ((sf - 1) & ((digit_t*)p377)[i]) | ((0 - sf) & res[i]), (sf - 1) & res[i], borrow, res[i]);
}
void fpinv_21(digit_t* a)
{
BY_inversion((digit_t*)p377, a, a);
}
int main()
{
int che = 1;
for (digit_t k = 1; k < 100; k++)
{
felm_t a, recip_a, res;
fprandom377_test(a);
to_mont(a, res);
fpinv_21(a);
to_mont(a, a);
fpmul_mont(a, res, res);
from_mont(res, a);
if (!isfel_one(a))
{
che = 0;
cout << "failed";
break;
}
}
if(che)cout << "OK";
/*
felm_t a = { 2 }, res;
BY_inversion((digit_t*)p377, a, res);
for (int i = 0; i < NWORDS_FIELD; i++)
cout << hex << res[NWORDS_FIELD - i - 1] << " ";
cout << endl;
*/
/*
ll res, res2;
BY_inversionte(7, 2, res, res2);
cout<<res<<" "<<res2;
*/
/*
felm_t a = { 2 };
fpinv_21(a);
for (int i = 0; i < NWORDS_FIELD; i++)
cout << hex << a[NWORDS_FIELD - i - 1] << " ";
cout << endl;
*/
return 0;
}
Compilation | N/A | N/A | Compile OK | Score: N/A | 显示更多 |
Testcase #1 | 35.305 ms | 36 KB | Accepted | Score: 100 | 显示更多 |