提交记录 17711 - Judge Duck Online

用户	题目	状态	得分	用时	内存	语言	代码长度
locytus	test. 自定义测试	Accepted	100	35.312 ms	36 KB	C++11	22.06 KB

提交时间	评测时间
2022-05-19 15:03:16	2023-09-03 19:42:11

代码

#include<iostream>
#include<algorithm>
#include<iomanip>
using namespace std;
#define NWORDS_FIELD    12
#define RADIX           32
#define p377_ZERO_WORDS 5
typedef long long ll;
typedef unsigned long long ull;
typedef unsigned long ul;
typedef uint32_t digit_t;
typedef digit_t felm_t[NWORDS_FIELD];
typedef digit_t dfelm_t[2 * NWORDS_FIELD];
#define NBITS_FIELD             377 
#define NWORDS64_FIELD          ((NBITS_FIELD+63)/64)               // Number of 64-bit words of a 377-bit field element 
#define NBITS_TO_NWORDS(nbits)      (((nbits)+(sizeof(digit_t)*8)-1)/(sizeof(digit_t)*8))    // Conversion macro from number of bits to number of computer words

const uint64_t p377[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF, 0x0B46D546BC2A5699,
                                        0xA879CC6988CE7CF5, 0x015B702E0C542196 };
const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x826E131D3839C923, 0x54892C7B7D73E7F7, 0x3F8957D221B867A3, 0xD1217CD71D03BB94,
                                                 0xDCCBFB71E3AE5457, 0x00FCC56B6CD4B219 };
const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000000000BC, 0x0000000000000000, 0x0000000000000000, 0xB7FB600DD0E86746,
                                                  0x468DE27F885C3C0B, 0x00D99E2EF237555C };
const uint64_t p377p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0B46D546BC2A5699,
                                          0xA879CC6988CE7CF5, 0x015B702E0C542196 };
const uint64_t p377x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x168DAA8D7854AD32,
                                          0x50F398D3119CF9EA, 0x02B6E05C18A8432D };
const felm_t felp377 = { 0xFFFFFFFF,0xFFFFFFFF, 0xFFFFFFFF,0xFFFFFFFF, 0xFFFFFFFF,0x7FFFFFFF,0xBC2A5699, 0x0B46D546,
                   0x88CE7CF5, 0xA879CC69,0x0C542196, 0x015B702E };
const felm_t precomp = { 0x9c405c58,0xdc95540f,0x986e2d11,0xe16777a2 ,0x59914245 ,0xe93e8c9e ,
                    0xe7be98,0x5eab71b0, 0xbfc75866 ,0x903d0443,0xa2afa10d,0x1297c0c };
const uint64_t pre_mont[NWORDS64_FIELD] = { 0x8da2ed98a1d92182,0x524605fcb44079c7,0x1f275d7c73d4b139,
                                            0xc1a4a45bce317bd6,0xf599e53f96c83960,0x00f3de27b7515320 };
//2^-1 mod p377;
const felm_t twone = { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xC0000000,
                 0x5E152B4C,0x85A36AA3,0xC4673E7A,0x543CE634,0x062A10CB,0x00ADB817 };
// Digit multiplication
void digit_x_digit(const digit_t a, const digit_t b, digit_t* c)
{ // Digit multiplication, digit * digit -> 2-digit result    
    register digit_t al, ah, bl, bh, temp;
    digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
    digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4);

    al = a & mask_low;                        // Low part
    ah = a >> (sizeof(digit_t) * 4);          // High part
    bl = b & mask_low;
    bh = b >> (sizeof(digit_t) * 4);

    albl = al * bl;
    albh = al * bh;
    ahbl = ah * bl;
    ahbh = ah * bh;
    c[0] = albl & mask_low;                   // C00

    res1 = albl >> (sizeof(digit_t) * 4);
    res2 = ahbl & mask_low;
    res3 = albh & mask_low;
    temp = res1 + res2 + res3;
    carry = temp >> (sizeof(digit_t) * 4);
    c[0] ^= temp << (sizeof(digit_t) * 4);    // C01   

    res1 = ahbl >> (sizeof(digit_t) * 4);
    res2 = albh >> (sizeof(digit_t) * 4);
    res3 = ahbh & mask_low;
    temp = res1 + res2 + res3 + carry;
    c[1] = temp & mask_low;                   // C10 
    carry = temp & mask_high;
    c[1] ^= (ahbh & mask_high) + carry;       // C11
}
#define MUL(multiplier, multiplicand, hi, lo)                                                     \
    digit_x_digit((multiplier), (multiplicand), &(lo));
static __inline unsigned int is_digit_nonzero_ct(digit_t x)
{ // Is x != 0?
    return (unsigned int)((x | (0 - x)) >> (RADIX - 1));
}
static __inline unsigned int is_digit_zero_ct(digit_t x)
{ // Is x = 0?
    return (unsigned int)(1 ^ is_digit_nonzero_ct(x));
}
static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
{ // Is x < y?
    return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1));
}

// Digit addition with carry
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                                         \
    { digit_t tempReg = (addend1) + (digit_t)(carryIn);                                           \
    (sumOut) = (addend2) + tempReg;                                                               \
    (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); }

// Digit subtraction with borrow
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)                             \
    { digit_t tempReg = (minuend) - (subtrahend);                                                 \
    unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg)));  \
    (differenceOut) = tempReg - (digit_t)(borrowIn);                                              \
    (borrowOut) = borrowReg; }

// Shift right with flexible datatype
#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize)                                         \
    (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift)));

// Shift left with flexible datatype
#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize)                                         \
    (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift)));
void fpcorrection(digit_t* a)
{ // Modular correction to reduce field element a in [0, 2*p377-1] to [0, p377-1].
    unsigned int i, borrow = 0;
    digit_t mask;

    for (i = 0; i < NWORDS_FIELD; i++) {
        SUBC(borrow, a[i], ((digit_t*)p377)[i], borrow, a[i]);
    }
    mask = 0 - (digit_t)borrow;

    borrow = 0;
    for (i = 0; i < NWORDS_FIELD; i++) {
        ADDC(borrow, a[i], ((digit_t*)p377)[i] & mask, borrow, a[i]);
    }
}
void fpadd377x1(const digit_t* a, const digit_t* b, digit_t* c)
{
    // Modular addition, c = a+b mod p377.
    // Inputs: a, b in [0, p377-1] 
    // Output: c in [0, p377-1] 
    unsigned int i, carry = 0;
    digit_t mask;

    for (i = 0; i < NWORDS_FIELD; i++) {
        ADDC(carry, a[i], b[i], carry, c[i]);
    }

    carry = 0;
    for (i = 0; i < NWORDS_FIELD; i++) {
        SUBC(carry, c[i], ((digit_t*)p377)[i], carry, c[i]);
    }
    mask = 0 - (digit_t)carry;

    carry = 0;
    for (i = 0; i < NWORDS_FIELD; i++) {
        ADDC(carry, c[i], ((digit_t*)p377)[i] & mask, carry, c[i]);
    }
}
void fpsub377x1(const digit_t* a, const digit_t* b, digit_t* c)
{ // Modular subtraction, c = a-b mod p377.
    unsigned int i, borrow = 0;
    digit_t mask;

    for (i = 0; i < NWORDS_FIELD; i++) {
        SUBC(borrow, a[i], b[i], borrow, c[i]);
    }
    mask = 0 - (digit_t)borrow;

    borrow = 0;
    for (i = 0; i < NWORDS_FIELD; i++) {
        ADDC(borrow, c[i], ((digit_t*)p377)[i] & mask, borrow, c[i]);
    }
}
void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords)
{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.   
    unsigned int i, j;
    digit_t t = 0, u = 0, v = 0, UV[2];
    unsigned int carry = 0;

    for (i = 0; i < nwords; i++) {
        for (j = 0; j <= i; j++) {
            MUL(a[j], b[i - j], UV + 1, UV[0]);
            ADDC(0, UV[0], v, carry, v);
            ADDC(carry, UV[1], u, carry, u);
            t += carry;
        }
        c[i] = v;
        v = u;
        u = t;
        t = 0;
    }

    for (i = nwords; i < 2 * nwords - 1; i++) {
        for (j = i - nwords + 1; j < nwords; j++) {
            MUL(a[j], b[i - j], UV + 1, UV[0]);
            ADDC(0, UV[0], v, carry, v);
            ADDC(carry, UV[1], u, carry, u);
            t += carry;
        }
        c[i] = v;
        v = u;
        u = t;
        t = 0;
    }
    c[2 * nwords - 1] = v;
}
void rdc_mont(digit_t* ma, digit_t* mc)
{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
  // mc = ma*R^-1 mod p377x2, where R = 2^384.
  // If ma < 2^384*p377, the output mc is in the range [0, 2*p377-1].
  // ma is assumed to be in Montgomery representation.
    unsigned int i, j, carry, count = p377_ZERO_WORDS;
    digit_t UV[2], t = 0, u = 0, v = 0;

    for (i = 0; i < NWORDS_FIELD; i++) {
        mc[i] = 0;
    }

    for (i = 0; i < NWORDS_FIELD; i++) {
        for (j = 0; j < i; j++) {
            if (j < (i - p377_ZERO_WORDS + 1)) {
                MUL(mc[j], ((digit_t*)p377p1)[i - j], UV + 1, UV[0]);
                ADDC(0, UV[0], v, carry, v);
                ADDC(carry, UV[1], u, carry, u);
                t += carry;
            }
        }
        ADDC(0, v, ma[i], carry, v);
        ADDC(carry, u, 0, carry, u);
        t += carry;
        mc[i] = v;
        v = u;
        u = t;
        t = 0;
    }

    for (i = NWORDS_FIELD; i < 2 * NWORDS_FIELD - 1; i++) {
        if (count > 0) {
            count -= 1;
        }
        for (j = i - NWORDS_FIELD + 1; j < NWORDS_FIELD; j++) {
            if (j < (NWORDS_FIELD - count)) {
                MUL(mc[j], ((digit_t*)p377p1)[i - j], UV + 1, UV[0]);
                ADDC(0, UV[0], v, carry, v);
                ADDC(carry, UV[1], u, carry, u);
                t += carry;
            }
        }
        ADDC(0, v, ma[i], carry, v);
        ADDC(carry, u, 0, carry, u);
        t += carry;
        mc[i - NWORDS_FIELD] = v;
        v = u;
        u = t;
        t = 0;
    }
    ADDC(0, v, ma[2 * NWORDS_FIELD - 1], carry, v);
    mc[NWORDS_FIELD - 1] = v;
}
void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
{ // Multiprecision multiplication, c = a*b mod p.
    dfelm_t temp = { 0 };
    mp_mul(ma, mb, temp, NWORDS_FIELD);
    rdc_mont(temp, mc);
}
void to_mont(const felm_t a, felm_t mc)
{ // Conversion to Montgomery representation,
  // mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1].
  // The Montgomery constant R^2 mod p is the global value "Montgomery_R2". 

    fpmul_mont(a, (digit_t*)&Montgomery_R2, mc);
}
void from_mont(const felm_t ma, felm_t c)
{ // Conversion from Montgomery representation to standard representation,
  // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
    digit_t one[NWORDS_FIELD] = { 0 };

    one[0] = 1;
    fpmul_mont(ma, one, c);
    fpcorrection(c);
}

//锟斤拷锟斤拷锟斤拷锟斤拷锟?
int compare_words(digit_t* a, digit_t* b, unsigned int nwords)
{ // Comparing "nword" elements, a=b? : (1) a>b, (0) a=b, (-1) a<b
  // SECURITY NOTE: this function does not have constant-time execution. TO BE USED FOR TESTING ONLY.
    int i;

    for (i = nwords - 1; i >= 0; i--)
    {
        if (a[i] > b[i]) return 1;
        else if (a[i] < b[i]) return -1;
    }

    return 0;
}
static void sub_test(digit_t* a, digit_t* b, digit_t* c, unsigned int nwords)
{ // Subtraction without borrow, c = a-b where a>b
  // SECURITY NOTE: this function does not have constant-time execution. It is for TESTING ONLY.     
    unsigned int i;
    digit_t res, carry, borrow = 0;

    for (i = 0; i < nwords; i++)
    {
        res = a[i] - b[i];
        carry = (a[i] < b[i]);
        c[i] = res - borrow;
        borrow = carry || (res < borrow);
    }
}
void fprandom377_test(digit_t* a)
{ // Generating a pseudo-random field element in [0, p377-1] 
  // SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
    unsigned int i, diff = 384 - 377, nwords = NBITS_TO_NWORDS(377);
    unsigned char* string = NULL;

    string = (unsigned char*)a;
    for (i = 0; i < sizeof(digit_t) * nwords; i++) {
        *(string + i) = (unsigned char)rand();              // Obtain 384-bit number
    }
    a[nwords - 1] &= (((digit_t)(-1) << diff) >> diff);

    while (compare_words((digit_t*)p377, a, nwords) < 1) {  // Force it to [0, modulus-1]
        sub_test(a, (digit_t*)p377, a, nwords);
    }
}


void mp_shiftr1(digit_t* x, const unsigned int nwords)
{ // Multiprecision right shift by one.
    unsigned int i;

    for (i = 0; i < nwords - 1; i++) {
        SHIFTR(x[i + 1], x[i], 1, x[i], RADIX);
    }
    x[nwords - 1] >>= 1;
}
void fpdiv2_377(const digit_t* a, digit_t* c)
{ // Modular division by two, c = a/2 mod p377.
  // Input : a in [0, 2*p377-1] 
  // Output: c in [0, 2*p377-1] 
    unsigned int i, carry = 0;
    digit_t mask;

    mask = 0 - (digit_t)(a[0] & 1);    // If a is odd compute a+p377
    for (i = 0; i < NWORDS_FIELD; i++) {
        ADDC(carry, a[i], ((digit_t*)p377)[i] & mask, carry, c[i]);
    }

    mp_shiftr1(c, NWORDS_FIELD);
}

typedef struct
{
    felm_t val[2][2];
}Matrix2;
void shfRx(digit_t* a, const unsigned int nwords, digit_t x)//锟斤拷锟斤拷x位,x<32;
{
    int i;
    for (i = 0; i < nwords - 1; i++)
        SHIFTR(a[i + 1], a[i], x, a[i], RADIX);
    a[nwords - 1] >>= x;
}
void shfLx(digit_t* a, const unsigned int nwords, digit_t x)//锟斤拷锟斤拷x位,x<32;
{
    int i;
    for (i = nwords - 1; i > 0; i--)
        SHIFTL(a[i], a[i - 1], x, a[i], RADIX);
    a[0] <<= x;
}
void felcopy(const digit_t* a, digit_t* b)//b=a;
{
    for (int i = 0; i < NWORDS_FIELD; i++)
        b[i] = a[i];
}
Matrix2 getval(felm_t a, felm_t b, felm_t c, felm_t d)
{
    Matrix2 M;
    felcopy(a, M.val[0][0]);
    felcopy(b, M.val[0][1]);
    felcopy(c, M.val[1][0]);
    felcopy(d, M.val[1][1]);
    return M;
}
digit_t getbit(felm_t n)
{
    digit_t len = 0;
    digit_t k;
    for (k = 0; n[k] && k < NWORDS_FIELD; k++);
    digit_t tem = RADIX * (k - 1);
    digit_t nt = n[k - 1];
    while (nt)
    {
        len++;
        nt >>= 1;
    }
    return len + tem;
}
void ab(digit_t* a, digit_t b, digit_t* res)//res=a^b mop p;
{
    felm_t ma, one = { 1 }, oneR, mat, rest;
    to_mont(a, ma);
    to_mont(one, res);
    felcopy(res, oneR);
    fpmul_mont(ma, oneR, mat);
    felcopy(mat, ma);
    while (b)
    {
        if (b & 1)//b锟斤拷锟斤拷锟斤拷;
        {
            fpmul_mont(res, ma, rest);
            felcopy(rest, res);
            b--;
        }
        b >>= 1;
        fpmul_mont(ma, ma, mat);
        felcopy(mat, ma);
    }
    fpmul_mont(res, oneR, rest);
    from_mont(rest, res);
}

bool isfel_nozero(felm_t x)
{
    for (int i = 0; i < NWORDS_FIELD; i++)
        if (x[i] != 0)return true;
    return false;
}
bool isfel_one(felm_t x)
{
    for (int i = NWORDS_FIELD - 1; i > 0; i--)
        if (x[i] != 0)return false;
    if (x[0] != 1)return false;
    return true;
}

//21锟斤拷锟斤拷锟斤拷锟姐法实锟斤拷:
void swapll(ll* a, ll* b)
{
    a[0] ^= b[0];
    b[0] ^= a[0];
    a[0] ^= b[0];
}
void swapfel(digit_t* f, digit_t* g)
{
    for (unsigned i = 0; i < NWORDS_FIELD; i++)
    {
        f[i] ^= g[i];
        g[i] ^= f[i];
        f[i] ^= g[i];
    }
}
void getne(digit_t* f)
{
    felm_t zero = { 0 };
    unsigned int i,borrow = 0;
    for (i = 0; i < NWORDS_FIELD; i++)
        SUBC(borrow, zero[i], f[i], borrow, f[i]);
}   
void divsteps_21(digit_t n, digit_t* delta, felm_t f, felm_t g, Matrix2* M, digit_t* sf)//n>=0,t>=0
{
    felm_t ut, vt;
    felm_t u = { 1 }, v = { 0 }, q = { 0 }, r = { 1 };
    for (int jt = 0; jt < n; jt++)
    {
        if (!(*delta >> (RADIX - 1)) && (g[0] & 1))
        {
            *delta = 0 - *delta;
            //f,g=g,-f;
            swapfel(f, g);
            getne(g);
            //u,v,q,r=q,r,-u,-v;
            swapfel(u, q);
            unsigned borrow = 0;
            for (int i = 0; i < NWORDS_FIELD; i++)
                SUBC(borrow, ((digit_t*)p377)[i], q[i], borrow, q[i]);
            swapfel(v, r);
            borrow = 0;
            for (int i = 0; i < NWORDS_FIELD; i++)
                SUBC(borrow, ((digit_t*)p377)[i], r[i], borrow, r[i]);
        }
        digit_t g0 = 0 - (g[0] & 1);
        *delta += 1;
        //g = (g + g0 * f) >> 1;
        //q = (q + g0 * u);
        //r = (r + g0 * v);
        //u=2*u
        //v=2*v
        //ft,ut,vt=f*g0,u*g0,v*g0;
        for (unsigned i = 0; i < NWORDS_FIELD; i++)
        {
            ut[i] = g0 & u[i];
            vt[i] = g0 & v[i];
        }
        unsigned carry = 0;
        for (int i = 0; i < NWORDS_FIELD; i++)
            ADDC(carry, g0 & f[i], g[i], carry, g[i]);
        shfRx(g, NWORDS_FIELD, 1);
        //if(g&(2^382))g xor =(2^383)
        g[NWORDS_FIELD - 1] |= (g[NWORDS_FIELD - 1] >> (RADIX - 2) << (RADIX - 1));

        fpadd377x1(q, ut, q);
        fpadd377x1(r, vt, r);
        shfLx(u, NWORDS_FIELD, 1);
        shfLx(v, NWORDS_FIELD, 1);
        fpcorrection(u);
        fpcorrection(v);
        /*
       	printf("%d:\n",jt);
       	for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<f[NWORDS_FIELD-j-1];
        cout<<endl;
        for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<g[NWORDS_FIELD-j-1];
        cout<<endl;/*
  		for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<u[NWORDS_FIELD-j-1];
        cout<<endl;
        for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<v[NWORDS_FIELD-j-1];
        cout<<endl;
        for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<q[NWORDS_FIELD-j-1];
        cout<<endl;
        for(int j=0;j<NWORDS_FIELD;j++)cout<<hex<<r[NWORDS_FIELD-j-1];
        cout<<endl;
        */ 
    }
    sf[0] = f[0] + 1 >> 1;
    M[0] = getval(u, v, q, r);
}  
void divsteps_LL(digit_t n,digit_t* delta,digit_t* f,digit_t* g,Matrix2*P,digit_t *sf){
    felm_t u={1},v={0},q={0},r={1};
	for(int jt=0;jt<n;jt++){
		if (!((*delta) >> (RADIX - 1)) && ((*g) & 1))
        {
            *delta = 0 - *delta;
            //f,g=g,-f;
            swap(f, g);
            *g=-*g;
            //u,v,q,r=q,r,-u,-v;
            swap(*u, *q);
            *q=-*q;
            swap(*v, *r);
            *r=-*r;
        }
        digit_t g0 = 0 - (g[0] & 1);
        *delta += 2;
        //g = (g + g0 * f) >> 1;
        //q = (q + g0 * u);
        //r = (r + g0 * v);
        //u=2*u
        //v=2*v
        *g+=(g0&(*f));
        *g>>=1;
        *g |= ((*g) >> (RADIX - 2) << (RADIX - 1));
		*q+=g0&*u;
		*r+=g0&*v;
		*u<<=1;
		*v<<=1;
	}
	for(int i=1;i<NWORDS_FIELD;i++){
		u[i]=0-(u[0]>>(RADIX-1));
		v[i]=0-(v[0]>>(RADIX-1));
		q[i]=0-(q[0]>>(RADIX-1));
		r[i]=0-(r[0]>>(RADIX-1));
	}
	*P=getval(u,v,q,r);
	*sf=(*f)+1>>1;
}
void jumpdivstep(digit_t n,digit_t k,digit_t delta,felm_t f,felm_t g,Matrix2*P,digit_t* sf){
	felm_t one={1},zero={0};
	for(int i=0;i<n/k;i++)to_mont(one,one);
	felm_t f_,g_,u_[2],v_[2],q_[2],r_[2];
	digit_t ft[NWORDS_FIELD<<1],gt[NWORDS_FIELD<<1];
	Matrix2 T=getval(one,zero,zero,one),T_;
	for(int i=0;i<n/k;i++){
		f_[0]=f[0];
		g_[0]=g[0];
		divsteps_LL(k,&delta,f_,g_,&T_,sf);
		mp_mul(T_.val[0][0],f,ft,NWORDS_FIELD);
		ft[NWORDS_FIELD]-=f[0]&(0-(T_.val[0][0][NWORDS_FIELD-1]>>(RADIX-1)));
		ft[NWORDS_FIELD]-=T_.val[0][0][0]&(0-(f[NWORDS_FIELD-1]>>(RADIX-1)));
		mp_mul(T_.val[0][1],g,gt,NWORDS_FIELD);
		gt[NWORDS_FIELD]-=g[0]&(0-(T_.val[0][1][NWORDS_FIELD-1]>>(RADIX-1)));
		gt[NWORDS_FIELD]-=T_.val[0][1][0]&(0-(g[NWORDS_FIELD-1]>>(RADIX-1)));
		unsigned carry = 0;
        for (int j = 0; j <1+NWORDS_FIELD; j++)
            ADDC(carry, gt[j], ft[j], carry, f_[j]);
        shfRx(f_, 1+NWORDS_FIELD, k);        
        mp_mul(T_.val[1][0],f,ft,NWORDS_FIELD);
		ft[NWORDS_FIELD]-=f[0]&(0-(T_.val[1][0][NWORDS_FIELD-1]>>(RADIX-1)));
		ft[NWORDS_FIELD]-=T_.val[1][0][0]&(0-(f[NWORDS_FIELD-1]>>(RADIX-1)));
		mp_mul(T_.val[1][1],g,gt,NWORDS_FIELD);
		gt[NWORDS_FIELD]-=g[0]&(0-(T_.val[1][1][NWORDS_FIELD-1]>>(RADIX-1)));
		gt[NWORDS_FIELD]-=T_.val[1][1][0]&(0-(g[NWORDS_FIELD-1]>>(RADIX-1)));
		carry = 0;
        for (int j = 0; j < 1+NWORDS_FIELD; j++)
            ADDC(carry, ft[j], gt[j], carry, g_[j]);
        shfRx(g_, 1+NWORDS_FIELD, k);
        felcopy(f_,f);
        felcopy(g_,g);
        T_.val[0][0][0]+=((digit_t*)p377)[0]&(0-(T_.val[0][0][0]>>(RADIX-1)));
        for(int j=1;j<NWORDS_FIELD;j++)T_.val[0][0][j]&=((digit_t*)p377)[j];
		T_.val[0][1][0]+=((digit_t*)p377)[0]&(0-(T_.val[0][1][0]>>(RADIX-1)));
        for(int j=1;j<NWORDS_FIELD;j++)T_.val[0][1][j]&=((digit_t*)p377)[j];
        T_.val[1][0][0]+=((digit_t*)p377)[0]&(0-(T_.val[1][0][0]>>(RADIX-1)));
        for(int j=1;j<NWORDS_FIELD;j++)T_.val[1][0][j]&=((digit_t*)p377)[j];
		T_.val[1][1][0]+=((digit_t*)p377)[0]&(0-(T_.val[1][1][0]>>(RADIX-1)));
        for(int j=1;j<NWORDS_FIELD;j++)T_.val[1][1][j]&=((digit_t*)p377)[j];
//        fpmul_mont(T_.val[0][0],T.val[0][0],u_[0]);
//        fpmul_mont(T_.val[0][1],T.val[1][0],u_[1]);
        fpmul_mont(T_.val[0][0],T.val[0][1],v_[0]);
        fpmul_mont(T_.val[0][1],T.val[1][1],v_[1]);
//        fpmul_mont(T_.val[1][0],T.val[0][0],q_[0]);
//        fpmul_mont(T_.val[1][1],T.val[1][0],q_[1]);
        fpmul_mont(T_.val[1][0],T.val[0][1],r_[0]);
        fpmul_mont(T_.val[1][1],T.val[1][1],r_[1]);
//        fpadd377x1(u_[0],u_[1],T.val[0][0]);
        fpadd377x1(v_[0],v_[1],T.val[0][1]);
//        fpadd377x1(q_[0],q_[1],T.val[1][0]);
        fpadd377x1(r_[0],r_[1],T.val[1][1]);
	}
	sf[0] = f[0] + 1 >> 1;
	*P=T;
}
void BY_inversion(digit_t* f, digit_t* g, digit_t* res)
{
    digit_t delta = 1, sf = 1, m = 870;
    felm_t e={1},v, ft, gt;
    fpdiv2_377(e,e);
    ab(e,m,e);
    felcopy(f, ft);
    felcopy(g, gt);
    Matrix2 P[1] = { 0 };
	jumpdivstep(m,30,delta,ft,gt,P,&sf);
//    felcopy(f, ft);
//    felcopy(g, gt);
//    divsteps_21(m, &delta, ft, gt, P, &sf);
    //g^-1=pre_mont*v*sign(f)
    to_mont(e,e);
    to_mont(P[0].val[0][1], v);
    fpmul_mont(e, v, res);
    from_mont(res, res); 
    unsigned borrow = 0;
    for (int i = 0; i < NWORDS_FIELD; i++)
        SUBC(borrow, ((sf - 1) & ((digit_t*)p377)[i]) | ((0 - sf) & res[i]), (sf - 1) & res[i], borrow, res[i]);
}
void fpinv_21(digit_t* a)
{
    BY_inversion((digit_t*)p377, a, a);
}

int main()
{
    int che = 1;
    for (digit_t k = 1; k < 100; k++)
    {
        felm_t a, recip_a, res;
        fprandom377_test(a);
        to_mont(a, res);
        fpinv_21(a);
        to_mont(a, a);
        fpmul_mont(a, res, res);
        from_mont(res, a);
        if (!isfel_one(a))
        {
            che = 0;
            cout << "failed";
            break;
        }
    }
    if(che)cout << "OK";
    /*
    felm_t a = { 2 }, res;
    BY_inversion((digit_t*)p377, a, res);
    for (int i = 0; i < NWORDS_FIELD; i++)
        cout << hex << res[NWORDS_FIELD - i - 1] << " ";
    cout << endl;
    */
    /*
    ll res, res2;
    BY_inversionte(7, 2, res, res2);
    cout<<res<<" "<<res2;
    */
    /*
    felm_t a = { 2 };
    fpinv_21(a);
    for (int i = 0; i < NWORDS_FIELD; i++)
        cout << hex << a[NWORDS_FIELD - i - 1] << " ";
    cout << endl;
    */
    return 0;
}

评测结果

Compilation

N/A

Compile OK

Score: N/A

显示更多

Compile OK

Testcase #1

35.312 ms

36 KB

Accepted

Score: 100

显示更多

Time (ms): 35.311742
Memory (KiB): 36
Status: Run Finished

ok Accepted
>>>>>>> stdout (first 512 bytes) <<<<<<<
OK
========================================