提交记录 11110 - Judge Duck Online

用户	题目	状态	得分	用时	内存	语言	代码长度
rd0x01	test. 自定义测试	Accepted	100	24.74 ms	40 KB	C++11	3.34 KB

提交时间	评测时间
2019-10-28 15:22:45	2023-09-03 19:38:50

代码

#include <cmath>
#include <cstdlib>
#include <iostream>
#include <functional>
#include <x86intrin.h>

#pragma GCC optimize("O0")
template <class T, class OP, int IDENT>
struct compile_o0
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        for (int i = 0; i < n; i++)
            t = op(t, v[i]);
        *dest = t;
    }
};

#pragma GCC optimize("O2")
template <class T, class OP, int IDENT>
struct compile_o2
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        for (int i = 0; i < n; i++)
            t = op(t, v[i]);
        *dest = t;
    }
};

template <class T, class OP, int IDENT>
struct unroll_2
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        for (int i = 1; i < n; i += 2)
        {
            t = op(t, v[i - 1]);
            t = op(t, v[i]);
        }
        if (n % 2 == 1)
            t = op(t, v[n - 1]);
        *dest = t;
    }
};

template <class T, class OP, int IDENT>
struct unroll_4
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        int i;
        for (i = 0; i <= n - 4; i += 4)
        {
            t = op(t, v[i]);
            t = op(t, v[i + 1]);
            t = op(t, v[i + 2]);
            t = op(t, v[i + 3]);
        }
        for (; i < n; i++)
            t = op(t, v[i]);
        *dest = t;
    }
};

template <class T, class OP, int IDENT, class U>
int64_t eval(int n, U f, const char *name)
{
    T *buff = new T[n];
    for (int i = 0; i < n; i++)
        buff[i] = rand() % 1000;
    T dest, correct;
    f(n, buff, &dest); // drop the first run
    int TEST_RUN = 1000;
    int64_t clocks = 0;
    for (int t = 0; t < TEST_RUN; t++)
    {
        int64_t start = __rdtsc();
        f(n, buff, &dest);
        int64_t end = __rdtsc();
        clocks += end - start;
        compile_o0<T, OP, IDENT>()(n, buff, &correct);
        if (std::abs(dest - correct) > 1e-7 && std::abs(dest - correct) > 1e-7 * correct)
            return -1;
    }
    delete[] buff;
    return clocks / TEST_RUN;
}

void output(int64_t x)
{
    if (x == -1)
        std::cout << "\tWA";
    else
        std::cout << "\t" << x;
}

#define eval_all(n, f) \
    std::cout << #f; \
    output(eval<int32_t, std::plus<int32_t>, 0>(n, f<int32_t, std::plus<int32_t>, 0>(), #f " i32 +")); \
    output(eval<int32_t, std::multiplies<int32_t>, 1>(n, f<int32_t, std::multiplies<int32_t>, 1>(), #f " i32 *")); \
    output(eval<int64_t, std::plus<int64_t>, 0>(n, f<int64_t, std::plus<int64_t>, 0>(), #f " i64 +")); \
    output(eval<int64_t, std::multiplies<int64_t>, 1>(n, f<int64_t, std::multiplies<int64_t>, 1>(), #f " i64 *")); \
    output(eval<float, std::plus<float>, 0>(n, f<float, std::plus<float>, 0>(), #f " f32 +")); \
    output(eval<float, std::multiplies<float>, 1>(n, f<float, std::multiplies<float>, 1>(), #f " f32 *")); \
    output(eval<double, std::plus<double>, 0>(n, f<double, std::plus<double>, 0>(), #f " f64 +")); \
    output(eval<double, std::multiplies<double>, 1>(n, f<double, std::multiplies<double>, 1>(), #f " f64 *")); \
    std::cout << std::endl;

int main()
{
    std::cout << "name\t\ti32+\ti32*\ti64+\ti64*\tf32+\tf32*\t64+\tf64*" << std::endl;
    eval_all(200, compile_o0);
    eval_all(200, compile_o2);
    eval_all(200, unroll_2);
    eval_all(200, unroll_4);
    return 0;
}

评测结果

Compilation

N/A

Compile OK

Score: N/A

显示更多

Compile OK

Testcase #1

24.74 ms

40 KB

Accepted

Score: 100

显示更多

Time (ms): 24.740127
Memory (KiB): 40
Status: Run Finished

ok Accepted
>>>>>>> stdout (first 512 bytes) <<<<<<<
name		i32+	i32*	i64+	i64*	f32+	f32*	64+	f64*
compile_o0	1188	1452	1193	1431	2393	2394	2390	2396
compile_o2	300	622	302	623	822	823	822	821
unroll_2	252	621	226	623	821	822	820	821
unroll_4	167	442	165	44