提交记录 11098


用户 题目 状态 得分 用时 内存 语言 代码长度
rd0x01 test. 自定义测试 Accepted 100 24.877 ms 48 KB C++11 3.12 KB
提交时间 评测时间
2019-10-28 14:46:27 2023-09-03 19:38:36
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <functional>
#include <x86intrin.h>

#pragma GCC optimize("O0")
template <class T, class OP, int IDENT>
struct combine_o0
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        for (int i = 0; i < n; i++)
            t = op(t, v[i]);
        *dest = t;
    }
};

#pragma GCC optimize("O2")
template <class T, class OP, int IDENT>
struct combine_o2
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        for (int i = 0; i < n; i++)
            t = op(t, v[i]);
        *dest = t;
    }
};

template <class T, class OP, int IDENT>
struct combine_unroll2
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        for (int i = 1; i < n; i += 2)
        {
            t = op(t, v[i - 1]);
            t = op(t, v[i]);
        }
        if (n % 2 == 1)
            t = op(t, v[n - 1]);
        *dest = t;
    }
};

template <class T, class OP, int IDENT>
struct combine_unroll4
{
    void operator()(int n, const T *v, T *dest)
    {
        OP op;
        T t = IDENT;
        int i;
        for (i = 0; i <= n - 4; i += 4)
        {
            t = op(t, v[i]);
            t = op(t, v[i + 1]);
            t = op(t, v[i + 2]);
            t = op(t, v[i + 3]);
        }
        for (; i < n; i++)
            t = op(t, v[i]);
        *dest = t;
    }
};

template <class T, class OP, int IDENT, class U>
void eval(int n, U f, const char *name)
{
    std::cout << name << ": ";
    T *buff = new T[n];
    for (int i = 0; i < n; i++)
        buff[i] = rand();
    T dest, correct;
    f(n, buff, &dest); // drop the first run
    int TEST_RUN = 1000;
    uint64_t clocks = 0;
    for (int t = 0; t < TEST_RUN; t++)
    {
        auto start = __rdtsc();
        f(n, buff, &dest);
        auto end = __rdtsc();
        clocks += end - start;
        combine_o0<T, OP, IDENT>()(n, buff, &correct);
        if (std::abs(dest - correct) > 1e-9)
        {
            std::cout << "WRONG" << std::endl;
            return;
        }
    }
    std::cout << double(clocks) / TEST_RUN << std::endl;
    delete[] buff;
}

#define eval_all(n, f) \
    eval<int32_t, std::plus<int32_t>, 0>(n, f<int32_t, std::plus<int32_t>, 0>(), #f " i32 +"); \
    eval<int32_t, std::multiplies<int32_t>, 1>(n, f<int32_t, std::multiplies<int32_t>, 1>(), #f " i32 *"); \
    eval<int64_t, std::plus<int64_t>, 0>(n, f<int64_t, std::plus<int64_t>, 0>(), #f " i64 +"); \
    eval<int64_t, std::multiplies<int64_t>, 1>(n, f<int64_t, std::multiplies<int64_t>, 1>(), #f " i64 *"); \
    eval<float, std::plus<float>, 0>(n, f<float, std::plus<float>, 0>(), #f " f32 +"); \
    eval<float, std::multiplies<float>, 1>(n, f<float, std::multiplies<float>, 1>(), #f " f32 *"); \
    eval<double, std::plus<double>, 0>(n, f<double, std::plus<double>, 0>(), #f " f64 +"); \
    eval<double, std::multiplies<double>, 1>(n, f<double, std::multiplies<double>, 1>(), #f " f64 *");

int main()
{
    eval_all(200, combine_o0);
    eval_all(200, combine_o2);
    eval_all(200, combine_unroll2);
    eval_all(200, combine_unroll4);
    return 0;
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #124.877 ms48 KBAcceptedScore: 100


Judge Duck Online | 评测鸭在线
Server Time: 2026-03-28 10:10:12 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠