提交记录 20166


用户 题目 状态 得分 用时 内存 语言 代码长度
TSKY test. 自定义测试 Runtime Error 0 111.258 ms 44 KB C++14 3.30 KB
提交时间 评测时间
2023-09-16 12:18:55 2023-09-16 12:18:57
#include <iostream>
#include <chrono>
#include <immintrin.h>

#pragma GCC target("avx2,fma")

//#pragma GCC target("avx512f")

class StopWatch
{
private:
    bool is_stop = false, is_start = false;
    uint64_t tick = 0;
    double rate = 1.0f;
    std::chrono::system_clock::time_point begin;
    std::chrono::system_clock::time_point end;

public:
    StopWatch()
    {
        is_start = false;
        is_stop = false;
        rate = 1.0f;
        tick = 0;
    }
    StopWatch(double rate_in)
    {
        is_start = false;
        is_stop = false;
        rate = rate_in;
        tick = 0;
    }
    void start()
    {
        reset();
        is_start = true;
        is_stop = false;
        begin = std::chrono::system_clock::now();
    }
    void stop()
    {
        if (is_start)
        {
            is_stop = true;
            end = std::chrono::system_clock::now();
            auto delta_time = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
            tick = delta_time.count();
        }
    }
    void reset()
    {
        is_start = false;
        is_stop = false;
        tick = 0;
    }
    double duration()
    {
        if (!is_stop)
        {
            stop();
            is_stop = false;
        }
        return static_cast<double>(tick / (rate * 1000));
    }
};
StopWatch watch_default(1);

size_t MFLOPS_AVX2(size_t test_cycle)
{
    __m256d a0 = _mm256_set1_pd(1);
    __m256d a1 = _mm256_set1_pd(2);
    __m256d a2 = _mm256_set1_pd(3);
    __m256d a3 = _mm256_set1_pd(4);
    __m256d a4 = _mm256_set1_pd(5);
    __m256d a5 = _mm256_set1_pd(6);
    __m256d a6 = _mm256_set1_pd(7);
    __m256d a7 = _mm256_set1_pd(8);
    __m256d a8 = _mm256_set1_pd(9);
    __m256d a9 = _mm256_set1_pd(10);
    __m256d a10 = _mm256_set1_pd(11);
    __m256d a11 = _mm256_set1_pd(12);
    __m256d b = _mm256_set1_pd(2);
    watch_default.start();
    for (size_t i = 0; i < test_cycle; i++)
    {
        a0 = _mm256_fmadd_pd(b, a0, a0);
        a1 = _mm256_fmadd_pd(b, a1, a1);
        a2 = _mm256_fmadd_pd(b, a2, a2);
        a3 = _mm256_fmadd_pd(b, a3, a3);
        a4 = _mm256_fmadd_pd(b, a4, a4);
        a5 = _mm256_fmadd_pd(b, a5, a5);
        a6 = _mm256_fmadd_pd(b, a6, a6);
        a7 = _mm256_fmadd_pd(b, a7, a7);
        /*a8 = _mm256_fmadd_pd(b, a8, a8);
        a9 = _mm256_fmadd_pd(b, a9, a9);
        a10 = _mm256_fmadd_pd(b, a10, a10);
        a11 = _mm256_fmadd_pd(b, a11, a11);*/
    }
    watch_default.stop();
    double ary[48];
    _mm256_storeu_pd(ary, a0);
    _mm256_storeu_pd(ary + 4, a1);
    _mm256_storeu_pd(ary + 8, a2);
    _mm256_storeu_pd(ary + 12, a3);
    _mm256_storeu_pd(ary + 16, a4);
    _mm256_storeu_pd(ary + 20, a5);
    _mm256_storeu_pd(ary + 24, a6);
    _mm256_storeu_pd(ary + 28, a7);
    _mm256_storeu_pd(ary + 32, a8);
    _mm256_storeu_pd(ary + 36, a9);
    _mm256_storeu_pd(ary + 40, a10);
    _mm256_storeu_pd(ary + 44, a11);
    double sum = 0;
    for (int i = 0; i < 48; i++)
    {
        sum += ary[i];
    }
    std::cout << sum << std::endl;
    size_t flops = test_cycle * 12 * 4 * 2;
    size_t us = watch_default.duration();
    std::cout << "time: " << us / 1000 << "ms\t" << flops / us << "Mflops" << std::endl;
    return flops / us;
}
int main()
{
    std::cout << "AVX2:\n";
    MFLOPS_AVX2(1e8); // 循环1e9次
    //std::cout << "AVX512:\n";
    //MFLOPS_AVX512(1e9); // 循环1e9次
}

CompilationN/AN/ACompile OKScore: N/A

Testcase #1111.258 ms44 KBRuntime ErrorScore: 0


Judge Duck Online | 评测鸭在线
Server Time: 2025-05-09 17:46:27 | Loaded in 1 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠