Perf of https://duck.ac/submission/14004

user1 于 2023-05-09 10:56:58 发表,最后修改于 2023-05-09 11:05:38


Better Display

Percent│                                                                                                                                                        ◆
       │      /tmp/a.out:     文件格式 elf64-x86-64                                                                                                            ▒
       │                                                                                                                                                        ▒
       │                                                                                                                                                        ▒
       │      Disassembly of section .text:                                                                                                                     ▒
       │                                                                                                                                                        ▒
       │      0000000000001060 <main>:                                                                                                                          ▒
       │      main():                                                                                                                                           ▒
       │1060:   endbr64                                                                                                                                         ▒
       │1064:   push         %r15                                                                                                                               ▒
       │1066:   push         %r14                                                                                                                               ▒
       │1068:   push         %r13                                                                                                                               ▒
       │106a:   push         %r12                                                                                                                               ▒
       │106c:   push         %rbp                                                                                                                               ▒
       │106d:   push         %rbx                                                                                                                               ▒
       │106e:   sub          $0x28,%rsp                                                                                                                         ▒
       │1072:   movl         $0x64,0x1c(%rsp)                                                                                                                   ▒
       │107a:   nopw         0x0(%rax,%rax,1)                                                                                                                   ▒
       │1080:   lea          0x2082fb9(%rip),%rbp        # 2084040 <matrix_multiply(int, double const*, double const*, double*)::A>                             ▒
       │1087:   lea          0x1002fb2(%rip),%rdx        # 1004040 <A>                                                                                          ▒
       │108e:   mov          %rbp,%rax                                                                                                                          ▒
       │1091:   nopl         0x0(%rax)                                                                                                                          ▒
       │1098:   mov          %rdx,%r15                                                                                                                          ▒
       │109b:   mov          %rax,%r14                                                                                                                          ▒
       │109e:   mov          $0xffffffffffffe000,%r13                                                                                                           ▒
  0.00 │10a5:   nopl         (%rax)                                                                                                                             ▒
  0.00 │10a8:   vmovapd      0x2000(%r15,%r13,1),%ymm0                                                                                                          ▒
  0.03 │10b2:   vmovapd      0x2020(%r15,%r13,1),%ymm1                                                                                                          ▒
  0.00 │10bc:   vmovapd      0x2040(%r15,%r13,1),%ymm2                                                                                                          ▒
  0.01 │10c6:   vmovapd      0x2060(%r15,%r13,1),%ymm3                                                                                                          ▒
  0.01 │10d0:   vmovapd      0x2080(%r15,%r13,1),%ymm4                                                                                                          ▒
  0.02 │10da:   vmovapd      0x20a0(%r15,%r13,1),%ymm5                                                                                                          ▒
  0.01 │10e4:   vmovapd      0x20c0(%r15,%r13,1),%ymm6                                                                                                          ▒
  0.11 │10ee:   vmovapd      0x20e0(%r15,%r13,1),%ymm7                                                                                                          ▒
  0.00 │10f8:   vmovapd      %ymm0,0x2000(%r14,%r13,1)                                                                                                          ▒
  0.48 │1102:   vmovapd      %ymm1,0x2020(%r14,%r13,1)                                                                                                          ▒
  0.01 │110c:   vmovapd      %ymm2,0x2040(%r14,%r13,1)                                                                                                          ▒
  0.24 │1116:   vmovapd      %ymm3,0x2060(%r14,%r13,1)                                                                                                          ▒
  0.01 │1120:   vmovapd      %ymm4,0x2080(%r14,%r13,1)                                                                                                          ▒
  0.45 │112a:   vmovapd      %ymm5,0x20a0(%r14,%r13,1)                                                                                                          ▒
  0.01 │1134:   vmovapd      %ymm6,0x20c0(%r14,%r13,1)                                                                                                          ▒
  0.43 │113e:   vmovapd      %ymm7,0x20e0(%r14,%r13,1)                                                                                                          ▒
  0.00 │1148:   add          $0x100,%r13                                                                                                                        ▒
  0.00 │114f: ↑ jne          10a8 <main+0x48>                                                                                                                   ▒
       │1155:   lea          0x28c2ee4(%rip),%rbx        # 28c4040 <_end>                                                                                       ▒
       │115c:   add          $0x2100,%rax                                                                                                                       ▒
       │1162:   add          $0x2000,%rdx                                                                                                                       ▒
       │1169:   cmp          %rbx,%rax                                                                                                                          ▒
  0.00 │116c: ↑ jne          1098 <main+0x38>                                                                                                                   ▒
       │1172:   lea          0x802ec7(%rip),%rdx        # 804040 <B>                                                                                            ▒
       │1179:   lea          0x1802ec0(%rip),%rax        # 1804040 <matrix_multiply(int, double const*, double const*, double*)::B>                             ▒
       │1180:   mov          %rdx,%r15                                                                                                                          ▒
       │1183:   mov          %rax,%r14                                                                                                                          ▒
  0.00 │1186:   mov          $0xffffffffffffe000,%r13                                                                                                           ▒
       │118d:   nopl         (%rax)                                                                                                                             ▒
  0.00 │1190:   vmovapd      0x2000(%r15,%r13,1),%ymm0                                                                                                          ▒
  0.04 │119a:   vmovapd      0x2020(%r15,%r13,1),%ymm1                                                                                                          ▒
  0.00 │11a4:   vmovapd      0x2040(%r15,%r13,1),%ymm2                                                                                                          ▒
  0.01 │11ae:   vmovapd      0x2060(%r15,%r13,1),%ymm3                                                                                                          ▒
  0.01 │11b8:   vmovapd      0x2080(%r15,%r13,1),%ymm4                                                                                                          ▒
  0.03 │11c2:   vmovapd      0x20a0(%r15,%r13,1),%ymm5                                                                                                          ▒
  0.01 │11cc:   vmovapd      0x20c0(%r15,%r13,1),%ymm6                                                                                                          ▒
  0.12 │11d6:   vmovapd      0x20e0(%r15,%r13,1),%ymm7                                                                                                          ▒
  0.00 │11e0:   vmovapd      %ymm0,0x2000(%r14,%r13,1)                                                                                                          ▒
  0.54 │11ea:   vmovapd      %ymm1,0x2020(%r14,%r13,1)                                                                                                          ▒
  0.01 │11f4:   vmovapd      %ymm2,0x2040(%r14,%r13,1)                                                                                                          ▒
  0.26 │11fe:   vmovapd      %ymm3,0x2060(%r14,%r13,1)                                                                                                          ▒
  0.01 │1208:   vmovapd      %ymm4,0x2080(%r14,%r13,1)                                                                                                          ▒
  0.56 │1212:   vmovapd      %ymm5,0x20a0(%r14,%r13,1)                                                                                                          ▒
  0.01 │121c:   vmovapd      %ymm6,0x20c0(%r14,%r13,1)                                                                                                          ▒
  0.51 │1226:   vmovapd      %ymm7,0x20e0(%r14,%r13,1)                                                                                                          ▒
  0.00 │1230:   add          $0x100,%r13                                                                                                                        ▒
       │1237: ↑ jne          1190 <main+0x130>                                                                                                                  ▒
       │123d:   lea          0x2082dfc(%rip),%rbx        # 2084040 <matrix_multiply(int, double const*, double const*, double*)::A>                             ▒
       │1244:   add          $0x2200,%rax                                                                                                                       ◆
       │124a:   add          $0x2000,%rdx                                                                                                                       ▒
       │1251:   cmp          %rax,%rbx                                                                                                                          ▒
  0.00 │1254: ↑ jne          1180 <main+0x120>                                                                                                                  ▒
       │125a:   mov          $0x800000,%edx                                                                                                                     ▒
       │125f:   xor          %esi,%esi                                                                                                                          ▒
       │1261:   lea          0x2dd8(%rip),%rdi        # 4040 <C>                                                                                                ▒
       │1268:   vzeroupper                                                                                                                                      ▒
       │126b: → callq        1050 <memset@plt>                                                                                                                  ▒
  0.00 │1270:   lea          0x2dc9(%rip),%rax        # 4040 <C>                                                                                                ▒
  0.00 │1277:   xor          %ecx,%ecx                                                                                                                          ▒
       │1279:   mov          %rax,0x8(%rsp)                                                                                                                     ▒
       │127e:   movl         $0x0,0x18(%rsp)                                                                                                                    ▒
       │1286:   nopw         %cs:0x0(%rax,%rax,1)                                                                                                               ▒
       │1290:   lea          0x40000(%rcx),%rax                                                                                                                 ▒
  0.00 │1297:   addl         $0x100,0x18(%rsp)                                                                                                                  ▒
       │129f:   mov          %rbp,%rsi                                                                                                                          ▒
       │12a2:   movq         $0x0,(%rsp)                                                                                                                        ▒
       │12aa:   lea          0x1802d8f(%rip),%rdi        # 1804040 <matrix_multiply(int, double const*, double const*, double*)::B>                             ▒
       │12b1:   mov          %rax,0x10(%rsp)                                                                                                                    ▒
       │12b6:   nopw         %cs:0x0(%rax,%rax,1)                                                                                                               ▒
       │12c0:   mov          0x8(%rsp),%r10                                                                                                                     ▒
  0.00 │12c5:   mov          %rdi,%r9                                                                                                                           ▒
       │12c8:   xor          %r8d,%r8d                                                                                                                          ▒
  0.00 │12cb:   mov          %rsi,%rbx                                                                                                                          ▒
       │12ce:   mov          %r10,%r11                                                                                                                          ▒
       │12d1:   mov          %rcx,%rdx                                                                                                                          ▒
       │12d4:   nopl         0x0(%rax)                                                                                                                          ▒
       │12d8:   xor          %eax,%eax                                                                                                                          ▒
  0.00 │12da:   lea          (%r11,%rax,1),%r12                                                                                                                 ▒
  0.00 │12de:   lea          (%r9,%rax,1),%r13                                                                                                                  ▒
  0.01 │12e2:   mov          %r12,%r15                                                                                                                          ▒
       │12e5:   mov          %r13,%r14                                                                                                                          ▒
  0.08 │12e8:   mov          %rbx,%r13                                                                                                                          ▒
  0.00 │12eb:   mov          $0xf,%r12                                                                                                                          ▒
  0.40 │12f2:   vmovapd      (%r15),%ymm0                                                                                                                       ▒
  2.30 │12f7:   vmovapd      0x20(%r15),%ymm1                                                                                                                   ▒
  0.08 │12fd:   vmovapd      0x2000(%r15),%ymm2                                                                                                                 ▒
  0.74 │1306:   vmovapd      0x2020(%r15),%ymm3                                                                                                                 ▒
  0.09 │130f:   vmovapd      0x4000(%r15),%ymm4                                                                                                                 ▒
  0.83 │1318:   vmovapd      0x4020(%r15),%ymm5                                                                                                                 ▒
  0.08 │1321:   vmovapd      0x6000(%r15),%ymm6                                                                                                                 ▒
  0.93 │132a:   vmovapd      0x6020(%r15),%ymm7                                                                                                                 ▒
  0.44 │1333:   vmovapd      (%r14),%ymm14                                                                                                                      ▒
  0.08 │1338:   vmovapd      0x20(%r14),%ymm15                                                                                                                  ▒
  0.01 │133e:   vbroadcastsd 0x0(%r13),%ymm10                                                                                                                   ▒
  0.16 │1344:   vbroadcastsd 0x2100(%r13),%ymm11                                                                                                                ▒
  0.17 │134d:   vbroadcastsd 0x4200(%r13),%ymm12                                                                                                                ▒
  0.16 │1356:   vbroadcastsd 0x6300(%r13),%ymm13                                                                                                                ▒
  0.20 │135f:   nop                                                                                                                                             ▒
  0.01 │1360:   vmovapd      0x2200(%r14),%ymm8                                                                                                                 ▒
  3.94 │1369:   vmovapd      0x2220(%r14),%ymm9                                                                                                                 ▒
  0.70 │1372:   vfmadd231pd  %ymm10,%ymm14,%ymm0                                                                                                                ▒
  3.78 │1377:   vfmadd231pd  %ymm10,%ymm15,%ymm1                                                                                                                ▒
  3.63 │137c:   vbroadcastsd 0x8(%r13),%ymm10                                                                                                                   ▒
  1.02 │1382:   vfmadd231pd  %ymm11,%ymm14,%ymm2                                                                                                                ▒
  3.91 │1387:   vfmadd231pd  %ymm11,%ymm15,%ymm3                                                                                                                ▒
  3.81 │138c:   vbroadcastsd 0x2108(%r13),%ymm11                                                                                                                ▒
  0.01 │1395:   vfmadd231pd  %ymm12,%ymm14,%ymm4                                                                                                                ▒
  5.50 │139a:   vfmadd231pd  %ymm12,%ymm15,%ymm5                                                                                                                ▒
  3.87 │139f:   vbroadcastsd 0x4208(%r13),%ymm12                                                                                                                ▒
  0.05 │13a8:   vfmadd231pd  %ymm13,%ymm14,%ymm6                                                                                                                ▒
  5.64 │13ad:   vfmadd231pd  %ymm13,%ymm15,%ymm7                                                                                                                ▒
  3.98 │13b2:   vbroadcastsd 0x6308(%r13),%ymm13                                                                                                                ▒
  0.02 │13bb:   add          $0x4400,%r14                                                                                                                       ◆
  0.31 │13c2:   add          $0x10,%r13                                                                                                                         ▒
  0.00 │13c6:   vmovapd      (%r14),%ymm14                                                                                                                      ▒
  3.79 │13cb:   vmovapd      0x20(%r14),%ymm15                                                                                                                  ▒
  0.74 │13d1:   vfmadd231pd  %ymm10,%ymm8,%ymm0                                                                                                                 ▒
  3.83 │13d6:   vfmadd231pd  %ymm10,%ymm9,%ymm1                                                                                                                 ▒
  3.79 │13db:   vbroadcastsd 0x0(%r13),%ymm10                                                                                                                   ▒
  1.12 │13e1:   vfmadd231pd  %ymm11,%ymm8,%ymm2                                                                                                                 ▒
  3.93 │13e6:   vfmadd231pd  %ymm11,%ymm9,%ymm3                                                                                                                 ▒
  3.91 │13eb:   vbroadcastsd 0x2100(%r13),%ymm11                                                                                                                ▒
  0.10 │13f4:   vfmadd231pd  %ymm12,%ymm8,%ymm4                                                                                                                 ▒
  5.40 │13f9:   vfmadd231pd  %ymm12,%ymm9,%ymm5                                                                                                                 ▒
  4.03 │13fe:   vbroadcastsd 0x4200(%r13),%ymm12                                                                                                                ▒
  0.12 │1407:   vfmadd231pd  %ymm13,%ymm8,%ymm6                                                                                                                 ▒
  5.76 │140c:   vfmadd231pd  %ymm13,%ymm9,%ymm7                                                                                                                 ▒
  4.25 │1411:   vbroadcastsd 0x6300(%r13),%ymm13                                                                                                                ▒
  0.07 │141a:   dec          %r12                                                                                                                               ▒
  0.30 │141d: ↑ jne          1360 <main+0x300>                                                                                                                  ▒
  0.00 │1423:   vmovapd      0x2200(%r14),%ymm8                                                                                                                 ▒
  0.02 │142c:   vmovapd      0x2220(%r14),%ymm9                                                                                                                 ▒
  0.04 │1435:   vfmadd231pd  %ymm10,%ymm14,%ymm0                                                                                                                ▒
  0.29 │143a:   vfmadd231pd  %ymm10,%ymm15,%ymm1                                                                                                                ▒
  0.30 │143f:   vbroadcastsd 0x8(%r13),%ymm10                                                                                                                   ▒
  0.06 │1445:   vfmadd231pd  %ymm11,%ymm14,%ymm2                                                                                                                ▒
  0.28 │144a:   vfmadd231pd  %ymm11,%ymm15,%ymm3                                                                                                                ▒
  0.29 │144f:   vbroadcastsd 0x2108(%r13),%ymm11                                                                                                                ▒
  0.00 │1458:   vfmadd231pd  %ymm12,%ymm14,%ymm4                                                                                                                ▒
  0.34 │145d:   vfmadd231pd  %ymm12,%ymm15,%ymm5                                                                                                                ▒
  0.28 │1462:   vbroadcastsd 0x4208(%r13),%ymm12                                                                                                                ▒
  0.00 │146b:   vfmadd231pd  %ymm13,%ymm14,%ymm6                                                                                                                ▒
  0.38 │1470:   vfmadd231pd  %ymm13,%ymm15,%ymm7                                                                                                                ▒
  0.31 │1475:   vbroadcastsd 0x6308(%r13),%ymm13                                                                                                                ▒
  0.00 │147e:   vfmadd231pd  %ymm10,%ymm8,%ymm0                                                                                                                 ▒
  0.33 │1483:   vfmadd231pd  %ymm10,%ymm9,%ymm1                                                                                                                 ▒
  0.31 │1488:   vfmadd231pd  %ymm11,%ymm8,%ymm2                                                                                                                 ▒
  0.30 │148d:   vfmadd231pd  %ymm11,%ymm9,%ymm3                                                                                                                 ▒
  0.33 │1492:   vfmadd231pd  %ymm12,%ymm8,%ymm4                                                                                                                 ▒
  0.36 │1497:   vfmadd231pd  %ymm12,%ymm9,%ymm5                                                                                                                 ▒
  0.29 │149c:   vfmadd231pd  %ymm13,%ymm8,%ymm6                                                                                                                 ▒
  0.40 │14a1:   vfmadd231pd  %ymm13,%ymm9,%ymm7                                                                                                                 ▒
  0.29 │14a6:   vmovapd      %ymm0,(%r15)                                                                                                                       ▒
  0.02 │14ab:   vmovapd      %ymm1,0x20(%r15)                                                                                                                   ▒
  0.02 │14b1:   vmovapd      %ymm2,0x2000(%r15)                                                                                                                 ▒
  0.21 │14ba:   vmovapd      %ymm3,0x2020(%r15)                                                                                                                 ▒
  0.40 │14c3:   vmovapd      %ymm4,0x4000(%r15)                                                                                                                 ▒
  0.45 │14cc:   vmovapd      %ymm5,0x4020(%r15)                                                                                                                 ▒
  0.45 │14d5:   vmovapd      %ymm6,0x6000(%r15)                                                                                                                 ▒
  0.53 │14de:   vmovapd      %ymm7,0x6020(%r15)                                                                                                                 ▒
  0.50 │14e7:   add          $0x40,%rax                                                                                                                         ▒
  0.00 │14eb:   cmp          $0x200,%rax                                                                                                                        ▒
  0.01 │14f1: ↑ jne          12da <main+0x27a>                                                                                                                  ▒
       │14f7:   add          $0x1000,%rdx                                                                                                                       ▒
  0.07 │14fe:   add          $0x8000,%r11                                                                                                                       ▒
  0.01 │1505:   add          $0x8400,%rbx                                                                                                                       ▒
       │150c:   cmp          0x10(%rsp),%rdx                                                                                                                    ▒
  0.09 │1511: ↑ jne          12d8 <main+0x278>                                                                                                                  ▒
       │1517:   add          $0x40,%r8                                                                                                                          ▒
  0.00 │151b:   add          $0x200,%r9                                                                                                                         ▒
       │1522:   add          $0x200,%r10                                                                                                                        ▒
  0.00 │1529:   cmp          $0x400,%r8                                                                                                                         ▒
  0.00 │1530: ↑ jne          12cb <main+0x26b>                                                                                                                  ▒
       │1536:   addq         $0x20,(%rsp)                                                                                                                       ▒
  0.00 │153b:   mov          (%rsp),%rax                                                                                                                        ▒
  0.00 │153f:   add          $0x100,%rsi                                                                                                                        ▒
  0.00 │1546:   add          $0x44000,%rdi                                                                                                                      ▒
       │154d:   cmp          $0x400,%rax                                                                                                                        ◆
       │1553: ↑ jne          12c0 <main+0x260>                                                                                                                  ▒
       │1559:   addq         $0x200000,0x8(%rsp)                                                                                                                ▒
  0.00 │1562:   add          $0x210000,%rbp                                                                                                                     ▒
       │1569:   cmpl         $0x400,0x18(%rsp)                                                                                                                  ▒
       │1571: ↓ je           1580 <main+0x520>                                                                                                                  ▒
       │1573:   mov          %rdx,%rcx                                                                                                                          ▒
       │1576: ↑ jmpq         1290 <main+0x230>                                                                                                                  ▒
       │157b:   nopl         0x0(%rax,%rax,1)                                                                                                                   ▒
       │1580:   subl         $0x1,0x1c(%rsp)                                                                                                                    ▒
       │1585: ↑ jne          1080 <main+0x20>                                                                                                                   ▒
       │158b:   xor          %eax,%eax                                                                                                                          ▒
       │158d:   vzeroupper                                                                                                                                      ▒
       │1590:   add          $0x28,%rsp                                                                                                                         ▒
       │1594:   pop          %rbx                                                                                                                               ▒
       │1595:   pop          %rbp                                                                                                                               ▒
       │1596:   pop          %r12                                                                                                                               ▒
       │1598:   pop          %r13                                                                                                                               ▒
       │159a:   pop          %r14                                                                                                                               ▒
       │159c:   pop          %r15                                                                                                                               ▒
       │159e: ← retq                                                                                                                                            ▒
Press 'h' for help on key bindings                                                                                                                              ◆

Judge Duck Online | 评测鸭在线
Server Time: 2025-01-18 16:02:10 | Loaded in 24 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠