Perf of https://duck.ac/submission/14004
由 user1 于 2023-05-09 10:56:58 发表,最后修改于 2023-05-09 11:05:38
Percent│ ◆
│ /tmp/a.out: 文件格式 elf64-x86-64 ▒
│ ▒
│ ▒
│ Disassembly of section .text: ▒
│ ▒
│ 0000000000001060 <main>: ▒
│ main(): ▒
│1060: endbr64 ▒
│1064: push %r15 ▒
│1066: push %r14 ▒
│1068: push %r13 ▒
│106a: push %r12 ▒
│106c: push %rbp ▒
│106d: push %rbx ▒
│106e: sub $0x28,%rsp ▒
│1072: movl $0x64,0x1c(%rsp) ▒
│107a: nopw 0x0(%rax,%rax,1) ▒
│1080: lea 0x2082fb9(%rip),%rbp # 2084040 <matrix_multiply(int, double const*, double const*, double*)::A> ▒
│1087: lea 0x1002fb2(%rip),%rdx # 1004040 <A> ▒
│108e: mov %rbp,%rax ▒
│1091: nopl 0x0(%rax) ▒
│1098: mov %rdx,%r15 ▒
│109b: mov %rax,%r14 ▒
│109e: mov $0xffffffffffffe000,%r13 ▒
0.00 │10a5: nopl (%rax) ▒
0.00 │10a8: vmovapd 0x2000(%r15,%r13,1),%ymm0 ▒
0.03 │10b2: vmovapd 0x2020(%r15,%r13,1),%ymm1 ▒
0.00 │10bc: vmovapd 0x2040(%r15,%r13,1),%ymm2 ▒
0.01 │10c6: vmovapd 0x2060(%r15,%r13,1),%ymm3 ▒
0.01 │10d0: vmovapd 0x2080(%r15,%r13,1),%ymm4 ▒
0.02 │10da: vmovapd 0x20a0(%r15,%r13,1),%ymm5 ▒
0.01 │10e4: vmovapd 0x20c0(%r15,%r13,1),%ymm6 ▒
0.11 │10ee: vmovapd 0x20e0(%r15,%r13,1),%ymm7 ▒
0.00 │10f8: vmovapd %ymm0,0x2000(%r14,%r13,1) ▒
0.48 │1102: vmovapd %ymm1,0x2020(%r14,%r13,1) ▒
0.01 │110c: vmovapd %ymm2,0x2040(%r14,%r13,1) ▒
0.24 │1116: vmovapd %ymm3,0x2060(%r14,%r13,1) ▒
0.01 │1120: vmovapd %ymm4,0x2080(%r14,%r13,1) ▒
0.45 │112a: vmovapd %ymm5,0x20a0(%r14,%r13,1) ▒
0.01 │1134: vmovapd %ymm6,0x20c0(%r14,%r13,1) ▒
0.43 │113e: vmovapd %ymm7,0x20e0(%r14,%r13,1) ▒
0.00 │1148: add $0x100,%r13 ▒
0.00 │114f: ↑ jne 10a8 <main+0x48> ▒
│1155: lea 0x28c2ee4(%rip),%rbx # 28c4040 <_end> ▒
│115c: add $0x2100,%rax ▒
│1162: add $0x2000,%rdx ▒
│1169: cmp %rbx,%rax ▒
0.00 │116c: ↑ jne 1098 <main+0x38> ▒
│1172: lea 0x802ec7(%rip),%rdx # 804040 <B> ▒
│1179: lea 0x1802ec0(%rip),%rax # 1804040 <matrix_multiply(int, double const*, double const*, double*)::B> ▒
│1180: mov %rdx,%r15 ▒
│1183: mov %rax,%r14 ▒
0.00 │1186: mov $0xffffffffffffe000,%r13 ▒
│118d: nopl (%rax) ▒
0.00 │1190: vmovapd 0x2000(%r15,%r13,1),%ymm0 ▒
0.04 │119a: vmovapd 0x2020(%r15,%r13,1),%ymm1 ▒
0.00 │11a4: vmovapd 0x2040(%r15,%r13,1),%ymm2 ▒
0.01 │11ae: vmovapd 0x2060(%r15,%r13,1),%ymm3 ▒
0.01 │11b8: vmovapd 0x2080(%r15,%r13,1),%ymm4 ▒
0.03 │11c2: vmovapd 0x20a0(%r15,%r13,1),%ymm5 ▒
0.01 │11cc: vmovapd 0x20c0(%r15,%r13,1),%ymm6 ▒
0.12 │11d6: vmovapd 0x20e0(%r15,%r13,1),%ymm7 ▒
0.00 │11e0: vmovapd %ymm0,0x2000(%r14,%r13,1) ▒
0.54 │11ea: vmovapd %ymm1,0x2020(%r14,%r13,1) ▒
0.01 │11f4: vmovapd %ymm2,0x2040(%r14,%r13,1) ▒
0.26 │11fe: vmovapd %ymm3,0x2060(%r14,%r13,1) ▒
0.01 │1208: vmovapd %ymm4,0x2080(%r14,%r13,1) ▒
0.56 │1212: vmovapd %ymm5,0x20a0(%r14,%r13,1) ▒
0.01 │121c: vmovapd %ymm6,0x20c0(%r14,%r13,1) ▒
0.51 │1226: vmovapd %ymm7,0x20e0(%r14,%r13,1) ▒
0.00 │1230: add $0x100,%r13 ▒
│1237: ↑ jne 1190 <main+0x130> ▒
│123d: lea 0x2082dfc(%rip),%rbx # 2084040 <matrix_multiply(int, double const*, double const*, double*)::A> ▒
│1244: add $0x2200,%rax ◆
│124a: add $0x2000,%rdx ▒
│1251: cmp %rax,%rbx ▒
0.00 │1254: ↑ jne 1180 <main+0x120> ▒
│125a: mov $0x800000,%edx ▒
│125f: xor %esi,%esi ▒
│1261: lea 0x2dd8(%rip),%rdi # 4040 <C> ▒
│1268: vzeroupper ▒
│126b: → callq 1050 <memset@plt> ▒
0.00 │1270: lea 0x2dc9(%rip),%rax # 4040 <C> ▒
0.00 │1277: xor %ecx,%ecx ▒
│1279: mov %rax,0x8(%rsp) ▒
│127e: movl $0x0,0x18(%rsp) ▒
│1286: nopw %cs:0x0(%rax,%rax,1) ▒
│1290: lea 0x40000(%rcx),%rax ▒
0.00 │1297: addl $0x100,0x18(%rsp) ▒
│129f: mov %rbp,%rsi ▒
│12a2: movq $0x0,(%rsp) ▒
│12aa: lea 0x1802d8f(%rip),%rdi # 1804040 <matrix_multiply(int, double const*, double const*, double*)::B> ▒
│12b1: mov %rax,0x10(%rsp) ▒
│12b6: nopw %cs:0x0(%rax,%rax,1) ▒
│12c0: mov 0x8(%rsp),%r10 ▒
0.00 │12c5: mov %rdi,%r9 ▒
│12c8: xor %r8d,%r8d ▒
0.00 │12cb: mov %rsi,%rbx ▒
│12ce: mov %r10,%r11 ▒
│12d1: mov %rcx,%rdx ▒
│12d4: nopl 0x0(%rax) ▒
│12d8: xor %eax,%eax ▒
0.00 │12da: lea (%r11,%rax,1),%r12 ▒
0.00 │12de: lea (%r9,%rax,1),%r13 ▒
0.01 │12e2: mov %r12,%r15 ▒
│12e5: mov %r13,%r14 ▒
0.08 │12e8: mov %rbx,%r13 ▒
0.00 │12eb: mov $0xf,%r12 ▒
0.40 │12f2: vmovapd (%r15),%ymm0 ▒
2.30 │12f7: vmovapd 0x20(%r15),%ymm1 ▒
0.08 │12fd: vmovapd 0x2000(%r15),%ymm2 ▒
0.74 │1306: vmovapd 0x2020(%r15),%ymm3 ▒
0.09 │130f: vmovapd 0x4000(%r15),%ymm4 ▒
0.83 │1318: vmovapd 0x4020(%r15),%ymm5 ▒
0.08 │1321: vmovapd 0x6000(%r15),%ymm6 ▒
0.93 │132a: vmovapd 0x6020(%r15),%ymm7 ▒
0.44 │1333: vmovapd (%r14),%ymm14 ▒
0.08 │1338: vmovapd 0x20(%r14),%ymm15 ▒
0.01 │133e: vbroadcastsd 0x0(%r13),%ymm10 ▒
0.16 │1344: vbroadcastsd 0x2100(%r13),%ymm11 ▒
0.17 │134d: vbroadcastsd 0x4200(%r13),%ymm12 ▒
0.16 │1356: vbroadcastsd 0x6300(%r13),%ymm13 ▒
0.20 │135f: nop ▒
0.01 │1360: vmovapd 0x2200(%r14),%ymm8 ▒
3.94 │1369: vmovapd 0x2220(%r14),%ymm9 ▒
0.70 │1372: vfmadd231pd %ymm10,%ymm14,%ymm0 ▒
3.78 │1377: vfmadd231pd %ymm10,%ymm15,%ymm1 ▒
3.63 │137c: vbroadcastsd 0x8(%r13),%ymm10 ▒
1.02 │1382: vfmadd231pd %ymm11,%ymm14,%ymm2 ▒
3.91 │1387: vfmadd231pd %ymm11,%ymm15,%ymm3 ▒
3.81 │138c: vbroadcastsd 0x2108(%r13),%ymm11 ▒
0.01 │1395: vfmadd231pd %ymm12,%ymm14,%ymm4 ▒
5.50 │139a: vfmadd231pd %ymm12,%ymm15,%ymm5 ▒
3.87 │139f: vbroadcastsd 0x4208(%r13),%ymm12 ▒
0.05 │13a8: vfmadd231pd %ymm13,%ymm14,%ymm6 ▒
5.64 │13ad: vfmadd231pd %ymm13,%ymm15,%ymm7 ▒
3.98 │13b2: vbroadcastsd 0x6308(%r13),%ymm13 ▒
0.02 │13bb: add $0x4400,%r14 ◆
0.31 │13c2: add $0x10,%r13 ▒
0.00 │13c6: vmovapd (%r14),%ymm14 ▒
3.79 │13cb: vmovapd 0x20(%r14),%ymm15 ▒
0.74 │13d1: vfmadd231pd %ymm10,%ymm8,%ymm0 ▒
3.83 │13d6: vfmadd231pd %ymm10,%ymm9,%ymm1 ▒
3.79 │13db: vbroadcastsd 0x0(%r13),%ymm10 ▒
1.12 │13e1: vfmadd231pd %ymm11,%ymm8,%ymm2 ▒
3.93 │13e6: vfmadd231pd %ymm11,%ymm9,%ymm3 ▒
3.91 │13eb: vbroadcastsd 0x2100(%r13),%ymm11 ▒
0.10 │13f4: vfmadd231pd %ymm12,%ymm8,%ymm4 ▒
5.40 │13f9: vfmadd231pd %ymm12,%ymm9,%ymm5 ▒
4.03 │13fe: vbroadcastsd 0x4200(%r13),%ymm12 ▒
0.12 │1407: vfmadd231pd %ymm13,%ymm8,%ymm6 ▒
5.76 │140c: vfmadd231pd %ymm13,%ymm9,%ymm7 ▒
4.25 │1411: vbroadcastsd 0x6300(%r13),%ymm13 ▒
0.07 │141a: dec %r12 ▒
0.30 │141d: ↑ jne 1360 <main+0x300> ▒
0.00 │1423: vmovapd 0x2200(%r14),%ymm8 ▒
0.02 │142c: vmovapd 0x2220(%r14),%ymm9 ▒
0.04 │1435: vfmadd231pd %ymm10,%ymm14,%ymm0 ▒
0.29 │143a: vfmadd231pd %ymm10,%ymm15,%ymm1 ▒
0.30 │143f: vbroadcastsd 0x8(%r13),%ymm10 ▒
0.06 │1445: vfmadd231pd %ymm11,%ymm14,%ymm2 ▒
0.28 │144a: vfmadd231pd %ymm11,%ymm15,%ymm3 ▒
0.29 │144f: vbroadcastsd 0x2108(%r13),%ymm11 ▒
0.00 │1458: vfmadd231pd %ymm12,%ymm14,%ymm4 ▒
0.34 │145d: vfmadd231pd %ymm12,%ymm15,%ymm5 ▒
0.28 │1462: vbroadcastsd 0x4208(%r13),%ymm12 ▒
0.00 │146b: vfmadd231pd %ymm13,%ymm14,%ymm6 ▒
0.38 │1470: vfmadd231pd %ymm13,%ymm15,%ymm7 ▒
0.31 │1475: vbroadcastsd 0x6308(%r13),%ymm13 ▒
0.00 │147e: vfmadd231pd %ymm10,%ymm8,%ymm0 ▒
0.33 │1483: vfmadd231pd %ymm10,%ymm9,%ymm1 ▒
0.31 │1488: vfmadd231pd %ymm11,%ymm8,%ymm2 ▒
0.30 │148d: vfmadd231pd %ymm11,%ymm9,%ymm3 ▒
0.33 │1492: vfmadd231pd %ymm12,%ymm8,%ymm4 ▒
0.36 │1497: vfmadd231pd %ymm12,%ymm9,%ymm5 ▒
0.29 │149c: vfmadd231pd %ymm13,%ymm8,%ymm6 ▒
0.40 │14a1: vfmadd231pd %ymm13,%ymm9,%ymm7 ▒
0.29 │14a6: vmovapd %ymm0,(%r15) ▒
0.02 │14ab: vmovapd %ymm1,0x20(%r15) ▒
0.02 │14b1: vmovapd %ymm2,0x2000(%r15) ▒
0.21 │14ba: vmovapd %ymm3,0x2020(%r15) ▒
0.40 │14c3: vmovapd %ymm4,0x4000(%r15) ▒
0.45 │14cc: vmovapd %ymm5,0x4020(%r15) ▒
0.45 │14d5: vmovapd %ymm6,0x6000(%r15) ▒
0.53 │14de: vmovapd %ymm7,0x6020(%r15) ▒
0.50 │14e7: add $0x40,%rax ▒
0.00 │14eb: cmp $0x200,%rax ▒
0.01 │14f1: ↑ jne 12da <main+0x27a> ▒
│14f7: add $0x1000,%rdx ▒
0.07 │14fe: add $0x8000,%r11 ▒
0.01 │1505: add $0x8400,%rbx ▒
│150c: cmp 0x10(%rsp),%rdx ▒
0.09 │1511: ↑ jne 12d8 <main+0x278> ▒
│1517: add $0x40,%r8 ▒
0.00 │151b: add $0x200,%r9 ▒
│1522: add $0x200,%r10 ▒
0.00 │1529: cmp $0x400,%r8 ▒
0.00 │1530: ↑ jne 12cb <main+0x26b> ▒
│1536: addq $0x20,(%rsp) ▒
0.00 │153b: mov (%rsp),%rax ▒
0.00 │153f: add $0x100,%rsi ▒
0.00 │1546: add $0x44000,%rdi ▒
│154d: cmp $0x400,%rax ◆
│1553: ↑ jne 12c0 <main+0x260> ▒
│1559: addq $0x200000,0x8(%rsp) ▒
0.00 │1562: add $0x210000,%rbp ▒
│1569: cmpl $0x400,0x18(%rsp) ▒
│1571: ↓ je 1580 <main+0x520> ▒
│1573: mov %rdx,%rcx ▒
│1576: ↑ jmpq 1290 <main+0x230> ▒
│157b: nopl 0x0(%rax,%rax,1) ▒
│1580: subl $0x1,0x1c(%rsp) ▒
│1585: ↑ jne 1080 <main+0x20> ▒
│158b: xor %eax,%eax ▒
│158d: vzeroupper ▒
│1590: add $0x28,%rsp ▒
│1594: pop %rbx ▒
│1595: pop %rbp ▒
│1596: pop %r12 ▒
│1598: pop %r13 ▒
│159a: pop %r14 ▒
│159c: pop %r15 ▒
│159e: ← retq ▒
Press 'h' for help on key bindings ◆
Judge Duck Online | 评测鸭在线
Server Time: 2025-01-18 16:02:10 | Loaded in 24 ms | Server Status
个人娱乐项目,仅供学习交流使用 | 捐赠