diff --git a/doc/osaca-workflow.svg b/doc/osaca-workflow.svg deleted file mode 100644 index 22fcf41..0000000 --- a/doc/osaca-workflow.svg +++ /dev/null @@ -1,1767 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - // Multiply i with immediate // and add to arrayint t = 0.19;int main(void){ int i = 0; //STARTLOOP while(i < 1000){ arr[i] = arr[i-1] + i * t; i += 1; }} - - - User input - - - - - - - Port Binding in Cycles Per Iteration:----------------------------------------------| Port | 0 | 1 | 2 | 3 | 4 | 5 |----------------------------------------------| Cycles | 4.0 | 5.0 | 3.0 | 3.0 | 2.0 | 2.0 |---------------------------------------------- Ports Pressure in cycles | 0 | 1 | 2 | 3 | 4 | 5 |-------------------------------------------| | | 1.00 | 1.00 | | | lea 0x1(%rax,%rax,1),%edx| 0.50 | 1.00 | | | | 0.50 | vcvtsi2ss %edx,%xmm2,%xmm2| 1.00 | | | | | | vmulss %xmm2,%xmm0,%xmm3| | | 1.00 | 1.00 | | | lea 0x2(%rax,%rax,1),%ecx| | 1.00 | | | | | vaddss %xmm3,%xmm1,%xmm4| 0.33 | 0.33 | | | | 0.33 | vxorps %xmm1,%xmm1,%xmm1| 0.50 | 1.00 | | | | 0.50 | vcvtsi2ss %ecx,%xmm1,%xmm1| 1.00 | | | | | | vmulss %xmm1,%xmm0,%xmm5| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm4,0x4(%rsp,%rax,8)| | 1.00 | | | | | vaddss %xmm5,%xmm4,%xmm1| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm1,0x8(%rsp,%rax,8)| 0.33 | 0.33 | | | | 0.33 | inc %rax| 0.33 | 0.33 | | | | 0.33 | cmp $0x1f3,%rax| | | | | | | jb 400bc2 <main+0x62>Total number of estimated throughput: 5.0 - Throughput analysis - - - - - - - Memory - - - - GPR - - - - XMM - - - Extract instructions - - - - - - - - - - - - - - - - - - CSV - - OSACA - - - - - - - - - - - - - - - - - - - - - - - - - - - xor %eax,%eaxlea 0x1(%rax,%rax,1),%edxvcvtsi2ss %edx,%xmm2,%xmm2 vmulss %xmm2,%xmm0,%xmm3lea 0x2(%rax,%rax,1),%ecxvaddss %xmm3,%xmm1,%xmm4 vxorps %xmm1,%xmm1,%xmm1vcvtsi2ss %ecx,%xmm1,%xmm1vmulss %xmm1,%xmm0,%xmm5vmovss %xmm4,0x4(%rsp,%rax,8) - - ibench - - - - - #define INSTR vcvtsi2ss#define NINST 32#define N edi#define i r8d.intel_syntaxnoprefix.globl ninst.dataninst:.long NINST.align32PI:.long0xf01b866e, 0x400921f9.text.globllatency.typelatency, @function.align32loop: inc i INSTR xmm3, xmm0, eax INSTR xmm4, xmm1, ebx INSTR xmm5, xmm2, ecx INSTR xmm6, xmm0, eax INSTR xmm7, xmm1, ebx INSTR xmm8, xmm2, ecx - - Benchmark file - - - - - - #define INSTR vcvtsi2ss#define NINST 32#define N edi#define i r8d.intel_syntaxnoprefix.globl ninst.dataninst:.long NINST.align32PI:.long0xf01b866e, 0x400921f9.text.globllatency.typelatency, @function.align32loop: inc i INSTR xmm3, xmm0, eax INSTR xmm4, xmm1, ebx INSTR xmm5, xmm2, ecx INSTR xmm6, xmm0, eax INSTR xmm7, xmm1, ebx INSTR xmm8, xmm2, ecx - - Benchmark file - - - - - - #define INSTR vcvtsi2ss#define NINST 32#define N edi#define i r8d.intel_syntaxnoprefix.globl ninst.dataninst:.long NINST.align32PI:.long0xf01b866e, 0x400921f9.text.globllatency.typelatency, @function.align32loop: inc i INSTR xmm3, xmm0, eax INSTR xmm4, xmm1, ebx INSTR xmm5, xmm2, ecx INSTR xmm6, xmm0, eax INSTR xmm7, xmm1, ebx INSTR xmm8, xmm2, ecx - - Benchmark file - - - - - - vcvtsi2ss-xmm_xmm_r32 1.0vcvtsi2ss-xmm_xmm_r32-TP 3.0vmulss-xmm_xmm_xmm 1.0 - - ibench output - - -