diff --git a/osaca/parser/base_parser.py b/osaca/parser/base_parser.py index ebe695f..0525d37 100755 --- a/osaca/parser/base_parser.py +++ b/osaca/parser/base_parser.py @@ -29,7 +29,7 @@ class BaseParser(object): for i, line in enumerate(lines): if line == '': continue - asm_instructions.append(self.parseLine(line, i + 1)) + asm_instructions.append(self.parse_line(line, i + 1)) return asm_instructions def parse_line(self, line, line_number): @@ -40,17 +40,5 @@ class BaseParser(object): # Done in derived classes raise NotImplementedError() - def parse_register(self, register): - # Done in derived classed - raise NotImplementedError() - - def parse_memory(self, memory_address): - # Done in derived classed - raise NotImplementedError() - - def parse_immediate(self, immediate): - # Done in derived classed - raise NotImplementedError() - def construct_parser(self): raise NotImplementedError() diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 55124d9..42df884 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -28,9 +28,11 @@ class ParserX86ATT(BaseParser): pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) ).setResultsName('value') hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') - commaSeparatedList = pp.delimitedList( - pp.Optional(pp.quotedString | identifier | hex_number | decimal_number), delim=',' + directive_option = pp.Combine(pp.Word('#@.', exact=1) + pp.Word(pp.printables)) + directive_parameter = ( + pp.quotedString | directive_option | identifier | hex_number | decimal_number ) + commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',') self.directive = pp.Group( pp.Literal('.') + pp.Word(pp.alphanums + '_').setResultsName('name') @@ -58,8 +60,7 @@ class ParserX86ATT(BaseParser): # Immediate: pp.Regex('^\$(-?[0-9]+)|(0x[0-9a-fA-F]+),?') symbol_immediate = '$' immediate = pp.Group( - pp.Literal(symbol_immediate) - + (hex_number | decimal_number) + pp.Literal(symbol_immediate) + (hex_number | decimal_number | identifier) ).setResultsName(self.IMMEDIATE_ID) # Memory: offset(base, index, scale) offset = identifier | hex_number | decimal_number @@ -140,7 +141,14 @@ class ParserX86ATT(BaseParser): # 4. Parse instruction if result is None: - result = self.parse_instruction(line) + try: + result = self.parse_instruction(line) + except pp.ParseException: + print( + '\n\n*-*-*-*-*-*-*-*-*-*-\n{}: {}\n*-*-*-*-*-*-*-*-*-*-\n\n'.format( + line_number, line + ) + ) instruction_form['instruction'] = result['instruction'] instruction_form['operands'] = result['operands'] instruction_form['comment'] = result['comment'] @@ -153,19 +161,19 @@ class ParserX86ATT(BaseParser): # Check from right to left # Check third operand if 'operand3' in result: - operands['destination'] = self.process_operand(result['operand3']) + operands['destination'] = self._process_operand(result['operand3']) # Check second operand if 'operand2' in result: if 'destination' in operands: - operands['sources'].insert(0, self.process_operand(result['operand2'])) + operands['sources'].insert(0, self._process_operand(result['operand2'])) else: - operands['destination'] = self.process_operand(result['operand2']) + operands['destination'] = self._process_operand(result['operand2']) # Check first operand if 'operand1' in result: if 'destination' in operands: - operands['sources'].insert(0, self.process_operand(result['operand1'])) + operands['sources'].insert(0, self._process_operand(result['operand1'])) else: - operands['destination'] = self.process_operand(result['operand1']) + operands['destination'] = self._process_operand(result['operand1']) return_dict = { 'instruction': result['mnemonic'], 'operands': operands, @@ -173,7 +181,7 @@ class ParserX86ATT(BaseParser): } return return_dict - def process_operand(self, operand): + def _process_operand(self, operand): # For the moment, only used to structure memory addresses if 'memory' in operand: return self.substitute_memory_address(operand['memory']) @@ -185,10 +193,5 @@ class ParserX86ATT(BaseParser): base = None if 'base' not in memory_address else memory_address['base'] index = None if 'index' not in memory_address else memory_address['index'] scale = '1' if 'scale' not in memory_address else memory_address['scale'] - new_dict = { - 'offset': offset, - 'base': base, - 'index': index, - 'scale': scale, - } + new_dict = {'offset': offset, 'base': base, 'index': index, 'scale': scale} return {'memory': new_dict} diff --git a/tests/test_files/triad-iaca.s b/tests/test_files/triad-iaca.s new file mode 100644 index 0000000..07afa50 --- /dev/null +++ b/tests/test_files/triad-iaca.s @@ -0,0 +1,353 @@ + .file "triad.c" + .section .rodata.str1.8,"aMS",@progbits,1 + .align 8 +.LC9: + .string "%12.1f | %9.8f | %9.3f | %7.1f | %7.1f | %7d | %4d \n" + .text + .p2align 4,,15 + .globl triad + .type triad, @function +triad: +.LFB24: + .cfi_startproc + pushq %r13 + .cfi_def_cfa_offset 16 + .cfi_offset 13, -16 + movslq %edi, %rax + movl $64, %edi + leaq 16(%rsp), %r13 + .cfi_def_cfa 13, 0 + andq $-32, %rsp + pushq -8(%r13) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movq %rsp, %rbp + pushq %r15 + .cfi_escape 0x10,0xf,0x2,0x76,0x78 + leaq 0(,%rax,8), %r15 + pushq %r14 + movq %r15, %rsi + pushq %r13 + .cfi_escape 0xf,0x3,0x76,0x68,0x6 + .cfi_escape 0x10,0xe,0x2,0x76,0x70 + pushq %r12 + pushq %rbx + .cfi_escape 0x10,0xc,0x2,0x76,0x60 + .cfi_escape 0x10,0x3,0x2,0x76,0x58 + movq %rax, %rbx + subq $72, %rsp + call aligned_alloc + movq %r15, %rsi + movl $64, %edi + movq %rax, %r14 + call aligned_alloc + movq %r15, %rsi + movl $64, %edi + movq %rax, %r12 + call aligned_alloc + movq %r15, %rsi + movl $64, %edi + movq %rax, %r13 + call aligned_alloc + movq %rax, %r15 + leal -1(%rbx), %eax + movl %eax, -96(%rbp) + testl %ebx, %ebx + jle .L2 + cmpl $2, %eax + jbe .L14 + movl %ebx, %esi + vmovapd .LC0(%rip), %ymm0 + xorl %eax, %eax + xorl %ecx, %ecx + shrl $2, %esi + .p2align 4,,10 + .p2align 3 +.L4: + addl $1, %ecx + vmovapd %ymm0, (%r15,%rax) + vmovapd %ymm0, 0(%r13,%rax) + vmovapd %ymm0, (%r12,%rax) + vmovapd %ymm0, (%r14,%rax) + addq $32, %rax + cmpl %ecx, %esi + ja .L4 + movl %ebx, %eax + andl $-4, %eax + cmpl %eax, %ebx + je .L26 + vzeroupper +.L3: + vmovsd .LC1(%rip), %xmm0 + movslq %eax, %rcx + vmovsd %xmm0, (%r15,%rcx,8) + vmovsd %xmm0, 0(%r13,%rcx,8) + vmovsd %xmm0, (%r12,%rcx,8) + vmovsd %xmm0, (%r14,%rcx,8) + leal 1(%rax), %ecx + cmpl %ecx, %ebx + jle .L2 + movslq %ecx, %rcx + addl $2, %eax + vmovsd %xmm0, (%r15,%rcx,8) + vmovsd %xmm0, 0(%r13,%rcx,8) + vmovsd %xmm0, (%r12,%rcx,8) + vmovsd %xmm0, (%r14,%rcx,8) + cmpl %eax, %ebx + jle .L2 + cltq + vmovsd %xmm0, (%r15,%rax,8) + vmovsd %xmm0, 0(%r13,%rax,8) + vmovsd %xmm0, (%r12,%rax,8) + vmovsd %xmm0, (%r14,%rax,8) +.L2: + movl %ebx, %eax + movl $1, -84(%rbp) + movl %ebx, %r10d + andl $-4, %eax + shrl $2, %r10d + movl %eax, -100(%rbp) + .p2align 4,,10 + .p2align 3 +.L13: + leaq -56(%rbp), %rsi + leaq -72(%rbp), %rdi + movl %r10d, -88(%rbp) + call timing + movl -88(%rbp), %r10d + xorl %r11d, %r11d + .p2align 4,,10 + .p2align 3 +.L12: + vmovsd (%r14), %xmm0 + vxorpd %xmm7, %xmm7, %xmm7 + vucomisd %xmm7, %xmm0 + jbe .L6 + movq %r14, %rdi + movl %r11d, -92(%rbp) + movl %r10d, -88(%rbp) + vzeroupper + call dummy + movl -92(%rbp), %r11d + movl -88(%rbp), %r10d +.L6: + testl %ebx, %ebx + jle .L8 + cmpl $2, -96(%rbp) + jbe .L15 + xorl %eax, %eax + xorl %ecx, %ecx + .p2align 4,,10 + .p2align 3 + movl $111, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY + .byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY + .byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY + .byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY +.L10: + vmovapd (%r15,%rax), %ymm0 + vmovapd (%r12,%rax), %ymm3 + addl $1, %ecx + vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0 + vmovapd %ymm0, (%r14,%rax) + addq $32, %rax + cmpl %ecx, %r10d + ja .L10 + movl $222, %ebx # INSERTED BY KERNCRAFT IACA MARKER UTILITY + .byte 100 # INSERTED BY KERNCRAFT IACA MARKER UTILITY + .byte 103 # INSERTED BY KERNCRAFT IACA MARKER UTILITY + .byte 144 # INSERTED BY KERNCRAFT IACA MARKER UTILITY + movl -100(%rbp), %eax + cmpl %ebx, %eax + je .L8 +.L9: + movslq %eax, %rcx + vmovsd 0(%r13,%rcx,8), %xmm0 + vmovsd (%r12,%rcx,8), %xmm5 + vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0 + vmovsd %xmm0, (%r14,%rcx,8) + leal 1(%rax), %ecx + cmpl %ebx, %ecx + jge .L8 + movslq %ecx, %rcx + addl $2, %eax + vmovsd 0(%r13,%rcx,8), %xmm0 + vmovsd (%r12,%rcx,8), %xmm6 + vfmadd132sd (%r15,%rcx,8), %xmm6, %xmm0 + vmovsd %xmm0, (%r14,%rcx,8) + cmpl %eax, %ebx + jle .L8 + cltq + vmovsd (%r15,%rax,8), %xmm0 + vmovsd (%r12,%rax,8), %xmm4 + vfmadd132sd 0(%r13,%rax,8), %xmm4, %xmm0 + vmovsd %xmm0, (%r14,%rax,8) +.L8: + addl $1, %r11d + cmpl -84(%rbp), %r11d + jne .L12 + leaq -56(%rbp), %rsi + leaq -64(%rbp), %rdi + movl %r11d, -84(%rbp) + movl %r10d, -88(%rbp) + vzeroupper + call timing + vmovsd -64(%rbp), %xmm1 + vsubsd -72(%rbp), %xmm1, %xmm1 + vmovsd .LC3(%rip), %xmm2 + movl -84(%rbp), %r11d + movl -88(%rbp), %r10d + vucomisd %xmm1, %xmm2 + leal (%r11,%r11), %eax + movl %eax, -84(%rbp) + ja .L13 + movl %eax, %esi + vxorpd %xmm6, %xmm6, %xmm6 + vxorpd %xmm0, %xmm0, %xmm0 + movl %ebx, %edx + sarl %esi + vcvtsi2sd %ebx, %xmm0, %xmm0 + movl $.LC9, %edi + movl $5, %eax + vcvtsi2sd %esi, %xmm6, %xmm6 + vmulsd .LC5(%rip), %xmm6, %xmm2 + vmovsd .LC4(%rip), %xmm5 + vmovsd .LC6(%rip), %xmm7 + vmulsd %xmm0, %xmm6, %xmm4 + vmulsd %xmm0, %xmm2, %xmm2 + vdivsd %xmm1, %xmm4, %xmm4 + vdivsd %xmm1, %xmm2, %xmm2 + vdivsd %xmm5, %xmm4, %xmm4 + vmulsd %xmm7, %xmm2, %xmm3 + vaddsd %xmm0, %xmm0, %xmm2 + vmulsd .LC8(%rip), %xmm0, %xmm0 + vmulsd %xmm6, %xmm2, %xmm2 + vmulsd .LC7(%rip), %xmm2, %xmm2 + vmulsd %xmm7, %xmm3, %xmm3 + vdivsd %xmm5, %xmm0, %xmm0 + vdivsd %xmm5, %xmm4, %xmm4 + vdivsd %xmm1, %xmm2, %xmm2 + call printf + movq %r14, %rdi + call free + movq %r12, %rdi + call free + movq %r13, %rdi + call free + addq $72, %rsp + movq %r15, %rdi + popq %rbx + popq %r12 + popq %r13 + .cfi_remember_state + .cfi_def_cfa 13, 0 + popq %r14 + popq %r15 + popq %rbp + leaq -16(%r13), %rsp + .cfi_def_cfa 7, 16 + popq %r13 + .cfi_def_cfa_offset 8 + jmp free + .p2align 4,,10 + .p2align 3 +.L15: + .cfi_restore_state + xorl %eax, %eax + jmp .L9 +.L26: + vzeroupper + jmp .L2 +.L14: + xorl %eax, %eax + jmp .L3 + .cfi_endproc +.LFE24: + .size triad, .-triad + .section .rodata.str1.8 + .align 8 +.LC10: + .string "TRIAD a[i] = b[i]+c[i]*d[i], 32 byte/it, 2 Flop/it" + .align 8 +.LC11: + .string "Size (KByte) | runtime | MFlop/s | MB/s | MLUP/s | repeat | size" + .section .text.startup,"ax",@progbits + .p2align 4,,15 + .globl main + .type main, @function +main: +.LFB25: + .cfi_startproc + pushq %rbx + .cfi_def_cfa_offset 16 + .cfi_offset 3, -16 + movl $.LC10, %edi + movl $20, %ebx + call puts + movl $.LC11, %edi + call puts + .p2align 4,,10 + .p2align 3 +.L28: + vxorpd %xmm1, %xmm1, %xmm1 + movq .LC12(%rip), %rax + vcvtsi2sd %ebx, %xmm1, %xmm1 + addl $1, %ebx + vmovq %rax, %xmm0 + call pow + vcvttsd2si %xmm0, %edi + call triad + cmpl $36, %ebx + jne .L28 + xorl %eax, %eax + popq %rbx + .cfi_def_cfa_offset 8 + ret + .cfi_endproc +.LFE25: + .size main, .-main + .section .rodata.cst32,"aM",@progbits,32 + .align 32 +.LC0: + .long 1907715710 + .long 1048610426 + .long 1907715710 + .long 1048610426 + .long 1907715710 + .long 1048610426 + .long 1907715710 + .long 1048610426 + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC1: + .long 1907715710 + .long 1048610426 + .align 8 +.LC3: + .long 2576980378 + .long 1070176665 + .align 8 +.LC4: + .long 0 + .long 1083129856 + .align 8 +.LC5: + .long 0 + .long 1077936128 + .align 8 +.LC6: + .long 0 + .long 1062207488 + .align 8 +.LC7: + .long 2696277389 + .long 1051772663 + .align 8 +.LC8: + .long 0 + .long 1075838976 + .align 8 +.LC12: + .long 3435973837 + .long 1073007820 + .ident "GCC: (GNU) 7.2.0" + .section .note.GNU-stack,"",@progbits diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index 141b618..2290e0f 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -3,6 +3,7 @@ Unit tests for x86 AT&T assembly parser """ +import os import unittest from pyparsing import ParseException @@ -13,48 +14,56 @@ from osaca.parser import ParserX86ATT class TestParserX86ATT(unittest.TestCase): def setUp(self): self.parser = ParserX86ATT() + with open(self._find_file('triad-iaca.s')) as f: + self.triad_code = f.read() ################## # Test ################## def test_comment_parser(self): - self.assertEqual(get_comment(self.parser, '# some comments'), 'some comments') - self.assertEqual(get_comment(self.parser, '\t\t#AA BB CC \t end \t'), 'AA BB CC end') + self.assertEqual(self._get_comment(self.parser, '# some comments'), 'some comments') + self.assertEqual(self._get_comment(self.parser, '\t\t#AA BB CC \t end \t'), 'AA BB CC end') self.assertEqual( - get_comment(self.parser, '\t## comment ## comment'), '# comment ## comment' + self._get_comment(self.parser, '\t## comment ## comment'), '# comment ## comment' ) def test_label_parser(self): - self.assertEqual(get_label(self.parser, 'main:')['name'], 'main') - self.assertEqual(get_label(self.parser, '..B1.10:')['name'], '..B1.10') - self.assertEqual(get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3') - self.assertEqual(get_label(self.parser, '.L1:\t\t\t#label1')['name'], '.L1') + self.assertEqual(self._get_label(self.parser, 'main:')['name'], 'main') + self.assertEqual(self._get_label(self.parser, '..B1.10:')['name'], '..B1.10') + self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3') + self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t#label1')['name'], '.L1') self.assertEqual( - ' '.join(get_label(self.parser, '.L1:\t\t\t#label1')['comment']), 'label1' + ' '.join(self._get_label(self.parser, '.L1:\t\t\t#label1')['comment']), 'label1' ) with self.assertRaises(ParseException): - get_label(self.parser, '\t.cfi_startproc') + self._get_label(self.parser, '\t.cfi_startproc') def test_directive_parser(self): - self.assertEqual(get_directive(self.parser, '\t.text')['name'], 'text') - self.assertEqual(len(get_directive(self.parser, '\t.text')['parameters']), 0) - self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align') - self.assertEqual(len(get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2) - self.assertEqual(get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90') + self.assertEqual(self._get_directive(self.parser, '\t.text')['name'], 'text') + self.assertEqual(len(self._get_directive(self.parser, '\t.text')['parameters']), 0) + self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align') self.assertEqual( - get_directive(self.parser, ' .byte 100,103,144 #IACA START')['name'], + len(self._get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2 + ) + self.assertEqual( + self._get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90' + ) + self.assertEqual( + self._get_directive(self.parser, ' .byte 100,103,144 #IACA START')[ + 'name' + ], 'byte', ) self.assertEqual( - get_directive(self.parser, ' .byte 100,103,144 #IACA START')[ + self._get_directive(self.parser, ' .byte 100,103,144 #IACA START')[ 'parameters' ][2], '144', ) self.assertEqual( ' '.join( - get_directive(self.parser, ' .byte 100,103,144 #IACA START')[ + self._get_directive(self.parser, ' .byte 100,103,144 #IACA START')[ 'comment' ] ), @@ -67,12 +76,14 @@ class TestParserX86ATT(unittest.TestCase): instr3 = ' movl $222,%ebx #IACA END' instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9' instr5 = 'mov %ebx, var(,1)' + instr6 = 'lea (,%rax,8), %rbx' parsed_1 = self.parser.parse_instruction(instr1) parsed_2 = self.parser.parse_instruction(instr2) parsed_3 = self.parser.parse_instruction(instr3) parsed_4 = self.parser.parse_instruction(instr4) parsed_5 = self.parser.parse_instruction(instr5) + parsed_6 = self.parser.parse_instruction(instr6) self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss') self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2') @@ -104,6 +115,13 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(parsed_5['operands']['destination']['memory']['scale'], '1') self.assertEqual(parsed_5['operands']['sources'][0]['register']['name'], 'ebx') + self.assertEqual(parsed_6['instruction'], 'lea') + self.assertIsNone(parsed_6['operands']['sources'][0]['memory']['offset']) + self.assertIsNone(parsed_6['operands']['sources'][0]['memory']['base']) + self.assertEqual(parsed_6['operands']['sources'][0]['memory']['index']['name'], 'rax') + self.assertEqual(parsed_6['operands']['sources'][0]['memory']['scale'], '8') + self.assertEqual(parsed_6['operands']['destination']['register']['name'], 'rbx') + def test_parse_line(self): line_comment = '# -- Begin main' line_label = '..B1.7: # Preds ..B1.6' @@ -165,17 +183,26 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(parsed_3, instruction_form_3) self.assertEqual(parsed_4['operands'], instruction_form_4['operands']) + def test_parse_file(self): + parsed = self.parser.parse_file(self.triad_code) + self.assertEqual(parsed[0]['line_number'], 1) + self.assertEqual(len(parsed), 353) -################## -# Helper functions -################## -def get_comment(parser, comment): - return ' '.join(parser.comment.parseString(comment, parseAll=True).asDict()['comment']) + ################## + # Helper functions + ################## + def _get_comment(self, parser, comment): + return ' '.join(parser.comment.parseString(comment, parseAll=True).asDict()['comment']) + def _get_label(self, parser, label): + return parser.label.parseString(label, parseAll=True).asDict()['label'] -def get_label(parser, label): - return parser.label.parseString(label, parseAll=True).asDict()['label'] + def _get_directive(self, parser, directive): + return parser.directive.parseString(directive, parseAll=True).asDict()['directive'] - -def get_directive(parser, directive): - return parser.directive.parseString(directive, parseAll=True).asDict()['directive'] + @staticmethod + def _find_file(name): + testdir = os.path.dirname(__file__) + name = os.path.join(testdir, 'test_files', name) + assert os.path.exists(name) + return name