mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
Compare commits
8 Commits
2cf2bf5cec
...
v0.7.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2a231bf20b | ||
|
|
fb8c8ec7db | ||
|
|
2f069000e9 | ||
|
|
bdbcb18817 | ||
|
|
7930e4d704 | ||
|
|
d61330404b | ||
|
|
91da9a311a | ||
|
|
0c201be10e |
@@ -34,7 +34,7 @@ Getting started
|
||||
===============
|
||||
OSACA is as a python module with a command line interface.
|
||||
|
||||
OSACA is also integrated into the `Compiler Explorer at godbolt.org <https://godbolt.org>`_, which allows using OSACA from a browser without any installation. To analyze an assembly snippet, go to https://godbolt.org change language to "Analysis", insert an AArch64 or AT&T(!) x86 assembly code and make sure OSACA is selected in the corresponding analysis panel, e.g., https://godbolt.org/z/shK4f8. When analyzing a high-level language code, use the "Add tool..." menu in the compiler output panel to add OSACA analysis, e.g. https://godbolt.org/z/hbMoPn. To change the micro architecture model, add ``--arch`` and µarch shortname (e.g., ``SKX`` for Skylake, ``ZEN2``, ``N1`` for ARM Neoverse) to the "Compiler options..." (when using "Analysis" mode) or "Arguments" (when analyzing compiler output of a high-level code).
|
||||
OSACA is also integrated into the `Compiler Explorer at godbolt.org <https://godbolt.org>`_, which allows using OSACA from a browser without any installation. To analyze an assembly snippet, go to https://godbolt.org change language to "Analysis", insert an AArch64 or x86 assembly code and make sure OSACA is selected in the corresponding analysis panel, e.g., https://godbolt.org/z/shK4f8. When analyzing a high-level language code, use the "Add tool..." menu in the compiler output panel to add OSACA analysis, e.g. https://godbolt.org/z/hbMoPn. To change the micro architecture model, add ``--arch`` and µarch shortname (e.g., ``SKX`` for Skylake, ``ZEN2``, ``N1`` for ARM Neoverse) to the "Compiler options..." (when using "Analysis" mode) or "Arguments" (when analyzing compiler output of a high-level code).
|
||||
|
||||
Installation
|
||||
------------
|
||||
@@ -100,9 +100,9 @@ The usage of OSACA can be listed as:
|
||||
shows the program’s version number.
|
||||
--arch ARCH
|
||||
needs to be replaced with the target architecture abbreviation.
|
||||
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ICL`` (Client), ``ICX`` (Server), ``SPR`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN[1-4]`` for AMD Zen architectures.
|
||||
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72, ``TSV110`` for the HiSilicon TaiShan v110, ``A64FX`` for Fujitsu's HPC ARM architecture, ``M1`` for the Apple M1-Firestorm performance core, and ``V2`` for the Neoverse V2 (used in NVIDIA's Grace CPU) are available.
|
||||
If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
|
||||
See `the table of supported microarchitectures below <https://github.com/RRZE-HPC/OSACA?tab=readme-ov-file#supported-microarchitectures>`__ for all possible options. If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
|
||||
--syntax SYNTAX
|
||||
Define the assembly syntax (ATT, Intel) for x86. If no syntax is given, OSACA tries to determine automatically the syntax to use.
|
||||
--fixed
|
||||
Run the throughput analysis with fixed port utilization for all suitable ports per instruction.
|
||||
Otherwise, OSACA will print out the optimal port utilization for the kernel.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Open Source Architecture Code Analyzer"""
|
||||
|
||||
name = "osaca"
|
||||
__version__ = "0.6.1"
|
||||
__version__ = "0.7.0"
|
||||
|
||||
# To trigger travis deployment to pypi, do the following:
|
||||
# 1. Increment __version___
|
||||
|
||||
@@ -244,10 +244,12 @@ def check_arguments(args, parser):
|
||||
)
|
||||
if args.syntax and args.arch and MachineModel.get_isa_for_arch(args.arch) != "x86":
|
||||
parser.error("Syntax can only be explicitly specified for an x86 microarchitecture")
|
||||
if args.syntax and args.syntax.upper() not in SUPPORTED_SYNTAXES:
|
||||
parser.error(
|
||||
"Assembly syntax not supported. Please see --help for all valid assembly syntaxes."
|
||||
)
|
||||
if args.syntax:
|
||||
args.syntax = args.syntax.upper()
|
||||
if args.syntax not in SUPPORTED_SYNTAXES:
|
||||
parser.error(
|
||||
"Assembly syntax not supported. Please see --help for all valid assembly syntaxes."
|
||||
)
|
||||
if "import_data" in args and args.import_data not in supported_import_files:
|
||||
parser.error(
|
||||
"Microbenchmark not supported for data import. Please see --help for all valid "
|
||||
@@ -339,7 +341,9 @@ def inspect(args, output_file=sys.stdout):
|
||||
if args.arch:
|
||||
archs_to_try = [args.arch]
|
||||
else:
|
||||
archs_to_try = [detected_arch]
|
||||
archs_to_try = list(DEFAULT_ARCHS.values())
|
||||
archs_to_try.remove(detected_arch)
|
||||
archs_to_try.append(detected_arch)
|
||||
if args.syntax:
|
||||
syntaxes_to_try = [args.syntax]
|
||||
else:
|
||||
|
||||
@@ -318,28 +318,50 @@ class ParserX86Intel(ParserX86):
|
||||
base_register = self.register
|
||||
index_register = self.register
|
||||
scale = pp.Word("1248", exact=1)
|
||||
post_displacement = pp.Group(
|
||||
(pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign") + integer_number
|
||||
| identifier
|
||||
).setResultsName(self.immediate_id)
|
||||
pre_displacement = pp.Group(integer_number + pp.Literal("+")).setResultsName(
|
||||
self.immediate_id
|
||||
)
|
||||
indexed = pp.Group(
|
||||
|
||||
base = base_register.setResultsName("base")
|
||||
displacement = pp.Group(
|
||||
pp.Group(integer_number ^ identifier).setResultsName(self.immediate_id)
|
||||
).setResultsName("displacement")
|
||||
short_indexed = index_register.setResultsName("index")
|
||||
long_indexed = (
|
||||
index_register.setResultsName("index")
|
||||
+ pp.Optional(pp.Literal("*") + scale.setResultsName("scale"))
|
||||
).setResultsName("indexed")
|
||||
+ pp.Literal("*")
|
||||
+ scale.setResultsName("scale")
|
||||
)
|
||||
indexed = pp.Group(short_indexed ^ long_indexed).setResultsName("indexed")
|
||||
operator = pp.Word("+-", exact=1)
|
||||
operator_index = pp.Word("+-", exact=1).setResultsName("operator_idx")
|
||||
operator_displacement = pp.Word("+-", exact=1).setResultsName("operator_disp")
|
||||
|
||||
# Syntax:
|
||||
# `base` always preceedes `indexed`.
|
||||
# `short_indexed` is only allowed if it follows `base`, not alone.
|
||||
# `displacement` can go anywhere.
|
||||
# It's easier to list all the alternatives than to represent these rules using complicated
|
||||
# `Optional` and what not.
|
||||
register_expression = pp.Group(
|
||||
pp.Literal("[")
|
||||
+ pp.Optional(pp.Group(pre_displacement).setResultsName("pre_displacement"))
|
||||
+ pp.Group(
|
||||
base_register.setResultsName("base")
|
||||
^ pp.Group(
|
||||
base_register.setResultsName("base") + pp.Literal("+") + indexed
|
||||
).setResultsName("base_and_indexed")
|
||||
^ indexed
|
||||
).setResultsName("non_displacement")
|
||||
+ pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement"))
|
||||
+ (
|
||||
base
|
||||
^ (base + operator_displacement + displacement)
|
||||
^ (base + operator_displacement + displacement + operator_index + indexed)
|
||||
^ (base + operator_index + indexed)
|
||||
^ (base + operator_index + indexed + operator_displacement + displacement)
|
||||
^ (displacement + operator + base)
|
||||
^ (displacement + operator + base + operator_index + indexed)
|
||||
^ (
|
||||
displacement
|
||||
+ operator_index
|
||||
+ pp.Group(long_indexed).setResultsName("indexed")
|
||||
)
|
||||
^ pp.Group(long_indexed).setResultsName("indexed")
|
||||
^ (
|
||||
pp.Group(long_indexed).setResultsName("indexed")
|
||||
+ operator_displacement
|
||||
+ displacement
|
||||
)
|
||||
)
|
||||
+ pp.Literal("]")
|
||||
).setResultsName("register_expression")
|
||||
|
||||
@@ -356,7 +378,7 @@ class ParserX86Intel(ParserX86):
|
||||
self.register.setResultsName("segment") + pp.Literal(":") + immediate
|
||||
^ immediate + register_expression
|
||||
^ register_expression
|
||||
^ identifier + pp.Optional(pp.Literal("+") + immediate)
|
||||
^ identifier + pp.Optional(operator + immediate)
|
||||
).setResultsName("address_expression")
|
||||
|
||||
offset_expression = pp.Group(
|
||||
@@ -640,34 +662,19 @@ class ParserX86Intel(ParserX86):
|
||||
return RegisterOperand(name=operand.name)
|
||||
|
||||
def process_register_expression(self, register_expression):
|
||||
pre_displacement = register_expression.get("pre_displacement")
|
||||
post_displacement = register_expression.get("post_displacement")
|
||||
non_displacement = register_expression.get("non_displacement")
|
||||
base = None
|
||||
indexed = None
|
||||
if non_displacement:
|
||||
base_and_indexed = non_displacement.get("base_and_indexed")
|
||||
if base_and_indexed:
|
||||
base = base_and_indexed.get("base")
|
||||
indexed = base_and_indexed.get("indexed")
|
||||
else:
|
||||
base = non_displacement.get("base")
|
||||
if not base:
|
||||
indexed = non_displacement.get("indexed")
|
||||
base = register_expression.get("base")
|
||||
displacement = register_expression.get("displacement")
|
||||
indexed = register_expression.get("indexed")
|
||||
index = None
|
||||
scale = 1
|
||||
if indexed:
|
||||
index = indexed.get("index")
|
||||
scale = int(indexed.get("scale", "1"), 0)
|
||||
else:
|
||||
index = None
|
||||
scale = 1
|
||||
displacement_op = (
|
||||
self.process_immediate(pre_displacement.immediate) if pre_displacement else None
|
||||
)
|
||||
displacement_op = (
|
||||
self.process_immediate(post_displacement.immediate)
|
||||
if post_displacement
|
||||
else displacement_op
|
||||
)
|
||||
if register_expression.get("operator_index") == "-":
|
||||
scale *= -1
|
||||
displacement_op = self.process_immediate(displacement.immediate) if displacement else None
|
||||
if displacement_op and register_expression.get("operator_disp") == "-":
|
||||
displacement_op.value *= -1
|
||||
base_op = RegisterOperand(name=base.name) if base else None
|
||||
index_op = RegisterOperand(name=index.name) if index else None
|
||||
new_memory = MemoryOperand(
|
||||
@@ -724,6 +731,8 @@ class ParserX86Intel(ParserX86):
|
||||
if "displacement" in offset_expression
|
||||
else None
|
||||
)
|
||||
if displacement and "operator_disp" == "-":
|
||||
displacement.value *= -1
|
||||
identifier = self.process_identifier(offset_expression.identifier)
|
||||
identifier.offset = displacement
|
||||
return MemoryOperand(offset=identifier)
|
||||
|
||||
102
tests/test_files/gs_x86_gcc.s
Normal file
102
tests/test_files/gs_x86_gcc.s
Normal file
@@ -0,0 +1,102 @@
|
||||
# Produced with gcc 14.2 with -O3 -march=sapphirerapids -fopenmp-simd -mprefer-vector-width=512, https://godbolt.org/z/drE47x1b4.
|
||||
.LC3:
|
||||
.string "%f\n"
|
||||
main:
|
||||
push r14
|
||||
xor edi, edi
|
||||
push r13
|
||||
push r12
|
||||
push rbp
|
||||
push rbx
|
||||
call time
|
||||
mov edi, eax
|
||||
call srand
|
||||
mov edi, 1600
|
||||
call malloc
|
||||
mov r12, rax
|
||||
mov rbp, rax
|
||||
lea r13, [rax+1600]
|
||||
mov rbx, rax
|
||||
.L2:
|
||||
mov edi, 1600
|
||||
add rbx, 8
|
||||
call malloc
|
||||
mov QWORD PTR [rbx-8], rax
|
||||
cmp r13, rbx
|
||||
jne .L2
|
||||
lea rbx, [r12+8]
|
||||
lea r13, [r12+1592]
|
||||
.L5:
|
||||
mov r14d, 8
|
||||
.L4:
|
||||
call rand
|
||||
vxorpd xmm2, xmm2, xmm2
|
||||
mov rcx, QWORD PTR [rbx]
|
||||
movsx rdx, eax
|
||||
mov esi, eax
|
||||
imul rdx, rdx, 351843721
|
||||
sar esi, 31
|
||||
sar rdx, 45
|
||||
sub edx, esi
|
||||
imul edx, edx, 100000
|
||||
sub eax, edx
|
||||
vcvtsi2sd xmm0, xmm2, eax
|
||||
vdivsd xmm0, xmm0, QWORD PTR .LC0[rip]
|
||||
vmovsd QWORD PTR [rcx+r14], xmm0
|
||||
add r14, 8
|
||||
cmp r14, 1592
|
||||
jne .L4
|
||||
add rbx, 8
|
||||
cmp r13, rbx
|
||||
jne .L5
|
||||
vmovsd xmm1, QWORD PTR .LC1[rip]
|
||||
lea rdi, [r12+1584]
|
||||
.L6:
|
||||
mov rdx, QWORD PTR [rbp+8]
|
||||
mov rcx, QWORD PTR [rbp+16]
|
||||
mov eax, 1
|
||||
mov rsi, QWORD PTR [rbp+0]
|
||||
vmovsd xmm0, QWORD PTR [rdx]
|
||||
.L7:
|
||||
vaddsd xmm0, xmm0, QWORD PTR [rcx+rax*8]
|
||||
vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]
|
||||
vaddsd xmm0, xmm0, QWORD PTR [rsi+rax*8]
|
||||
vmulsd xmm0, xmm0, xmm1
|
||||
vmovsd QWORD PTR [rdx+rax*8], xmm0
|
||||
inc rax
|
||||
cmp rax, 199
|
||||
jne .L7
|
||||
vmovsd xmm0, QWORD PTR [rdx+1592]
|
||||
add rbp, 8
|
||||
vmovsd QWORD PTR [rcx+8], xmm0
|
||||
cmp rdi, rbp
|
||||
jne .L6
|
||||
mov rax, QWORD PTR [r12+1584]
|
||||
vmovsd xmm0, QWORD PTR .LC2[rip]
|
||||
vucomisd xmm0, QWORD PTR [rax+1584]
|
||||
jp .L9
|
||||
je .L19
|
||||
.L9:
|
||||
pop rbx
|
||||
xor eax, eax
|
||||
pop rbp
|
||||
pop r12
|
||||
pop r13
|
||||
pop r14
|
||||
ret
|
||||
.L19:
|
||||
mov rax, QWORD PTR [r12]
|
||||
mov edi, OFFSET FLAT:.LC3
|
||||
vmovsd xmm0, QWORD PTR [rax]
|
||||
mov eax, 1
|
||||
call printf
|
||||
jmp .L9
|
||||
.LC0:
|
||||
.long 0
|
||||
.long 1083129856
|
||||
.LC1:
|
||||
.long 2061584302
|
||||
.long 1072934420
|
||||
.LC2:
|
||||
.long -57724360
|
||||
.long 1072939201
|
||||
@@ -1,15 +1,15 @@
|
||||
; Translated from kernel_x86_memdep.s
|
||||
L4:
|
||||
vmovsd [rax+8], xmm0
|
||||
add rax, 8
|
||||
vmovsd [rax+rcx*8+8], xmm0
|
||||
vaddsd xmm0, xmm0, [rax]
|
||||
sub rax, -8
|
||||
vaddsd xmm0, xmm0, [rax-8]
|
||||
dec rcx
|
||||
vaddsd xmm0, xmm0, [rax+rcx*8+8]
|
||||
mov rdx, rcx
|
||||
vaddsd xmm0, xmm0, [rax+rdx*8+8]
|
||||
vmovsd [rax+8], xmm0 # line 3 <---------------------------------+
|
||||
add rax, 8 # rax=rax_orig+8 |
|
||||
vmovsd [rax+rcx*8+8], xmm0 # line 5 <------------------------------------------+
|
||||
vaddsd xmm0, xmm0, [rax] # depends on line 3, rax+8;[rax] == [rax+8] --------+ |
|
||||
sub rax, -8 # rax=rax_orig+16 | |
|
||||
vaddsd xmm0, xmm0, [rax-8] # depends on line 3, rax+16;[rax-8] == [rax+8] -----+ |
|
||||
dec rcx # rcx=rcx_orig-1 |
|
||||
vaddsd xmm0, xmm0, [rax+rcx*8+8] # depends on line 5, [(rax+8)+(rcx-1)*8+8] == [rax+rcx*+8] --+
|
||||
mov rdx, rcx # |
|
||||
vaddsd xmm0, xmm0, [rax+rdx*8+8] # depends on line 5, rcx == rdx -----------------------------+
|
||||
vmulsd xmm0, xmm0, xmm1
|
||||
add rax, 8
|
||||
cmp rsi, rax
|
||||
|
||||
@@ -25,6 +25,8 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
self.triad_iaca_code = f.read()
|
||||
with open(self._find_file("gs_x86_icc.s")) as f:
|
||||
self.gs_icc_code = f.read()
|
||||
with open(self._find_file("gs_x86_gcc.s")) as f:
|
||||
self.gs_gcc_code = f.read()
|
||||
|
||||
##################
|
||||
# Test
|
||||
@@ -100,6 +102,7 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
instr11 = "\tlea\trcx, OFFSET FLAT:??_R0N@8+8"
|
||||
instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555"
|
||||
instr13 = "\tjmp\t$LN18@operator"
|
||||
instr14 = "vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]"
|
||||
|
||||
parsed_1 = self.parser.parse_instruction(instr1)
|
||||
parsed_2 = self.parser.parse_instruction(instr2)
|
||||
@@ -114,6 +117,7 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
parsed_11 = self.parser.parse_instruction(instr11)
|
||||
parsed_12 = self.parser.parse_instruction(instr12)
|
||||
parsed_13 = self.parser.parse_instruction(instr13)
|
||||
parsed_14 = self.parser.parse_instruction(instr14)
|
||||
|
||||
self.assertEqual(parsed_1.mnemonic, "sub")
|
||||
self.assertEqual(parsed_1.operands[0], RegisterOperand(name="RSP"))
|
||||
@@ -204,6 +208,19 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
self.assertEqual(parsed_13.mnemonic, "jmp")
|
||||
self.assertEqual(parsed_13.operands[0], IdentifierOperand(name="$LN18@operator"))
|
||||
|
||||
self.assertEqual(parsed_14.mnemonic, "vaddsd")
|
||||
self.assertEqual(parsed_14.operands[0], RegisterOperand(name="XMM0"))
|
||||
self.assertEqual(parsed_14.operands[1], RegisterOperand(name="XMM0"))
|
||||
self.assertEqual(
|
||||
parsed_14.operands[2],
|
||||
MemoryOperand(
|
||||
base=RegisterOperand(name="RDX"),
|
||||
offset=ImmediateOperand(value=8),
|
||||
index=RegisterOperand(name="RAX"),
|
||||
scale=8,
|
||||
),
|
||||
)
|
||||
|
||||
def test_parse_line(self):
|
||||
line_comment = "; -- Begin main"
|
||||
line_instruction = "\tret\t0"
|
||||
@@ -344,6 +361,38 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(len(parsed), 227)
|
||||
|
||||
def test_parse_file4(self):
|
||||
parsed = self.parser.parse_file(self.gs_gcc_code)
|
||||
self.assertEqual(parsed[0].line_number, 1)
|
||||
# Check a few lines to make sure that we produced something reasonable.
|
||||
self.assertEqual(
|
||||
parsed[61],
|
||||
InstructionForm(
|
||||
mnemonic="vaddsd",
|
||||
operands=[
|
||||
RegisterOperand("XMM0"),
|
||||
RegisterOperand("XMM0"),
|
||||
MemoryOperand(
|
||||
base=RegisterOperand("RDX"),
|
||||
index=RegisterOperand("RAX"),
|
||||
scale=8,
|
||||
offset=ImmediateOperand(value=8),
|
||||
),
|
||||
],
|
||||
line=" vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]",
|
||||
line_number=62,
|
||||
),
|
||||
)
|
||||
self.assertEqual(
|
||||
parsed[101],
|
||||
InstructionForm(
|
||||
directive_id=DirectiveOperand(name=".long", parameters=["1072939201"]),
|
||||
line=" .long 1072939201",
|
||||
line_number=102,
|
||||
),
|
||||
)
|
||||
self.assertEqual(len(parsed), 102)
|
||||
|
||||
def test_normalize_imd(self):
|
||||
imd_binary = ImmediateOperand(value="1001111B")
|
||||
imd_octal = ImmediateOperand(value="117O")
|
||||
|
||||
Reference in New Issue
Block a user