added mov generation script

This commit is contained in:
Julian Hammer
2020-01-16 13:28:16 +01:00
parent 6e25da6c08
commit 571d090344

View File

@@ -0,0 +1,668 @@
#!/usr/bin/env python3
from collections import OrderedDict, defaultdict
from fractions import Fraction
class MOVEntryBuilder:
@staticmethod
def compute_throughput(port_pressure):
port_occupancy = defaultdict(Fraction)
for uops, ports in port_pressure:
for p in ports:
port_occupancy[p] += Fraction(uops, len(ports))
return float(max(list(port_occupancy.values())+[0]))
@staticmethod
def classify(operands_types):
load = 'mem' in operands_types[:-1]
store = 'mem' in operands_types[-1:]
assert not (load and store), "Can not process a combined load-store instruction."
return load, store
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0, comment=None):
if comment:
comment = " # " + comment
else:
comment = ""
description = '- name: {}{}\n - operands:\n'.format(instruction_name, comment)
for ot in operand_types:
if ot == 'imd':
description += ' - class: immediate\n imd: int\n'
elif ot.startswith('mem'):
description += (
' - class: memory\n'
' base: "*"\n'
' offset: "*"\n')
if ot == 'mem_simple':
description += ' index: ~\n'
elif ot == 'mem_complex':
description += ' index: gpr\n'
else:
description += ' index: "*"\n'
description += ' scale: "*"\n'
else:
description += ' - class: register\n name: {}\n'.format(ot)
description += (
' latency: {latency}\n'
' port_pressure: {port_pressure!r}\n'
' throughput: {throughput}\n'
' uops: {uops}\n').format(
latency=latency,
port_pressure=port_pressure,
throughput=self.compute_throughput(port_pressure),
uops=sum([i for i,p in port_pressure]))
return description
def parse_port_pressure(self, port_pressure_str):
"""
Example:
1*p45+2*p0 -> [[1, '45'], [2, '0']]
"""
port_pressure = []
if port_pressure_str:
for p in port_pressure_str.split('+'):
cycles, ports = p.split('*p')
port_pressure.append([int(cycles), ports])
return port_pressure
def process_item(self, instruction_form, resources):
"""
Example:
('mov xmm mem', ('1*p45+2*p0', 7) -> ('mov', ['xmm', 'mem'], [[1, '45'], [2, '0']], 7)
"""
instr_elements = instruction_form.split(' ')
latency = resources[1]
port_pressure = self.parse_port_pressure(resources[0])
instruction_name = instr_elements[0]
operand_types = instr_elements[1:]
return self.build_description(instruction_name, operand_types, port_pressure, latency)
class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
# for SNB and IVB
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
load, store = self.classify(operand_types)
comment = None
if load:
port_pressure += [[1, '23'], [1, ['2D', '3D']]]
latency += 4
comment = "with load"
if store:
port_pressure += [[1, '23'], [1, '4']]
latency += 0
comment = "with store"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
# for HSW, BDW, SKX and CSX
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
load, store = self.classify(operand_types)
if load:
port_pressure += [[1, '23'], [1, ['2D', '3D']]]
latency += 4
comment = "with load"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
if store:
port_pressure_simple = port_pressure + [[1, '237'], [1, '4']]
operands_simple = ['mem_simple' if o == 'mem' else o for o in operand_types]
port_pressure_complex = port_pressure + [[1, '23'], [1, '4']]
operands_complex = ['mem_complex' if o == 'mem' else o for o in operand_types]
latency += 0
return (
MOVEntryBuilder.build_description(
self, instruction_name, operands_simple, port_pressure_simple, latency,
"with store, simple AGU") +
'\n' +
MOVEntryBuilder.build_description(
self, instruction_name, operands_complex, port_pressure_complex, latency,
"with store, complex AGU"))
# Register only:
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency)
np7 = MOVEntryBuilderIntelNoPort7AGU()
p7 = MOVEntryBuilderIntelWithPort7AGU()
# SNB
snb_mov_instructions = [
# https://www.felixcloutier.com/x86/mov
('mov gpr gpr', ('', 0)),
('mov gpr mem', ('', 0)),
('mov mem gpr', ('', 0)),
('mov imd gpr', ('', 0)),
('mov imd mem', ('', 0)),
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('', 0)),
('movapd xmm mem', ('', 0)),
('movapd mem xmm', ('', 0)),
('vmovapd xmm xmm', ('', 0)),
('vmovapd xmm mem', ('', 0)),
('vmovapd mem xmm', ('', 0)),
('vmovapd ymm ymm', ('', 0)),
('vmovapd ymm mem', ('', 0)),
('vmovapd mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('', 0)),
('movaps xmm mem', ('', 0)),
('movaps mem xmm', ('', 0)),
('vmovaps xmm xmm', ('', 0)),
('movaps xmm mem', ('', 0)),
('movaps mem xmm', ('', 0)),
('vmovaps ymm ymm', ('', 0)),
('movaps ymm mem', ('', 0)),
('movaps mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movd:movq
('movd gpr mm', ('1*p5', 1)),
('movd mem mm', ('', 0)),
('movq gpr mm', ('1*p5', 1)),
('movq mem mm', ('', 0)),
('movd mm gpr', ('1*p0', 1)),
('movd mm mem', ('', 0)),
('movq mm gpr', ('1*p0', 1)),
('movq mm mem', ('', 0)),
('movd gpr xmm', ('1*p5', 1)),
('movd mem xmm', ('', 0)),
('movq gpr xmm', ('1*p5', 1)),
('movq mem xmm', ('', 0)),
('movd xmm gpr', ('1*p5', 1)),
('movd xmm mem', ('', 0)),
('movq xmm gpr', ('1*p5', 1)),
('movq xmm mem', ('', 0)),
('vmovd gpr xmm', ('1*p5', 1)),
('vmovd mem xmm', ('', 0)),
('vmovq gpr xmm', ('1*p5', 1)),
('vmovq mem xmm', ('', 0)),
('vmovd xmm gpr', ('1*p5', 1)),
('vmovd xmm mem', ('', 0)),
('vmovq xmm gpr', ('1*p5', 1)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movddup
('movddup xmm xmm', ('1*p5', 1)),
('movddup mem xmm', ('', 0)),
('vmovddup xmm xmm', ('1*p5', 1)),
('vmovddup mem xmm', ('', 0)),
('vmovddup ymm ymm', ('1*p5', 1)),
('vmovddup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdq2q
('movdq2q xmm mm', ('1*p015+1*p5', 1)),
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('', 0)),
('movdqa mem xmm', ('', 0)),
('movdqa xmm mem', ('', 0)),
('vmovdqa xmm xmm', ('', 0)),
('vmovdqa mem xmm', ('', 0)),
('vmovdqa xmm mem', ('', 0)),
('vmovdqa ymm ymm', ('', 0)),
('vmovdqa mem ymm', ('', 0)),
('vmovdqa ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('', 0)),
('movdqu mem xmm', ('', 0)),
('movdqu xmm mem', ('', 0)),
('vmovdqu xmm xmm', ('', 0)),
('vmovdqu mem xmm', ('', 0)),
('vmovdqu xmm mem', ('', 0)),
('vmovdqu ymm ymm', ('', 0)),
('vmovdqu mem ymm', ('', 0)),
('vmovdqu ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movhlps
('movhlps xmm xmm', ('1*p5', 1)),
('vmovhlps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movhpd
('movhpd mem xmm', ('1*p5', 1)),
('vmovhpd mem xmm xmm', ('1*p5', 1)),
('movhpd xmm mem', ('', 0)),
('vmovhpd mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movhps
('movhps mem xmm', ('1*p5', 1)),
('vmovhps mem xmm xmm', ('1*p5', 1)),
('movhps xmm mem', ('', 0)),
('vmovhps mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movlhps
('movlhps xmm xmm', ('1*p5', 1)),
('vmovlhps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movlpd
('movlpd mem xmm', ('1*p5', 1)),
('vmovlpd mem xmm xmm', ('1*p5', 1)),
('movlpd xmm mem', ('', 0)),
('vmovlpd mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movlps
('movlps mem xmm', ('1*p5', 1)),
('vmovlps mem xmm xmm', ('1*p5', 1)),
('movlps xmm mem', ('', 0)),
('vmovlps mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 1)),
('vmovmskpd xmm gpr', ('1*p0', 1)),
('vmovmskpd ymm gpr', ('1*p0', 1)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskps xmm gpr', ('1*p0', 1)),
('vmovmskps xmm gpr', ('1*p0', 1)),
('vmovmskps ymm gpr', ('1*p0', 1)),
# https://www.felixcloutier.com/x86/movntdq
('movntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('movntdqa mem xmm', ('', 0)),
('vmovntdqa mem xmm', ('', 0)),
('vmovntdqa mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movnti
('movnti gpr mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntpd
('movntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('movntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntq
('movntq mm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq
('movq mm mm', ('', 0)),
('movq mem mm', ('', 0)),
('movq mm mem', ('', 0)),
('movq xmm xmm', ('', 0)),
('movq mem xmm', ('', 0)),
('movq xmm mem', ('', 0)),
('vmovq xmm xmm', ('', 0)),
('vmovq mem xmm', ('', 0)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p015', 1)),
# https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq
# TODO combined load-store is currently not supported
# ('movs mem mem', ()),
# https://www.felixcloutier.com/x86/movsd
('movsd xmm xmm', ('', 0)),
('movsd mem xmm', ('', 0)),
('movsd xmm mem', ('', 0)),
('vmovsd xmm xmm xmm', ('', 0)),
('vmovsd mem xmm', ('', 0)),
('vmovsd xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movshdup
('movshdup xmm xmm', ('', 0)),
('movshdup mem xmm', ('', 0)),
('vmovshdup xmm xmm', ('', 0)),
('vmovshdup mem xmm', ('', 0)),
('vmovshdup ymm ymm', ('', 0)),
('vmovshdup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movsldup
('movsldup xmm xmm', ('', 0)),
('movsldup mem xmm', ('', 0)),
('vmovsldup xmm xmm', ('', 0)),
('vmovsldup mem xmm', ('', 0)),
('vmovsldup ymm ymm', ('', 0)),
('vmovsldup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movss
('movss xmm xmm', ('', 0)),
('movss mem xmm', ('', 0)),
('vmovss xmm xmm xmm', ('', 0)),
('vmovss mem xmm', ('', 0)),
('vmovss xmm xmm', ('', 0)),
('vmovss xmm mem', ('', 0)),
('movss mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('', 0)),
('movsx mem gpr', ('', 0)),
('movsxd gpr gpr', ('', 0)),
('movsxd mem gpr', ('', 0)),
('movsb gpr gpr', ('', 0)), # AT&T version
('movsb mem gpr', ('', 0)), # AT&T version
('movsw gpr gpr', ('', 0)), # AT&T version
('movsw mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('', 0)),
('movupd mem xmm', ('', 0)),
('movupd xmm mem', ('', 0)),
('vmovupd xmm xmm', ('', 0)),
('vmovupd mem xmm', ('', 0)),
('vmovupd xmm mem', ('', 0)),
('vmovupd ymm ymm', ('', 0)),
('vmovupd mem ymm', ('', 0)),
('vmovupd ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movups
('movups xmm xmm', ('', 0)),
('movups mem xmm', ('', 0)),
('movups xmm mem', ('', 0)),
('vmovups xmm xmm', ('', 0)),
('vmovups mem xmm', ('', 0)),
('vmovups xmm mem', ('', 0)),
('vmovups ymm ymm', ('', 0)),
('vmovups mem ymm', ('', 0)),
('vmovups ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('', 0)),
('movzx mem gpr', ('', 0)),
('movzb gpr gpr', ('', 0)), # AT&T version
('movzb mem gpr', ('', 0)), # AT&T version
('movzw gpr gpr', ('', 0)), # AT&T version
('movzw mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p015+2*p05', 2)),
('cmova mem gpr', ('1*p015+2*p05', 2)),
('cmovae gpr gpr', ('1*p015+1*p05', 2)),
('cmovae mem gpr', ('1*p015+2*p05', 2)),
('cmovb gpr gpr', ('1*p015+2*p05', 2)),
('cmovb mem gpr', ('1*p015+1*p05', 2)),
('cmovbe gpr gpr', ('1*p015+2*p05', 2)),
('cmovbe mem gpr', ('1*p015+2*p05', 2)),
('cmovc gpr gpr', ('1*p015+1*p05', 2)),
('cmovc mem gpr', ('1*p015+1*p05', 2)),
('cmove gpr gpr', ('1*p015+1*p05', 2)),
('cmove mem gpr', ('1*p015+1*p05', 2)),
('cmovg gpr gpr', ('1*p015+1*p05', 2)),
('cmovg mem gpr', ('1*p015+1*p05', 2)),
('cmovge gpr gpr', ('1*p015+1*p05', 2)),
('cmovge mem gpr', ('1*p015+1*p05', 2)),
('cmovl gpr gpr', ('1*p015+1*p05', 2)),
('cmovl mem gpr', ('1*p015+1*p05', 2)),
('cmovle gpr gpr', ('1*p015+1*p05', 2)),
('cmovle mem gpr', ('1*p015+1*p05', 2)),
('cmovna gpr gpr', ('1*p015+2*p05', 2)),
('cmovna mem gpr', ('1*p015+2*p05', 2)),
('cmovnae gpr gpr', ('1*p015+1*p05', 2)),
('cmovnae mem gpr', ('1*p015+1*p05', 2)),
('cmovnb gpr gpr', ('1*p015+1*p05', 2)),
('cmovnb mem gpr', ('1*p015+1*p05', 2)),
('cmovnbe gpr gpr', ('1*p015+2*p05', 2)),
('cmovnbe mem gpr', ('1*p015+2*p05', 2)),
('cmovnb gpr gpr', ('1*p015+1*p05', 2)),
('cmovnb mem gpr', ('1*p015+1*p05', 2)),
('cmovnc gpr gpr', ('1*p015+1*p05', 2)),
('cmovnc mem gpr', ('1*p015+1*p05', 2)),
('cmovne gpr gpr', ('1*p015+1*p05', 2)),
('cmovne mem gpr', ('1*p015+1*p05', 2)),
('cmovng gpr gpr', ('1*p015+1*p05', 2)),
('cmovng mem gpr', ('1*p015+1*p05', 2)),
('cmovnge gpr gpr', ('1*p015+1*p05', 2)),
('cmovnge mem gpr', ('1*p015+1*p05', 2)),
('cmovnl gpr gpr', ('1*p015+1*p05', 2)),
('cmovnl mem gpr', ('1*p015+1*p05', 2)),
('cmovno gpr gpr', ('1*p015+1*p05', 2)),
('cmovno mem gpr', ('1*p015+1*p05', 2)),
('cmovnp gpr gpr', ('1*p015+1*p05', 2)),
('cmovnp mem gpr', ('1*p015+1*p05', 2)),
('cmovns gpr gpr', ('1*p015+1*p05', 2)),
('cmovns mem gpr', ('1*p015+1*p05', 2)),
('cmovnz gpr gpr', ('1*p015+1*p05', 2)),
('cmovnz mem gpr', ('1*p015+1*p05', 2)),
('cmovo gpr gpr', ('1*p015+1*p05', 2)),
('cmovo mem gpr', ('1*p015+1*p05', 2)),
('cmovp gpr gpr', ('1*p015+1*p05', 2)),
('cmovp mem gpr', ('1*p015+1*p05', 2)),
('cmovpe gpr gpr', ('1*p015+1*p05', 2)),
('cmovpe mem gpr', ('1*p015+1*p05', 2)),
('cmovpo gpr gpr', ('1*p015+1*p05', 2)),
('cmovpo mem gpr', ('1*p015+1*p05', 2)),
('cmovs gpr gpr', ('1*p015+1*p05', 2)),
('cmovs mem gpr', ('1*p015+1*p05', 2)),
('cmovz gpr gpr', ('1*p015+1*p05', 2)),
('cmovz mem gpr', ('1*p015+1*p05', 2)),
]
snb_description = '\n'.join([np7.process_item(*item) for item in snb_mov_instructions])
ivb_mov_instructions = snb_mov_instructions
ivb_description = '\n'.join([np7.process_item(*item) for item in ivb_mov_instructions])
hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 6)),
('movbe mem gpr', ('1*p15', 6)),
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
('cmova mem gpr', ('1*p0156+2*p06', 2)),
('cmovae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovae mem gpr', ('1*p0156+2*p06', 2)),
('cmovb gpr gpr', ('1*p0156+2*p06', 2)),
('cmovb mem gpr', ('1*p0156+1*p06', 2)),
('cmovbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovc mem gpr', ('1*p0156+1*p06', 2)),
('cmove gpr gpr', ('1*p0156+1*p06', 2)),
('cmove mem gpr', ('1*p0156+1*p06', 2)),
('cmovg gpr gpr', ('1*p0156+1*p06', 2)),
('cmovg mem gpr', ('1*p0156+1*p06', 2)),
('cmovge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovge mem gpr', ('1*p0156+1*p06', 2)),
('cmovl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovl mem gpr', ('1*p0156+1*p06', 2)),
('cmovle gpr gpr', ('1*p0156+1*p06', 2)),
('cmovle mem gpr', ('1*p0156+1*p06', 2)),
('cmovna gpr gpr', ('1*p0156+2*p06', 2)),
('cmovna mem gpr', ('1*p0156+2*p06', 2)),
('cmovnae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnae mem gpr', ('1*p0156+1*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovnbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnc mem gpr', ('1*p0156+1*p06', 2)),
('cmovne gpr gpr', ('1*p0156+1*p06', 2)),
('cmovne mem gpr', ('1*p0156+1*p06', 2)),
('cmovng gpr gpr', ('1*p0156+1*p06', 2)),
('cmovng mem gpr', ('1*p0156+1*p06', 2)),
('cmovnge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnge mem gpr', ('1*p0156+1*p06', 2)),
('cmovnl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnl mem gpr', ('1*p0156+1*p06', 2)),
('cmovno gpr gpr', ('1*p0156+1*p06', 2)),
('cmovno mem gpr', ('1*p0156+1*p06', 2)),
('cmovnp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnp mem gpr', ('1*p0156+1*p06', 2)),
('cmovns gpr gpr', ('1*p0156+1*p06', 2)),
('cmovns mem gpr', ('1*p0156+1*p06', 2)),
('cmovnz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnz mem gpr', ('1*p0156+1*p06', 2)),
('cmovo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovo mem gpr', ('1*p0156+1*p06', 2)),
('cmovp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovp mem gpr', ('1*p0156+1*p06', 2)),
('cmovpe gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpe mem gpr', ('1*p0156+1*p06', 2)),
('cmovpo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpo mem gpr', ('1*p0156+1*p06', 2)),
('cmovs gpr gpr', ('1*p0156+1*p06', 2)),
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
]).items())
hsw_description = '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions])
bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('2*p06', 1)),
('cmova mem gpr', ('2*p06', 1)),
('cmovae gpr gpr', ('1*p06', 1)),
('cmovae mem gpr', ('2*p06', 1)),
('cmovb gpr gpr', ('2*p06', 1)),
('cmovb mem gpr', ('1*p06', 1)),
('cmovbe gpr gpr', ('2*p06', 1)),
('cmovbe mem gpr', ('2*p06', 1)),
('cmovc gpr gpr', ('1*p06', 1)),
('cmovc mem gpr', ('1*p06', 1)),
('cmove gpr gpr', ('1*p06', 1)),
('cmove mem gpr', ('1*p06', 1)),
('cmovg gpr gpr', ('1*p06', 1)),
('cmovg mem gpr', ('1*p06', 1)),
('cmovge gpr gpr', ('1*p06', 1)),
('cmovge mem gpr', ('1*p06', 1)),
('cmovl gpr gpr', ('1*p06', 1)),
('cmovl mem gpr', ('1*p06', 1)),
('cmovle gpr gpr', ('1*p06', 1)),
('cmovle mem gpr', ('1*p06', 1)),
('cmovna gpr gpr', ('2*p06', 1)),
('cmovna mem gpr', ('2*p06', 1)),
('cmovnae gpr gpr', ('1*p06', 1)),
('cmovnae mem gpr', ('1*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnbe gpr gpr', ('2*p06', 1)),
('cmovnbe mem gpr', ('2*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnc gpr gpr', ('1*p06', 1)),
('cmovnc mem gpr', ('1*p06', 1)),
('cmovne gpr gpr', ('1*p06', 1)),
('cmovne mem gpr', ('1*p06', 1)),
('cmovng gpr gpr', ('1*p06', 1)),
('cmovng mem gpr', ('1*p06', 1)),
('cmovnge gpr gpr', ('1*p06', 1)),
('cmovnge mem gpr', ('1*p06', 1)),
('cmovnl gpr gpr', ('1*p06', 1)),
('cmovnl mem gpr', ('1*p06', 1)),
('cmovno gpr gpr', ('1*p06', 1)),
('cmovno mem gpr', ('1*p06', 1)),
('cmovnp gpr gpr', ('1*p06', 1)),
('cmovnp mem gpr', ('1*p06', 1)),
('cmovns gpr gpr', ('1*p06', 1)),
('cmovns mem gpr', ('1*p06', 1)),
('cmovnz gpr gpr', ('1*p06', 1)),
('cmovnz mem gpr', ('1*p06', 1)),
('cmovo gpr gpr', ('1*p06', 1)),
('cmovo mem gpr', ('1*p06', 1)),
('cmovp gpr gpr', ('1*p06', 1)),
('cmovp mem gpr', ('1*p06', 1)),
('cmovpe gpr gpr', ('1*p06', 1)),
('cmovpe mem gpr', ('1*p06', 1)),
('cmovpo gpr gpr', ('1*p06', 1)),
('cmovpo mem gpr', ('1*p06', 1)),
('cmovs gpr gpr', ('1*p06', 1)),
('cmovs mem gpr', ('1*p06', 1)),
('cmovz gpr gpr', ('1*p06', 1)),
('cmovz mem gpr', ('1*p06', 1)),
]).items())
bdw_description = '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions])
skx_mov_instructions = list(OrderedDict(bdw_mov_instructions + [
# TODO with masking! https://www.felixcloutier.com/x86/movapd
# TODO with masking! https://www.felixcloutier.com/x86/movaps
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 4)),
('movbe mem gpr', ('1*p15', 4)),
# TODO with masking! https://www.felixcloutier.com/x86/movddup
# TODO with masking! https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
# TODO with masking! https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
# https://www.felixcloutier.com/x86/movntdq
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('vmovntdqa mem zmm', ('', 0)),
# https://www.felixcloutier.com/x86/movntpd
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
# TODO with masking! https://www.felixcloutier.com/x86/movsd
# TODO with masking! https://www.felixcloutier.com/x86/movshdup
# TODO with masking! https://www.felixcloutier.com/x86/movsldup
# TODO with masking! https://www.felixcloutier.com/x86/movss
# TODO with masking! https://www.felixcloutier.com/x86/movupd
# TODO with masking! https://www.felixcloutier.com/x86/movups
]).items())
skx_description = '\n'.join([p7.process_item(*item) for item in skx_mov_instructions])
csx_mov_instructions = OrderedDict(skx_mov_instructions + [
]).items()
def get_description(arch, rhs_comment=None):
descriptions = {
'snb': '\n'.join([p7.process_item(*item) for item in snb_mov_instructions]),
'ivb': '\n'.join([p7.process_item(*item) for item in ivb_mov_instructions]),
'hsw': '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions]),
'bdw': '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions]),
'skx': '\n'.join([p7.process_item(*item) for item in skx_mov_instructions]),
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions])
}
description = descriptions[arch]
if rhs_comment is not None:
max_length = max([len(l) for l in descriptions[arch].split('\n')])
commented_description = ""
for l in descriptions[arch].split('\n'):
commented_description += ("{:<"+str(max_length)+"} # {}\n").format(l, rhs_comment)
description = commented_description
return description
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx)".format(sys.argv[0]))
sys.exit(0)
try:
print(get_description(sys.argv[1], rhs_comment="by "+' '.join(sys.argv)))
except KeyError:
print("Unknown architecture.")
sys.exit(1)