diff --git a/.travis.yml b/.travis.yml
index 129e342..538fbda 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ before_install:
 #  - pip install tox-travis
   - pip install codecov
   - pip install pygraphviz
+  - pip install kerncraft>=0.8.4.dev2
 install:
   - pip install -e .
 cache: pip
diff --git a/osaca/osaca.py b/osaca/osaca.py
index 3eb4511..5cb6ff8 100755
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -169,9 +169,6 @@ def insert_byte_marker(args):
 
     :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
     """
-    if MachineModel.get_isa_for_arch(args.arch) != 'x86':
-        print('Marker insertion for non-x86 is not yet supported by Kerncraft.', file=sys.stderr)
-        sys.exit(1)
     try:
         from kerncraft.incore_model import asm_instrumentation
     except ImportError:
@@ -191,6 +188,7 @@ def insert_byte_marker(args):
         output_file=marked_assembly,
         block_selection='manual',
         pointer_increment='auto_with_manual_fallback',
+        isa=MachineModel.get_isa_for_arch(args.arch)
     )
 
     marked_assembly.seek(0)
diff --git a/osaca/semantics/marker_utils.py b/osaca/semantics/marker_utils.py
index 11127ce..e778468 100755
--- a/osaca/semantics/marker_utils.py
+++ b/osaca/semantics/marker_utils.py
@@ -50,6 +50,7 @@ def find_marked_kernel_x86ATT(lines):
 
 def get_marker(isa, comment=""):
     """Return tuple of start and end marker lines."""
+    isa = isa.lower()
     if isa == 'x86':
         start_marker_raw = (
             'movl      $111, %ebx # OSACA START MARKER\n'
@@ -65,7 +66,7 @@ def get_marker(isa, comment=""):
             '.byte     103        # OSACA END MARKER\n'
             '.byte     144        # OSACA END MARKER\n'
         )
-    elif isa == 'AArch64':
+    elif isa == 'aarch64':
         start_marker_raw = (
             'mov       x1, #111    // OSACA START MARKER\n'
             '.byte     213,3,32,31 // OSACA START MARKER\n'
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 51ad441..7b3b8d1 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -6,9 +6,12 @@ Unit tests for the CLI of OSACA and running the sample kernels in examples/
 import argparse
 import os
 import unittest
+from unittest.mock import patch
 from io import StringIO
+from shutil import copyfile
 
 import osaca.osaca as osaca
+from osaca.parser import ParserAArch64v81, ParserX86ATT
 
 
 class ErrorRaisingArgumentParser(argparse.ArgumentParser):
@@ -27,7 +30,9 @@ class TestCLI(unittest.TestCase):
         args = parser.parse_args(['--arch', 'WRONG_ARCH', self._find_file('gs', 'csx', 'gcc')])
         with self.assertRaises(ValueError):
             osaca.check_arguments(args, parser)
-        args = parser.parse_args(['--import', 'WRONG_BENCH', self._find_file('gs', 'csx', 'gcc')])
+        args = parser.parse_args(
+            ['--arch', 'csx', '--import', 'WRONG_BENCH', self._find_file('gs', 'csx', 'gcc')]
+        )
         with self.assertRaises(ValueError):
             osaca.check_arguments(args, parser)
 
@@ -47,6 +52,7 @@ class TestCLI(unittest.TestCase):
                 self._find_test_file('asmbench_import_aarch64.dat'),
             ]
         )
+        osaca.run(args, output_file=output)
 
     def test_check_db(self):
         parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
@@ -56,6 +62,49 @@ class TestCLI(unittest.TestCase):
         output = StringIO()
         osaca.run(args, output_file=output)
 
+    def test_get_parser(self):
+        self.assertTrue(isinstance(osaca.get_asm_parser('csx'), ParserX86ATT))
+        self.assertTrue(isinstance(osaca.get_asm_parser('tx2'), ParserAArch64v81))
+        with self.assertRaises(ValueError):
+            osaca.get_asm_parser('UNKNOWN')
+
+    def test_marker_insert_x86(self):
+        # copy file to add markers
+        name = self._find_test_file('kernel_x86.s')
+        name_copy = name + '.copy.s'
+        copyfile(name, name_copy)
+
+        user_input = ['.L10']
+        output = StringIO()
+        parser = osaca.create_parser()
+        args = parser.parse_args(['--arch', 'csx', '--insert-marker', name_copy])
+        with patch('builtins.input', side_effect=user_input):
+            osaca.run(args, output_file=output)
+
+        lines_orig = len(open(name).readlines())
+        lines_copy = len(open(name_copy).readlines())
+        self.assertEqual(lines_copy, lines_orig + 5 + 4)
+        # remove copy again
+        os.remove(name_copy)
+
+    def test_marker_insert_aarch64(self):
+        # copy file to add markers
+        name = self._find_test_file('kernel_aarch64.s')
+        name_copy = name + '.copy.s'
+        copyfile(name, name_copy)
+
+        user_input = ['.LBB0_32', '64']
+        parser = osaca.create_parser()
+        args = parser.parse_args(['--arch', 'tx2', '--insert-marker', name_copy])
+        with patch('builtins.input', side_effect=user_input):
+            osaca.run(args)
+
+        lines_orig = len(open(name).readlines())
+        lines_copy = len(open(name_copy).readlines())
+        self.assertEqual(lines_copy, lines_orig + 3 + 2)
+        # remove copy again
+        os.remove(name_copy)
+
     def test_examples(self):
         kernels = [
             'add',
@@ -76,7 +125,9 @@ class TestCLI(unittest.TestCase):
             for a in archs:
                 for c in comps[a]:
                     with self.subTest(kernel=k, arch=a, comp=c):
-                        args = parser.parse_args(['--arch', a, self._find_file(k, a, c)])
+                        args = parser.parse_args(
+                            ['--arch', a, self._find_file(k, a, c), '--export-graph', '/dev/null']
+                        )
                         output = StringIO()
                         osaca.run(args, output_file=output)
                         self.assertTrue('WARNING' not in output.getvalue())
diff --git a/tests/test_files/kernel_aarch64.s b/tests/test_files/kernel_aarch64.s
index e934761..7e4b87a 100644
--- a/tests/test_files/kernel_aarch64.s
+++ b/tests/test_files/kernel_aarch64.s
@@ -19,7 +19,7 @@
     add x10, x10, #64           // =64
     adds    x12, x12, #1            // =1
     fmov    s0, -1.0e+0
-    fmov    s1, #2.0e+2f
-    prfm    pldl1keep, [x26, #2112]
     b.ne    .LBB0_32
 // OSACA-END
+    fmov    s1, #2.0e+2f
+    prfm    pldl1keep, [x26, #2112]
diff --git a/tests/test_semantics.py b/tests/test_semantics.py
index 82bb44c..752760b 100755
--- a/tests/test_semantics.py
+++ b/tests/test_semantics.py
@@ -10,8 +10,10 @@ from subprocess import call
 
 import networkx as nx
 
+from osaca.osaca import get_unmatched_instruction_ratio
 from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT
-from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel
+from osaca.semantics import (INSTR_FLAGS, ArchSemantics, KernelDG,
+                             MachineModel, reduce_to_section)
 
 
 class TestSemanticTools(unittest.TestCase):
@@ -33,8 +35,10 @@ class TestSemanticTools(unittest.TestCase):
             self.code_x86 = f.read()
         with open(self._find_file('kernel_aarch64.s')) as f:
             self.code_AArch64 = f.read()
-        self.kernel_x86 = self.parser_x86.parse_file(self.code_x86)
-        self.kernel_AArch64 = self.parser_AArch64.parse_file(self.code_AArch64)
+        self.kernel_x86 = reduce_to_section(self.parser_x86.parse_file(self.code_x86), 'x86')
+        self.kernel_AArch64 = reduce_to_section(
+            self.parser_AArch64.parse_file(self.code_AArch64), 'aarch64'
+        )
 
         # set up machine models
         self.machine_model_csx = MachineModel(
@@ -180,7 +184,7 @@ class TestSemanticTools(unittest.TestCase):
             test_mm_x86.get_load_throughput(
                 {'base': {'name': 'x'}, 'offset': None, 'index': None, 'scale': 1}
             ),
-            [[1, '23'], [1, ['2D', '3D']]]
+            [[1, '23'], [1, ['2D', '3D']]],
         )
 
         # test adding port
@@ -232,15 +236,18 @@ class TestSemanticTools(unittest.TestCase):
         # x86
         kernel_fixed = deepcopy(self.kernel_x86)
         self.semantics_csx.add_semantics(kernel_fixed)
+        self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0)
         kernel_optimal = deepcopy(kernel_fixed)
         self.semantics_csx.assign_optimal_throughput(kernel_optimal)
         tp_fixed = self.semantics_csx.get_throughput_sum(kernel_fixed)
         tp_optimal = self.semantics_csx.get_throughput_sum(kernel_optimal)
         self.assertNotEqual(tp_fixed, tp_optimal)
         self.assertTrue(max(tp_optimal) <= max(tp_fixed))
+
         # arm
         kernel_fixed = deepcopy(self.kernel_AArch64)
         self.semantics_tx2.add_semantics(kernel_fixed)
+        self.assertEqual(get_unmatched_instruction_ratio(kernel_fixed), 0)
         kernel_optimal = deepcopy(kernel_fixed)
         self.semantics_tx2.assign_optimal_throughput(kernel_optimal)
         tp_fixed = self.semantics_tx2.get_throughput_sum(kernel_fixed)