VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103917

Last change on this file since 103917 was 103917, checked in by vboxsync, 8 months ago

VMM/IEM: Implement native emitter for IEM_MC_STORE_XREG_U128(), bugref:10614

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 323.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103917 2024-03-19 13:27:07Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103917 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ew_WO': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
241 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
242 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
243 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
244 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
246 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
248 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
250 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
252 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
254 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
256 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
259 'Wqq': ( 'IDX_UseModRM', 'rm', '%Wqq', 'Wqq', 'RM', ),
260 'Wqq_WO': ( 'IDX_UseModRM', 'rm', '%Wqq', 'Wqq', 'RM', ),
261 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
262 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
263
264 # ModR/M.rm - register only.
265 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
266 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
267 'Uqq': ( 'IDX_UseModRM', 'rm', '%Uqq', 'Uqq', 'REG' ),
268 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
269 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
270 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
271 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
272 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
273 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
274
275 # ModR/M.rm - memory only.
276 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
277 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
278 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
279 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
280 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
281 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
282 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
283 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
284 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
285 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
286 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
287 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
288 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
289 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
290 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
291
292 # ModR/M.reg
293 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
294 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
295 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
296 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
297 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
298 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
299 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
300 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
301 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
302 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
303 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
304 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
305 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
306 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
307 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
308 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
309 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
310 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
311 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
312 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
313 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
314 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
315 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
316 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
317 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
318 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
319 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
320 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
321 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
322 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
323 'Vqq': ( 'IDX_UseModRM', 'reg', '%Vqq', 'Vqq', '', ),
324 'Vqq_WO': ( 'IDX_UseModRM', 'reg', '%Vqq', 'Vqq', '', ),
325 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
326 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
327
328 # VEX.vvvv
329 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
330 'Hdq': ( 'IDX_UseModRM', 'vvvv', '%Hdq', 'Hdq', 'V', ),
331 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
332 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
333 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
334 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
335 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
336 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
337 'Hqq': ( 'IDX_UseModRM', 'vvvv', '%Hqq', 'Hqq', 'V', ),
338 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
339
340 # Immediate values.
341 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
342 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
343 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
344 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
345 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
346 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
347
348 # Address operands (no ModR/M).
349 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
350 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
351
352 # Relative jump targets
353 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
354 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
355
356 # DS:rSI
357 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
358 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
359 # ES:rDI
360 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
361 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
362
363 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
364
365 # Fixed registers.
366 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
367 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
368 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
369 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
370 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
371 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
372 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
373 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
374 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
375 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
376
377 # Fixed values.
378 '1': ( '', '1', '1', '1', '', ),
379};
380
381# IDX_ParseFixedReg
382# IDX_ParseVexDest
383
384
385## IEMFORM_XXX mappings.
386g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
387 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
388 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
389 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
390 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
391 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
392 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
393 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
394 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
395 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
396 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
397 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
398 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
399 'M': ( 'ModR/M', [ 'rm', ], '', ),
400 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
401 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
402 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
403 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
404 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
405 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
406 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
407 'R': ( 'ModR/M', [ 'reg', ], '', ),
408
409 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
410 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
411 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
412 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
413 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
414 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
415 'VEX_MRI': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
416 'VEX_MRI_REG': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
417 'VEX_MRI_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
418 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
419 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
420 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
421 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
422 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
423 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
424 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
425 'VEX_RVMI': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '', ),
426 'VEX_RVMI_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
427 'VEX_RVMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
428 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
429 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
430 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
431 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
432 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
433 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
434 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
435 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
436 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
437
438 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
439 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
440 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
441 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
442 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
443 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
444
445 'FIXED': ( 'fixed', None, '', ),
446};
447
448## \@oppfx values.
449g_kdPrefixes = {
450 'none': [],
451 '0x66': [],
452 '0xf3': [],
453 '0xf2': [],
454 '!0xf3': [], # special case for bsf/tzcnt
455};
456
457## Special \@opcode tag values.
458g_kdSpecialOpcodes = {
459 '/reg': [],
460 'mr/reg': [],
461 '11 /reg': [],
462 '!11 /reg': [],
463 '11 mr/reg': [],
464 '!11 mr/reg': [],
465};
466
467## Special \@opcodesub tag values.
468## The first value is the real value for aliases.
469## The second value is for bs3cg1.
470g_kdSubOpcodes = {
471 'none': [ None, '', ],
472 '11 mr/reg': [ '11 mr/reg', '', ],
473 '11': [ '11 mr/reg', '', ], ##< alias
474 '!11 mr/reg': [ '!11 mr/reg', '', ],
475 '!11': [ '!11 mr/reg', '', ], ##< alias
476 'rex.w=0': [ 'rex.w=0', 'WZ', ],
477 'w=0': [ 'rex.w=0', '', ], ##< alias
478 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
479 'w=1': [ 'rex.w=1', '', ], ##< alias
480 'vex.l=0': [ 'vex.l=0', 'L0', ],
481 'vex.l=1': [ 'vex.l=0', 'L1', ],
482 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
483 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
484 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
485 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
486 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
487 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
488};
489
490## Valid values for \@openc
491g_kdEncodings = {
492 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
493 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
494 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
495 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
496 'prefix': [ None, ], ##< Prefix
497};
498
499## \@opunused, \@opinvalid, \@opinvlstyle
500g_kdInvalidStyles = {
501 'immediate': [], ##< CPU stops decoding immediately after the opcode.
502 'vex.modrm': [], ##< VEX+ModR/M, everyone.
503 'intel-modrm': [], ##< Intel decodes ModR/M.
504 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
505 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
506 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
507};
508
509g_kdCpuNames = {
510 '8086': (),
511 '80186': (),
512 '80286': (),
513 '80386': (),
514 '80486': (),
515};
516
517## \@opcpuid
518g_kdCpuIdFlags = {
519 'vme': 'X86_CPUID_FEATURE_EDX_VME',
520 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
521 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
522 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
523 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
524 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
525 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
526 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
527 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
528 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
529 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
530 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
531 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
532 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
533 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
534 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
535 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
536 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
537 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
538 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
539 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
540 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
541 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
542 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
543 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
544 'aes': 'X86_CPUID_FEATURE_ECX_AES',
545 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
546 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
547 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
548 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
549 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
550
551 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
552 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
553 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
554 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
555 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
556 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
557 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
558 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
559 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
560 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
561};
562
563## \@ophints values.
564# pylint: disable=line-too-long
565g_kdHints = {
566 'invalid': 'DISOPTYPE_INVALID', ##<
567 'harmless': 'DISOPTYPE_HARMLESS', ##<
568 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
569 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
570 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
571 'portio': 'DISOPTYPE_PORTIO', ##<
572 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
573 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
574 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
575 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
576 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
577 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
578 'illegal': 'DISOPTYPE_ILLEGAL', ##<
579 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
580 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
581 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
582 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
583 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
584 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
585 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
586 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
587 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
588 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
589 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
590 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
591 ## (only in 16 & 32 bits mode!)
592 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
593 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
594 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
595 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
596 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
597 'ignores_rexw': '', ##< Ignores REX.W.
598 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
599 'vex_l_zero': '', ##< VEX.L must be 0.
600 'vex_l_one': '', ##< VEX.L must be 1.
601 'vex_l_ignored': '', ##< VEX.L is ignored.
602 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
603 'lock_allowed': '', ##< Lock prefix allowed.
604};
605# pylint: enable=line-too-long
606
607## \@opxcpttype values (see SDMv2 2.4, 2.7).
608g_kdXcptTypes = {
609 'none': [],
610 '1': [],
611 '2': [],
612 '3': [],
613 '4': [],
614 '4UA': [],
615 '5': [],
616 '5LZ': [], # LZ = VEX.L must be zero.
617 '6': [],
618 '7': [],
619 '7LZ': [],
620 '8': [],
621 '11': [],
622 '12': [],
623 'E1': [],
624 'E1NF': [],
625 'E2': [],
626 'E3': [],
627 'E3NF': [],
628 'E4': [],
629 'E4NF': [],
630 'E5': [],
631 'E5NF': [],
632 'E6': [],
633 'E6NF': [],
634 'E7NF': [],
635 'E9': [],
636 'E9NF': [],
637 'E10': [],
638 'E11': [],
639 'E12': [],
640 'E12NF': [],
641};
642
643
644def _isValidOpcodeByte(sOpcode):
645 """
646 Checks if sOpcode is a valid lower case opcode byte.
647 Returns true/false.
648 """
649 if len(sOpcode) == 4:
650 if sOpcode[:2] == '0x':
651 if sOpcode[2] in '0123456789abcdef':
652 if sOpcode[3] in '0123456789abcdef':
653 return True;
654 return False;
655
656
657class InstructionMap(object):
658 """
659 Instruction map.
660
661 The opcode map provides the lead opcode bytes (empty for the one byte
662 opcode map). An instruction can be member of multiple opcode maps as long
663 as it uses the same opcode value within the map (because of VEX).
664 """
665
666 kdEncodings = {
667 'legacy': [],
668 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
669 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
670 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
671 'xop8': [], ##< XOP prefix with vvvvv = 8
672 'xop9': [], ##< XOP prefix with vvvvv = 9
673 'xop10': [], ##< XOP prefix with vvvvv = 10
674 };
675 ## Selectors.
676 ## 1. The first value is the number of table entries required by a
677 ## decoder or disassembler for this type of selector.
678 ## 2. The second value is how many entries per opcode byte if applicable.
679 kdSelectors = {
680 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
681 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
682 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
683 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
684 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
685 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
686 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
687 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
688 };
689
690 ## Define the subentry number according to the Instruction::sPrefix
691 ## value for 'byte+pfx' selected tables.
692 kiPrefixOrder = {
693 'none': 0,
694 '0x66': 1,
695 '0xf3': 2,
696 '0xf2': 3,
697 };
698
699 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
700 sEncoding = 'legacy', sDisParse = None):
701 assert sSelector in self.kdSelectors;
702 assert sEncoding in self.kdEncodings;
703 if asLeadOpcodes is None:
704 asLeadOpcodes = [];
705 else:
706 for sOpcode in asLeadOpcodes:
707 assert _isValidOpcodeByte(sOpcode);
708 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
709
710 self.sName = sName;
711 self.sIemName = sIemName;
712 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
713 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
714 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
715 self.aoInstructions = [] # type: Instruction
716 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
717
718 def copy(self, sNewName, sPrefixFilter = None):
719 """
720 Copies the table with filtering instruction by sPrefix if not None.
721 """
722 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
723 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
724 else self.sSelector,
725 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
726 if sPrefixFilter is None:
727 oCopy.aoInstructions = list(self.aoInstructions);
728 else:
729 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
730 return oCopy;
731
732 def getTableSize(self):
733 """
734 Number of table entries. This corresponds directly to the selector.
735 """
736 return self.kdSelectors[self.sSelector][0];
737
738 def getEntriesPerByte(self):
739 """
740 Number of table entries per opcode bytes.
741
742 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
743 the others it will just return 1.
744 """
745 return self.kdSelectors[self.sSelector][1];
746
747 def getInstructionIndex(self, oInstr):
748 """
749 Returns the table index for the instruction.
750 """
751 bOpcode = oInstr.getOpcodeByte();
752
753 # The byte selectors are simple. We need a full opcode byte and need just return it.
754 if self.sSelector == 'byte':
755 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
756 return bOpcode;
757
758 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
759 if self.sSelector == 'byte+pfx':
760 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
761 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
762 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
763
764 # The other selectors needs masking and shifting.
765 if self.sSelector == '/r':
766 return (bOpcode >> 3) & 0x7;
767
768 if self.sSelector == 'mod /r':
769 return (bOpcode >> 3) & 0x1f;
770
771 if self.sSelector == 'memreg /r':
772 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
773
774 if self.sSelector == '!11 /r':
775 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
776 return (bOpcode >> 3) & 0x7;
777
778 if self.sSelector == '11 /r':
779 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
780 return (bOpcode >> 3) & 0x7;
781
782 if self.sSelector == '11':
783 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
784 return bOpcode & 0x3f;
785
786 assert False, self.sSelector;
787 return -1;
788
789 def getInstructionsInTableOrder(self):
790 """
791 Get instructions in table order.
792
793 Returns array of instructions. Normally there is exactly one
794 instruction per entry. However the entry could also be None if
795 not instruction was specified for that opcode value. Or there
796 could be a list of instructions to deal with special encodings
797 where for instance prefix (e.g. REX.W) encodes a different
798 instruction or different CPUs have different instructions or
799 prefixes in the same place.
800 """
801 # Start with empty table.
802 cTable = self.getTableSize();
803 aoTable = [None] * cTable;
804
805 # Insert the instructions.
806 for oInstr in self.aoInstructions:
807 if oInstr.sOpcode:
808 idxOpcode = self.getInstructionIndex(oInstr);
809 assert idxOpcode < cTable, str(idxOpcode);
810
811 oExisting = aoTable[idxOpcode];
812 if oExisting is None:
813 aoTable[idxOpcode] = oInstr;
814 elif not isinstance(oExisting, list):
815 aoTable[idxOpcode] = list([oExisting, oInstr]);
816 else:
817 oExisting.append(oInstr);
818
819 return aoTable;
820
821
822 def getDisasTableName(self):
823 """
824 Returns the disassembler table name for this map.
825 """
826 sName = 'g_aDisas';
827 for sWord in self.sName.split('_'):
828 if sWord == 'm': # suffix indicating modrm.mod==mem
829 sName += '_m';
830 elif sWord == 'r': # suffix indicating modrm.mod==reg
831 sName += '_r';
832 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
833 sName += '_' + sWord;
834 else:
835 sWord = sWord.replace('grp', 'Grp');
836 sWord = sWord.replace('map', 'Map');
837 sName += sWord[0].upper() + sWord[1:];
838 return sName;
839
840 def getDisasRangeName(self):
841 """
842 Returns the disassembler table range name for this map.
843 """
844 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
845
846 def isVexMap(self):
847 """ Returns True if a VEX map. """
848 return self.sEncoding.startswith('vex');
849
850
851class TestType(object):
852 """
853 Test value type.
854
855 This base class deals with integer like values. The fUnsigned constructor
856 parameter indicates the default stance on zero vs sign extending. It is
857 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
858 """
859 def __init__(self, sName, acbSizes = None, fUnsigned = True):
860 self.sName = sName;
861 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
862 self.fUnsigned = fUnsigned;
863
864 class BadValue(Exception):
865 """ Bad value exception. """
866 def __init__(self, sMessage):
867 Exception.__init__(self, sMessage);
868 self.sMessage = sMessage;
869
870 ## For ascii ~ operator.
871 kdHexInv = {
872 '0': 'f',
873 '1': 'e',
874 '2': 'd',
875 '3': 'c',
876 '4': 'b',
877 '5': 'a',
878 '6': '9',
879 '7': '8',
880 '8': '7',
881 '9': '6',
882 'a': '5',
883 'b': '4',
884 'c': '3',
885 'd': '2',
886 'e': '1',
887 'f': '0',
888 };
889
890 def get(self, sValue):
891 """
892 Get the shortest normal sized byte representation of oValue.
893
894 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
895 The latter form is for AND+OR pairs where the first entry is what to
896 AND with the field and the second the one or OR with.
897
898 Raises BadValue if invalid value.
899 """
900 if not sValue:
901 raise TestType.BadValue('empty value');
902
903 # Deal with sign and detect hexadecimal or decimal.
904 fSignExtend = not self.fUnsigned;
905 if sValue[0] == '-' or sValue[0] == '+':
906 fSignExtend = True;
907 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
908 else:
909 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
910
911 # try convert it to long integer.
912 try:
913 iValue = long(sValue, 16 if fHex else 10);
914 except Exception as oXcpt:
915 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
916
917 # Convert the hex string and pad it to a decent value. Negative values
918 # needs to be manually converted to something non-negative (~-n + 1).
919 if iValue >= 0:
920 sHex = hex(iValue);
921 if sys.version_info[0] < 3:
922 assert sHex[-1] == 'L';
923 sHex = sHex[:-1];
924 assert sHex[:2] == '0x';
925 sHex = sHex[2:];
926 else:
927 sHex = hex(-iValue - 1);
928 if sys.version_info[0] < 3:
929 assert sHex[-1] == 'L';
930 sHex = sHex[:-1];
931 assert sHex[:2] == '0x';
932 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
933 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
934 sHex = 'f' + sHex;
935
936 cDigits = len(sHex);
937 if cDigits <= self.acbSizes[-1] * 2:
938 for cb in self.acbSizes:
939 cNaturalDigits = cb * 2;
940 if cDigits <= cNaturalDigits:
941 break;
942 else:
943 cNaturalDigits = self.acbSizes[-1] * 2;
944 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
945 assert isinstance(cNaturalDigits, int)
946
947 if cNaturalDigits != cDigits:
948 cNeeded = cNaturalDigits - cDigits;
949 if iValue >= 0:
950 sHex = ('0' * cNeeded) + sHex;
951 else:
952 sHex = ('f' * cNeeded) + sHex;
953
954 # Invert and convert to bytearray and return it.
955 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
956
957 return ((fSignExtend, abValue),);
958
959 def validate(self, sValue):
960 """
961 Returns True if value is okay, error message on failure.
962 """
963 try:
964 self.get(sValue);
965 except TestType.BadValue as oXcpt:
966 return oXcpt.sMessage;
967 return True;
968
969 def isAndOrPair(self, sValue):
970 """
971 Checks if sValue is a pair.
972 """
973 _ = sValue;
974 return False;
975
976
977class TestTypeEflags(TestType):
978 """
979 Special value parsing for EFLAGS/RFLAGS/FLAGS.
980 """
981
982 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
983
984 def __init__(self, sName):
985 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
986
987 def get(self, sValue):
988 fClear = 0;
989 fSet = 0;
990 for sFlag in sValue.split(','):
991 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
992 if sConstant is None:
993 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
994 if sConstant[0] == '!':
995 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
996 else:
997 fSet |= g_kdX86EFlagsConstants[sConstant];
998
999 aoSet = TestType.get(self, '0x%x' % (fSet,));
1000 if fClear != 0:
1001 aoClear = TestType.get(self, '%#x' % (fClear,))
1002 assert self.isAndOrPair(sValue) is True;
1003 return (aoClear[0], aoSet[0]);
1004 assert self.isAndOrPair(sValue) is False;
1005 return aoSet;
1006
1007 def isAndOrPair(self, sValue):
1008 for sZeroFlag in self.kdZeroValueFlags:
1009 if sValue.find(sZeroFlag) >= 0:
1010 return True;
1011 return False;
1012
1013class TestTypeFromDict(TestType):
1014 """
1015 Special value parsing for CR0.
1016 """
1017
1018 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1019
1020 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1021 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1022 self.kdConstantsAndValues = kdConstantsAndValues;
1023 self.sConstantPrefix = sConstantPrefix;
1024
1025 def get(self, sValue):
1026 fValue = 0;
1027 for sFlag in sValue.split(','):
1028 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1029 if fFlagValue is None:
1030 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1031 fValue |= fFlagValue;
1032 return TestType.get(self, '0x%x' % (fValue,));
1033
1034
1035class TestInOut(object):
1036 """
1037 One input or output state modifier.
1038
1039 This should be thought as values to modify BS3REGCTX and extended (needs
1040 to be structured) state.
1041 """
1042 ## Assigned operators.
1043 kasOperators = [
1044 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1045 '&~=',
1046 '&=',
1047 '|=',
1048 '='
1049 ];
1050 ## Types
1051 kdTypes = {
1052 'uint': TestType('uint', fUnsigned = True),
1053 'int': TestType('int'),
1054 'efl': TestTypeEflags('efl'),
1055 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1056 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1057 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1058 };
1059 ## CPU context fields.
1060 kdFields = {
1061 # name: ( default type, [both|input|output], )
1062 # Operands.
1063 'op1': ( 'uint', 'both', ), ## \@op1
1064 'op2': ( 'uint', 'both', ), ## \@op2
1065 'op3': ( 'uint', 'both', ), ## \@op3
1066 'op4': ( 'uint', 'both', ), ## \@op4
1067 # Flags.
1068 'efl': ( 'efl', 'both', ),
1069 'efl_undef': ( 'uint', 'output', ),
1070 # 8-bit GPRs.
1071 'al': ( 'uint', 'both', ),
1072 'cl': ( 'uint', 'both', ),
1073 'dl': ( 'uint', 'both', ),
1074 'bl': ( 'uint', 'both', ),
1075 'ah': ( 'uint', 'both', ),
1076 'ch': ( 'uint', 'both', ),
1077 'dh': ( 'uint', 'both', ),
1078 'bh': ( 'uint', 'both', ),
1079 'r8l': ( 'uint', 'both', ),
1080 'r9l': ( 'uint', 'both', ),
1081 'r10l': ( 'uint', 'both', ),
1082 'r11l': ( 'uint', 'both', ),
1083 'r12l': ( 'uint', 'both', ),
1084 'r13l': ( 'uint', 'both', ),
1085 'r14l': ( 'uint', 'both', ),
1086 'r15l': ( 'uint', 'both', ),
1087 # 16-bit GPRs.
1088 'ax': ( 'uint', 'both', ),
1089 'dx': ( 'uint', 'both', ),
1090 'cx': ( 'uint', 'both', ),
1091 'bx': ( 'uint', 'both', ),
1092 'sp': ( 'uint', 'both', ),
1093 'bp': ( 'uint', 'both', ),
1094 'si': ( 'uint', 'both', ),
1095 'di': ( 'uint', 'both', ),
1096 'r8w': ( 'uint', 'both', ),
1097 'r9w': ( 'uint', 'both', ),
1098 'r10w': ( 'uint', 'both', ),
1099 'r11w': ( 'uint', 'both', ),
1100 'r12w': ( 'uint', 'both', ),
1101 'r13w': ( 'uint', 'both', ),
1102 'r14w': ( 'uint', 'both', ),
1103 'r15w': ( 'uint', 'both', ),
1104 # 32-bit GPRs.
1105 'eax': ( 'uint', 'both', ),
1106 'edx': ( 'uint', 'both', ),
1107 'ecx': ( 'uint', 'both', ),
1108 'ebx': ( 'uint', 'both', ),
1109 'esp': ( 'uint', 'both', ),
1110 'ebp': ( 'uint', 'both', ),
1111 'esi': ( 'uint', 'both', ),
1112 'edi': ( 'uint', 'both', ),
1113 'r8d': ( 'uint', 'both', ),
1114 'r9d': ( 'uint', 'both', ),
1115 'r10d': ( 'uint', 'both', ),
1116 'r11d': ( 'uint', 'both', ),
1117 'r12d': ( 'uint', 'both', ),
1118 'r13d': ( 'uint', 'both', ),
1119 'r14d': ( 'uint', 'both', ),
1120 'r15d': ( 'uint', 'both', ),
1121 # 64-bit GPRs.
1122 'rax': ( 'uint', 'both', ),
1123 'rdx': ( 'uint', 'both', ),
1124 'rcx': ( 'uint', 'both', ),
1125 'rbx': ( 'uint', 'both', ),
1126 'rsp': ( 'uint', 'both', ),
1127 'rbp': ( 'uint', 'both', ),
1128 'rsi': ( 'uint', 'both', ),
1129 'rdi': ( 'uint', 'both', ),
1130 'r8': ( 'uint', 'both', ),
1131 'r9': ( 'uint', 'both', ),
1132 'r10': ( 'uint', 'both', ),
1133 'r11': ( 'uint', 'both', ),
1134 'r12': ( 'uint', 'both', ),
1135 'r13': ( 'uint', 'both', ),
1136 'r14': ( 'uint', 'both', ),
1137 'r15': ( 'uint', 'both', ),
1138 # 16-bit, 32-bit or 64-bit registers according to operand size.
1139 'oz.rax': ( 'uint', 'both', ),
1140 'oz.rdx': ( 'uint', 'both', ),
1141 'oz.rcx': ( 'uint', 'both', ),
1142 'oz.rbx': ( 'uint', 'both', ),
1143 'oz.rsp': ( 'uint', 'both', ),
1144 'oz.rbp': ( 'uint', 'both', ),
1145 'oz.rsi': ( 'uint', 'both', ),
1146 'oz.rdi': ( 'uint', 'both', ),
1147 'oz.r8': ( 'uint', 'both', ),
1148 'oz.r9': ( 'uint', 'both', ),
1149 'oz.r10': ( 'uint', 'both', ),
1150 'oz.r11': ( 'uint', 'both', ),
1151 'oz.r12': ( 'uint', 'both', ),
1152 'oz.r13': ( 'uint', 'both', ),
1153 'oz.r14': ( 'uint', 'both', ),
1154 'oz.r15': ( 'uint', 'both', ),
1155 # Control registers.
1156 'cr0': ( 'cr0', 'both', ),
1157 'cr4': ( 'cr4', 'both', ),
1158 'xcr0': ( 'xcr0', 'both', ),
1159 # FPU Registers
1160 'fcw': ( 'uint', 'both', ),
1161 'fsw': ( 'uint', 'both', ),
1162 'ftw': ( 'uint', 'both', ),
1163 'fop': ( 'uint', 'both', ),
1164 'fpuip': ( 'uint', 'both', ),
1165 'fpucs': ( 'uint', 'both', ),
1166 'fpudp': ( 'uint', 'both', ),
1167 'fpuds': ( 'uint', 'both', ),
1168 'mxcsr': ( 'uint', 'both', ),
1169 'st0': ( 'uint', 'both', ),
1170 'st1': ( 'uint', 'both', ),
1171 'st2': ( 'uint', 'both', ),
1172 'st3': ( 'uint', 'both', ),
1173 'st4': ( 'uint', 'both', ),
1174 'st5': ( 'uint', 'both', ),
1175 'st6': ( 'uint', 'both', ),
1176 'st7': ( 'uint', 'both', ),
1177 # MMX registers.
1178 'mm0': ( 'uint', 'both', ),
1179 'mm1': ( 'uint', 'both', ),
1180 'mm2': ( 'uint', 'both', ),
1181 'mm3': ( 'uint', 'both', ),
1182 'mm4': ( 'uint', 'both', ),
1183 'mm5': ( 'uint', 'both', ),
1184 'mm6': ( 'uint', 'both', ),
1185 'mm7': ( 'uint', 'both', ),
1186 # SSE registers.
1187 'xmm0': ( 'uint', 'both', ),
1188 'xmm1': ( 'uint', 'both', ),
1189 'xmm2': ( 'uint', 'both', ),
1190 'xmm3': ( 'uint', 'both', ),
1191 'xmm4': ( 'uint', 'both', ),
1192 'xmm5': ( 'uint', 'both', ),
1193 'xmm6': ( 'uint', 'both', ),
1194 'xmm7': ( 'uint', 'both', ),
1195 'xmm8': ( 'uint', 'both', ),
1196 'xmm9': ( 'uint', 'both', ),
1197 'xmm10': ( 'uint', 'both', ),
1198 'xmm11': ( 'uint', 'both', ),
1199 'xmm12': ( 'uint', 'both', ),
1200 'xmm13': ( 'uint', 'both', ),
1201 'xmm14': ( 'uint', 'both', ),
1202 'xmm15': ( 'uint', 'both', ),
1203 'xmm0.lo': ( 'uint', 'both', ),
1204 'xmm1.lo': ( 'uint', 'both', ),
1205 'xmm2.lo': ( 'uint', 'both', ),
1206 'xmm3.lo': ( 'uint', 'both', ),
1207 'xmm4.lo': ( 'uint', 'both', ),
1208 'xmm5.lo': ( 'uint', 'both', ),
1209 'xmm6.lo': ( 'uint', 'both', ),
1210 'xmm7.lo': ( 'uint', 'both', ),
1211 'xmm8.lo': ( 'uint', 'both', ),
1212 'xmm9.lo': ( 'uint', 'both', ),
1213 'xmm10.lo': ( 'uint', 'both', ),
1214 'xmm11.lo': ( 'uint', 'both', ),
1215 'xmm12.lo': ( 'uint', 'both', ),
1216 'xmm13.lo': ( 'uint', 'both', ),
1217 'xmm14.lo': ( 'uint', 'both', ),
1218 'xmm15.lo': ( 'uint', 'both', ),
1219 'xmm0.hi': ( 'uint', 'both', ),
1220 'xmm1.hi': ( 'uint', 'both', ),
1221 'xmm2.hi': ( 'uint', 'both', ),
1222 'xmm3.hi': ( 'uint', 'both', ),
1223 'xmm4.hi': ( 'uint', 'both', ),
1224 'xmm5.hi': ( 'uint', 'both', ),
1225 'xmm6.hi': ( 'uint', 'both', ),
1226 'xmm7.hi': ( 'uint', 'both', ),
1227 'xmm8.hi': ( 'uint', 'both', ),
1228 'xmm9.hi': ( 'uint', 'both', ),
1229 'xmm10.hi': ( 'uint', 'both', ),
1230 'xmm11.hi': ( 'uint', 'both', ),
1231 'xmm12.hi': ( 'uint', 'both', ),
1232 'xmm13.hi': ( 'uint', 'both', ),
1233 'xmm14.hi': ( 'uint', 'both', ),
1234 'xmm15.hi': ( 'uint', 'both', ),
1235 'xmm0.lo.zx': ( 'uint', 'both', ),
1236 'xmm1.lo.zx': ( 'uint', 'both', ),
1237 'xmm2.lo.zx': ( 'uint', 'both', ),
1238 'xmm3.lo.zx': ( 'uint', 'both', ),
1239 'xmm4.lo.zx': ( 'uint', 'both', ),
1240 'xmm5.lo.zx': ( 'uint', 'both', ),
1241 'xmm6.lo.zx': ( 'uint', 'both', ),
1242 'xmm7.lo.zx': ( 'uint', 'both', ),
1243 'xmm8.lo.zx': ( 'uint', 'both', ),
1244 'xmm9.lo.zx': ( 'uint', 'both', ),
1245 'xmm10.lo.zx': ( 'uint', 'both', ),
1246 'xmm11.lo.zx': ( 'uint', 'both', ),
1247 'xmm12.lo.zx': ( 'uint', 'both', ),
1248 'xmm13.lo.zx': ( 'uint', 'both', ),
1249 'xmm14.lo.zx': ( 'uint', 'both', ),
1250 'xmm15.lo.zx': ( 'uint', 'both', ),
1251 'xmm0.dw0': ( 'uint', 'both', ),
1252 'xmm1.dw0': ( 'uint', 'both', ),
1253 'xmm2.dw0': ( 'uint', 'both', ),
1254 'xmm3.dw0': ( 'uint', 'both', ),
1255 'xmm4.dw0': ( 'uint', 'both', ),
1256 'xmm5.dw0': ( 'uint', 'both', ),
1257 'xmm6.dw0': ( 'uint', 'both', ),
1258 'xmm7.dw0': ( 'uint', 'both', ),
1259 'xmm8.dw0': ( 'uint', 'both', ),
1260 'xmm9.dw0': ( 'uint', 'both', ),
1261 'xmm10.dw0': ( 'uint', 'both', ),
1262 'xmm11.dw0': ( 'uint', 'both', ),
1263 'xmm12.dw0': ( 'uint', 'both', ),
1264 'xmm13.dw0': ( 'uint', 'both', ),
1265 'xmm14.dw0': ( 'uint', 'both', ),
1266 'xmm15_dw0': ( 'uint', 'both', ),
1267 # AVX registers.
1268 'ymm0': ( 'uint', 'both', ),
1269 'ymm1': ( 'uint', 'both', ),
1270 'ymm2': ( 'uint', 'both', ),
1271 'ymm3': ( 'uint', 'both', ),
1272 'ymm4': ( 'uint', 'both', ),
1273 'ymm5': ( 'uint', 'both', ),
1274 'ymm6': ( 'uint', 'both', ),
1275 'ymm7': ( 'uint', 'both', ),
1276 'ymm8': ( 'uint', 'both', ),
1277 'ymm9': ( 'uint', 'both', ),
1278 'ymm10': ( 'uint', 'both', ),
1279 'ymm11': ( 'uint', 'both', ),
1280 'ymm12': ( 'uint', 'both', ),
1281 'ymm13': ( 'uint', 'both', ),
1282 'ymm14': ( 'uint', 'both', ),
1283 'ymm15': ( 'uint', 'both', ),
1284
1285 # Special ones.
1286 'value.xcpt': ( 'uint', 'output', ),
1287 };
1288
1289 def __init__(self, sField, sOp, sValue, sType):
1290 assert sField in self.kdFields;
1291 assert sOp in self.kasOperators;
1292 self.sField = sField;
1293 self.sOp = sOp;
1294 self.sValue = sValue;
1295 self.sType = sType;
1296 assert isinstance(sField, str);
1297 assert isinstance(sOp, str);
1298 assert isinstance(sType, str);
1299 assert isinstance(sValue, str);
1300
1301
1302class TestSelector(object):
1303 """
1304 One selector for an instruction test.
1305 """
1306 ## Selector compare operators.
1307 kasCompareOps = [ '==', '!=' ];
1308 ## Selector variables and their valid values.
1309 kdVariables = {
1310 # Operand size.
1311 'size': {
1312 'o16': 'size_o16',
1313 'o32': 'size_o32',
1314 'o64': 'size_o64',
1315 },
1316 # VEX.L value.
1317 'vex.l': {
1318 '0': 'vexl_0',
1319 '1': 'vexl_1',
1320 },
1321 # Execution ring.
1322 'ring': {
1323 '0': 'ring_0',
1324 '1': 'ring_1',
1325 '2': 'ring_2',
1326 '3': 'ring_3',
1327 '0..2': 'ring_0_thru_2',
1328 '1..3': 'ring_1_thru_3',
1329 },
1330 # Basic code mode.
1331 'codebits': {
1332 '64': 'code_64bit',
1333 '32': 'code_32bit',
1334 '16': 'code_16bit',
1335 },
1336 # cpu modes.
1337 'mode': {
1338 'real': 'mode_real',
1339 'prot': 'mode_prot',
1340 'long': 'mode_long',
1341 'v86': 'mode_v86',
1342 'smm': 'mode_smm',
1343 'vmx': 'mode_vmx',
1344 'svm': 'mode_svm',
1345 },
1346 # paging on/off
1347 'paging': {
1348 'on': 'paging_on',
1349 'off': 'paging_off',
1350 },
1351 # CPU vendor
1352 'vendor': {
1353 'amd': 'vendor_amd',
1354 'intel': 'vendor_intel',
1355 'via': 'vendor_via',
1356 },
1357 };
1358 ## Selector shorthand predicates.
1359 ## These translates into variable expressions.
1360 kdPredicates = {
1361 'o16': 'size==o16',
1362 'o32': 'size==o32',
1363 'o64': 'size==o64',
1364 'ring0': 'ring==0',
1365 '!ring0': 'ring==1..3',
1366 'ring1': 'ring==1',
1367 'ring2': 'ring==2',
1368 'ring3': 'ring==3',
1369 'user': 'ring==3',
1370 'supervisor': 'ring==0..2',
1371 '16-bit': 'codebits==16',
1372 '32-bit': 'codebits==32',
1373 '64-bit': 'codebits==64',
1374 'real': 'mode==real',
1375 'prot': 'mode==prot',
1376 'long': 'mode==long',
1377 'v86': 'mode==v86',
1378 'smm': 'mode==smm',
1379 'vmx': 'mode==vmx',
1380 'svm': 'mode==svm',
1381 'paging': 'paging==on',
1382 '!paging': 'paging==off',
1383 'amd': 'vendor==amd',
1384 '!amd': 'vendor!=amd',
1385 'intel': 'vendor==intel',
1386 '!intel': 'vendor!=intel',
1387 'via': 'vendor==via',
1388 '!via': 'vendor!=via',
1389 };
1390
1391 def __init__(self, sVariable, sOp, sValue):
1392 assert sVariable in self.kdVariables;
1393 assert sOp in self.kasCompareOps;
1394 assert sValue in self.kdVariables[sVariable];
1395 self.sVariable = sVariable;
1396 self.sOp = sOp;
1397 self.sValue = sValue;
1398
1399
1400class InstructionTest(object):
1401 """
1402 Instruction test.
1403 """
1404
1405 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1406 self.oInstr = oInstr # type: InstructionTest
1407 self.aoInputs = [] # type: List[TestInOut]
1408 self.aoOutputs = [] # type: List[TestInOut]
1409 self.aoSelectors = [] # type: List[TestSelector]
1410
1411 def toString(self, fRepr = False):
1412 """
1413 Converts it to string representation.
1414 """
1415 asWords = [];
1416 if self.aoSelectors:
1417 for oSelector in self.aoSelectors:
1418 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1419 asWords.append('/');
1420
1421 for oModifier in self.aoInputs:
1422 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1423
1424 asWords.append('->');
1425
1426 for oModifier in self.aoOutputs:
1427 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1428
1429 if fRepr:
1430 return '<' + ' '.join(asWords) + '>';
1431 return ' '.join(asWords);
1432
1433 def __str__(self):
1434 """ Provide string represenation. """
1435 return self.toString(False);
1436
1437 def __repr__(self):
1438 """ Provide unambigious string representation. """
1439 return self.toString(True);
1440
1441class Operand(object):
1442 """
1443 Instruction operand.
1444 """
1445
1446 def __init__(self, sWhere, sType):
1447 assert sWhere in g_kdOpLocations, sWhere;
1448 assert sType in g_kdOpTypes, sType;
1449 self.sWhere = sWhere; ##< g_kdOpLocations
1450 self.sType = sType; ##< g_kdOpTypes
1451
1452 def usesModRM(self):
1453 """ Returns True if using some form of ModR/M encoding. """
1454 return self.sType[0] in ['E', 'G', 'M'];
1455
1456
1457
1458class Instruction(object): # pylint: disable=too-many-instance-attributes
1459 """
1460 Instruction.
1461 """
1462
1463 def __init__(self, sSrcFile, iLine):
1464 ## @name Core attributes.
1465 ## @{
1466 self.oParent = None # type: Instruction
1467 self.sMnemonic = None;
1468 self.sBrief = None;
1469 self.asDescSections = [] # type: List[str]
1470 self.aoMaps = [] # type: List[InstructionMap]
1471 self.aoOperands = [] # type: List[Operand]
1472 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1473 self.sOpcode = None # type: str
1474 self.sSubOpcode = None # type: str
1475 self.sEncoding = None;
1476 self.asFlTest = None;
1477 self.asFlModify = None;
1478 self.asFlUndefined = None;
1479 self.asFlSet = None;
1480 self.asFlClear = None;
1481 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1482 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1483 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1484 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1485 self.aoTests = [] # type: List[InstructionTest]
1486 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1487 self.oCpuExpr = None; ##< Some CPU restriction expression...
1488 self.sGroup = None;
1489 self.fUnused = False; ##< Unused instruction.
1490 self.fInvalid = False; ##< Invalid instruction (like UD2).
1491 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1492 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1493 ## @}
1494
1495 ## @name Implementation attributes.
1496 ## @{
1497 self.sStats = None;
1498 self.sFunction = None;
1499 self.fStub = False;
1500 self.fUdStub = False;
1501 ## @}
1502
1503 ## @name Decoding info
1504 ## @{
1505 self.sSrcFile = sSrcFile;
1506 self.iLineCreated = iLine;
1507 self.iLineCompleted = None;
1508 self.cOpTags = 0;
1509 self.iLineFnIemOpMacro = -1;
1510 self.iLineMnemonicMacro = -1;
1511 ## @}
1512
1513 ## @name Intermediate input fields.
1514 ## @{
1515 self.sRawDisOpNo = None;
1516 self.asRawDisParams = [];
1517 self.sRawIemOpFlags = None;
1518 self.sRawOldOpcodes = None;
1519 self.asCopyTests = [];
1520 ## @}
1521
1522 ## All the MC blocks associated with this instruction.
1523 self.aoMcBlocks = [] # type: List[McBlock]
1524
1525 def toString(self, fRepr = False):
1526 """ Turn object into a string. """
1527 aasFields = [];
1528
1529 aasFields.append(['opcode', self.sOpcode]);
1530 if self.sPrefix:
1531 aasFields.append(['prefix', self.sPrefix]);
1532 aasFields.append(['mnemonic', self.sMnemonic]);
1533 for iOperand, oOperand in enumerate(self.aoOperands):
1534 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1535 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1536 aasFields.append(['encoding', self.sEncoding]);
1537 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1538 aasFields.append(['disenum', self.sDisEnum]);
1539 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1540 aasFields.append(['group', self.sGroup]);
1541 if self.fUnused: aasFields.append(['unused', 'True']);
1542 if self.fInvalid: aasFields.append(['invalid', 'True']);
1543 aasFields.append(['invlstyle', self.sInvalidStyle]);
1544 aasFields.append(['fltest', self.asFlTest]);
1545 aasFields.append(['flmodify', self.asFlModify]);
1546 aasFields.append(['flundef', self.asFlUndefined]);
1547 aasFields.append(['flset', self.asFlSet]);
1548 aasFields.append(['flclear', self.asFlClear]);
1549 aasFields.append(['mincpu', self.sMinCpu]);
1550 aasFields.append(['stats', self.sStats]);
1551 aasFields.append(['sFunction', self.sFunction]);
1552 if self.fStub: aasFields.append(['fStub', 'True']);
1553 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1554 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1555 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1556 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1557
1558 sRet = '<' if fRepr else '';
1559 for sField, sValue in aasFields:
1560 if sValue is not None:
1561 if len(sRet) > 1:
1562 sRet += '; ';
1563 sRet += '%s=%s' % (sField, sValue,);
1564 if fRepr:
1565 sRet += '>';
1566
1567 return sRet;
1568
1569 def __str__(self):
1570 """ Provide string represenation. """
1571 return self.toString(False);
1572
1573 def __repr__(self):
1574 """ Provide unambigious string representation. """
1575 return self.toString(True);
1576
1577 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1578 """
1579 Makes a copy of the object for the purpose of putting in a different map
1580 or a different place in the current map.
1581 """
1582 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1583
1584 oCopy.oParent = self;
1585 oCopy.sMnemonic = self.sMnemonic;
1586 oCopy.sBrief = self.sBrief;
1587 oCopy.asDescSections = list(self.asDescSections);
1588 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1589 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1590 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1591 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1592 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1593 oCopy.sEncoding = self.sEncoding;
1594 oCopy.asFlTest = self.asFlTest;
1595 oCopy.asFlModify = self.asFlModify;
1596 oCopy.asFlUndefined = self.asFlUndefined;
1597 oCopy.asFlSet = self.asFlSet;
1598 oCopy.asFlClear = self.asFlClear;
1599 oCopy.dHints = dict(self.dHints);
1600 oCopy.sDisEnum = self.sDisEnum;
1601 oCopy.asCpuIds = list(self.asCpuIds);
1602 oCopy.asReqFeatures = list(self.asReqFeatures);
1603 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1604 oCopy.sMinCpu = self.sMinCpu;
1605 oCopy.oCpuExpr = self.oCpuExpr;
1606 oCopy.sGroup = self.sGroup;
1607 oCopy.fUnused = self.fUnused;
1608 oCopy.fInvalid = self.fInvalid;
1609 oCopy.sInvalidStyle = self.sInvalidStyle;
1610 oCopy.sXcptType = self.sXcptType;
1611
1612 oCopy.sStats = self.sStats;
1613 oCopy.sFunction = self.sFunction;
1614 oCopy.fStub = self.fStub;
1615 oCopy.fUdStub = self.fUdStub;
1616
1617 oCopy.iLineCompleted = self.iLineCompleted;
1618 oCopy.cOpTags = self.cOpTags;
1619 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1620 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1621
1622 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1623 oCopy.asRawDisParams = list(self.asRawDisParams);
1624 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1625 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1626 oCopy.asCopyTests = list(self.asCopyTests);
1627
1628 return oCopy;
1629
1630 def getOpcodeByte(self):
1631 """
1632 Decodes sOpcode into a byte range integer value.
1633 Raises exception if sOpcode is None or invalid.
1634 """
1635 if self.sOpcode is None:
1636 raise Exception('No opcode byte for %s!' % (self,));
1637 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1638
1639 # Full hex byte form.
1640 if sOpcode[:2] == '0x':
1641 return int(sOpcode, 16);
1642
1643 # The /r form:
1644 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1645 return int(sOpcode[1:]) << 3;
1646
1647 # The 11/r form:
1648 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1649 return (int(sOpcode[-1:]) << 3) | 0xc0;
1650
1651 # The !11/r form (returns mod=1):
1652 ## @todo this doesn't really work...
1653 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1654 return (int(sOpcode[-1:]) << 3) | 0x80;
1655
1656 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1657
1658 @staticmethod
1659 def _flagsToIntegerMask(asFlags):
1660 """
1661 Returns the integer mask value for asFlags.
1662 """
1663 uRet = 0;
1664 if asFlags:
1665 for sFlag in asFlags:
1666 sConstant = g_kdEFlagsMnemonics[sFlag];
1667 assert sConstant[0] != '!', sConstant
1668 uRet |= g_kdX86EFlagsConstants[sConstant];
1669 return uRet;
1670
1671 def getTestedFlagsMask(self):
1672 """ Returns asFlTest into a integer mask value """
1673 return self._flagsToIntegerMask(self.asFlTest);
1674
1675 def getModifiedFlagsMask(self):
1676 """ Returns asFlModify into a integer mask value """
1677 return self._flagsToIntegerMask(self.asFlModify);
1678
1679 def getUndefinedFlagsMask(self):
1680 """ Returns asFlUndefined into a integer mask value """
1681 return self._flagsToIntegerMask(self.asFlUndefined);
1682
1683 def getSetFlagsMask(self):
1684 """ Returns asFlSet into a integer mask value """
1685 return self._flagsToIntegerMask(self.asFlSet);
1686
1687 def getClearedFlagsMask(self):
1688 """ Returns asFlClear into a integer mask value """
1689 return self._flagsToIntegerMask(self.asFlClear);
1690
1691 @staticmethod
1692 def _flagsToC(asFlags):
1693 """
1694 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1695 """
1696 if asFlags:
1697 asRet = [];
1698 for sFlag in asFlags:
1699 sConstant = g_kdEFlagsMnemonics[sFlag];
1700 assert sConstant[0] != '!', sConstant
1701 asRet.append(sConstant);
1702 return ' | '.join(asRet);
1703 return '0';
1704
1705 def getTestedFlagsCStyle(self):
1706 """ Returns asFlTest as C constants ored together. """
1707 return self._flagsToC(self.asFlTest);
1708
1709 def getModifiedFlagsCStyle(self):
1710 """ Returns asFlModify as C constants ored together. """
1711 return self._flagsToC(self.asFlModify);
1712
1713 def getUndefinedFlagsCStyle(self):
1714 """ Returns asFlUndefined as C constants ored together. """
1715 return self._flagsToC(self.asFlUndefined);
1716
1717 def getSetFlagsCStyle(self):
1718 """ Returns asFlSet as C constants ored together. """
1719 return self._flagsToC(self.asFlSet);
1720
1721 def getClearedFlagsCStyle(self):
1722 """ Returns asFlClear as C constants ored together. """
1723 return self._flagsToC(self.asFlClear);
1724
1725 def onlyInVexMaps(self):
1726 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1727 if not self.aoMaps:
1728 return False;
1729 for oMap in self.aoMaps:
1730 if not oMap.isVexMap():
1731 return False;
1732 return True;
1733
1734
1735
1736## All the instructions.
1737g_aoAllInstructions = [] # type: List[Instruction]
1738
1739## All the instructions indexed by statistics name (opstat).
1740g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1741
1742## All the instructions indexed by function name (opfunction).
1743g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1744
1745## Instructions tagged by oponlytest
1746g_aoOnlyTestInstructions = [] # type: List[Instruction]
1747
1748## Instruction maps.
1749g_aoInstructionMaps = [
1750 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1751 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1752 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1753 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1754 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1755 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1756 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1757 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1758 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1759 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1760 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1761 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1762 ## @todo g_apfnEscF1_E0toFF
1763 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1764 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1765 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1766 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1767 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1768 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1769 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1770 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1771
1772 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1773 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1774 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1775 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1776 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1777 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1778 ## @todo What about g_apfnGroup9MemReg?
1779 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1780 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1781 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1782 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1783 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1784 ## @todo What about g_apfnGroup15RegReg?
1785 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1786 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1787 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1788
1789 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1790 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1791
1792 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1793 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1794 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1795 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1796 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1797 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1798
1799 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1800 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1801
1802 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1803 InstructionMap('xopmap8', sEncoding = 'xop8'),
1804 InstructionMap('xopmap9', sEncoding = 'xop9'),
1805 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1806 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1807 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1808 InstructionMap('xopmap10', sEncoding = 'xop10'),
1809 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1810];
1811g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1812g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1813
1814
1815#
1816# Decoder functions.
1817#
1818
1819class DecoderFunction(object):
1820 """
1821 Decoder function.
1822
1823 This is mainly for searching for scoping searches for variables used in
1824 microcode blocks.
1825 """
1826 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1827 self.sName = sName; ##< The function name.
1828 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1829 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1830 self.iBeginLine = iBeginLine; ##< The start line.
1831 self.iEndLine = -1; ##< The line the function (probably) ends on.
1832 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1833
1834 def complete(self, iEndLine, asLines):
1835 """
1836 Completes the function.
1837 """
1838 assert self.iEndLine == -1;
1839 self.iEndLine = iEndLine;
1840 self.asLines = asLines;
1841
1842
1843#
1844# "Microcode" statements and blocks
1845#
1846
1847class McStmt(object):
1848 """
1849 Statement in a microcode block.
1850 """
1851 def __init__(self, sName, asParams):
1852 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1853 self.asParams = asParams;
1854 self.oUser = None;
1855
1856 def renderCode(self, cchIndent = 0):
1857 """
1858 Renders the code for the statement.
1859 """
1860 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1861
1862 @staticmethod
1863 def renderCodeForList(aoStmts, cchIndent = 0):
1864 """
1865 Renders a list of statements.
1866 """
1867 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1868
1869 @staticmethod
1870 def findStmtByNames(aoStmts, dNames):
1871 """
1872 Returns first statement with any of the given names in from the list.
1873
1874 Note! The names are passed as a dictionary for quick lookup, the value
1875 does not matter.
1876 """
1877 for oStmt in aoStmts:
1878 if oStmt.sName in dNames:
1879 return oStmt;
1880 if isinstance(oStmt, McStmtCond):
1881 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1882 if not oHit:
1883 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1884 if oHit:
1885 return oHit;
1886 return None;
1887
1888 @staticmethod
1889 def countStmtsByName(aoStmts, dNames, dRet):
1890 """
1891 Searches the given list of statements for the names in the dictionary,
1892 adding each found to dRet with an occurnece count.
1893
1894 return total number of hits;
1895 """
1896 cHits = 0;
1897 for oStmt in aoStmts:
1898 if oStmt.sName in dNames:
1899 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1900 cHits += 1;
1901 if isinstance(oStmt, McStmtCond):
1902 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1903 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1904 return cHits;
1905
1906 def isCppStmt(self):
1907 """ Checks if this is a C++ statement. """
1908 return self.sName.startswith('C++');
1909
1910class McStmtCond(McStmt):
1911 """
1912 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1913 """
1914 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1915 McStmt.__init__(self, sName, asParams);
1916 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1917 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1918 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1919 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1920 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1921
1922 def renderCode(self, cchIndent = 0):
1923 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1924 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1925 if self.aoElseBranch:
1926 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1927 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1928 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1929 return sRet;
1930
1931class McStmtNativeIf(McStmtCond):
1932 """ IEM_MC_NATIVE_IF """
1933 def __init__(self, sName, asArchitectures):
1934 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1935 self.asArchitectures = asArchitectures;
1936
1937class McStmtVar(McStmt):
1938 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1939 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1940 McStmt.__init__(self, sName, asParams);
1941 self.sType = sType;
1942 self.sVarName = sVarName;
1943 self.sValue = sValue; ##< None if no assigned / const value.
1944
1945class McStmtArg(McStmtVar):
1946 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1947 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1948 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1949 self.iArg = iArg;
1950 self.sRef = sRef; ##< The reference string (local variable, register).
1951 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1952 assert sRefType in ('none', 'local');
1953
1954class McStmtCall(McStmt):
1955 """ IEM_MC_CALL_* """
1956 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1957 McStmt.__init__(self, sName, asParams);
1958 self.idxFn = iFnParam;
1959 self.idxParams = iFnParam + 1;
1960 self.sFn = asParams[iFnParam];
1961 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1962
1963class McStmtAssertEFlags(McStmt):
1964 """
1965 IEM_MC_ASSERT_EFLAGS
1966 """
1967 def __init__(self, oInstruction):
1968 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1969 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1970
1971
1972class McCppGeneric(McStmt):
1973 """
1974 Generic C++/C statement.
1975 """
1976 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1977 McStmt.__init__(self, sName, [sCode,]);
1978 self.fDecode = fDecode;
1979 self.cchIndent = cchIndent;
1980
1981 def renderCode(self, cchIndent = 0):
1982 cchIndent += self.cchIndent;
1983 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1984 if self.fDecode:
1985 sRet = sRet.replace('\n', ' // C++ decode\n');
1986 else:
1987 sRet = sRet.replace('\n', ' // C++ normal\n');
1988 return sRet;
1989
1990class McCppCall(McCppGeneric):
1991 """
1992 A generic C++/C call statement.
1993
1994 The sName is still 'C++', so the function name is in the first parameter
1995 and the the arguments in the subsequent ones.
1996 """
1997 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1998 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1999 self.asParams.extend(asArgs);
2000
2001 def renderCode(self, cchIndent = 0):
2002 cchIndent += self.cchIndent;
2003 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
2004 if self.fDecode:
2005 sRet += ' // C++ decode\n';
2006 else:
2007 sRet += ' // C++ normal\n';
2008 return sRet;
2009
2010class McCppCond(McStmtCond):
2011 """
2012 C++/C 'if' statement.
2013 """
2014 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2015 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2016 self.fDecode = fDecode;
2017 self.cchIndent = cchIndent;
2018
2019 def renderCode(self, cchIndent = 0):
2020 cchIndent += self.cchIndent;
2021 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2022 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2023 sRet += ' ' * cchIndent + '{\n';
2024 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2025 sRet += ' ' * cchIndent + '}\n';
2026 if self.aoElseBranch:
2027 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2028 sRet += ' ' * cchIndent + '{\n';
2029 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2030 sRet += ' ' * cchIndent + '}\n';
2031 return sRet;
2032
2033class McCppPreProc(McCppGeneric):
2034 """
2035 C++/C Preprocessor directive.
2036 """
2037 def __init__(self, sCode):
2038 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2039
2040 def renderCode(self, cchIndent = 0):
2041 return self.asParams[0] + '\n';
2042
2043
2044## IEM_MC_F_XXX values.
2045g_kdMcFlags = {
2046 'IEM_MC_F_ONLY_8086': (),
2047 'IEM_MC_F_MIN_186': (),
2048 'IEM_MC_F_MIN_286': (),
2049 'IEM_MC_F_NOT_286_OR_OLDER': (),
2050 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2051 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2052 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2053 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2054 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2055 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2056 'IEM_MC_F_NOT_64BIT': (),
2057};
2058## IEM_MC_F_XXX values.
2059g_kdCImplFlags = {
2060 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2061 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2062 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2063 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2064 'IEM_CIMPL_F_BRANCH_FAR': (),
2065 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2066 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2067 'IEM_CIMPL_F_BRANCH_STACK': (),
2068 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2069 'IEM_CIMPL_F_MODE': (),
2070 'IEM_CIMPL_F_RFLAGS': (),
2071 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2072 'IEM_CIMPL_F_STATUS_FLAGS': (),
2073 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2074 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2075 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2076 'IEM_CIMPL_F_VMEXIT': (),
2077 'IEM_CIMPL_F_FPU': (),
2078 'IEM_CIMPL_F_REP': (),
2079 'IEM_CIMPL_F_IO': (),
2080 'IEM_CIMPL_F_END_TB': (),
2081 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2082 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2083 'IEM_CIMPL_F_CALLS_CIMPL': (),
2084 'IEM_CIMPL_F_CALLS_AIMPL': (),
2085 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2086};
2087class McBlock(object):
2088 """
2089 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2090 """
2091
2092 ## @name Macro expansion types.
2093 ## @{
2094 kiMacroExp_None = 0;
2095 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2096 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2097 ## @}
2098
2099 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2100 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2101 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2102 self.fDeferToCImpl = fDeferToCImpl;
2103 ## The source file containing the block.
2104 self.sSrcFile = sSrcFile;
2105 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2106 self.iBeginLine = iBeginLine;
2107 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2108 self.offBeginLine = offBeginLine;
2109 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2110 self.iEndLine = -1;
2111 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2112 self.offEndLine = 0;
2113 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2114 self.offAfterEnd = 0;
2115 ## The function the block resides in.
2116 self.oFunction = oFunction;
2117 ## The name of the function the block resides in. DEPRECATED.
2118 self.sFunction = oFunction.sName;
2119 ## The block number within the function.
2120 self.iInFunction = iInFunction;
2121 ## The instruction this block is associated with - can be None.
2122 self.oInstruction = oInstruction # type: Instruction
2123 ## Indentation level of the block.
2124 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2125 ## The raw lines the block is made up of.
2126 self.asLines = [] # type: List[str]
2127 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2128 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2129 self.iMacroExp = self.kiMacroExp_None;
2130 ## IEM_MC_BEGIN: Argument count.
2131 self.cArgs = -1;
2132 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2133 self.aoArgs = [] # type: List[McStmtArg]
2134 ## IEM_MC_BEGIN: Locals count.
2135 self.cLocals = -1;
2136 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2137 self.aoLocals = [] # type: List[McStmtVar]
2138 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2139 self.dsMcFlags = {} # type: Dict[str, bool]
2140 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2141 self.dsCImplFlags = {} # type: Dict[str, bool]
2142 ## Decoded statements in the block.
2143 self.aoStmts = [] # type: List[McStmt]
2144
2145 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2146 """
2147 Completes the microcode block.
2148 """
2149 assert self.iEndLine == -1;
2150 self.iEndLine = iEndLine;
2151 self.offEndLine = offEndLine;
2152 self.offAfterEnd = offAfterEnd;
2153 self.asLines = asLines;
2154
2155 def raiseDecodeError(self, sRawCode, off, sMessage):
2156 """ Raises a decoding error. """
2157 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2158 iLine = sRawCode.count('\n', 0, off);
2159 raise ParserException('%s:%d:%d: parsing error: %s'
2160 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2161
2162 def raiseStmtError(self, sName, sMessage):
2163 """ Raises a statement parser error. """
2164 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2165
2166 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2167 """ Check the parameter count, raising an error it doesn't match. """
2168 if len(asParams) != cParamsExpected:
2169 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2170 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2171 return True;
2172
2173 @staticmethod
2174 def parseMcGeneric(oSelf, sName, asParams):
2175 """ Generic parser that returns a plain McStmt object. """
2176 _ = oSelf;
2177 return McStmt(sName, asParams);
2178
2179 @staticmethod
2180 def parseMcGenericCond(oSelf, sName, asParams):
2181 """ Generic parser that returns a plain McStmtCond object. """
2182 _ = oSelf;
2183 return McStmtCond(sName, asParams);
2184
2185 kdArchVals = {
2186 'RT_ARCH_VAL_X86': True,
2187 'RT_ARCH_VAL_AMD64': True,
2188 'RT_ARCH_VAL_ARM32': True,
2189 'RT_ARCH_VAL_ARM64': True,
2190 'RT_ARCH_VAL_SPARC32': True,
2191 'RT_ARCH_VAL_SPARC64': True,
2192 };
2193
2194 @staticmethod
2195 def parseMcNativeIf(oSelf, sName, asParams):
2196 """ IEM_MC_NATIVE_IF """
2197 oSelf.checkStmtParamCount(sName, asParams, 1);
2198 if asParams[0].strip() == '0':
2199 asArchitectures = [];
2200 else:
2201 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2202 for sArch in asArchitectures:
2203 if sArch not in oSelf.kdArchVals:
2204 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2205 return McStmtNativeIf(sName, asArchitectures);
2206
2207 @staticmethod
2208 def parseMcBegin(oSelf, sName, asParams):
2209 """ IEM_MC_BEGIN """
2210 oSelf.checkStmtParamCount(sName, asParams, 4);
2211 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2212 oSelf.raiseStmtError(sName, 'Used more than once!');
2213 oSelf.cArgs = int(asParams[0]);
2214 oSelf.cLocals = int(asParams[1]);
2215
2216 if asParams[2] != '0':
2217 for sFlag in asParams[2].split('|'):
2218 sFlag = sFlag.strip();
2219 if sFlag not in g_kdMcFlags:
2220 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2221 oSelf.dsMcFlags[sFlag] = True;
2222 for sFlag2 in g_kdMcFlags[sFlag]:
2223 oSelf.dsMcFlags[sFlag2] = True;
2224
2225 if asParams[3] != '0':
2226 oSelf.parseCImplFlags(sName, asParams[3]);
2227
2228 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2229
2230 @staticmethod
2231 def parseMcArg(oSelf, sName, asParams):
2232 """ IEM_MC_ARG """
2233 oSelf.checkStmtParamCount(sName, asParams, 3);
2234 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2235 oSelf.aoArgs.append(oStmt);
2236 return oStmt;
2237
2238 @staticmethod
2239 def parseMcArgConst(oSelf, sName, asParams):
2240 """ IEM_MC_ARG_CONST """
2241 oSelf.checkStmtParamCount(sName, asParams, 4);
2242 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2243 oSelf.aoArgs.append(oStmt);
2244 return oStmt;
2245
2246 @staticmethod
2247 def parseMcArgLocalRef(oSelf, sName, asParams):
2248 """ IEM_MC_ARG_LOCAL_REF """
2249 oSelf.checkStmtParamCount(sName, asParams, 4);
2250 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2251 oSelf.aoArgs.append(oStmt);
2252 return oStmt;
2253
2254 @staticmethod
2255 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2256 """ IEM_MC_ARG_LOCAL_EFLAGS """
2257 oSelf.checkStmtParamCount(sName, asParams, 3);
2258 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2259 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2260 oSelf.aoLocals.append(oStmtLocal);
2261 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2262 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2263 oSelf.aoArgs.append(oStmtArg);
2264 return (oStmtLocal, oStmtArg,);
2265
2266 @staticmethod
2267 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2268 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2269 oSelf.checkStmtParamCount(sName, asParams, 0);
2270 # Note! Translate to IEM_MC_ARG_CONST
2271 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2272 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2273 oSelf.aoArgs.append(oStmt);
2274 return oStmt;
2275
2276 @staticmethod
2277 def parseMcLocal(oSelf, sName, asParams):
2278 """ IEM_MC_LOCAL """
2279 oSelf.checkStmtParamCount(sName, asParams, 2);
2280 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2281 oSelf.aoLocals.append(oStmt);
2282 return oStmt;
2283
2284 @staticmethod
2285 def parseMcLocalAssign(oSelf, sName, asParams):
2286 """ IEM_MC_LOCAL_ASSIGN """
2287 oSelf.checkStmtParamCount(sName, asParams, 3);
2288 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2289 oSelf.aoLocals.append(oStmt);
2290 return oStmt;
2291
2292 @staticmethod
2293 def parseMcLocalConst(oSelf, sName, asParams):
2294 """ IEM_MC_LOCAL_CONST """
2295 oSelf.checkStmtParamCount(sName, asParams, 3);
2296 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2297 oSelf.aoLocals.append(oStmt);
2298 return oStmt;
2299
2300 @staticmethod
2301 def parseMcLocalEFlags(oSelf, sName, asParams):
2302 """ IEM_MC_LOCAL_EFLAGS"""
2303 oSelf.checkStmtParamCount(sName, asParams, 1);
2304 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2305 oSelf.aoLocals.append(oStmt);
2306 return oStmt;
2307
2308 @staticmethod
2309 def parseMcCallAImpl(oSelf, sName, asParams):
2310 """ IEM_MC_CALL_AIMPL_3|4 """
2311 cArgs = int(sName[-1]);
2312 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2313 return McStmtCall(sName, asParams, 1, 0);
2314
2315 @staticmethod
2316 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2317 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2318 cArgs = int(sName[-1]);
2319 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2320 return McStmtCall(sName, asParams, 0);
2321
2322 @staticmethod
2323 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2324 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2325 cArgs = int(sName[-1]);
2326 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2327 return McStmtCall(sName, asParams, 0);
2328
2329 @staticmethod
2330 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2331 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2332 cArgs = int(sName[-1]);
2333 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2334 return McStmtCall(sName, asParams, 0);
2335
2336 @staticmethod
2337 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2338 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2339 cArgs = int(sName[-1]);
2340 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2341 return McStmtCall(sName, asParams, 0);
2342
2343 @staticmethod
2344 def parseMcCallSseAImpl(oSelf, sName, asParams):
2345 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2346 cArgs = int(sName[-1]);
2347 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2348 return McStmtCall(sName, asParams, 0);
2349
2350 def parseCImplFlags(self, sName, sFlags):
2351 """
2352 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2353 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2354 """
2355 if sFlags != '0':
2356 sFlags = self.stripComments(sFlags);
2357 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2358 for sFlag in sFlags.split('|'):
2359 sFlag = sFlag.strip();
2360 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2361 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2362 #print('debug: %s' % sFlag)
2363 if sFlag not in g_kdCImplFlags:
2364 if sFlag == '0':
2365 continue;
2366 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2367 self.dsCImplFlags[sFlag] = True;
2368 for sFlag2 in g_kdCImplFlags[sFlag]:
2369 self.dsCImplFlags[sFlag2] = True;
2370 return None;
2371
2372 @staticmethod
2373 def parseMcCallCImpl(oSelf, sName, asParams):
2374 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2375 cArgs = int(sName[-1]);
2376 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2377 oSelf.parseCImplFlags(sName, asParams[0]);
2378 return McStmtCall(sName, asParams, 2);
2379
2380 @staticmethod
2381 def parseMcDeferToCImpl(oSelf, sName, asParams):
2382 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2383 # Note! This code is called by workerIemMcDeferToCImplXRet.
2384 #print('debug: %s, %s,...' % (sName, asParams[0],));
2385 cArgs = int(sName[-5]);
2386 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2387 oSelf.parseCImplFlags(sName, asParams[0]);
2388 return McStmtCall(sName, asParams, 2);
2389
2390 @staticmethod
2391 def stripComments(sCode):
2392 """ Returns sCode with comments removed. """
2393 off = 0;
2394 while off < len(sCode):
2395 off = sCode.find('/', off);
2396 if off < 0 or off + 1 >= len(sCode):
2397 break;
2398
2399 if sCode[off + 1] == '/':
2400 # C++ comment.
2401 offEnd = sCode.find('\n', off + 2);
2402 if offEnd < 0:
2403 return sCode[:off].rstrip();
2404 sCode = sCode[ : off] + sCode[offEnd : ];
2405 off += 1;
2406
2407 elif sCode[off + 1] == '*':
2408 # C comment
2409 offEnd = sCode.find('*/', off + 2);
2410 if offEnd < 0:
2411 return sCode[:off].rstrip();
2412 sSep = ' ';
2413 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2414 sSep = '';
2415 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2416 off += len(sSep);
2417
2418 else:
2419 # Not a comment.
2420 off += 1;
2421 return sCode;
2422
2423 @staticmethod
2424 def extractParam(sCode, offParam):
2425 """
2426 Extracts the parameter value at offParam in sCode.
2427 Returns stripped value and the end offset of the terminating ',' or ')'.
2428 """
2429 # Extract it.
2430 cNesting = 0;
2431 offStart = offParam;
2432 while offParam < len(sCode):
2433 ch = sCode[offParam];
2434 if ch == '(':
2435 cNesting += 1;
2436 elif ch == ')':
2437 if cNesting == 0:
2438 break;
2439 cNesting -= 1;
2440 elif ch == ',' and cNesting == 0:
2441 break;
2442 offParam += 1;
2443 return (sCode[offStart : offParam].strip(), offParam);
2444
2445 @staticmethod
2446 def extractParams(sCode, offOpenParen):
2447 """
2448 Parses a parameter list.
2449 Returns the list of parameter values and the offset of the closing parentheses.
2450 Returns (None, len(sCode)) on if no closing parentheses was found.
2451 """
2452 assert sCode[offOpenParen] == '(';
2453 asParams = [];
2454 off = offOpenParen + 1;
2455 while off < len(sCode):
2456 ch = sCode[off];
2457 if ch.isspace():
2458 off += 1;
2459 elif ch != ')':
2460 (sParam, off) = McBlock.extractParam(sCode, off);
2461 asParams.append(sParam);
2462 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2463 if sCode[off] == ',':
2464 off += 1;
2465 else:
2466 return (asParams, off);
2467 return (None, off);
2468
2469 @staticmethod
2470 def findClosingBraces(sCode, off, offStop):
2471 """
2472 Finds the matching '}' for the '{' at off in sCode.
2473 Returns offset of the matching '}' on success, otherwise -1.
2474
2475 Note! Does not take comments into account.
2476 """
2477 cDepth = 1;
2478 off += 1;
2479 while off < offStop:
2480 offClose = sCode.find('}', off, offStop);
2481 if offClose < 0:
2482 break;
2483 cDepth += sCode.count('{', off, offClose);
2484 cDepth -= 1;
2485 if cDepth == 0:
2486 return offClose;
2487 off = offClose + 1;
2488 return -1;
2489
2490 @staticmethod
2491 def countSpacesAt(sCode, off, offStop):
2492 """ Returns the number of space characters at off in sCode. """
2493 offStart = off;
2494 while off < offStop and sCode[off].isspace():
2495 off += 1;
2496 return off - offStart;
2497
2498 @staticmethod
2499 def skipSpacesAt(sCode, off, offStop):
2500 """ Returns first offset at or after off for a non-space character. """
2501 return off + McBlock.countSpacesAt(sCode, off, offStop);
2502
2503 @staticmethod
2504 def isSubstrAt(sStr, off, sSubStr):
2505 """ Returns true of sSubStr is found at off in sStr. """
2506 return sStr[off : off + len(sSubStr)] == sSubStr;
2507
2508 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2509 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2510 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2511 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2512 + r')');
2513
2514 kaasConditions = (
2515 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2516 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2517 );
2518 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2519 """
2520 Decodes sRawCode[off : offStop].
2521
2522 Returns list of McStmt instances.
2523 Raises ParserException on failure.
2524 """
2525 if offStop < 0:
2526 offStop = len(sRawCode);
2527 aoStmts = [];
2528 while off < offStop:
2529 ch = sRawCode[off];
2530
2531 #
2532 # Skip spaces and comments.
2533 #
2534 if ch.isspace():
2535 off += 1;
2536
2537 elif ch == '/':
2538 ch = sRawCode[off + 1];
2539 if ch == '/': # C++ comment.
2540 off = sRawCode.find('\n', off + 2);
2541 if off < 0:
2542 break;
2543 off += 1;
2544 elif ch == '*': # C comment.
2545 off = sRawCode.find('*/', off + 2);
2546 if off < 0:
2547 break;
2548 off += 2;
2549 else:
2550 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2551
2552 #
2553 # Is it a MC statement.
2554 #
2555 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2556 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2557 # Extract it and strip comments from it.
2558 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2559 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2560 else: iCond = -1;
2561 if iCond < 0:
2562 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2563 if offEnd <= off:
2564 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2565 else:
2566 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2567 if offEnd <= off:
2568 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2569 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2570 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2571 offEnd -= 1;
2572 while offEnd > off and sRawCode[offEnd - 1].isspace():
2573 offEnd -= 1;
2574
2575 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2576
2577 # Isolate the statement name.
2578 offOpenParen = sRawStmt.find('(');
2579 if offOpenParen < 0:
2580 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2581 sName = sRawStmt[: offOpenParen].strip();
2582
2583 # Extract the parameters.
2584 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2585 if asParams is None:
2586 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2587 if offCloseParen + 1 != len(sRawStmt):
2588 self.raiseDecodeError(sRawCode, off,
2589 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2590
2591 # Hand it to the handler.
2592 fnParser = g_dMcStmtParsers.get(sName);
2593 if not fnParser:
2594 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2595 fnParser = fnParser[0];
2596 oStmt = fnParser(self, sName, asParams);
2597 if not isinstance(oStmt, (list, tuple)):
2598 aoStmts.append(oStmt);
2599 else:
2600 aoStmts.extend(oStmt);
2601
2602 #
2603 # If conditional, we need to parse the whole statement.
2604 #
2605 # For reasons of simplicity, we assume the following structure
2606 # and parse each branch in a recursive call:
2607 # IEM_MC_IF_XXX() {
2608 # IEM_MC_WHATEVER();
2609 # } IEM_MC_ELSE() {
2610 # IEM_MC_WHATEVER();
2611 # } IEM_MC_ENDIF();
2612 #
2613 if iCond >= 0:
2614 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2615 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2616
2617 # Find start of the IF block:
2618 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2619 if sRawCode[offBlock1] != '{':
2620 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2621
2622 # Find the end of it.
2623 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2624 if offBlock1End < 0:
2625 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2626
2627 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2628
2629 # Is there an else section?
2630 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2631 sElseNm = self.kaasConditions[iCond][1];
2632 if self.isSubstrAt(sRawCode, off, sElseNm):
2633 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2634 if sRawCode[off] != '(':
2635 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2636 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2637 if sRawCode[off] != ')':
2638 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2639
2640 # Find start of the ELSE block.
2641 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2642 if sRawCode[offBlock2] != '{':
2643 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2644
2645 # Find the end of it.
2646 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2647 if offBlock2End < 0:
2648 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2649
2650 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2651 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2652
2653 # Parse past the endif statement.
2654 sEndIfNm = self.kaasConditions[iCond][2];
2655 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2656 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2657 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2658 if sRawCode[off] != '(':
2659 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2660 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2661 if sRawCode[off] != ')':
2662 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2663 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2664 if sRawCode[off] != ';':
2665 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2666 off += 1;
2667
2668 else:
2669 # Advance.
2670 off = offEnd + 1;
2671
2672 #
2673 # Otherwise it must be a C/C++ statement of sorts.
2674 #
2675 else:
2676 # Find the end of the statement. if and else requires special handling.
2677 sCondExpr = None;
2678 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2679 if oMatch:
2680 if oMatch.group(1)[-1] == '(':
2681 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2682 else:
2683 offEnd = oMatch.end();
2684 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2685 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2686 elif ch == '#':
2687 offEnd = sRawCode.find('\n', off, offStop);
2688 if offEnd < 0:
2689 offEnd = offStop;
2690 offEnd -= 1;
2691 while offEnd > off and sRawCode[offEnd - 1].isspace():
2692 offEnd -= 1;
2693 else:
2694 offEnd = sRawCode.find(';', off);
2695 if offEnd < 0:
2696 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2697
2698 # Check this and the following statement whether it might have
2699 # something to do with decoding. This is a statement filter
2700 # criteria when generating the threaded functions blocks.
2701 offNextEnd = sRawCode.find(';', offEnd + 1);
2702 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2703 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2704 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2705 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2706 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2707 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2708 );
2709
2710 if not oMatch:
2711 if ch != '#':
2712 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2713 else:
2714 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2715 off = offEnd + 1;
2716 elif oMatch.group(1).startswith('if'):
2717 #
2718 # if () xxx [else yyy] statement.
2719 #
2720 oStmt = McCppCond(sCondExpr, fDecode);
2721 aoStmts.append(oStmt);
2722 off = offEnd + 1;
2723
2724 # Following the if () we can either have a {} containing zero or more statements
2725 # or we have a single statement.
2726 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2727 if sRawCode[offBlock1] == '{':
2728 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2729 if offBlock1End < 0:
2730 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2731 offBlock1 += 1;
2732 else:
2733 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2734 if offBlock1End < 0:
2735 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2736
2737 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2738
2739 # The else is optional and can likewise be followed by {} or a single statement.
2740 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2741 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2742 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2743 if sRawCode[offBlock2] == '{':
2744 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2745 if offBlock2End < 0:
2746 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2747 offBlock2 += 1;
2748 else:
2749 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2750 if offBlock2End < 0:
2751 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2752
2753 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2754 off = offBlock2End + 1;
2755
2756 elif oMatch.group(1) == 'else':
2757 # Problematic 'else' branch, typically involving #ifdefs.
2758 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2759
2760 return aoStmts;
2761
2762 def decode(self):
2763 """
2764 Decodes the block, populating self.aoStmts if necessary.
2765 Returns the statement list.
2766 Raises ParserException on failure.
2767 """
2768 if not self.aoStmts:
2769 self.aoStmts = self.decodeCode(''.join(self.asLines));
2770 return self.aoStmts;
2771
2772
2773 def checkForTooEarlyEffSegUse(self, aoStmts):
2774 """
2775 Checks if iEffSeg is used before the effective address has been decoded.
2776 Returns None on success, error string on failure.
2777
2778 See r158454 for an example of this issue.
2779 """
2780
2781 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2782 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2783 # as we're ASSUMING these will not occur before address calculation.
2784 for iStmt, oStmt in enumerate(aoStmts):
2785 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2786 while iStmt > 0:
2787 iStmt -= 1;
2788 oStmt = aoStmts[iStmt];
2789 for sArg in oStmt.asParams:
2790 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2791 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2792 break;
2793 return None;
2794
2795 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2796 kdDecodeCppStmtOkayAfterDone = {
2797 'IEMOP_HLP_IN_VMX_OPERATION': True,
2798 'IEMOP_HLP_VMX_INSTR': True,
2799 };
2800
2801 def checkForDoneDecoding(self, aoStmts):
2802 """
2803 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2804 invocation.
2805 Returns None on success, error string on failure.
2806
2807 This ensures safe instruction restarting in case the recompiler runs
2808 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2809 entries).
2810 """
2811
2812 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2813 # don't need to look.
2814 cIemOpHlpDone = 0;
2815 for iStmt, oStmt in enumerate(aoStmts):
2816 if oStmt.isCppStmt():
2817 #print('dbg: #%u[%u]: %s %s (%s)'
2818 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2819
2820 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2821 if oMatch:
2822 sFirstWord = oMatch.group(1);
2823 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2824 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2825 cIemOpHlpDone += 1;
2826 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2827 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2828 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2829 else:
2830 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2831 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2832 cIemOpHlpDone += 1;
2833 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2834 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2835 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2836 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2837 if cIemOpHlpDone == 1:
2838 return None;
2839 if cIemOpHlpDone > 1:
2840 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2841 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2842
2843 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2844 """
2845 Checks that the register references are placed after register fetches
2846 from the same register class.
2847 Returns None on success, error string on failure.
2848
2849 Example:
2850 SHL CH, CL
2851
2852 If the CH reference is created first, the fetching of CL will cause the
2853 RCX guest register to have an active shadow register when it's being
2854 updated. The shadow register will then be stale after the SHL operation
2855 completes, without us noticing.
2856
2857 It's easier to ensure we've got correct code than complicating the
2858 recompiler code with safeguards here.
2859 """
2860 for iStmt, oStmt in enumerate(aoStmts):
2861 if not oStmt.isCppStmt():
2862 offRef = oStmt.sName.find("_REF_");
2863 if offRef > 0:
2864 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2865 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2866 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2867 sClass = 'FPUREG';
2868 else:
2869 offUnderscore = oStmt.sName.find('_', offRef + 5);
2870 if offUnderscore > 0:
2871 assert offUnderscore > offRef;
2872 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2873 else:
2874 sClass = oStmt.sName[offRef + 5];
2875 asRegRefClasses[sClass] = True;
2876 else:
2877 offFetch = oStmt.sName.find("_FETCH_");
2878 if offFetch > 0:
2879 sClass = oStmt.sName[offFetch + 7 : ];
2880 if not sClass.startswith("MEM"):
2881 offUnderscore = sClass.find('_');
2882 if offUnderscore >= 0:
2883 assert offUnderscore > 0;
2884 sClass = sClass[:offUnderscore];
2885 if sClass in asRegRefClasses:
2886 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2887 % (iStmt + 1, oStmt.sName,);
2888
2889 # Go into branches.
2890 if isinstance(oStmt, McStmtCond):
2891 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2892 if sRet:
2893 return sRet;
2894 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2895 if sRet:
2896 return sRet;
2897 return None;
2898
2899 def check(self):
2900 """
2901 Performs some sanity checks on the block.
2902 Returns error string list, empty if all is fine.
2903 """
2904 aoStmts = self.decode();
2905 asRet = [];
2906
2907 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2908 if sRet:
2909 asRet.append(sRet);
2910
2911 sRet = self.checkForDoneDecoding(aoStmts);
2912 if sRet:
2913 asRet.append(sRet);
2914
2915 sRet = self.checkForFetchAfterRef(aoStmts, {});
2916 if sRet:
2917 asRet.append(sRet);
2918
2919 return asRet;
2920
2921
2922## Temporary flag for enabling / disabling experimental MCs depending on the
2923## SIMD register allocator.
2924g_fNativeSimd = True;
2925
2926## IEM_MC_XXX -> parser + info dictionary.
2927#
2928# The info columns:
2929# - col 1+0: boolean entry indicating whether the statement modifies state and
2930# must not be used before IEMOP_HL_DONE_*.
2931# - col 1+1: boolean entry indicating similar to the previous column but is
2932# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2933# The difference is that most IEM_MC_IF_XXX entries are False here.
2934# - col 1+2: boolean entry indicating native recompiler support.
2935#
2936# The raw table was generated via the following command
2937# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2938# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2939# pylint: disable=line-too-long
2940g_dMcStmtParsers = {
2941 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2942 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2943 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2944 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2945 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2946 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2947 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2948 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2949 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2950 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2951 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2952 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2953 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2954 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2955 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2956 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2957 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2958 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2959 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2960 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2961 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2962 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2963 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2964 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2965 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2966 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2967 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2968 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2969 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2970 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2971 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2972 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2973 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2974 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2975 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2976 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2977 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2978 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2979 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2980 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2981 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2982 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2983 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2984 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2985 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2986 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2987 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2988 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2989 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2990 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2991 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2992 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2993 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2994 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2995 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2996 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2997 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2998 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2999 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3000 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3001 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
3002 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
3003 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
3004 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
3005 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, g_fNativeSimd),
3006 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3007 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3008 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3009 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3010 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
3011 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3012 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
3013 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3014 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3015 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3016 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
3017 'IEM_MC_COMMIT_EFLAGS_OPT': (McBlock.parseMcGeneric, True, True, True, ),
3018 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3019 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3020 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3021 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3022 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3023 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3024 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3025 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3026 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3027 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3028 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3029 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3030 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3031 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3032 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3033 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3034 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3035 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3036 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3037 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3038 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3039 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3040 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3041 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3042 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3043 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3044 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3045 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3046 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3047 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3048 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3049 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3050 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3051 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3052 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3053 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3054 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3055 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3056 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3057 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3058 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3059 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3060 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3061 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3062 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3063 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3064 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3065 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3066 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3067 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3068 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3069 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3070 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3071 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3072 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3073 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3074 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3075 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3076 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3077 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3078 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3079 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3080 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3081 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3082 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3083 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3084 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3085 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3086 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3087 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3088 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3089 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3090 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3091 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3092 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3093 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3094 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3095 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3096 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3097 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3098 'IEM_MC_FETCH_MREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
3099 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3100 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3101 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3102 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3103 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3104 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3105 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3106 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3107 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3108 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3109 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3110 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3111 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3112 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3113 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3114 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3115 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3116 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3117 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3118 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3119 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3120 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3121 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3122 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3123 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3124 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3125 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3126 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3127 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3128 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3129 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3130 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3131 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3132 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3133 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3134 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3135 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3136 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3137 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3138 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3139 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3140 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3141 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3142 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3143 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3144 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3145 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3146 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3147 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3148 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3149 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3150 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3151 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3152 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3153 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3154 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3155 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3156 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3157 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, True, ),
3158 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, g_fNativeSimd),
3159 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3160 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3161 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3162 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3163 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3164 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3165 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3166 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3167 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3168 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3169 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3170 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3171 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3172 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3173 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3174 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3175 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3176 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3177 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3178 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3179 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3180 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3181 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3182 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3183 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3184 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3185 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3186 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3188 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3189 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3193 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3196 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3197 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3198 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3199 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3200 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3201 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3202 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3203 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3204 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3205 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3206 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3207 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3208 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3209 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3211 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3212 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3213 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3214 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3215 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3216 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3217 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3218 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3219 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3220 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3221 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3222 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3223 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3224 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3225 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3226 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3227 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3228 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3229 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3230 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3231 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3232 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3233 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3234 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3235 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3236 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3237 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3238 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3239 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3240 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3241 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3242 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3243 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3244 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3245 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3246 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3247 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3248 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3249 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3250 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3251 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3252 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3253 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3254 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, True, ),
3255 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3256 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3257 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3258 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3259 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3260 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3261 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3262 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3263 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3264 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3265 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3266 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3267 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3268 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3269 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3270 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3271 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3272 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3273 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3274 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3275 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3276 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3277 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3278 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3279 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3280 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3281 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3282 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3283 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3284 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3285 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3286 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3287 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3288 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3289 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3290 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3291 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3292 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3293 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3294 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3295 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3296 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3297 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3298 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3299 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3300 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3301 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3302 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3303 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3304 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3305 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3306 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3307 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3308 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3309 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3310 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3311 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3312 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3313 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3314 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3315 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3316 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3317 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3318 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3319 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3320 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3321 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3322 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3323 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3324 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3325 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3326 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3327 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3328 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3329 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3331 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3332 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3334 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3335 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3336 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3337 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3338 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3339 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3340 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3341 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3342 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3343 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3344 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3345 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3346 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3347 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3348 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3349 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3350 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3351 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3352 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3353 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3354 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3355 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3356 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3357 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3358 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3359 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3360 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3361 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3362 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3363 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3364 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3365 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3366 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3367 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3368 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3369 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3370 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3371 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3372 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3373 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3374 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3375 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3376};
3377# pylint: enable=line-too-long
3378
3379## List of microcode blocks.
3380g_aoMcBlocks = [] # type: List[McBlock]
3381
3382
3383
3384class ParserException(Exception):
3385 """ Parser exception """
3386 def __init__(self, sMessage):
3387 Exception.__init__(self, sMessage);
3388
3389
3390class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3391 """
3392 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3393 """
3394
3395 ## @name Parser state.
3396 ## @{
3397 kiCode = 0;
3398 kiCommentMulti = 1;
3399 ## @}
3400
3401 class Macro(object):
3402 """ Macro """
3403 def __init__(self, sName, asArgs, sBody, iLine):
3404 self.sName = sName; ##< The macro name.
3405 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3406 self.sBody = sBody;
3407 self.iLine = iLine;
3408 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3409
3410 @staticmethod
3411 def _needSpace(ch):
3412 """ This is just to make the expanded output a bit prettier. """
3413 return ch.isspace() and ch != '(';
3414
3415 def expandMacro(self, oParent, asArgs = None):
3416 """ Expands the macro body with the given arguments. """
3417 _ = oParent;
3418 sBody = self.sBody;
3419
3420 if self.oReArgMatch:
3421 assert len(asArgs) == len(self.asArgs);
3422 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3423
3424 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3425 oMatch = self.oReArgMatch.search(sBody);
3426 while oMatch:
3427 sName = oMatch.group(2);
3428 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3429 sValue = dArgs[sName];
3430 sPre = '';
3431 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3432 sPre = ' ';
3433 sPost = '';
3434 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3435 sPost = ' ';
3436 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3437 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3438 else:
3439 assert not asArgs;
3440
3441 return sBody;
3442
3443 class PreprocessorConditional(object):
3444 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3445
3446 ## Known defines.
3447 # - A value of 1 indicates that it's always defined.
3448 # - A value of 0 if it's always undefined
3449 # - A value of -1 if it's an arch and it depends of script parameters.
3450 # - A value of -2 if it's not recognized when filtering MC blocks.
3451 kdKnownDefines = {
3452 'IEM_WITH_ONE_BYTE_TABLE': 1,
3453 'IEM_WITH_TWO_BYTE_TABLE': 1,
3454 'IEM_WITH_THREE_0F_38': 1,
3455 'IEM_WITH_THREE_0F_3A': 1,
3456 'IEM_WITH_THREE_BYTE_TABLES': 1,
3457 'IEM_WITH_3DNOW': 1,
3458 'IEM_WITH_3DNOW_TABLE': 1,
3459 'IEM_WITH_VEX': 1,
3460 'IEM_WITH_VEX_TABLES': 1,
3461 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3462 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3463 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3464 'LOG_ENABLED': 1,
3465 'RT_WITHOUT_PRAGMA_ONCE': 0,
3466 'TST_IEM_CHECK_MC': 0,
3467 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3468 'RT_ARCH_AMD64': -1,
3469 'RT_ARCH_ARM64': -1,
3470 'RT_ARCH_ARM32': -1,
3471 'RT_ARCH_X86': -1,
3472 'RT_ARCH_SPARC': -1,
3473 'RT_ARCH_SPARC64': -1,
3474 };
3475 kdBuildArchToIprt = {
3476 'amd64': 'RT_ARCH_AMD64',
3477 'arm64': 'RT_ARCH_ARM64',
3478 'sparc32': 'RT_ARCH_SPARC64',
3479 };
3480 ## For parsing the next defined(xxxx).
3481 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3482
3483 def __init__(self, sType, sExpr):
3484 self.sType = sType;
3485 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3486 self.aoElif = [] # type: List[PreprocessorConditional]
3487 self.fInElse = [];
3488 if sType in ('if', 'elif'):
3489 self.checkExpression(sExpr);
3490 else:
3491 self.checkSupportedDefine(sExpr)
3492
3493 @staticmethod
3494 def checkSupportedDefine(sDefine):
3495 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3496 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3497 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3498 return True;
3499 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3500 return True;
3501 raise Exception('Unsupported define: %s' % (sDefine,));
3502
3503 @staticmethod
3504 def checkExpression(sExpr):
3505 """ Check that the expression is supported. Raises exception if not. """
3506 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3507 if sExpr in ('0', '1'):
3508 return True;
3509
3510 off = 0;
3511 cParan = 0;
3512 while off < len(sExpr):
3513 ch = sExpr[off];
3514
3515 # Unary operator or parentheses:
3516 if ch in ('(', '!'):
3517 if ch == '(':
3518 cParan += 1;
3519 off += 1;
3520 else:
3521 # defined(xxxx)
3522 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3523 if oMatch:
3524 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3525 elif sExpr[off:] != '1':
3526 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3527 off = oMatch.end();
3528
3529 # Look for closing parentheses.
3530 while off < len(sExpr) and sExpr[off].isspace():
3531 off += 1;
3532 if cParan > 0:
3533 while off < len(sExpr) and sExpr[off] == ')':
3534 if cParan <= 0:
3535 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3536 cParan -= 1;
3537 off += 1;
3538 while off < len(sExpr) and sExpr[off].isspace():
3539 off += 1;
3540
3541 # Look for binary operator.
3542 if off >= len(sExpr):
3543 break;
3544 if sExpr[off:off + 2] in ('||', '&&'):
3545 off += 2;
3546 else:
3547 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3548
3549 # Skip spaces.
3550 while off < len(sExpr) and sExpr[off].isspace():
3551 off += 1;
3552 if cParan != 0:
3553 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3554 return True;
3555
3556 @staticmethod
3557 def isArchIncludedInExpr(sExpr, sArch):
3558 """ Checks if sArch is included in the given expression. """
3559 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3560 if sExpr == '0':
3561 return False;
3562 if sExpr == '1':
3563 return True;
3564 off = 0;
3565 while off < len(sExpr):
3566 # defined(xxxx)
3567 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3568 if not oMatch:
3569 if sExpr[off:] == '1':
3570 return True;
3571 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3572 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3573 return True;
3574 off = oMatch.end();
3575
3576 # Look for OR operator.
3577 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3578 off += 1;
3579 if off >= len(sExpr):
3580 break;
3581 if sExpr.startswith('||'):
3582 off += 2;
3583 else:
3584 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3585
3586 return False;
3587
3588 @staticmethod
3589 def matchArch(sDefine, sArch):
3590 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3591 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3592
3593 @staticmethod
3594 def matchDefined(sExpr, sArch):
3595 """ Check the result of an ifdef/ifndef expression, given sArch. """
3596 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3597 if iDefine == -2:
3598 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3599 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3600
3601 def isArchIncludedInPrimaryBlock(self, sArch):
3602 """ Checks if sArch is included in the (primary) 'if' block. """
3603 if self.sType == 'ifdef':
3604 return self.matchDefined(self.sExpr, sArch);
3605 if self.sType == 'ifndef':
3606 return not self.matchDefined(self.sExpr, sArch);
3607 return self.isArchIncludedInExpr(self.sExpr, sArch);
3608
3609 @staticmethod
3610 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3611 """ Checks if sArch is included in the current conditional block. """
3612 _ = iLine;
3613 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3614 for oCond in aoCppCondStack:
3615 if oCond.isArchIncludedInPrimaryBlock(sArch):
3616 if oCond.aoElif or oCond.fInElse:
3617 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3618 return False;
3619 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3620 else:
3621 fFine = False;
3622 for oElifCond in oCond.aoElif:
3623 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3624 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3625 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3626 return False;
3627 fFine = True;
3628 if not fFine and not oCond.fInElse:
3629 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3630 return False;
3631 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3632 return True;
3633
3634 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3635 self.sSrcFile = sSrcFile;
3636 self.asLines = asLines;
3637 self.iLine = 0;
3638 self.iState = self.kiCode;
3639 self.sComment = '';
3640 self.iCommentLine = 0;
3641 self.aoCurInstrs = [] # type: List[Instruction]
3642 self.oCurFunction = None # type: DecoderFunction
3643 self.iMcBlockInFunc = 0;
3644 self.oCurMcBlock = None # type: McBlock
3645 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3646 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3647 if oInheritMacrosFrom:
3648 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3649 self.oReMacros = oInheritMacrosFrom.oReMacros;
3650 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3651 self.sHostArch = sHostArch;
3652
3653 assert sDefaultMap in g_dInstructionMaps;
3654 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3655
3656 self.cTotalInstr = 0;
3657 self.cTotalStubs = 0;
3658 self.cTotalTagged = 0;
3659 self.cTotalMcBlocks = 0;
3660
3661 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3662 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3663 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3664 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3665 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3666 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3667 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3668 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3669 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3670 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3671 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3672 self.fDebug = True;
3673 self.fDebugMc = False;
3674 self.fDebugPreproc = False;
3675
3676 self.dTagHandlers = {
3677 '@opbrief': self.parseTagOpBrief,
3678 '@opdesc': self.parseTagOpDesc,
3679 '@opmnemonic': self.parseTagOpMnemonic,
3680 '@op1': self.parseTagOpOperandN,
3681 '@op2': self.parseTagOpOperandN,
3682 '@op3': self.parseTagOpOperandN,
3683 '@op4': self.parseTagOpOperandN,
3684 '@oppfx': self.parseTagOpPfx,
3685 '@opmaps': self.parseTagOpMaps,
3686 '@opcode': self.parseTagOpcode,
3687 '@opcodesub': self.parseTagOpcodeSub,
3688 '@openc': self.parseTagOpEnc,
3689 #@opfltest: Lists all flags that will be used as input in some way.
3690 '@opfltest': self.parseTagOpEFlags,
3691 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3692 '@opflmodify': self.parseTagOpEFlags,
3693 #@opflclear: Lists all flags that will be set (set to 1).
3694 '@opflset': self.parseTagOpEFlags,
3695 #@opflclear: Lists all flags that will be cleared (set to 0).
3696 '@opflclear': self.parseTagOpEFlags,
3697 #@opflundef: List of flag documented as undefined.
3698 '@opflundef': self.parseTagOpEFlags,
3699 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3700 '@opflclass': self.parseTagOpEFlagsClass,
3701 '@ophints': self.parseTagOpHints,
3702 '@opdisenum': self.parseTagOpDisEnum,
3703 '@opmincpu': self.parseTagOpMinCpu,
3704 '@opcpuid': self.parseTagOpCpuId,
3705 '@opgroup': self.parseTagOpGroup,
3706 '@opunused': self.parseTagOpUnusedInvalid,
3707 '@opinvalid': self.parseTagOpUnusedInvalid,
3708 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3709 '@optest': self.parseTagOpTest,
3710 '@optestign': self.parseTagOpTestIgnore,
3711 '@optestignore': self.parseTagOpTestIgnore,
3712 '@opcopytests': self.parseTagOpCopyTests,
3713 '@oponly': self.parseTagOpOnlyTest,
3714 '@oponlytest': self.parseTagOpOnlyTest,
3715 '@opxcpttype': self.parseTagOpXcptType,
3716 '@opstats': self.parseTagOpStats,
3717 '@opfunction': self.parseTagOpFunction,
3718 '@opdone': self.parseTagOpDone,
3719 };
3720 for i in range(48):
3721 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3722 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3723
3724 self.asErrors = [];
3725
3726 def raiseError(self, sMessage):
3727 """
3728 Raise error prefixed with the source and line number.
3729 """
3730 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3731
3732 def raiseCommentError(self, iLineInComment, sMessage):
3733 """
3734 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3735 """
3736 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3737
3738 def error(self, sMessage):
3739 """
3740 Adds an error.
3741 returns False;
3742 """
3743 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3744 return False;
3745
3746 def errorOnLine(self, iLine, sMessage):
3747 """
3748 Adds an error.
3749 returns False;
3750 """
3751 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3752 return False;
3753
3754 def errorComment(self, iLineInComment, sMessage):
3755 """
3756 Adds a comment error.
3757 returns False;
3758 """
3759 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3760 return False;
3761
3762 def printErrors(self):
3763 """
3764 Print the errors to stderr.
3765 Returns number of errors.
3766 """
3767 if self.asErrors:
3768 sys.stderr.write(u''.join(self.asErrors));
3769 return len(self.asErrors);
3770
3771 def debug(self, sMessage):
3772 """
3773 For debugging.
3774 """
3775 if self.fDebug:
3776 print('debug: %s' % (sMessage,), file = sys.stderr);
3777
3778 def stripComments(self, sLine):
3779 """
3780 Returns sLine with comments stripped.
3781
3782 Complains if traces of incomplete multi-line comments are encountered.
3783 """
3784 sLine = self.oReComment.sub(" ", sLine);
3785 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3786 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3787 return sLine;
3788
3789 def parseFunctionTable(self, sLine):
3790 """
3791 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3792
3793 Note! Updates iLine as it consumes the whole table.
3794 """
3795
3796 #
3797 # Extract the table name.
3798 #
3799 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3800 oMap = g_dInstructionMapsByIemName.get(sName);
3801 if not oMap:
3802 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3803 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3804
3805 #
3806 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3807 # entries per byte:
3808 # no prefix, 066h prefix, f3h prefix, f2h prefix
3809 # Those tables has 256 & 32 entries respectively.
3810 #
3811 cEntriesPerByte = 4;
3812 cValidTableLength = 1024;
3813 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3814
3815 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3816 if oEntriesMatch:
3817 cEntriesPerByte = 1;
3818 cValidTableLength = int(oEntriesMatch.group(1));
3819 asPrefixes = (None,);
3820
3821 #
3822 # The next line should be '{' and nothing else.
3823 #
3824 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3825 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3826 self.iLine += 1;
3827
3828 #
3829 # Parse till we find the end of the table.
3830 #
3831 iEntry = 0;
3832 while self.iLine < len(self.asLines):
3833 # Get the next line and strip comments and spaces (assumes no
3834 # multi-line comments).
3835 sLine = self.asLines[self.iLine];
3836 self.iLine += 1;
3837 sLine = self.stripComments(sLine).strip();
3838
3839 # Split the line up into entries, expanding IEMOP_X4 usage.
3840 asEntries = sLine.split(',');
3841 for i in range(len(asEntries) - 1, -1, -1):
3842 sEntry = asEntries[i].strip();
3843 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3844 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3845 asEntries.insert(i + 1, sEntry);
3846 asEntries.insert(i + 1, sEntry);
3847 asEntries.insert(i + 1, sEntry);
3848 if sEntry:
3849 asEntries[i] = sEntry;
3850 else:
3851 del asEntries[i];
3852
3853 # Process the entries.
3854 for sEntry in asEntries:
3855 if sEntry in ('};', '}'):
3856 if iEntry != cValidTableLength:
3857 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3858 return True;
3859 if sEntry.startswith('iemOp_Invalid'):
3860 pass; # skip
3861 else:
3862 # Look up matching instruction by function.
3863 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3864 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3865 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3866 if aoInstr:
3867 if not isinstance(aoInstr, list):
3868 aoInstr = [aoInstr,];
3869 oInstr = None;
3870 for oCurInstr in aoInstr:
3871 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3872 pass;
3873 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3874 oCurInstr.sPrefix = sPrefix;
3875 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3876 oCurInstr.sOpcode = sOpcode;
3877 oCurInstr.sPrefix = sPrefix;
3878 else:
3879 continue;
3880 oInstr = oCurInstr;
3881 break;
3882 if not oInstr:
3883 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3884 aoInstr.append(oInstr);
3885 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3886 g_aoAllInstructions.append(oInstr);
3887 oMap.aoInstructions.append(oInstr);
3888 else:
3889 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3890 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3891 iEntry += 1;
3892
3893 return self.error('Unexpected end of file in PFNIEMOP table');
3894
3895 def addInstruction(self, iLine = None):
3896 """
3897 Adds an instruction.
3898 """
3899 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3900 g_aoAllInstructions.append(oInstr);
3901 self.aoCurInstrs.append(oInstr);
3902 return oInstr;
3903
3904 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3905 """
3906 Derives the mnemonic and operands from a IEM stats base name like string.
3907 """
3908 if oInstr.sMnemonic is None:
3909 asWords = sStats.split('_');
3910 oInstr.sMnemonic = asWords[0].lower();
3911 if len(asWords) > 1 and not oInstr.aoOperands:
3912 for sType in asWords[1:]:
3913 if sType in g_kdOpTypes:
3914 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3915 else:
3916 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3917 return False;
3918 return True;
3919
3920 def doneInstructionOne(self, oInstr, iLine):
3921 """
3922 Complete the parsing by processing, validating and expanding raw inputs.
3923 """
3924 assert oInstr.iLineCompleted is None;
3925 oInstr.iLineCompleted = iLine;
3926
3927 #
3928 # Specified instructions.
3929 #
3930 if oInstr.cOpTags > 0:
3931 if oInstr.sStats is None:
3932 pass;
3933
3934 #
3935 # Unspecified legacy stuff. We generally only got a few things to go on here.
3936 # /** Opcode 0x0f 0x00 /0. */
3937 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3938 #
3939 else:
3940 #if oInstr.sRawOldOpcodes:
3941 #
3942 #if oInstr.sMnemonic:
3943 pass;
3944
3945 #
3946 # Common defaults.
3947 #
3948
3949 # Guess mnemonic and operands from stats if the former is missing.
3950 if oInstr.sMnemonic is None:
3951 if oInstr.sStats is not None:
3952 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3953 elif oInstr.sFunction is not None:
3954 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3955
3956 # Derive the disassembler op enum constant from the mnemonic.
3957 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3958 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3959
3960 # Derive the IEM statistics base name from mnemonic and operand types.
3961 if oInstr.sStats is None:
3962 if oInstr.sFunction is not None:
3963 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3964 elif oInstr.sMnemonic is not None:
3965 oInstr.sStats = oInstr.sMnemonic;
3966 for oOperand in oInstr.aoOperands:
3967 if oOperand.sType:
3968 oInstr.sStats += '_' + oOperand.sType;
3969
3970 # Derive the IEM function name from mnemonic and operand types.
3971 if oInstr.sFunction is None:
3972 if oInstr.sMnemonic is not None:
3973 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3974 for oOperand in oInstr.aoOperands:
3975 if oOperand.sType:
3976 oInstr.sFunction += '_' + oOperand.sType;
3977 elif oInstr.sStats:
3978 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3979
3980 #
3981 # Apply default map and then add the instruction to all it's groups.
3982 #
3983 if not oInstr.aoMaps:
3984 oInstr.aoMaps = [ self.oDefaultMap, ];
3985 for oMap in oInstr.aoMaps:
3986 oMap.aoInstructions.append(oInstr);
3987
3988 #
3989 # Derive encoding from operands and maps.
3990 #
3991 if oInstr.sEncoding is None:
3992 if not oInstr.aoOperands:
3993 if oInstr.fUnused and oInstr.sSubOpcode:
3994 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3995 else:
3996 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3997 elif oInstr.aoOperands[0].usesModRM():
3998 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3999 or oInstr.onlyInVexMaps():
4000 oInstr.sEncoding = 'VEX.ModR/M';
4001 else:
4002 oInstr.sEncoding = 'ModR/M';
4003
4004 #
4005 # Check the opstat value and add it to the opstat indexed dictionary.
4006 #
4007 if oInstr.sStats:
4008 if oInstr.sStats not in g_dAllInstructionsByStat:
4009 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
4010 else:
4011 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
4012 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
4013
4014 #
4015 # Add to function indexed dictionary. We allow multiple instructions per function.
4016 #
4017 if oInstr.sFunction:
4018 if oInstr.sFunction not in g_dAllInstructionsByFunction:
4019 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4020 else:
4021 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4022
4023 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4024 return True;
4025
4026 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4027 """
4028 Done with current instruction.
4029 """
4030 for oInstr in self.aoCurInstrs:
4031 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4032 if oInstr.fStub:
4033 self.cTotalStubs += 1;
4034
4035 self.cTotalInstr += len(self.aoCurInstrs);
4036
4037 self.sComment = '';
4038 self.aoCurInstrs = [];
4039 if fEndOfFunction:
4040 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4041 if self.oCurFunction:
4042 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4043 self.oCurFunction = None;
4044 self.iMcBlockInFunc = 0;
4045 return True;
4046
4047 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4048 """
4049 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4050 is False, only None values and empty strings are replaced.
4051 """
4052 for oInstr in self.aoCurInstrs:
4053 if fOverwrite is not True:
4054 oOldValue = getattr(oInstr, sAttrib);
4055 if oOldValue is not None:
4056 continue;
4057 setattr(oInstr, sAttrib, oValue);
4058
4059 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4060 """
4061 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4062 If fOverwrite is False, only None values and empty strings are replaced.
4063 """
4064 for oInstr in self.aoCurInstrs:
4065 aoArray = getattr(oInstr, sAttrib);
4066 while len(aoArray) <= iEntry:
4067 aoArray.append(None);
4068 if fOverwrite is True or aoArray[iEntry] is None:
4069 aoArray[iEntry] = oValue;
4070
4071 def parseCommentOldOpcode(self, asLines):
4072 """ Deals with 'Opcode 0xff /4' like comments """
4073 asWords = asLines[0].split();
4074 if len(asWords) >= 2 \
4075 and asWords[0] == 'Opcode' \
4076 and ( asWords[1].startswith('0x')
4077 or asWords[1].startswith('0X')):
4078 asWords = asWords[:1];
4079 for iWord, sWord in enumerate(asWords):
4080 if sWord.startswith('0X'):
4081 sWord = '0x' + sWord[:2];
4082 asWords[iWord] = asWords;
4083 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4084
4085 return False;
4086
4087 def ensureInstructionForOpTag(self, iTagLine):
4088 """ Ensure there is an instruction for the op-tag being parsed. """
4089 if not self.aoCurInstrs:
4090 self.addInstruction(self.iCommentLine + iTagLine);
4091 for oInstr in self.aoCurInstrs:
4092 oInstr.cOpTags += 1;
4093 if oInstr.cOpTags == 1:
4094 self.cTotalTagged += 1;
4095 return self.aoCurInstrs[-1];
4096
4097 @staticmethod
4098 def flattenSections(aasSections):
4099 """
4100 Flattens multiline sections into stripped single strings.
4101 Returns list of strings, on section per string.
4102 """
4103 asRet = [];
4104 for asLines in aasSections:
4105 if asLines:
4106 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4107 return asRet;
4108
4109 @staticmethod
4110 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4111 """
4112 Flattens sections into a simple stripped string with newlines as
4113 section breaks. The final section does not sport a trailing newline.
4114 """
4115 # Typical: One section with a single line.
4116 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4117 return aasSections[0][0].strip();
4118
4119 sRet = '';
4120 for iSection, asLines in enumerate(aasSections):
4121 if asLines:
4122 if iSection > 0:
4123 sRet += sSectionSep;
4124 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4125 return sRet;
4126
4127
4128
4129 ## @name Tag parsers
4130 ## @{
4131
4132 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4133 """
4134 Tag: @opbrief
4135 Value: Text description, multiple sections, appended.
4136
4137 Brief description. If not given, it's the first sentence from @opdesc.
4138 """
4139 oInstr = self.ensureInstructionForOpTag(iTagLine);
4140
4141 # Flatten and validate the value.
4142 sBrief = self.flattenAllSections(aasSections);
4143 if not sBrief:
4144 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4145 if sBrief[-1] != '.':
4146 sBrief = sBrief + '.';
4147 if len(sBrief) > 180:
4148 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4149 offDot = sBrief.find('.');
4150 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4151 offDot = sBrief.find('.', offDot + 1);
4152 if offDot >= 0 and offDot != len(sBrief) - 1:
4153 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4154
4155 # Update the instruction.
4156 if oInstr.sBrief is not None:
4157 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4158 % (sTag, oInstr.sBrief, sBrief,));
4159 _ = iEndLine;
4160 return True;
4161
4162 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4163 """
4164 Tag: @opdesc
4165 Value: Text description, multiple sections, appended.
4166
4167 It is used to describe instructions.
4168 """
4169 oInstr = self.ensureInstructionForOpTag(iTagLine);
4170 if aasSections:
4171 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4172 return True;
4173
4174 _ = sTag; _ = iEndLine;
4175 return True;
4176
4177 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4178 """
4179 Tag: @opmenmonic
4180 Value: mnemonic
4181
4182 The 'mnemonic' value must be a valid C identifier string. Because of
4183 prefixes, groups and whatnot, there times when the mnemonic isn't that
4184 of an actual assembler mnemonic.
4185 """
4186 oInstr = self.ensureInstructionForOpTag(iTagLine);
4187
4188 # Flatten and validate the value.
4189 sMnemonic = self.flattenAllSections(aasSections);
4190 if not self.oReMnemonic.match(sMnemonic):
4191 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4192 if oInstr.sMnemonic is not None:
4193 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4194 % (sTag, oInstr.sMnemonic, sMnemonic,));
4195 oInstr.sMnemonic = sMnemonic
4196
4197 _ = iEndLine;
4198 return True;
4199
4200 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4201 """
4202 Tags: @op1, @op2, @op3, @op4
4203 Value: [where:]type
4204
4205 The 'where' value indicates where the operand is found, like the 'reg'
4206 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4207 a list.
4208
4209 The 'type' value indicates the operand type. These follow the types
4210 given in the opcode tables in the CPU reference manuals.
4211 See Instruction.kdOperandTypes for a list.
4212
4213 """
4214 oInstr = self.ensureInstructionForOpTag(iTagLine);
4215 idxOp = int(sTag[-1]) - 1;
4216 assert 0 <= idxOp < 4;
4217
4218 # flatten, split up, and validate the "where:type" value.
4219 sFlattened = self.flattenAllSections(aasSections);
4220 asSplit = sFlattened.split(':');
4221 if len(asSplit) == 1:
4222 sType = asSplit[0];
4223 sWhere = None;
4224 elif len(asSplit) == 2:
4225 (sWhere, sType) = asSplit;
4226 else:
4227 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4228
4229 if sType not in g_kdOpTypes:
4230 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4231 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4232 if sWhere is None:
4233 sWhere = g_kdOpTypes[sType][1];
4234 elif sWhere not in g_kdOpLocations:
4235 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4236 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4237
4238 # Insert the operand, refusing to overwrite an existing one.
4239 while idxOp >= len(oInstr.aoOperands):
4240 oInstr.aoOperands.append(None);
4241 if oInstr.aoOperands[idxOp] is not None:
4242 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4243 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4244 sWhere, sType,));
4245 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4246
4247 _ = iEndLine;
4248 return True;
4249
4250 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4251 """
4252 Tag: @opmaps
4253 Value: map[,map2]
4254
4255 Indicates which maps the instruction is in. There is a default map
4256 associated with each input file.
4257 """
4258 oInstr = self.ensureInstructionForOpTag(iTagLine);
4259
4260 # Flatten, split up and validate the value.
4261 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4262 asMaps = sFlattened.split(',');
4263 if not asMaps:
4264 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4265 for sMap in asMaps:
4266 if sMap not in g_dInstructionMaps:
4267 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4268 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4269
4270 # Add the maps to the current list. Throw errors on duplicates.
4271 for oMap in oInstr.aoMaps:
4272 if oMap.sName in asMaps:
4273 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4274
4275 for sMap in asMaps:
4276 oMap = g_dInstructionMaps[sMap];
4277 if oMap not in oInstr.aoMaps:
4278 oInstr.aoMaps.append(oMap);
4279 else:
4280 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4281
4282 _ = iEndLine;
4283 return True;
4284
4285 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4286 """
4287 Tag: @oppfx
4288 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4289
4290 Required prefix for the instruction. (In a (E)VEX context this is the
4291 value of the 'pp' field rather than an actual prefix.)
4292 """
4293 oInstr = self.ensureInstructionForOpTag(iTagLine);
4294
4295 # Flatten and validate the value.
4296 sFlattened = self.flattenAllSections(aasSections);
4297 asPrefixes = sFlattened.split();
4298 if len(asPrefixes) > 1:
4299 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4300
4301 sPrefix = asPrefixes[0].lower();
4302 if sPrefix == 'none':
4303 sPrefix = 'none';
4304 elif sPrefix == 'n/a':
4305 sPrefix = None;
4306 else:
4307 if len(sPrefix) == 2:
4308 sPrefix = '0x' + sPrefix;
4309 if not _isValidOpcodeByte(sPrefix):
4310 if sPrefix != '!0xf3':
4311 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4312
4313 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4314 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4315
4316 # Set it.
4317 if oInstr.sPrefix is not None:
4318 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4319 oInstr.sPrefix = sPrefix;
4320
4321 _ = iEndLine;
4322 return True;
4323
4324 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4325 """
4326 Tag: @opcode
4327 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4328
4329 The opcode byte or sub-byte for the instruction in the context of a map.
4330 """
4331 oInstr = self.ensureInstructionForOpTag(iTagLine);
4332
4333 # Flatten and validate the value.
4334 sOpcode = self.flattenAllSections(aasSections);
4335 if _isValidOpcodeByte(sOpcode):
4336 pass;
4337 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4338 pass;
4339 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4340 pass;
4341 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4342 pass;
4343 else:
4344 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4345
4346 # Set it.
4347 if oInstr.sOpcode is not None:
4348 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4349 oInstr.sOpcode = sOpcode;
4350
4351 _ = iEndLine;
4352 return True;
4353
4354 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4355 """
4356 Tag: @opcodesub
4357 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4358 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4359 | !11 rex.w=0 | !11 mr/reg rex.w=0
4360 | !11 rex.w=1 | !11 mr/reg rex.w=1
4361
4362 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4363 represents exactly two different instructions. The more proper way would
4364 be to go via maps with two members, but this is faster.
4365 """
4366 oInstr = self.ensureInstructionForOpTag(iTagLine);
4367
4368 # Flatten and validate the value.
4369 sSubOpcode = self.flattenAllSections(aasSections);
4370 if sSubOpcode not in g_kdSubOpcodes:
4371 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4372 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4373 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4374
4375 # Set it.
4376 if oInstr.sSubOpcode is not None:
4377 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4378 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4379 oInstr.sSubOpcode = sSubOpcode;
4380
4381 _ = iEndLine;
4382 return True;
4383
4384 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4385 """
4386 Tag: @openc
4387 Value: ModR/M|fixed|prefix|<map name>
4388
4389 The instruction operand encoding style.
4390 """
4391 oInstr = self.ensureInstructionForOpTag(iTagLine);
4392
4393 # Flatten and validate the value.
4394 sEncoding = self.flattenAllSections(aasSections);
4395 if sEncoding in g_kdEncodings:
4396 pass;
4397 elif sEncoding in g_dInstructionMaps:
4398 pass;
4399 elif not _isValidOpcodeByte(sEncoding):
4400 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4401
4402 # Set it.
4403 if oInstr.sEncoding is not None:
4404 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4405 % ( sTag, oInstr.sEncoding, sEncoding,));
4406 oInstr.sEncoding = sEncoding;
4407
4408 _ = iEndLine;
4409 return True;
4410
4411 ## EFlags tag to Instruction attribute name.
4412 kdOpFlagToAttr = {
4413 '@opfltest': 'asFlTest',
4414 '@opflmodify': 'asFlModify',
4415 '@opflundef': 'asFlUndefined',
4416 '@opflset': 'asFlSet',
4417 '@opflclear': 'asFlClear',
4418 };
4419
4420 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4421 """
4422 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4423 Value: <eflags specifier>
4424
4425 """
4426 oInstr = self.ensureInstructionForOpTag(iTagLine);
4427
4428 # Flatten, split up and validate the values.
4429 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4430 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4431 asFlags = [];
4432 else:
4433 fRc = True;
4434 for iFlag, sFlag in enumerate(asFlags):
4435 if sFlag not in g_kdEFlagsMnemonics:
4436 if sFlag.strip() in g_kdEFlagsMnemonics:
4437 asFlags[iFlag] = sFlag.strip();
4438 else:
4439 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4440 if not fRc:
4441 return False;
4442
4443 # Set them.
4444 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4445 if asOld is not None and len(asOld) > 0:
4446 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4447 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4448
4449 _ = iEndLine;
4450 return True;
4451
4452 ## EFLAGS class definitions with their attribute lists.
4453 kdEFlagsClasses = {
4454 'arithmetic': { # add, sub, ...
4455 'asFlTest': [],
4456 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4457 'asFlClear': [],
4458 'asFlSet': [],
4459 'asFlUndefined': [],
4460 },
4461 'arithmetic_carry': { # adc, sbb, ...
4462 'asFlTest': [ 'cf', ],
4463 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4464 'asFlClear': [],
4465 'asFlSet': [],
4466 'asFlUndefined': [],
4467 },
4468 'incdec': {
4469 'asFlTest': [],
4470 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4471 'asFlClear': [],
4472 'asFlSet': [],
4473 'asFlUndefined': [],
4474 },
4475 'division': { ## @todo specify intel/amd differences...
4476 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4477 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4478 'asFlClear': [],
4479 'asFlSet': [],
4480 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4481 },
4482 'multiply': { ## @todo specify intel/amd differences...
4483 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4484 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4485 'asFlClear': [], # between IMUL and MUL.
4486 'asFlSet': [],
4487 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4488 },
4489 'logical': { # and, or, xor, ...
4490 'asFlTest': [],
4491 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4492 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4493 'asFlSet': [],
4494 'asFlUndefined': [ 'af', ],
4495 },
4496 'rotate_1': { # rol and ror with fixed 1 shift count
4497 'asFlTest': [],
4498 'asFlModify': [ 'cf', 'of', ],
4499 'asFlClear': [],
4500 'asFlSet': [],
4501 'asFlUndefined': [],
4502 },
4503 'rotate_count': { # rol and ror w/o fixed 1 shift count
4504 'asFlTest': [],
4505 'asFlModify': [ 'cf', 'of', ],
4506 'asFlClear': [],
4507 'asFlSet': [],
4508 'asFlUndefined': [ 'of', ],
4509 },
4510 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4511 'asFlTest': [ 'cf', ],
4512 'asFlModify': [ 'cf', 'of', ],
4513 'asFlClear': [],
4514 'asFlSet': [],
4515 'asFlUndefined': [],
4516 },
4517 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4518 'asFlTest': [ 'cf', ],
4519 'asFlModify': [ 'cf', 'of', ],
4520 'asFlClear': [],
4521 'asFlSet': [],
4522 'asFlUndefined': [ 'of', ],
4523 },
4524 'shift_1': { # shl, shr or sar with fixed 1 count.
4525 'asFlTest': [],
4526 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4527 'asFlClear': [],
4528 'asFlSet': [],
4529 'asFlUndefined': [ 'af', ],
4530 },
4531 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4532 'asFlTest': [],
4533 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4534 'asFlClear': [],
4535 'asFlSet': [],
4536 'asFlUndefined': [ 'af', 'of', ],
4537 },
4538 'bitmap': { # bt, btc, btr, btc
4539 'asFlTest': [],
4540 'asFlModify': [ 'cf', ],
4541 'asFlClear': [],
4542 'asFlSet': [],
4543 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4544 },
4545 'unchanged': {
4546 'asFlTest': [],
4547 'asFlModify': [],
4548 'asFlClear': [],
4549 'asFlSet': [],
4550 'asFlUndefined': [],
4551 },
4552 };
4553 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4554 """
4555 Tags: @opflclass
4556 Value: arithmetic, logical, ...
4557
4558 """
4559 oInstr = self.ensureInstructionForOpTag(iTagLine);
4560
4561 # Flatten and validate the value.
4562 sClass = self.flattenAllSections(aasSections);
4563 kdAttribs = self.kdEFlagsClasses.get(sClass);
4564 if not kdAttribs:
4565 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4566 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4567
4568 # Set the attributes.
4569 for sAttrib, asFlags in kdAttribs.items():
4570 asOld = getattr(oInstr, sAttrib);
4571 if asOld is not None:
4572 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4573 % (sTag, asOld, asFlags, sAttrib));
4574 setattr(oInstr, sAttrib, asFlags);
4575
4576 _ = iEndLine;
4577 return True;
4578
4579 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4580 """
4581 Tag: @ophints
4582 Value: Comma or space separated list of flags and hints.
4583
4584 This covers the disassembler flags table and more.
4585 """
4586 oInstr = self.ensureInstructionForOpTag(iTagLine);
4587
4588 # Flatten as a space separated list, split it up and validate the values.
4589 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4590 if len(asHints) == 1 and asHints[0].lower() == 'none':
4591 asHints = [];
4592 else:
4593 fRc = True;
4594 for iHint, sHint in enumerate(asHints):
4595 if sHint not in g_kdHints:
4596 if sHint.strip() in g_kdHints:
4597 sHint[iHint] = sHint.strip();
4598 else:
4599 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4600 if not fRc:
4601 return False;
4602
4603 # Append them.
4604 for sHint in asHints:
4605 if sHint not in oInstr.dHints:
4606 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4607 else:
4608 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4609
4610 _ = iEndLine;
4611 return True;
4612
4613 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4614 """
4615 Tag: @opdisenum
4616 Value: OP_XXXX
4617
4618 This is for select a specific (legacy) disassembler enum value for the
4619 instruction.
4620 """
4621 oInstr = self.ensureInstructionForOpTag(iTagLine);
4622
4623 # Flatten and split.
4624 asWords = self.flattenAllSections(aasSections).split();
4625 if len(asWords) != 1:
4626 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4627 if not asWords:
4628 return False;
4629 sDisEnum = asWords[0];
4630 if not self.oReDisEnum.match(sDisEnum):
4631 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4632 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4633
4634 # Set it.
4635 if oInstr.sDisEnum is not None:
4636 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4637 oInstr.sDisEnum = sDisEnum;
4638
4639 _ = iEndLine;
4640 return True;
4641
4642 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4643 """
4644 Tag: @opmincpu
4645 Value: <simple CPU name>
4646
4647 Indicates when this instruction was introduced.
4648 """
4649 oInstr = self.ensureInstructionForOpTag(iTagLine);
4650
4651 # Flatten the value, split into words, make sure there's just one, valid it.
4652 asCpus = self.flattenAllSections(aasSections).split();
4653 if len(asCpus) > 1:
4654 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4655
4656 sMinCpu = asCpus[0];
4657 if sMinCpu in g_kdCpuNames:
4658 oInstr.sMinCpu = sMinCpu;
4659 else:
4660 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4661 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4662
4663 # Set it.
4664 if oInstr.sMinCpu is None:
4665 oInstr.sMinCpu = sMinCpu;
4666 elif oInstr.sMinCpu != sMinCpu:
4667 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4668
4669 _ = iEndLine;
4670 return True;
4671
4672 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4673 """
4674 Tag: @opcpuid
4675 Value: none | <CPUID flag specifier>
4676
4677 CPUID feature bit which is required for the instruction to be present.
4678 """
4679 oInstr = self.ensureInstructionForOpTag(iTagLine);
4680
4681 # Flatten as a space separated list, split it up and validate the values.
4682 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4683 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4684 asCpuIds = [];
4685 else:
4686 fRc = True;
4687 for iCpuId, sCpuId in enumerate(asCpuIds):
4688 if sCpuId not in g_kdCpuIdFlags:
4689 if sCpuId.strip() in g_kdCpuIdFlags:
4690 sCpuId[iCpuId] = sCpuId.strip();
4691 else:
4692 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4693 if not fRc:
4694 return False;
4695
4696 # Append them.
4697 for sCpuId in asCpuIds:
4698 if sCpuId not in oInstr.asCpuIds:
4699 oInstr.asCpuIds.append(sCpuId);
4700 else:
4701 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4702
4703 _ = iEndLine;
4704 return True;
4705
4706 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4707 """
4708 Tag: @opgroup
4709 Value: op_grp1[_subgrp2[_subsubgrp3]]
4710
4711 Instruction grouping.
4712 """
4713 oInstr = self.ensureInstructionForOpTag(iTagLine);
4714
4715 # Flatten as a space separated list, split it up and validate the values.
4716 asGroups = self.flattenAllSections(aasSections).split();
4717 if len(asGroups) != 1:
4718 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4719 sGroup = asGroups[0];
4720 if not self.oReGroupName.match(sGroup):
4721 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4722 % (sTag, sGroup, self.oReGroupName.pattern));
4723
4724 # Set it.
4725 if oInstr.sGroup is not None:
4726 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4727 oInstr.sGroup = sGroup;
4728
4729 _ = iEndLine;
4730 return True;
4731
4732 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4733 """
4734 Tag: @opunused, @opinvalid, @opinvlstyle
4735 Value: <invalid opcode behaviour style>
4736
4737 The @opunused indicates the specification is for a currently unused
4738 instruction encoding.
4739
4740 The @opinvalid indicates the specification is for an invalid currently
4741 instruction encoding (like UD2).
4742
4743 The @opinvlstyle just indicates how CPUs decode the instruction when
4744 not supported (@opcpuid, @opmincpu) or disabled.
4745 """
4746 oInstr = self.ensureInstructionForOpTag(iTagLine);
4747
4748 # Flatten as a space separated list, split it up and validate the values.
4749 asStyles = self.flattenAllSections(aasSections).split();
4750 if len(asStyles) != 1:
4751 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4752 sStyle = asStyles[0];
4753 if sStyle not in g_kdInvalidStyles:
4754 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4755 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4756 # Set it.
4757 if oInstr.sInvalidStyle is not None:
4758 return self.errorComment(iTagLine,
4759 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4760 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4761 oInstr.sInvalidStyle = sStyle;
4762 if sTag == '@opunused':
4763 oInstr.fUnused = True;
4764 elif sTag == '@opinvalid':
4765 oInstr.fInvalid = True;
4766
4767 _ = iEndLine;
4768 return True;
4769
4770 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4771 """
4772 Tag: @optest
4773 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4774 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4775
4776 The main idea here is to generate basic instruction tests.
4777
4778 The probably simplest way of handling the diverse input, would be to use
4779 it to produce size optimized byte code for a simple interpreter that
4780 modifies the register input and output states.
4781
4782 An alternative to the interpreter would be creating multiple tables,
4783 but that becomes rather complicated wrt what goes where and then to use
4784 them in an efficient manner.
4785 """
4786 oInstr = self.ensureInstructionForOpTag(iTagLine);
4787
4788 #
4789 # Do it section by section.
4790 #
4791 for asSectionLines in aasSections:
4792 #
4793 # Sort the input into outputs, inputs and selector conditions.
4794 #
4795 sFlatSection = self.flattenAllSections([asSectionLines,]);
4796 if not sFlatSection:
4797 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4798 continue;
4799 oTest = InstructionTest(oInstr);
4800
4801 asSelectors = [];
4802 asInputs = [];
4803 asOutputs = [];
4804 asCur = asOutputs;
4805 fRc = True;
4806 asWords = sFlatSection.split();
4807 for iWord in range(len(asWords) - 1, -1, -1):
4808 sWord = asWords[iWord];
4809 # Check for array switchers.
4810 if sWord == '->':
4811 if asCur != asOutputs:
4812 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4813 break;
4814 asCur = asInputs;
4815 elif sWord == '/':
4816 if asCur != asInputs:
4817 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4818 break;
4819 asCur = asSelectors;
4820 else:
4821 asCur.insert(0, sWord);
4822
4823 #
4824 # Validate and add selectors.
4825 #
4826 for sCond in asSelectors:
4827 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4828 oSelector = None;
4829 for sOp in TestSelector.kasCompareOps:
4830 off = sCondExp.find(sOp);
4831 if off >= 0:
4832 sVariable = sCondExp[:off];
4833 sValue = sCondExp[off + len(sOp):];
4834 if sVariable in TestSelector.kdVariables:
4835 if sValue in TestSelector.kdVariables[sVariable]:
4836 oSelector = TestSelector(sVariable, sOp, sValue);
4837 else:
4838 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4839 % ( sTag, sValue, sCond,
4840 TestSelector.kdVariables[sVariable].keys(),));
4841 else:
4842 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4843 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4844 break;
4845 if oSelector is not None:
4846 for oExisting in oTest.aoSelectors:
4847 if oExisting.sVariable == oSelector.sVariable:
4848 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4849 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4850 oTest.aoSelectors.append(oSelector);
4851 else:
4852 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4853
4854 #
4855 # Validate outputs and inputs, adding them to the test as we go along.
4856 #
4857 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4858 asValidFieldKinds = [ 'both', sDesc, ];
4859 for sItem in asItems:
4860 oItem = None;
4861 for sOp in TestInOut.kasOperators:
4862 off = sItem.find(sOp);
4863 if off < 0:
4864 continue;
4865 sField = sItem[:off];
4866 sValueType = sItem[off + len(sOp):];
4867 if sField in TestInOut.kdFields \
4868 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4869 asSplit = sValueType.split(':', 1);
4870 sValue = asSplit[0];
4871 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4872 if sType in TestInOut.kdTypes:
4873 oValid = TestInOut.kdTypes[sType].validate(sValue);
4874 if oValid is True:
4875 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4876 oItem = TestInOut(sField, sOp, sValue, sType);
4877 else:
4878 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4879 % ( sTag, sDesc, sItem, ));
4880 else:
4881 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4882 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4883 else:
4884 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4885 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4886 else:
4887 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4888 % ( sTag, sDesc, sField, sItem,
4889 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4890 if asVal[1] in asValidFieldKinds]),));
4891 break;
4892 if oItem is not None:
4893 for oExisting in aoDst:
4894 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4895 self.errorComment(iTagLine,
4896 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4897 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4898 aoDst.append(oItem);
4899 else:
4900 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4901
4902 #
4903 # .
4904 #
4905 if fRc:
4906 oInstr.aoTests.append(oTest);
4907 else:
4908 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4909 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4910 % (sTag, asSelectors, asInputs, asOutputs,));
4911
4912 _ = iEndLine;
4913 return True;
4914
4915 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4916 """
4917 Numbered @optest tag. Either @optest42 or @optest[42].
4918 """
4919 oInstr = self.ensureInstructionForOpTag(iTagLine);
4920
4921 iTest = 0;
4922 if sTag[-1] == ']':
4923 iTest = int(sTag[8:-1]);
4924 else:
4925 iTest = int(sTag[7:]);
4926
4927 if iTest != len(oInstr.aoTests):
4928 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4929 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4930
4931 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4932 """
4933 Tag: @optestign | @optestignore
4934 Value: <value is ignored>
4935
4936 This is a simple trick to ignore a test while debugging another.
4937
4938 See also @oponlytest.
4939 """
4940 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4941 return True;
4942
4943 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4944 """
4945 Tag: @opcopytests
4946 Value: <opstat | function> [..]
4947 Example: @opcopytests add_Eb_Gb
4948
4949 Trick to avoid duplicating tests for different encodings of the same
4950 operation.
4951 """
4952 oInstr = self.ensureInstructionForOpTag(iTagLine);
4953
4954 # Flatten, validate and append the copy job to the instruction. We execute
4955 # them after parsing all the input so we can handle forward references.
4956 asToCopy = self.flattenAllSections(aasSections).split();
4957 if not asToCopy:
4958 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4959 for sToCopy in asToCopy:
4960 if sToCopy not in oInstr.asCopyTests:
4961 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4962 oInstr.asCopyTests.append(sToCopy);
4963 else:
4964 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4965 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4966 else:
4967 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4968
4969 _ = iEndLine;
4970 return True;
4971
4972 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4973 """
4974 Tag: @oponlytest | @oponly
4975 Value: none
4976
4977 Only test instructions with this tag. This is a trick that is handy
4978 for singling out one or two new instructions or tests.
4979
4980 See also @optestignore.
4981 """
4982 oInstr = self.ensureInstructionForOpTag(iTagLine);
4983
4984 # Validate and add instruction to only test dictionary.
4985 sValue = self.flattenAllSections(aasSections).strip();
4986 if sValue:
4987 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4988
4989 if oInstr not in g_aoOnlyTestInstructions:
4990 g_aoOnlyTestInstructions.append(oInstr);
4991
4992 _ = iEndLine;
4993 return True;
4994
4995 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4996 """
4997 Tag: @opxcpttype
4998 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4999
5000 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
5001 """
5002 oInstr = self.ensureInstructionForOpTag(iTagLine);
5003
5004 # Flatten as a space separated list, split it up and validate the values.
5005 asTypes = self.flattenAllSections(aasSections).split();
5006 if len(asTypes) != 1:
5007 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
5008 sType = asTypes[0];
5009 if sType not in g_kdXcptTypes:
5010 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
5011 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
5012 # Set it.
5013 if oInstr.sXcptType is not None:
5014 return self.errorComment(iTagLine,
5015 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
5016 % ( sTag, oInstr.sXcptType, sType,));
5017 oInstr.sXcptType = sType;
5018
5019 _ = iEndLine;
5020 return True;
5021
5022 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5023 """
5024 Tag: @opfunction
5025 Value: <VMM function name>
5026
5027 This is for explicitly setting the IEM function name. Normally we pick
5028 this up from the FNIEMOP_XXX macro invocation after the description, or
5029 generate it from the mnemonic and operands.
5030
5031 It it thought it maybe necessary to set it when specifying instructions
5032 which implementation isn't following immediately or aren't implemented yet.
5033 """
5034 oInstr = self.ensureInstructionForOpTag(iTagLine);
5035
5036 # Flatten and validate the value.
5037 sFunction = self.flattenAllSections(aasSections);
5038 if not self.oReFunctionName.match(sFunction):
5039 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5040 % (sTag, sFunction, self.oReFunctionName.pattern));
5041
5042 if oInstr.sFunction is not None:
5043 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5044 % (sTag, oInstr.sFunction, sFunction,));
5045 oInstr.sFunction = sFunction;
5046
5047 _ = iEndLine;
5048 return True;
5049
5050 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5051 """
5052 Tag: @opstats
5053 Value: <VMM statistics base name>
5054
5055 This is for explicitly setting the statistics name. Normally we pick
5056 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5057 the mnemonic and operands.
5058
5059 It it thought it maybe necessary to set it when specifying instructions
5060 which implementation isn't following immediately or aren't implemented yet.
5061 """
5062 oInstr = self.ensureInstructionForOpTag(iTagLine);
5063
5064 # Flatten and validate the value.
5065 sStats = self.flattenAllSections(aasSections);
5066 if not self.oReStatsName.match(sStats):
5067 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5068 % (sTag, sStats, self.oReStatsName.pattern));
5069
5070 if oInstr.sStats is not None:
5071 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5072 % (sTag, oInstr.sStats, sStats,));
5073 oInstr.sStats = sStats;
5074
5075 _ = iEndLine;
5076 return True;
5077
5078 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5079 """
5080 Tag: @opdone
5081 Value: none
5082
5083 Used to explictily flush the instructions that have been specified.
5084 """
5085 sFlattened = self.flattenAllSections(aasSections);
5086 if sFlattened != '':
5087 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5088 _ = sTag; _ = iEndLine;
5089 return self.doneInstructions();
5090
5091 ## @}
5092
5093
5094 def parseComment(self):
5095 """
5096 Parse the current comment (self.sComment).
5097
5098 If it's a opcode specifiying comment, we reset the macro stuff.
5099 """
5100 #
5101 # Reject if comment doesn't seem to contain anything interesting.
5102 #
5103 if self.sComment.find('Opcode') < 0 \
5104 and self.sComment.find('@') < 0:
5105 return False;
5106
5107 #
5108 # Split the comment into lines, removing leading asterisks and spaces.
5109 # Also remove leading and trailing empty lines.
5110 #
5111 asLines = self.sComment.split('\n');
5112 for iLine, sLine in enumerate(asLines):
5113 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5114
5115 while asLines and not asLines[0]:
5116 self.iCommentLine += 1;
5117 asLines.pop(0);
5118
5119 while asLines and not asLines[-1]:
5120 asLines.pop(len(asLines) - 1);
5121
5122 #
5123 # Check for old style: Opcode 0x0f 0x12
5124 #
5125 if asLines[0].startswith('Opcode '):
5126 self.parseCommentOldOpcode(asLines);
5127
5128 #
5129 # Look for @op* tagged data.
5130 #
5131 cOpTags = 0;
5132 sFlatDefault = None;
5133 sCurTag = '@default';
5134 iCurTagLine = 0;
5135 asCurSection = [];
5136 aasSections = [ asCurSection, ];
5137 for iLine, sLine in enumerate(asLines):
5138 if not sLine.startswith('@'):
5139 if sLine:
5140 asCurSection.append(sLine);
5141 elif asCurSection:
5142 asCurSection = [];
5143 aasSections.append(asCurSection);
5144 else:
5145 #
5146 # Process the previous tag.
5147 #
5148 if not asCurSection and len(aasSections) > 1:
5149 aasSections.pop(-1);
5150 if sCurTag in self.dTagHandlers:
5151 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5152 cOpTags += 1;
5153 elif sCurTag.startswith('@op'):
5154 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5155 elif sCurTag == '@default':
5156 sFlatDefault = self.flattenAllSections(aasSections);
5157 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5158 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5159 elif sCurTag in ['@encoding', '@opencoding']:
5160 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5161
5162 #
5163 # New tag.
5164 #
5165 asSplit = sLine.split(None, 1);
5166 sCurTag = asSplit[0].lower();
5167 if len(asSplit) > 1:
5168 asCurSection = [asSplit[1],];
5169 else:
5170 asCurSection = [];
5171 aasSections = [asCurSection, ];
5172 iCurTagLine = iLine;
5173
5174 #
5175 # Process the final tag.
5176 #
5177 if not asCurSection and len(aasSections) > 1:
5178 aasSections.pop(-1);
5179 if sCurTag in self.dTagHandlers:
5180 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5181 cOpTags += 1;
5182 elif sCurTag.startswith('@op'):
5183 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5184 elif sCurTag == '@default':
5185 sFlatDefault = self.flattenAllSections(aasSections);
5186
5187 #
5188 # Don't allow default text in blocks containing @op*.
5189 #
5190 if cOpTags > 0 and sFlatDefault:
5191 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5192
5193 return True;
5194
5195 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5196 """
5197 Parses a macro invocation.
5198
5199 Returns three values:
5200 1. A list of macro arguments, where the zero'th is the macro name.
5201 2. The offset following the macro invocation, into sInvocation of
5202 this is on the same line or into the last line if it is on a
5203 different line.
5204 3. Number of additional lines the invocation spans (i.e. zero if
5205 it is all contained within sInvocation).
5206 """
5207 # First the name.
5208 offOpen = sInvocation.find('(', offStartInvocation);
5209 if offOpen <= offStartInvocation:
5210 self.raiseError("macro invocation open parenthesis not found");
5211 sName = sInvocation[offStartInvocation:offOpen].strip();
5212 if not self.oReMacroName.match(sName):
5213 self.raiseError("invalid macro name '%s'" % (sName,));
5214 asRet = [sName, ];
5215
5216 # Arguments.
5217 iLine = self.iLine;
5218 cDepth = 1;
5219 off = offOpen + 1;
5220 offStart = off;
5221 offCurLn = 0;
5222 chQuote = None;
5223 while cDepth > 0:
5224 if off >= len(sInvocation):
5225 if iLine >= len(self.asLines):
5226 self.error('macro invocation beyond end of file');
5227 return (asRet, off - offCurLn, iLine - self.iLine);
5228 offCurLn = off;
5229 sInvocation += self.asLines[iLine];
5230 iLine += 1;
5231 ch = sInvocation[off];
5232
5233 if chQuote:
5234 if ch == '\\' and off + 1 < len(sInvocation):
5235 off += 1;
5236 elif ch == chQuote:
5237 chQuote = None;
5238 elif ch in ('"', '\'',):
5239 chQuote = ch;
5240 elif ch in (',', ')',):
5241 if cDepth == 1:
5242 asRet.append(sInvocation[offStart:off].strip());
5243 offStart = off + 1;
5244 if ch == ')':
5245 cDepth -= 1;
5246 elif ch == '(':
5247 cDepth += 1;
5248 off += 1;
5249
5250 return (asRet, off - offCurLn, iLine - self.iLine);
5251
5252 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5253 """
5254 Returns (None, len(sCode), 0) if not found, otherwise the
5255 parseMacroInvocation() return value.
5256 """
5257 offHit = sCode.find(sMacro, offStart);
5258 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5259 return self.parseMacroInvocation(sCode, offHit);
5260 return (None, len(sCode), 0);
5261
5262 def findAndParseMacroInvocation(self, sCode, sMacro):
5263 """
5264 Returns None if not found, arguments as per parseMacroInvocation if found.
5265 """
5266 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5267
5268 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5269 """
5270 Returns same as findAndParseMacroInvocation.
5271 """
5272 for sMacro in asMacro:
5273 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5274 if asRet is not None:
5275 return asRet;
5276 return None;
5277
5278 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5279 sDisHints, sIemHints, asOperands):
5280 """
5281 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5282 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5283 """
5284 #
5285 # Some invocation checks.
5286 #
5287 if sUpper != sUpper.upper():
5288 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5289 if sLower != sLower.lower():
5290 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5291 if sUpper.lower() != sLower:
5292 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5293 if not self.oReMnemonic.match(sLower):
5294 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5295
5296 #
5297 # Check if sIemHints tells us to not consider this macro invocation.
5298 #
5299 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5300 return True;
5301
5302 # Apply to the last instruction only for now.
5303 if not self.aoCurInstrs:
5304 self.addInstruction();
5305 oInstr = self.aoCurInstrs[-1];
5306 if oInstr.iLineMnemonicMacro == -1:
5307 oInstr.iLineMnemonicMacro = self.iLine;
5308 else:
5309 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5310 % (sMacro, oInstr.iLineMnemonicMacro,));
5311
5312 # Mnemonic
5313 if oInstr.sMnemonic is None:
5314 oInstr.sMnemonic = sLower;
5315 elif oInstr.sMnemonic != sLower:
5316 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5317
5318 # Process operands.
5319 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5320 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5321 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5322 for iOperand, sType in enumerate(asOperands):
5323 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5324 if sWhere is None:
5325 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5326 if iOperand < len(oInstr.aoOperands): # error recovery.
5327 sWhere = oInstr.aoOperands[iOperand].sWhere;
5328 sType = oInstr.aoOperands[iOperand].sType;
5329 else:
5330 sWhere = 'reg';
5331 sType = 'Gb';
5332 if iOperand == len(oInstr.aoOperands):
5333 oInstr.aoOperands.append(Operand(sWhere, sType))
5334 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5335 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5336 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5337 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5338
5339 # Encoding.
5340 if sForm not in g_kdIemForms:
5341 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5342 else:
5343 if oInstr.sEncoding is None:
5344 oInstr.sEncoding = g_kdIemForms[sForm][0];
5345 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5346 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5347 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5348
5349 # Check the parameter locations for the encoding.
5350 if g_kdIemForms[sForm][1] is not None:
5351 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5352 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5353 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5354 else:
5355 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5356 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5357 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5358 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5359 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5360 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5361 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5362 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5363 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5364 or sForm.replace('VEX','').find('V') < 0) ):
5365 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5366 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5367 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5368 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5369 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5370 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5371 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5372 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5373 oInstr.aoOperands[iOperand].sWhere));
5374
5375
5376 # Check @opcodesub
5377 if oInstr.sSubOpcode \
5378 and g_kdIemForms[sForm][2] \
5379 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5380 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5381 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5382
5383 # Stats.
5384 if not self.oReStatsName.match(sStats):
5385 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5386 elif oInstr.sStats is None:
5387 oInstr.sStats = sStats;
5388 elif oInstr.sStats != sStats:
5389 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5390 % (sMacro, oInstr.sStats, sStats,));
5391
5392 # Process the hints (simply merge with @ophints w/o checking anything).
5393 for sHint in sDisHints.split('|'):
5394 sHint = sHint.strip();
5395 if sHint.startswith('DISOPTYPE_'):
5396 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5397 if sShortHint in g_kdHints:
5398 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5399 else:
5400 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5401 elif sHint != '0':
5402 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5403
5404 for sHint in sIemHints.split('|'):
5405 sHint = sHint.strip();
5406 if sHint.startswith('IEMOPHINT_'):
5407 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5408 if sShortHint in g_kdHints:
5409 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5410 else:
5411 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5412 elif sHint != '0':
5413 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5414
5415 _ = sAsm;
5416 return True;
5417
5418 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5419 """
5420 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5421 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5422 """
5423 if not asOperands:
5424 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5425 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5426 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5427
5428 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5429 """
5430 Process a IEM_MC_BEGIN macro invocation.
5431 """
5432 if self.fDebugMc:
5433 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5434 #self.debug('%s<eos>' % (sCode,));
5435
5436 # Check preconditions.
5437 if not self.oCurFunction:
5438 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5439 if self.oCurMcBlock:
5440 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5441
5442 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5443 cchIndent = offBeginStatementInCodeStr;
5444 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5445 if offPrevNewline >= 0:
5446 cchIndent -= offPrevNewline + 1;
5447 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5448
5449 # Start a new block.
5450 # But don't add it to the list unless the context matches the host architecture.
5451 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5452 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5453 cchIndent = cchIndent);
5454 try:
5455 if ( not self.aoCppCondStack
5456 or not self.sHostArch
5457 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5458 g_aoMcBlocks.append(self.oCurMcBlock);
5459 self.cTotalMcBlocks += 1;
5460 except Exception as oXcpt:
5461 self.raiseError(oXcpt.args[0]);
5462
5463 if self.oCurMcBlock.oInstruction:
5464 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5465 self.iMcBlockInFunc += 1;
5466 return True;
5467
5468 @staticmethod
5469 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5470 """
5471 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5472 extracting a statement block from a string that's the result of macro
5473 expansion and therefore contains multiple "sub-lines" as it were.
5474
5475 Returns list of lines covering offBegin thru offEnd in sRawLine.
5476 """
5477
5478 off = sRawLine.find('\n', offEnd);
5479 if off > 0:
5480 sRawLine = sRawLine[:off + 1];
5481
5482 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5483 sRawLine = sRawLine[off:];
5484 if not sRawLine.strip().startswith(sBeginStmt):
5485 sRawLine = sRawLine[offBegin - off:]
5486
5487 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5488
5489 def workerIemMcEnd(self, offEndStatementInLine):
5490 """
5491 Process a IEM_MC_END macro invocation.
5492 """
5493 if self.fDebugMc:
5494 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5495
5496 # Check preconditions.
5497 if not self.oCurMcBlock:
5498 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5499
5500 #
5501 # HACK ALERT! For blocks originating from macro expansion the start and
5502 # end line will be the same, but the line has multiple
5503 # newlines inside it. So, we have to do some extra tricks
5504 # to get the lines out of there. We ASSUME macros aren't
5505 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5506 #
5507 if self.iLine > self.oCurMcBlock.iBeginLine:
5508 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5509 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5510 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5511
5512 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5513 # so we can deal correctly with IEM_MC_END below and everything else.
5514 for sLine in asLines:
5515 cNewLines = sLine.count('\n');
5516 assert cNewLines > 0;
5517 if cNewLines > 1:
5518 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5519 self.oCurMcBlock.offBeginLine,
5520 offEndStatementInLine
5521 + sum(len(s) for s in asLines)
5522 - len(asLines[-1]));
5523 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5524 break;
5525 else:
5526 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5527 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5528 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5529
5530 #
5531 # Strip anything following the IEM_MC_END(); statement in the final line,
5532 # so that we don't carry on any trailing 'break' after macro expansions
5533 # like for iemOp_movsb_Xb_Yb.
5534 #
5535 while asLines[-1].strip() == '':
5536 asLines.pop();
5537 sFinal = asLines[-1];
5538 offFinalEnd = sFinal.find('IEM_MC_END');
5539 offEndInFinal = offFinalEnd;
5540 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5541 offFinalEnd += len('IEM_MC_END');
5542
5543 while sFinal[offFinalEnd].isspace():
5544 offFinalEnd += 1;
5545 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5546 offFinalEnd += 1;
5547
5548 while sFinal[offFinalEnd].isspace():
5549 offFinalEnd += 1;
5550 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5551 offFinalEnd += 1;
5552
5553 while sFinal[offFinalEnd].isspace():
5554 offFinalEnd += 1;
5555 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5556 offFinalEnd += 1;
5557
5558 asLines[-1] = sFinal[: offFinalEnd];
5559
5560 #
5561 # Complete and discard the current block.
5562 #
5563 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5564 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5565 self.oCurMcBlock = None;
5566 return True;
5567
5568 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5569 """
5570 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5571 """
5572 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5573 if self.fDebugMc:
5574 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5575 #self.debug('%s<eos>' % (sCode,));
5576
5577 # Check preconditions.
5578 if not self.oCurFunction:
5579 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5580 if self.oCurMcBlock:
5581 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5582
5583 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5584 cchIndent = offBeginStatementInCodeStr;
5585 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5586 if offPrevNewline >= 0:
5587 cchIndent -= offPrevNewline + 1;
5588 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5589
5590 # Start a new block.
5591 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5592 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5593 cchIndent = cchIndent, fDeferToCImpl = True);
5594
5595 # Parse the statment.
5596 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5597 if asArgs is None:
5598 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5599 if len(asArgs) != cParams + 4:
5600 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5601 % (sStmt, len(asArgs), cParams + 4, asArgs));
5602
5603 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5604
5605 # These MCs are not typically part of macro expansions, but let's get
5606 # it out of the way immediately if it's the case.
5607 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5608 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5609 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5610 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5611 asLines[-1] = asLines[-1][:offAfter + 1];
5612 else:
5613 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5614 offAfter, sStmt);
5615 assert asLines[-1].find(';') >= 0;
5616 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5617
5618 assert asLines[0].find(sStmt) >= 0;
5619 #if not asLines[0].strip().startswith(sStmt):
5620 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5621
5622 # Advance to the line with the closing ')'.
5623 self.iLine += cLines;
5624
5625 # Complete the block.
5626 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5627
5628 g_aoMcBlocks.append(oMcBlock);
5629 if oMcBlock.oInstruction:
5630 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5631 self.cTotalMcBlocks += 1;
5632 self.iMcBlockInFunc += 1;
5633
5634 return True;
5635
5636 def workerStartFunction(self, asArgs):
5637 """
5638 Deals with the start of a decoder function.
5639
5640 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5641 macros, so we get a argument list for these where the 0th argument is the
5642 macro name.
5643 """
5644 # Complete any existing function.
5645 if self.oCurFunction:
5646 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5647
5648 # Create the new function.
5649 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5650 return True;
5651
5652 def checkCodeForMacro(self, sCode, offLine):
5653 """
5654 Checks code for relevant macro invocation.
5655 """
5656
5657 #
5658 # Scan macro invocations.
5659 #
5660 if sCode.find('(') > 0:
5661 # Look for instruction decoder function definitions. ASSUME single line.
5662 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5663 [ 'FNIEMOP_DEF',
5664 'FNIEMOPRM_DEF',
5665 'FNIEMOP_STUB',
5666 'FNIEMOP_STUB_1',
5667 'FNIEMOP_UD_STUB',
5668 'FNIEMOP_UD_STUB_1' ]);
5669 if asArgs is not None:
5670 self.workerStartFunction(asArgs);
5671 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5672
5673 if not self.aoCurInstrs:
5674 self.addInstruction();
5675 for oInstr in self.aoCurInstrs:
5676 if oInstr.iLineFnIemOpMacro == -1:
5677 oInstr.iLineFnIemOpMacro = self.iLine;
5678 else:
5679 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5680 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5681 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5682 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5683 if asArgs[0].find('STUB') > 0:
5684 self.doneInstructions(fEndOfFunction = True);
5685 return True;
5686
5687 # Check for worker function definitions, so we can get a context for MC blocks.
5688 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5689 [ 'FNIEMOP_DEF_1',
5690 'FNIEMOP_DEF_2', ]);
5691 if asArgs is not None:
5692 self.workerStartFunction(asArgs);
5693 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5694 return True;
5695
5696 # IEMOP_HLP_DONE_VEX_DECODING_*
5697 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5698 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5699 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5700 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5701 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5702 ]);
5703 if asArgs is not None:
5704 sMacro = asArgs[0];
5705 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5706 for oInstr in self.aoCurInstrs:
5707 if 'vex_l_zero' not in oInstr.dHints:
5708 if oInstr.iLineMnemonicMacro >= 0:
5709 self.errorOnLine(oInstr.iLineMnemonicMacro,
5710 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5711 oInstr.dHints['vex_l_zero'] = True;
5712
5713 #
5714 # IEMOP_MNEMONIC*
5715 #
5716 if sCode.find('IEMOP_MNEMONIC') >= 0:
5717 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5718 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5719 if asArgs is not None:
5720 if len(self.aoCurInstrs) == 1:
5721 oInstr = self.aoCurInstrs[0];
5722 if oInstr.sStats is None:
5723 oInstr.sStats = asArgs[1];
5724 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5725
5726 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5727 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5728 if asArgs is not None:
5729 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5730 asArgs[7], []);
5731 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5732 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5733 if asArgs is not None:
5734 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5735 asArgs[8], [asArgs[6],]);
5736 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5737 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5738 if asArgs is not None:
5739 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5740 asArgs[9], [asArgs[6], asArgs[7]]);
5741 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5742 # a_fIemHints)
5743 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5744 if asArgs is not None:
5745 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5746 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5747 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5748 # a_fIemHints)
5749 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5750 if asArgs is not None:
5751 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5752 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5753
5754 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5755 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5756 if asArgs is not None:
5757 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5758 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5759 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5760 if asArgs is not None:
5761 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5762 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5763 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5764 if asArgs is not None:
5765 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5766 [asArgs[4], asArgs[5],]);
5767 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5768 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5769 if asArgs is not None:
5770 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5771 [asArgs[4], asArgs[5], asArgs[6],]);
5772 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5773 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5774 if asArgs is not None:
5775 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5776 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5777
5778 #
5779 # IEM_MC_BEGIN + IEM_MC_END.
5780 # We must support multiple instances per code snippet.
5781 #
5782 offCode = sCode.find('IEM_MC_');
5783 if offCode >= 0:
5784 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5785 if oMatch.group(1) == 'END':
5786 self.workerIemMcEnd(offLine + oMatch.start());
5787 elif oMatch.group(1) == 'BEGIN':
5788 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5789 else:
5790 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5791 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5792 return True;
5793
5794 return False;
5795
5796 def workerPreprocessorRecreateMacroRegex(self):
5797 """
5798 Recreates self.oReMacros when self.dMacros changes.
5799 """
5800 if self.dMacros:
5801 sRegex = '';
5802 for sName, oMacro in self.dMacros.items():
5803 if sRegex:
5804 sRegex += r'|' + sName;
5805 else:
5806 sRegex = r'\b(' + sName;
5807 if oMacro.asArgs is not None:
5808 sRegex += r'\s*\(';
5809 else:
5810 sRegex += r'\b';
5811 sRegex += ')';
5812 self.oReMacros = re.compile(sRegex);
5813 else:
5814 self.oReMacros = None;
5815 return True;
5816
5817 def workerPreprocessorDefine(self, sRest):
5818 """
5819 Handles a macro #define, the sRest is what follows after the directive word.
5820 """
5821 assert sRest[-1] == '\n';
5822
5823 #
5824 # If using line continutation, just concat all the lines together,
5825 # preserving the newline character but not the escaping.
5826 #
5827 iLineStart = self.iLine;
5828 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5829 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5830 self.iLine += 1;
5831 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5832
5833 #
5834 # Use regex to split out the name, argument list and body.
5835 # If this fails, we assume it's a simple macro.
5836 #
5837 oMatch = self.oReHashDefine2.match(sRest);
5838 if oMatch:
5839 sAllArgs = oMatch.group(2).strip();
5840 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5841 sBody = oMatch.group(3);
5842 else:
5843 oMatch = self.oReHashDefine3.match(sRest);
5844 if not oMatch:
5845 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5846 return self.error('bogus macro definition: %s' % (sRest,));
5847 asArgs = None;
5848 sBody = oMatch.group(2);
5849 sName = oMatch.group(1);
5850 assert sName == sName.strip();
5851 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5852
5853 #
5854 # Is this of any interest to us? We do NOT support MC blocks wihtin
5855 # nested macro expansion, just to avoid lots of extra work.
5856 #
5857 # There is only limited support for macros expanding to partial MC blocks.
5858 #
5859 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5860 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5861 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5862 # siblings in the recompiler. This is a lot simpler than nested macro
5863 # expansion and lots of heuristics for locating all the relevant macros.
5864 # Also, this way we don't produce lots of unnecessary threaded functions.
5865 #
5866 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5867 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5868 return True;
5869
5870 #
5871 # Add the macro.
5872 #
5873 if self.fDebugPreproc:
5874 self.debug('#define %s on line %u' % (sName, self.iLine,));
5875 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5876 return self.workerPreprocessorRecreateMacroRegex();
5877
5878 def workerPreprocessorUndef(self, sRest):
5879 """
5880 Handles a macro #undef, the sRest is what follows after the directive word.
5881 """
5882 # Quick comment strip and isolate the name.
5883 offSlash = sRest.find('/');
5884 if offSlash > 0:
5885 sRest = sRest[:offSlash];
5886 sName = sRest.strip();
5887
5888 # Remove the macro if we're clocking it.
5889 if sName in self.dMacros:
5890 if self.fDebugPreproc:
5891 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5892 del self.dMacros[sName];
5893 return self.workerPreprocessorRecreateMacroRegex();
5894
5895 return True;
5896
5897 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5898 """
5899 Handles an #if, #ifdef, #ifndef or #elif directive.
5900 """
5901 #
5902 # Sanity check #elif.
5903 #
5904 if sDirective == 'elif':
5905 if len(self.aoCppCondStack) == 0:
5906 self.raiseError('#elif without #if');
5907 if self.aoCppCondStack[-1].fInElse:
5908 self.raiseError('#elif after #else');
5909
5910 #
5911 # If using line continutation, just concat all the lines together,
5912 # stripping both the newline and escape characters.
5913 #
5914 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5915 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5916 self.iLine += 1;
5917
5918 # Strip it of all comments and leading and trailing blanks.
5919 sRest = self.stripComments(sRest).strip();
5920
5921 #
5922 # Stash it.
5923 #
5924 try:
5925 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5926 except Exception as oXcpt:
5927 self.raiseError(oXcpt.args[0]);
5928
5929 if sDirective == 'elif':
5930 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5931 else:
5932 self.aoCppCondStack.append(oPreprocCond);
5933
5934 return True;
5935
5936 def workerPreprocessorElse(self):
5937 """
5938 Handles an #else directive.
5939 """
5940 if len(self.aoCppCondStack) == 0:
5941 self.raiseError('#else without #if');
5942 if self.aoCppCondStack[-1].fInElse:
5943 self.raiseError('Another #else after #else');
5944
5945 self.aoCppCondStack[-1].fInElse = True;
5946 return True;
5947
5948 def workerPreprocessorEndif(self):
5949 """
5950 Handles an #endif directive.
5951 """
5952 if len(self.aoCppCondStack) == 0:
5953 self.raiseError('#endif without #if');
5954
5955 self.aoCppCondStack.pop();
5956 return True;
5957
5958 def checkPreprocessorDirective(self, sLine):
5959 """
5960 Handles a preprocessor directive.
5961 """
5962 # Skip past the preprocessor hash.
5963 off = sLine.find('#');
5964 assert off >= 0;
5965 off += 1;
5966 while off < len(sLine) and sLine[off].isspace():
5967 off += 1;
5968
5969 # Extract the directive.
5970 offDirective = off;
5971 while off < len(sLine) and not sLine[off].isspace():
5972 off += 1;
5973 sDirective = sLine[offDirective:off];
5974 if self.fDebugPreproc:
5975 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5976
5977 # Skip spaces following it to where the arguments/whatever starts.
5978 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5979 off += 1;
5980 sTail = sLine[off:];
5981
5982 # Handle the directive.
5983 if sDirective == 'define':
5984 return self.workerPreprocessorDefine(sTail);
5985 if sDirective == 'undef':
5986 return self.workerPreprocessorUndef(sTail);
5987 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5988 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5989 if sDirective == 'else':
5990 return self.workerPreprocessorElse();
5991 if sDirective == 'endif':
5992 return self.workerPreprocessorEndif();
5993
5994 if self.fDebugPreproc:
5995 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5996 return False;
5997
5998 def expandMacros(self, sLine, oMatch):
5999 """
6000 Expands macros we know about in the given line.
6001 Currently we ASSUME there is only one and that is what oMatch matched.
6002 """
6003 #
6004 # Get our bearings.
6005 #
6006 offMatch = oMatch.start();
6007 sName = oMatch.group(1);
6008 assert sName == sLine[oMatch.start() : oMatch.end()];
6009 fWithArgs = sName.endswith('(');
6010 if fWithArgs:
6011 sName = sName[:-1].strip();
6012 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
6013
6014 #
6015 # Deal with simple macro invocations w/o parameters.
6016 #
6017 if not fWithArgs:
6018 if self.fDebugPreproc:
6019 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6020 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6021
6022 #
6023 # Complicated macro with parameters.
6024 # Start by extracting the parameters. ASSUMES they are all on the same line!
6025 #
6026 cLevel = 1;
6027 offCur = oMatch.end();
6028 offCurArg = offCur;
6029 asArgs = [];
6030 while True:
6031 if offCur >= len(sLine):
6032 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6033 ch = sLine[offCur];
6034 if ch == '(':
6035 cLevel += 1;
6036 elif ch == ')':
6037 cLevel -= 1;
6038 if cLevel == 0:
6039 asArgs.append(sLine[offCurArg:offCur].strip());
6040 break;
6041 elif ch == ',' and cLevel == 1:
6042 asArgs.append(sLine[offCurArg:offCur].strip());
6043 offCurArg = offCur + 1;
6044 offCur += 1;
6045 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6046 asArgs = [];
6047 if len(oMacro.asArgs) != len(asArgs):
6048 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6049
6050 #
6051 # Do the expanding.
6052 #
6053 if self.fDebugPreproc:
6054 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6055 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6056
6057 def parse(self):
6058 """
6059 Parses the given file.
6060
6061 Returns number or errors.
6062 Raises exception on fatal trouble.
6063 """
6064 #self.debug('Parsing %s' % (self.sSrcFile,));
6065
6066 #
6067 # Loop thru the lines.
6068 #
6069 # Please mind that self.iLine may be updated by checkCodeForMacro and
6070 # other worker methods.
6071 #
6072 while self.iLine < len(self.asLines):
6073 sLine = self.asLines[self.iLine];
6074 self.iLine += 1;
6075 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6076
6077 # Expand macros we know about if we're currently in code.
6078 if self.iState == self.kiCode and self.oReMacros:
6079 oMatch = self.oReMacros.search(sLine);
6080 if oMatch:
6081 sLine = self.expandMacros(sLine, oMatch);
6082 if self.fDebugPreproc:
6083 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6084 self.asLines[self.iLine - 1] = sLine;
6085
6086 # Check for preprocessor directives before comments and other stuff.
6087 # ASSUMES preprocessor directives doesn't end with multiline comments.
6088 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6089 if self.fDebugPreproc:
6090 self.debug('line %d: preproc' % (self.iLine,));
6091 self.checkPreprocessorDirective(sLine);
6092 else:
6093 # Look for comments.
6094 offSlash = sLine.find('/');
6095 if offSlash >= 0:
6096 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6097 offLine = 0;
6098 while offLine < len(sLine):
6099 if self.iState == self.kiCode:
6100 # Look for substantial multiline comment so we pass the following MC as a whole line:
6101 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6102 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6103 offHit = sLine.find('/*', offLine);
6104 while offHit >= 0:
6105 offEnd = sLine.find('*/', offHit + 2);
6106 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6107 break;
6108 offHit = sLine.find('/*', offEnd);
6109
6110 if offHit >= 0:
6111 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6112 self.sComment = '';
6113 self.iCommentLine = self.iLine;
6114 self.iState = self.kiCommentMulti;
6115 offLine = offHit + 2;
6116 else:
6117 self.checkCodeForMacro(sLine[offLine:], offLine);
6118 offLine = len(sLine);
6119
6120 elif self.iState == self.kiCommentMulti:
6121 offHit = sLine.find('*/', offLine);
6122 if offHit >= 0:
6123 self.sComment += sLine[offLine:offHit];
6124 self.iState = self.kiCode;
6125 offLine = offHit + 2;
6126 self.parseComment();
6127 else:
6128 self.sComment += sLine[offLine:];
6129 offLine = len(sLine);
6130 else:
6131 assert False;
6132 # C++ line comment.
6133 elif offSlash > 0:
6134 self.checkCodeForMacro(sLine[:offSlash], 0);
6135
6136 # No slash, but append the line if in multi-line comment.
6137 elif self.iState == self.kiCommentMulti:
6138 #self.debug('line %d: multi' % (self.iLine,));
6139 self.sComment += sLine;
6140
6141 # No slash, but check code line for relevant macro.
6142 elif ( self.iState == self.kiCode
6143 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6144 #self.debug('line %d: macro' % (self.iLine,));
6145 self.checkCodeForMacro(sLine, 0);
6146
6147 # If the line is a '}' in the first position, complete the instructions.
6148 elif self.iState == self.kiCode and sLine[0] == '}':
6149 #self.debug('line %d: }' % (self.iLine,));
6150 self.doneInstructions(fEndOfFunction = True);
6151
6152 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6153 # so we can check/add @oppfx info from it.
6154 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6155 self.parseFunctionTable(sLine);
6156
6157 self.doneInstructions(fEndOfFunction = True);
6158 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6159 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6160 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6161 return self.printErrors();
6162
6163# Some sanity checking.
6164def __sanityCheckEFlagsClasses():
6165 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6166 for sAttrib, asFlags in dLists.items():
6167 for sFlag in asFlags:
6168 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6169__sanityCheckEFlagsClasses();
6170
6171## The parsed content of IEMAllInstCommonBodyMacros.h.
6172g_oParsedCommonBodyMacros = None # type: SimpleParser
6173
6174def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6175 """
6176 Parses one source file for instruction specfications.
6177 """
6178 #
6179 # Read sSrcFile into a line array.
6180 #
6181 try:
6182 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6183 except Exception as oXcpt:
6184 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6185 try:
6186 asLines = oFile.readlines();
6187 except Exception as oXcpt:
6188 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6189 finally:
6190 oFile.close();
6191
6192 #
6193 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6194 # can use the macros from it when processing the other files.
6195 #
6196 global g_oParsedCommonBodyMacros;
6197 if g_oParsedCommonBodyMacros is None:
6198 # Locate the file.
6199 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6200 if not os.path.isfile(sCommonBodyMacros):
6201 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6202
6203 # Read it.
6204 try:
6205 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6206 asIncFiles = oIncFile.readlines();
6207 except Exception as oXcpt:
6208 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6209
6210 # Parse it.
6211 try:
6212 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6213 if oParser.parse() != 0:
6214 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6215 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6216 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6217 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6218 oParser.cTotalMcBlocks,
6219 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6220 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6221 except ParserException as oXcpt:
6222 print(str(oXcpt), file = sys.stderr);
6223 raise;
6224 g_oParsedCommonBodyMacros = oParser;
6225
6226 #
6227 # Do the parsing.
6228 #
6229 try:
6230 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6231 return (oParser.parse(), oParser) ;
6232 except ParserException as oXcpt:
6233 print(str(oXcpt), file = sys.stderr);
6234 raise;
6235
6236
6237def __doTestCopying():
6238 """
6239 Executes the asCopyTests instructions.
6240 """
6241 asErrors = [];
6242 for oDstInstr in g_aoAllInstructions:
6243 if oDstInstr.asCopyTests:
6244 for sSrcInstr in oDstInstr.asCopyTests:
6245 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6246 if oSrcInstr:
6247 aoSrcInstrs = [oSrcInstr,];
6248 else:
6249 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6250 if aoSrcInstrs:
6251 for oSrcInstr in aoSrcInstrs:
6252 if oSrcInstr != oDstInstr:
6253 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6254 else:
6255 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6256 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6257 else:
6258 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6259 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6260
6261 if asErrors:
6262 sys.stderr.write(u''.join(asErrors));
6263 return len(asErrors);
6264
6265
6266def __applyOnlyTest():
6267 """
6268 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6269 all other instructions so that only these get tested.
6270 """
6271 if g_aoOnlyTestInstructions:
6272 for oInstr in g_aoAllInstructions:
6273 if oInstr.aoTests:
6274 if oInstr not in g_aoOnlyTestInstructions:
6275 oInstr.aoTests = [];
6276 return 0;
6277
6278## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6279g_aaoAllInstrFilesAndDefaultMapAndSet = (
6280 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6281 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6282 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6283 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6284 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6285 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6286 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6287 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6288 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6289);
6290
6291def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6292 """
6293 Parses all the IEMAllInstruction*.cpp.h files.
6294
6295 Returns a list of the parsers on success.
6296 Raises exception on failure.
6297 """
6298 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6299 cErrors = 0;
6300 aoParsers = [];
6301 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6302 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6303 sFilename = os.path.join(sSrcDir, sFilename);
6304 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6305 cErrors += cThisErrors;
6306 aoParsers.append(oParser);
6307 cErrors += __doTestCopying();
6308 cErrors += __applyOnlyTest();
6309
6310 # Total stub stats:
6311 cTotalStubs = 0;
6312 for oInstr in g_aoAllInstructions:
6313 cTotalStubs += oInstr.fStub;
6314 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6315 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6316 file = sys.stderr);
6317
6318 if cErrors != 0:
6319 raise Exception('%d parse errors' % (cErrors,));
6320 return aoParsers;
6321
6322
6323def parseFiles(asFiles, sHostArch = None):
6324 """
6325 Parses a selection of IEMAllInstruction*.cpp.h files.
6326
6327 Returns a list of the parsers on success.
6328 Raises exception on failure.
6329 """
6330 # Look up default maps for the files and call __parseFilesWorker to do the job.
6331 asFilesAndDefaultMap = [];
6332 for sFilename in asFiles:
6333 sName = os.path.split(sFilename)[1].lower();
6334 sMap = None;
6335 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6336 if aoInfo[0].lower() == sName:
6337 sMap = aoInfo[1];
6338 break;
6339 if not sMap:
6340 raise Exception('Unable to classify file: %s' % (sFilename,));
6341 asFilesAndDefaultMap.append((sFilename, sMap));
6342
6343 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6344
6345
6346def parseAll(sHostArch = None):
6347 """
6348 Parses all the IEMAllInstruction*.cpp.h files.
6349
6350 Returns a list of the parsers on success.
6351 Raises exception on failure.
6352 """
6353 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6354
6355
6356#
6357# Generators (may perhaps move later).
6358#
6359def __formatDisassemblerTableEntry(oInstr):
6360 """
6361 """
6362 sMacro = 'OP';
6363 cMaxOperands = 3;
6364 if len(oInstr.aoOperands) > 3:
6365 sMacro = 'OPVEX'
6366 cMaxOperands = 4;
6367 assert len(oInstr.aoOperands) <= cMaxOperands;
6368
6369 #
6370 # Format string.
6371 #
6372 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6373 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6374 sTmp += ' ' if iOperand == 0 else ',';
6375 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6376 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6377 else:
6378 sTmp += g_kdOpTypes[oOperand.sType][2];
6379 sTmp += '",';
6380 asColumns = [ sTmp, ];
6381
6382 #
6383 # Decoders.
6384 #
6385 iStart = len(asColumns);
6386 if oInstr.sEncoding is None:
6387 pass;
6388 elif oInstr.sEncoding == 'ModR/M':
6389 # ASSUME the first operand is using the ModR/M encoding
6390 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6391 asColumns.append('IDX_ParseModRM,');
6392 elif oInstr.sEncoding in [ 'prefix', ]:
6393 for oOperand in oInstr.aoOperands:
6394 asColumns.append('0,');
6395 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6396 pass;
6397 elif oInstr.sEncoding == 'VEX.ModR/M':
6398 asColumns.append('IDX_ParseModRM,');
6399 elif oInstr.sEncoding == 'vex2':
6400 asColumns.append('IDX_ParseVex2b,')
6401 elif oInstr.sEncoding == 'vex3':
6402 asColumns.append('IDX_ParseVex3b,')
6403 elif oInstr.sEncoding in g_dInstructionMaps:
6404 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6405 else:
6406 ## @todo
6407 #IDX_ParseTwoByteEsc,
6408 #IDX_ParseGrp1,
6409 #IDX_ParseShiftGrp2,
6410 #IDX_ParseGrp3,
6411 #IDX_ParseGrp4,
6412 #IDX_ParseGrp5,
6413 #IDX_Parse3DNow,
6414 #IDX_ParseGrp6,
6415 #IDX_ParseGrp7,
6416 #IDX_ParseGrp8,
6417 #IDX_ParseGrp9,
6418 #IDX_ParseGrp10,
6419 #IDX_ParseGrp12,
6420 #IDX_ParseGrp13,
6421 #IDX_ParseGrp14,
6422 #IDX_ParseGrp15,
6423 #IDX_ParseGrp16,
6424 #IDX_ParseThreeByteEsc4,
6425 #IDX_ParseThreeByteEsc5,
6426 #IDX_ParseModFence,
6427 #IDX_ParseEscFP,
6428 #IDX_ParseNopPause,
6429 #IDX_ParseInvOpModRM,
6430 assert False, str(oInstr);
6431
6432 # Check for immediates and stuff in the remaining operands.
6433 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6434 sIdx = g_kdOpTypes[oOperand.sType][0];
6435 #if sIdx != 'IDX_UseModRM':
6436 asColumns.append(sIdx + ',');
6437 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6438
6439 #
6440 # Opcode and operands.
6441 #
6442 assert oInstr.sDisEnum, str(oInstr);
6443 asColumns.append(oInstr.sDisEnum + ',');
6444 iStart = len(asColumns)
6445 for oOperand in oInstr.aoOperands:
6446 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6447 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6448
6449 #
6450 # Flags.
6451 #
6452 sTmp = '';
6453 for sHint in sorted(oInstr.dHints.keys()):
6454 sDefine = g_kdHints[sHint];
6455 if sDefine.startswith('DISOPTYPE_'):
6456 if sTmp:
6457 sTmp += ' | ' + sDefine;
6458 else:
6459 sTmp += sDefine;
6460 if sTmp:
6461 sTmp += '),';
6462 else:
6463 sTmp += '0),';
6464 asColumns.append(sTmp);
6465
6466 #
6467 # Format the columns into a line.
6468 #
6469 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6470 sLine = '';
6471 for i, s in enumerate(asColumns):
6472 if len(sLine) < aoffColumns[i]:
6473 sLine += ' ' * (aoffColumns[i] - len(sLine));
6474 else:
6475 sLine += ' ';
6476 sLine += s;
6477
6478 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6479 # DISOPTYPE_HARMLESS),
6480 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6481 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6482 return sLine;
6483
6484def __checkIfShortTable(aoTableOrdered, oMap):
6485 """
6486 Returns (iInstr, cInstructions, fShortTable)
6487 """
6488
6489 # Determin how much we can trim off.
6490 cInstructions = len(aoTableOrdered);
6491 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6492 cInstructions -= 1;
6493
6494 iInstr = 0;
6495 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6496 iInstr += 1;
6497
6498 # If we can save more than 30%, we go for the short table version.
6499 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6500 return (iInstr, cInstructions, True);
6501 _ = oMap; # Use this for overriding.
6502
6503 # Output the full table.
6504 return (0, len(aoTableOrdered), False);
6505
6506def generateDisassemblerTables(oDstFile = sys.stdout):
6507 """
6508 Generates disassembler tables.
6509
6510 Returns exit code.
6511 """
6512
6513 #
6514 # Parse all.
6515 #
6516 try:
6517 parseAll();
6518 except Exception as oXcpt:
6519 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6520 traceback.print_exc(file = sys.stderr);
6521 return 1;
6522
6523
6524 #
6525 # The disassembler uses a slightly different table layout to save space,
6526 # since several of the prefix varia
6527 #
6528 aoDisasmMaps = [];
6529 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6530 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6531 if oMap.sSelector != 'byte+pfx':
6532 aoDisasmMaps.append(oMap);
6533 else:
6534 # Split the map by prefix.
6535 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6536 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6537 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6538 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6539
6540 #
6541 # Dump each map.
6542 #
6543 asHeaderLines = [];
6544 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6545 for oMap in aoDisasmMaps:
6546 sName = oMap.sName;
6547
6548 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6549
6550 #
6551 # Get the instructions for the map and see if we can do a short version or not.
6552 #
6553 aoTableOrder = oMap.getInstructionsInTableOrder();
6554 cEntriesPerByte = oMap.getEntriesPerByte();
6555 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6556
6557 #
6558 # Output the table start.
6559 # Note! Short tables are static and only accessible via the map range record.
6560 #
6561 asLines = [];
6562 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6563 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6564 if fShortTable:
6565 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6566 else:
6567 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6568 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6569 asLines.append('{');
6570
6571 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6572 asLines.append(' /* %#04x: */' % (iInstrStart,));
6573
6574 #
6575 # Output the instructions.
6576 #
6577 iInstr = iInstrStart;
6578 while iInstr < iInstrEnd:
6579 oInstr = aoTableOrder[iInstr];
6580 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6581 if iInstr != iInstrStart:
6582 asLines.append('');
6583 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6584
6585 if oInstr is None:
6586 # Invalid. Optimize blocks of invalid instructions.
6587 cInvalidInstrs = 1;
6588 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6589 cInvalidInstrs += 1;
6590 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6591 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6592 iInstr += 0x10 * cEntriesPerByte - 1;
6593 elif cEntriesPerByte > 1:
6594 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6595 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6596 iInstr += 3;
6597 else:
6598 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6599 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6600 else:
6601 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6602 elif isinstance(oInstr, list):
6603 if len(oInstr) != 0:
6604 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6605 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6606 else:
6607 asLines.append(__formatDisassemblerTableEntry(oInstr));
6608 else:
6609 asLines.append(__formatDisassemblerTableEntry(oInstr));
6610
6611 iInstr += 1;
6612
6613 if iInstrStart >= iInstrEnd:
6614 asLines.append(' /* dummy */ INVALID_OPCODE');
6615
6616 asLines.append('};');
6617 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6618
6619 #
6620 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6621 #
6622 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6623 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6624 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6625
6626 #
6627 # Write out the lines.
6628 #
6629 oDstFile.write('\n'.join(asLines));
6630 oDstFile.write('\n');
6631 oDstFile.write('\n');
6632 #break; #for now
6633 return 0;
6634
6635if __name__ == '__main__':
6636 sys.exit(generateDisassemblerTables());
6637
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette