VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103779

Last change on this file since 103779 was 103779, checked in by vboxsync, 13 months ago

VMM/IEM: Implement emitter for IEM_MC_STORE_XREG_U32(), bugref:10614

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 321.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103779 2024-03-11 17:04:02Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103779 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
241 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
242 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
243 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
244 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
246 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
248 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
250 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
252 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
254 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
256 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
259 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
260
261 # ModR/M.rm - register only.
262 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
263 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
264 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
265 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
266 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
267 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
268 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
269 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
270
271 # ModR/M.rm - memory only.
272 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
273 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
274 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
275 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
276 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
277 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
278 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
279 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
280 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
281 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
282 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
283 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
284 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
285 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
286 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
287
288 # ModR/M.reg
289 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
290 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
291 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
292 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
293 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
294 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
295 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
296 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
297 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
298 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
299 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
300 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
301 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
302 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
303 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
304 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
305 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
306 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
307 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
308 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
309 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
310 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
311 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
312 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
313 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
314 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
315 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
316 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
317 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
318 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
319 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
320 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
321
322 # VEX.vvvv
323 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
324 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
325 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
326 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
327 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
328 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
329 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
330 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
331
332 # Immediate values.
333 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
334 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
335 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
336 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
337 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
338 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
339
340 # Address operands (no ModR/M).
341 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
342 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
343
344 # Relative jump targets
345 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
346 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
347
348 # DS:rSI
349 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
350 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
351 # ES:rDI
352 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
353 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
354
355 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
356
357 # Fixed registers.
358 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
359 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
360 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
361 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
362 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
363 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
364 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
365 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
366 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
367 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
368
369 # Fixed values.
370 '1': ( '', '1', '1', '1', '', ),
371};
372
373# IDX_ParseFixedReg
374# IDX_ParseVexDest
375
376
377## IEMFORM_XXX mappings.
378g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
379 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
380 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
381 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
382 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
383 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
384 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
385 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
386 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
387 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
388 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
389 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
390 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
391 'M': ( 'ModR/M', [ 'rm', ], '', ),
392 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
393 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
394 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
395 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
396 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
397 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
398 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
399 'R': ( 'ModR/M', [ 'reg', ], '', ),
400
401 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
402 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
403 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
404 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
405 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
406 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
407 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
408 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
409 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
410 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
411 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
412 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
414 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
415 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
416 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
417 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
418 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
419 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
420 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
421 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
422 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
423
424 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
425 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
426 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
427 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
428 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
429 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
430
431 'FIXED': ( 'fixed', None, '', ),
432};
433
434## \@oppfx values.
435g_kdPrefixes = {
436 'none': [],
437 '0x66': [],
438 '0xf3': [],
439 '0xf2': [],
440 '!0xf3': [], # special case for bsf/tzcnt
441};
442
443## Special \@opcode tag values.
444g_kdSpecialOpcodes = {
445 '/reg': [],
446 'mr/reg': [],
447 '11 /reg': [],
448 '!11 /reg': [],
449 '11 mr/reg': [],
450 '!11 mr/reg': [],
451};
452
453## Special \@opcodesub tag values.
454## The first value is the real value for aliases.
455## The second value is for bs3cg1.
456g_kdSubOpcodes = {
457 'none': [ None, '', ],
458 '11 mr/reg': [ '11 mr/reg', '', ],
459 '11': [ '11 mr/reg', '', ], ##< alias
460 '!11 mr/reg': [ '!11 mr/reg', '', ],
461 '!11': [ '!11 mr/reg', '', ], ##< alias
462 'rex.w=0': [ 'rex.w=0', 'WZ', ],
463 'w=0': [ 'rex.w=0', '', ], ##< alias
464 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
465 'w=1': [ 'rex.w=1', '', ], ##< alias
466 'vex.l=0': [ 'vex.l=0', 'L0', ],
467 'vex.l=1': [ 'vex.l=0', 'L1', ],
468 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
469 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
470 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
471 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
472 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
473 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
474};
475
476## Valid values for \@openc
477g_kdEncodings = {
478 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
479 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
480 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
481 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
482 'prefix': [ None, ], ##< Prefix
483};
484
485## \@opunused, \@opinvalid, \@opinvlstyle
486g_kdInvalidStyles = {
487 'immediate': [], ##< CPU stops decoding immediately after the opcode.
488 'vex.modrm': [], ##< VEX+ModR/M, everyone.
489 'intel-modrm': [], ##< Intel decodes ModR/M.
490 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
491 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
492 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
493};
494
495g_kdCpuNames = {
496 '8086': (),
497 '80186': (),
498 '80286': (),
499 '80386': (),
500 '80486': (),
501};
502
503## \@opcpuid
504g_kdCpuIdFlags = {
505 'vme': 'X86_CPUID_FEATURE_EDX_VME',
506 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
507 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
508 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
509 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
510 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
511 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
512 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
513 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
514 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
515 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
516 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
517 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
518 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
519 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
520 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
521 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
522 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
523 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
524 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
525 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
526 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
527 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
528 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
529 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
530 'aes': 'X86_CPUID_FEATURE_ECX_AES',
531 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
532 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
533 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
534 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
535 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
536
537 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
538 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
539 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
540 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
541 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
542 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
543 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
544 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
545 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
546 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
547};
548
549## \@ophints values.
550# pylint: disable=line-too-long
551g_kdHints = {
552 'invalid': 'DISOPTYPE_INVALID', ##<
553 'harmless': 'DISOPTYPE_HARMLESS', ##<
554 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
555 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
556 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
557 'portio': 'DISOPTYPE_PORTIO', ##<
558 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
559 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
560 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
561 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
562 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
563 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
564 'illegal': 'DISOPTYPE_ILLEGAL', ##<
565 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
566 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
567 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
568 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
569 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
570 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
571 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
572 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
573 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
574 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
575 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
576 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
577 ## (only in 16 & 32 bits mode!)
578 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
579 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
580 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
581 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
582 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
583 'ignores_rexw': '', ##< Ignores REX.W.
584 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
585 'vex_l_zero': '', ##< VEX.L must be 0.
586 'vex_l_ignored': '', ##< VEX.L is ignored.
587 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
588 'lock_allowed': '', ##< Lock prefix allowed.
589};
590# pylint: enable=line-too-long
591
592## \@opxcpttype values (see SDMv2 2.4, 2.7).
593g_kdXcptTypes = {
594 'none': [],
595 '1': [],
596 '2': [],
597 '3': [],
598 '4': [],
599 '4UA': [],
600 '5': [],
601 '5LZ': [], # LZ = VEX.L must be zero.
602 '6': [],
603 '7': [],
604 '7LZ': [],
605 '8': [],
606 '11': [],
607 '12': [],
608 'E1': [],
609 'E1NF': [],
610 'E2': [],
611 'E3': [],
612 'E3NF': [],
613 'E4': [],
614 'E4NF': [],
615 'E5': [],
616 'E5NF': [],
617 'E6': [],
618 'E6NF': [],
619 'E7NF': [],
620 'E9': [],
621 'E9NF': [],
622 'E10': [],
623 'E11': [],
624 'E12': [],
625 'E12NF': [],
626};
627
628
629def _isValidOpcodeByte(sOpcode):
630 """
631 Checks if sOpcode is a valid lower case opcode byte.
632 Returns true/false.
633 """
634 if len(sOpcode) == 4:
635 if sOpcode[:2] == '0x':
636 if sOpcode[2] in '0123456789abcdef':
637 if sOpcode[3] in '0123456789abcdef':
638 return True;
639 return False;
640
641
642class InstructionMap(object):
643 """
644 Instruction map.
645
646 The opcode map provides the lead opcode bytes (empty for the one byte
647 opcode map). An instruction can be member of multiple opcode maps as long
648 as it uses the same opcode value within the map (because of VEX).
649 """
650
651 kdEncodings = {
652 'legacy': [],
653 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
654 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
655 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
656 'xop8': [], ##< XOP prefix with vvvvv = 8
657 'xop9': [], ##< XOP prefix with vvvvv = 9
658 'xop10': [], ##< XOP prefix with vvvvv = 10
659 };
660 ## Selectors.
661 ## 1. The first value is the number of table entries required by a
662 ## decoder or disassembler for this type of selector.
663 ## 2. The second value is how many entries per opcode byte if applicable.
664 kdSelectors = {
665 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
666 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
667 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
668 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
669 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
670 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
671 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
672 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
673 };
674
675 ## Define the subentry number according to the Instruction::sPrefix
676 ## value for 'byte+pfx' selected tables.
677 kiPrefixOrder = {
678 'none': 0,
679 '0x66': 1,
680 '0xf3': 2,
681 '0xf2': 3,
682 };
683
684 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
685 sEncoding = 'legacy', sDisParse = None):
686 assert sSelector in self.kdSelectors;
687 assert sEncoding in self.kdEncodings;
688 if asLeadOpcodes is None:
689 asLeadOpcodes = [];
690 else:
691 for sOpcode in asLeadOpcodes:
692 assert _isValidOpcodeByte(sOpcode);
693 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
694
695 self.sName = sName;
696 self.sIemName = sIemName;
697 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
698 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
699 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
700 self.aoInstructions = [] # type: Instruction
701 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
702
703 def copy(self, sNewName, sPrefixFilter = None):
704 """
705 Copies the table with filtering instruction by sPrefix if not None.
706 """
707 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
708 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
709 else self.sSelector,
710 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
711 if sPrefixFilter is None:
712 oCopy.aoInstructions = list(self.aoInstructions);
713 else:
714 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
715 return oCopy;
716
717 def getTableSize(self):
718 """
719 Number of table entries. This corresponds directly to the selector.
720 """
721 return self.kdSelectors[self.sSelector][0];
722
723 def getEntriesPerByte(self):
724 """
725 Number of table entries per opcode bytes.
726
727 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
728 the others it will just return 1.
729 """
730 return self.kdSelectors[self.sSelector][1];
731
732 def getInstructionIndex(self, oInstr):
733 """
734 Returns the table index for the instruction.
735 """
736 bOpcode = oInstr.getOpcodeByte();
737
738 # The byte selectors are simple. We need a full opcode byte and need just return it.
739 if self.sSelector == 'byte':
740 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
741 return bOpcode;
742
743 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
744 if self.sSelector == 'byte+pfx':
745 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
746 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
747 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
748
749 # The other selectors needs masking and shifting.
750 if self.sSelector == '/r':
751 return (bOpcode >> 3) & 0x7;
752
753 if self.sSelector == 'mod /r':
754 return (bOpcode >> 3) & 0x1f;
755
756 if self.sSelector == 'memreg /r':
757 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
758
759 if self.sSelector == '!11 /r':
760 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
761 return (bOpcode >> 3) & 0x7;
762
763 if self.sSelector == '11 /r':
764 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
765 return (bOpcode >> 3) & 0x7;
766
767 if self.sSelector == '11':
768 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
769 return bOpcode & 0x3f;
770
771 assert False, self.sSelector;
772 return -1;
773
774 def getInstructionsInTableOrder(self):
775 """
776 Get instructions in table order.
777
778 Returns array of instructions. Normally there is exactly one
779 instruction per entry. However the entry could also be None if
780 not instruction was specified for that opcode value. Or there
781 could be a list of instructions to deal with special encodings
782 where for instance prefix (e.g. REX.W) encodes a different
783 instruction or different CPUs have different instructions or
784 prefixes in the same place.
785 """
786 # Start with empty table.
787 cTable = self.getTableSize();
788 aoTable = [None] * cTable;
789
790 # Insert the instructions.
791 for oInstr in self.aoInstructions:
792 if oInstr.sOpcode:
793 idxOpcode = self.getInstructionIndex(oInstr);
794 assert idxOpcode < cTable, str(idxOpcode);
795
796 oExisting = aoTable[idxOpcode];
797 if oExisting is None:
798 aoTable[idxOpcode] = oInstr;
799 elif not isinstance(oExisting, list):
800 aoTable[idxOpcode] = list([oExisting, oInstr]);
801 else:
802 oExisting.append(oInstr);
803
804 return aoTable;
805
806
807 def getDisasTableName(self):
808 """
809 Returns the disassembler table name for this map.
810 """
811 sName = 'g_aDisas';
812 for sWord in self.sName.split('_'):
813 if sWord == 'm': # suffix indicating modrm.mod==mem
814 sName += '_m';
815 elif sWord == 'r': # suffix indicating modrm.mod==reg
816 sName += '_r';
817 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
818 sName += '_' + sWord;
819 else:
820 sWord = sWord.replace('grp', 'Grp');
821 sWord = sWord.replace('map', 'Map');
822 sName += sWord[0].upper() + sWord[1:];
823 return sName;
824
825 def getDisasRangeName(self):
826 """
827 Returns the disassembler table range name for this map.
828 """
829 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
830
831 def isVexMap(self):
832 """ Returns True if a VEX map. """
833 return self.sEncoding.startswith('vex');
834
835
836class TestType(object):
837 """
838 Test value type.
839
840 This base class deals with integer like values. The fUnsigned constructor
841 parameter indicates the default stance on zero vs sign extending. It is
842 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
843 """
844 def __init__(self, sName, acbSizes = None, fUnsigned = True):
845 self.sName = sName;
846 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
847 self.fUnsigned = fUnsigned;
848
849 class BadValue(Exception):
850 """ Bad value exception. """
851 def __init__(self, sMessage):
852 Exception.__init__(self, sMessage);
853 self.sMessage = sMessage;
854
855 ## For ascii ~ operator.
856 kdHexInv = {
857 '0': 'f',
858 '1': 'e',
859 '2': 'd',
860 '3': 'c',
861 '4': 'b',
862 '5': 'a',
863 '6': '9',
864 '7': '8',
865 '8': '7',
866 '9': '6',
867 'a': '5',
868 'b': '4',
869 'c': '3',
870 'd': '2',
871 'e': '1',
872 'f': '0',
873 };
874
875 def get(self, sValue):
876 """
877 Get the shortest normal sized byte representation of oValue.
878
879 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
880 The latter form is for AND+OR pairs where the first entry is what to
881 AND with the field and the second the one or OR with.
882
883 Raises BadValue if invalid value.
884 """
885 if not sValue:
886 raise TestType.BadValue('empty value');
887
888 # Deal with sign and detect hexadecimal or decimal.
889 fSignExtend = not self.fUnsigned;
890 if sValue[0] == '-' or sValue[0] == '+':
891 fSignExtend = True;
892 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
893 else:
894 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
895
896 # try convert it to long integer.
897 try:
898 iValue = long(sValue, 16 if fHex else 10);
899 except Exception as oXcpt:
900 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
901
902 # Convert the hex string and pad it to a decent value. Negative values
903 # needs to be manually converted to something non-negative (~-n + 1).
904 if iValue >= 0:
905 sHex = hex(iValue);
906 if sys.version_info[0] < 3:
907 assert sHex[-1] == 'L';
908 sHex = sHex[:-1];
909 assert sHex[:2] == '0x';
910 sHex = sHex[2:];
911 else:
912 sHex = hex(-iValue - 1);
913 if sys.version_info[0] < 3:
914 assert sHex[-1] == 'L';
915 sHex = sHex[:-1];
916 assert sHex[:2] == '0x';
917 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
918 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
919 sHex = 'f' + sHex;
920
921 cDigits = len(sHex);
922 if cDigits <= self.acbSizes[-1] * 2:
923 for cb in self.acbSizes:
924 cNaturalDigits = cb * 2;
925 if cDigits <= cNaturalDigits:
926 break;
927 else:
928 cNaturalDigits = self.acbSizes[-1] * 2;
929 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
930 assert isinstance(cNaturalDigits, int)
931
932 if cNaturalDigits != cDigits:
933 cNeeded = cNaturalDigits - cDigits;
934 if iValue >= 0:
935 sHex = ('0' * cNeeded) + sHex;
936 else:
937 sHex = ('f' * cNeeded) + sHex;
938
939 # Invert and convert to bytearray and return it.
940 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
941
942 return ((fSignExtend, abValue),);
943
944 def validate(self, sValue):
945 """
946 Returns True if value is okay, error message on failure.
947 """
948 try:
949 self.get(sValue);
950 except TestType.BadValue as oXcpt:
951 return oXcpt.sMessage;
952 return True;
953
954 def isAndOrPair(self, sValue):
955 """
956 Checks if sValue is a pair.
957 """
958 _ = sValue;
959 return False;
960
961
962class TestTypeEflags(TestType):
963 """
964 Special value parsing for EFLAGS/RFLAGS/FLAGS.
965 """
966
967 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
968
969 def __init__(self, sName):
970 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
971
972 def get(self, sValue):
973 fClear = 0;
974 fSet = 0;
975 for sFlag in sValue.split(','):
976 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
977 if sConstant is None:
978 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
979 if sConstant[0] == '!':
980 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
981 else:
982 fSet |= g_kdX86EFlagsConstants[sConstant];
983
984 aoSet = TestType.get(self, '0x%x' % (fSet,));
985 if fClear != 0:
986 aoClear = TestType.get(self, '%#x' % (fClear,))
987 assert self.isAndOrPair(sValue) is True;
988 return (aoClear[0], aoSet[0]);
989 assert self.isAndOrPair(sValue) is False;
990 return aoSet;
991
992 def isAndOrPair(self, sValue):
993 for sZeroFlag in self.kdZeroValueFlags:
994 if sValue.find(sZeroFlag) >= 0:
995 return True;
996 return False;
997
998class TestTypeFromDict(TestType):
999 """
1000 Special value parsing for CR0.
1001 """
1002
1003 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1004
1005 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1006 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1007 self.kdConstantsAndValues = kdConstantsAndValues;
1008 self.sConstantPrefix = sConstantPrefix;
1009
1010 def get(self, sValue):
1011 fValue = 0;
1012 for sFlag in sValue.split(','):
1013 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1014 if fFlagValue is None:
1015 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1016 fValue |= fFlagValue;
1017 return TestType.get(self, '0x%x' % (fValue,));
1018
1019
1020class TestInOut(object):
1021 """
1022 One input or output state modifier.
1023
1024 This should be thought as values to modify BS3REGCTX and extended (needs
1025 to be structured) state.
1026 """
1027 ## Assigned operators.
1028 kasOperators = [
1029 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1030 '&~=',
1031 '&=',
1032 '|=',
1033 '='
1034 ];
1035 ## Types
1036 kdTypes = {
1037 'uint': TestType('uint', fUnsigned = True),
1038 'int': TestType('int'),
1039 'efl': TestTypeEflags('efl'),
1040 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1041 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1042 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1043 };
1044 ## CPU context fields.
1045 kdFields = {
1046 # name: ( default type, [both|input|output], )
1047 # Operands.
1048 'op1': ( 'uint', 'both', ), ## \@op1
1049 'op2': ( 'uint', 'both', ), ## \@op2
1050 'op3': ( 'uint', 'both', ), ## \@op3
1051 'op4': ( 'uint', 'both', ), ## \@op4
1052 # Flags.
1053 'efl': ( 'efl', 'both', ),
1054 'efl_undef': ( 'uint', 'output', ),
1055 # 8-bit GPRs.
1056 'al': ( 'uint', 'both', ),
1057 'cl': ( 'uint', 'both', ),
1058 'dl': ( 'uint', 'both', ),
1059 'bl': ( 'uint', 'both', ),
1060 'ah': ( 'uint', 'both', ),
1061 'ch': ( 'uint', 'both', ),
1062 'dh': ( 'uint', 'both', ),
1063 'bh': ( 'uint', 'both', ),
1064 'r8l': ( 'uint', 'both', ),
1065 'r9l': ( 'uint', 'both', ),
1066 'r10l': ( 'uint', 'both', ),
1067 'r11l': ( 'uint', 'both', ),
1068 'r12l': ( 'uint', 'both', ),
1069 'r13l': ( 'uint', 'both', ),
1070 'r14l': ( 'uint', 'both', ),
1071 'r15l': ( 'uint', 'both', ),
1072 # 16-bit GPRs.
1073 'ax': ( 'uint', 'both', ),
1074 'dx': ( 'uint', 'both', ),
1075 'cx': ( 'uint', 'both', ),
1076 'bx': ( 'uint', 'both', ),
1077 'sp': ( 'uint', 'both', ),
1078 'bp': ( 'uint', 'both', ),
1079 'si': ( 'uint', 'both', ),
1080 'di': ( 'uint', 'both', ),
1081 'r8w': ( 'uint', 'both', ),
1082 'r9w': ( 'uint', 'both', ),
1083 'r10w': ( 'uint', 'both', ),
1084 'r11w': ( 'uint', 'both', ),
1085 'r12w': ( 'uint', 'both', ),
1086 'r13w': ( 'uint', 'both', ),
1087 'r14w': ( 'uint', 'both', ),
1088 'r15w': ( 'uint', 'both', ),
1089 # 32-bit GPRs.
1090 'eax': ( 'uint', 'both', ),
1091 'edx': ( 'uint', 'both', ),
1092 'ecx': ( 'uint', 'both', ),
1093 'ebx': ( 'uint', 'both', ),
1094 'esp': ( 'uint', 'both', ),
1095 'ebp': ( 'uint', 'both', ),
1096 'esi': ( 'uint', 'both', ),
1097 'edi': ( 'uint', 'both', ),
1098 'r8d': ( 'uint', 'both', ),
1099 'r9d': ( 'uint', 'both', ),
1100 'r10d': ( 'uint', 'both', ),
1101 'r11d': ( 'uint', 'both', ),
1102 'r12d': ( 'uint', 'both', ),
1103 'r13d': ( 'uint', 'both', ),
1104 'r14d': ( 'uint', 'both', ),
1105 'r15d': ( 'uint', 'both', ),
1106 # 64-bit GPRs.
1107 'rax': ( 'uint', 'both', ),
1108 'rdx': ( 'uint', 'both', ),
1109 'rcx': ( 'uint', 'both', ),
1110 'rbx': ( 'uint', 'both', ),
1111 'rsp': ( 'uint', 'both', ),
1112 'rbp': ( 'uint', 'both', ),
1113 'rsi': ( 'uint', 'both', ),
1114 'rdi': ( 'uint', 'both', ),
1115 'r8': ( 'uint', 'both', ),
1116 'r9': ( 'uint', 'both', ),
1117 'r10': ( 'uint', 'both', ),
1118 'r11': ( 'uint', 'both', ),
1119 'r12': ( 'uint', 'both', ),
1120 'r13': ( 'uint', 'both', ),
1121 'r14': ( 'uint', 'both', ),
1122 'r15': ( 'uint', 'both', ),
1123 # 16-bit, 32-bit or 64-bit registers according to operand size.
1124 'oz.rax': ( 'uint', 'both', ),
1125 'oz.rdx': ( 'uint', 'both', ),
1126 'oz.rcx': ( 'uint', 'both', ),
1127 'oz.rbx': ( 'uint', 'both', ),
1128 'oz.rsp': ( 'uint', 'both', ),
1129 'oz.rbp': ( 'uint', 'both', ),
1130 'oz.rsi': ( 'uint', 'both', ),
1131 'oz.rdi': ( 'uint', 'both', ),
1132 'oz.r8': ( 'uint', 'both', ),
1133 'oz.r9': ( 'uint', 'both', ),
1134 'oz.r10': ( 'uint', 'both', ),
1135 'oz.r11': ( 'uint', 'both', ),
1136 'oz.r12': ( 'uint', 'both', ),
1137 'oz.r13': ( 'uint', 'both', ),
1138 'oz.r14': ( 'uint', 'both', ),
1139 'oz.r15': ( 'uint', 'both', ),
1140 # Control registers.
1141 'cr0': ( 'cr0', 'both', ),
1142 'cr4': ( 'cr4', 'both', ),
1143 'xcr0': ( 'xcr0', 'both', ),
1144 # FPU Registers
1145 'fcw': ( 'uint', 'both', ),
1146 'fsw': ( 'uint', 'both', ),
1147 'ftw': ( 'uint', 'both', ),
1148 'fop': ( 'uint', 'both', ),
1149 'fpuip': ( 'uint', 'both', ),
1150 'fpucs': ( 'uint', 'both', ),
1151 'fpudp': ( 'uint', 'both', ),
1152 'fpuds': ( 'uint', 'both', ),
1153 'mxcsr': ( 'uint', 'both', ),
1154 'st0': ( 'uint', 'both', ),
1155 'st1': ( 'uint', 'both', ),
1156 'st2': ( 'uint', 'both', ),
1157 'st3': ( 'uint', 'both', ),
1158 'st4': ( 'uint', 'both', ),
1159 'st5': ( 'uint', 'both', ),
1160 'st6': ( 'uint', 'both', ),
1161 'st7': ( 'uint', 'both', ),
1162 # MMX registers.
1163 'mm0': ( 'uint', 'both', ),
1164 'mm1': ( 'uint', 'both', ),
1165 'mm2': ( 'uint', 'both', ),
1166 'mm3': ( 'uint', 'both', ),
1167 'mm4': ( 'uint', 'both', ),
1168 'mm5': ( 'uint', 'both', ),
1169 'mm6': ( 'uint', 'both', ),
1170 'mm7': ( 'uint', 'both', ),
1171 # SSE registers.
1172 'xmm0': ( 'uint', 'both', ),
1173 'xmm1': ( 'uint', 'both', ),
1174 'xmm2': ( 'uint', 'both', ),
1175 'xmm3': ( 'uint', 'both', ),
1176 'xmm4': ( 'uint', 'both', ),
1177 'xmm5': ( 'uint', 'both', ),
1178 'xmm6': ( 'uint', 'both', ),
1179 'xmm7': ( 'uint', 'both', ),
1180 'xmm8': ( 'uint', 'both', ),
1181 'xmm9': ( 'uint', 'both', ),
1182 'xmm10': ( 'uint', 'both', ),
1183 'xmm11': ( 'uint', 'both', ),
1184 'xmm12': ( 'uint', 'both', ),
1185 'xmm13': ( 'uint', 'both', ),
1186 'xmm14': ( 'uint', 'both', ),
1187 'xmm15': ( 'uint', 'both', ),
1188 'xmm0.lo': ( 'uint', 'both', ),
1189 'xmm1.lo': ( 'uint', 'both', ),
1190 'xmm2.lo': ( 'uint', 'both', ),
1191 'xmm3.lo': ( 'uint', 'both', ),
1192 'xmm4.lo': ( 'uint', 'both', ),
1193 'xmm5.lo': ( 'uint', 'both', ),
1194 'xmm6.lo': ( 'uint', 'both', ),
1195 'xmm7.lo': ( 'uint', 'both', ),
1196 'xmm8.lo': ( 'uint', 'both', ),
1197 'xmm9.lo': ( 'uint', 'both', ),
1198 'xmm10.lo': ( 'uint', 'both', ),
1199 'xmm11.lo': ( 'uint', 'both', ),
1200 'xmm12.lo': ( 'uint', 'both', ),
1201 'xmm13.lo': ( 'uint', 'both', ),
1202 'xmm14.lo': ( 'uint', 'both', ),
1203 'xmm15.lo': ( 'uint', 'both', ),
1204 'xmm0.hi': ( 'uint', 'both', ),
1205 'xmm1.hi': ( 'uint', 'both', ),
1206 'xmm2.hi': ( 'uint', 'both', ),
1207 'xmm3.hi': ( 'uint', 'both', ),
1208 'xmm4.hi': ( 'uint', 'both', ),
1209 'xmm5.hi': ( 'uint', 'both', ),
1210 'xmm6.hi': ( 'uint', 'both', ),
1211 'xmm7.hi': ( 'uint', 'both', ),
1212 'xmm8.hi': ( 'uint', 'both', ),
1213 'xmm9.hi': ( 'uint', 'both', ),
1214 'xmm10.hi': ( 'uint', 'both', ),
1215 'xmm11.hi': ( 'uint', 'both', ),
1216 'xmm12.hi': ( 'uint', 'both', ),
1217 'xmm13.hi': ( 'uint', 'both', ),
1218 'xmm14.hi': ( 'uint', 'both', ),
1219 'xmm15.hi': ( 'uint', 'both', ),
1220 'xmm0.lo.zx': ( 'uint', 'both', ),
1221 'xmm1.lo.zx': ( 'uint', 'both', ),
1222 'xmm2.lo.zx': ( 'uint', 'both', ),
1223 'xmm3.lo.zx': ( 'uint', 'both', ),
1224 'xmm4.lo.zx': ( 'uint', 'both', ),
1225 'xmm5.lo.zx': ( 'uint', 'both', ),
1226 'xmm6.lo.zx': ( 'uint', 'both', ),
1227 'xmm7.lo.zx': ( 'uint', 'both', ),
1228 'xmm8.lo.zx': ( 'uint', 'both', ),
1229 'xmm9.lo.zx': ( 'uint', 'both', ),
1230 'xmm10.lo.zx': ( 'uint', 'both', ),
1231 'xmm11.lo.zx': ( 'uint', 'both', ),
1232 'xmm12.lo.zx': ( 'uint', 'both', ),
1233 'xmm13.lo.zx': ( 'uint', 'both', ),
1234 'xmm14.lo.zx': ( 'uint', 'both', ),
1235 'xmm15.lo.zx': ( 'uint', 'both', ),
1236 'xmm0.dw0': ( 'uint', 'both', ),
1237 'xmm1.dw0': ( 'uint', 'both', ),
1238 'xmm2.dw0': ( 'uint', 'both', ),
1239 'xmm3.dw0': ( 'uint', 'both', ),
1240 'xmm4.dw0': ( 'uint', 'both', ),
1241 'xmm5.dw0': ( 'uint', 'both', ),
1242 'xmm6.dw0': ( 'uint', 'both', ),
1243 'xmm7.dw0': ( 'uint', 'both', ),
1244 'xmm8.dw0': ( 'uint', 'both', ),
1245 'xmm9.dw0': ( 'uint', 'both', ),
1246 'xmm10.dw0': ( 'uint', 'both', ),
1247 'xmm11.dw0': ( 'uint', 'both', ),
1248 'xmm12.dw0': ( 'uint', 'both', ),
1249 'xmm13.dw0': ( 'uint', 'both', ),
1250 'xmm14.dw0': ( 'uint', 'both', ),
1251 'xmm15_dw0': ( 'uint', 'both', ),
1252 # AVX registers.
1253 'ymm0': ( 'uint', 'both', ),
1254 'ymm1': ( 'uint', 'both', ),
1255 'ymm2': ( 'uint', 'both', ),
1256 'ymm3': ( 'uint', 'both', ),
1257 'ymm4': ( 'uint', 'both', ),
1258 'ymm5': ( 'uint', 'both', ),
1259 'ymm6': ( 'uint', 'both', ),
1260 'ymm7': ( 'uint', 'both', ),
1261 'ymm8': ( 'uint', 'both', ),
1262 'ymm9': ( 'uint', 'both', ),
1263 'ymm10': ( 'uint', 'both', ),
1264 'ymm11': ( 'uint', 'both', ),
1265 'ymm12': ( 'uint', 'both', ),
1266 'ymm13': ( 'uint', 'both', ),
1267 'ymm14': ( 'uint', 'both', ),
1268 'ymm15': ( 'uint', 'both', ),
1269
1270 # Special ones.
1271 'value.xcpt': ( 'uint', 'output', ),
1272 };
1273
1274 def __init__(self, sField, sOp, sValue, sType):
1275 assert sField in self.kdFields;
1276 assert sOp in self.kasOperators;
1277 self.sField = sField;
1278 self.sOp = sOp;
1279 self.sValue = sValue;
1280 self.sType = sType;
1281 assert isinstance(sField, str);
1282 assert isinstance(sOp, str);
1283 assert isinstance(sType, str);
1284 assert isinstance(sValue, str);
1285
1286
1287class TestSelector(object):
1288 """
1289 One selector for an instruction test.
1290 """
1291 ## Selector compare operators.
1292 kasCompareOps = [ '==', '!=' ];
1293 ## Selector variables and their valid values.
1294 kdVariables = {
1295 # Operand size.
1296 'size': {
1297 'o16': 'size_o16',
1298 'o32': 'size_o32',
1299 'o64': 'size_o64',
1300 },
1301 # VEX.L value.
1302 'vex.l': {
1303 '0': 'vexl_0',
1304 '1': 'vexl_1',
1305 },
1306 # Execution ring.
1307 'ring': {
1308 '0': 'ring_0',
1309 '1': 'ring_1',
1310 '2': 'ring_2',
1311 '3': 'ring_3',
1312 '0..2': 'ring_0_thru_2',
1313 '1..3': 'ring_1_thru_3',
1314 },
1315 # Basic code mode.
1316 'codebits': {
1317 '64': 'code_64bit',
1318 '32': 'code_32bit',
1319 '16': 'code_16bit',
1320 },
1321 # cpu modes.
1322 'mode': {
1323 'real': 'mode_real',
1324 'prot': 'mode_prot',
1325 'long': 'mode_long',
1326 'v86': 'mode_v86',
1327 'smm': 'mode_smm',
1328 'vmx': 'mode_vmx',
1329 'svm': 'mode_svm',
1330 },
1331 # paging on/off
1332 'paging': {
1333 'on': 'paging_on',
1334 'off': 'paging_off',
1335 },
1336 # CPU vendor
1337 'vendor': {
1338 'amd': 'vendor_amd',
1339 'intel': 'vendor_intel',
1340 'via': 'vendor_via',
1341 },
1342 };
1343 ## Selector shorthand predicates.
1344 ## These translates into variable expressions.
1345 kdPredicates = {
1346 'o16': 'size==o16',
1347 'o32': 'size==o32',
1348 'o64': 'size==o64',
1349 'ring0': 'ring==0',
1350 '!ring0': 'ring==1..3',
1351 'ring1': 'ring==1',
1352 'ring2': 'ring==2',
1353 'ring3': 'ring==3',
1354 'user': 'ring==3',
1355 'supervisor': 'ring==0..2',
1356 '16-bit': 'codebits==16',
1357 '32-bit': 'codebits==32',
1358 '64-bit': 'codebits==64',
1359 'real': 'mode==real',
1360 'prot': 'mode==prot',
1361 'long': 'mode==long',
1362 'v86': 'mode==v86',
1363 'smm': 'mode==smm',
1364 'vmx': 'mode==vmx',
1365 'svm': 'mode==svm',
1366 'paging': 'paging==on',
1367 '!paging': 'paging==off',
1368 'amd': 'vendor==amd',
1369 '!amd': 'vendor!=amd',
1370 'intel': 'vendor==intel',
1371 '!intel': 'vendor!=intel',
1372 'via': 'vendor==via',
1373 '!via': 'vendor!=via',
1374 };
1375
1376 def __init__(self, sVariable, sOp, sValue):
1377 assert sVariable in self.kdVariables;
1378 assert sOp in self.kasCompareOps;
1379 assert sValue in self.kdVariables[sVariable];
1380 self.sVariable = sVariable;
1381 self.sOp = sOp;
1382 self.sValue = sValue;
1383
1384
1385class InstructionTest(object):
1386 """
1387 Instruction test.
1388 """
1389
1390 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1391 self.oInstr = oInstr # type: InstructionTest
1392 self.aoInputs = [] # type: List[TestInOut]
1393 self.aoOutputs = [] # type: List[TestInOut]
1394 self.aoSelectors = [] # type: List[TestSelector]
1395
1396 def toString(self, fRepr = False):
1397 """
1398 Converts it to string representation.
1399 """
1400 asWords = [];
1401 if self.aoSelectors:
1402 for oSelector in self.aoSelectors:
1403 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1404 asWords.append('/');
1405
1406 for oModifier in self.aoInputs:
1407 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1408
1409 asWords.append('->');
1410
1411 for oModifier in self.aoOutputs:
1412 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1413
1414 if fRepr:
1415 return '<' + ' '.join(asWords) + '>';
1416 return ' '.join(asWords);
1417
1418 def __str__(self):
1419 """ Provide string represenation. """
1420 return self.toString(False);
1421
1422 def __repr__(self):
1423 """ Provide unambigious string representation. """
1424 return self.toString(True);
1425
1426class Operand(object):
1427 """
1428 Instruction operand.
1429 """
1430
1431 def __init__(self, sWhere, sType):
1432 assert sWhere in g_kdOpLocations, sWhere;
1433 assert sType in g_kdOpTypes, sType;
1434 self.sWhere = sWhere; ##< g_kdOpLocations
1435 self.sType = sType; ##< g_kdOpTypes
1436
1437 def usesModRM(self):
1438 """ Returns True if using some form of ModR/M encoding. """
1439 return self.sType[0] in ['E', 'G', 'M'];
1440
1441
1442
1443class Instruction(object): # pylint: disable=too-many-instance-attributes
1444 """
1445 Instruction.
1446 """
1447
1448 def __init__(self, sSrcFile, iLine):
1449 ## @name Core attributes.
1450 ## @{
1451 self.oParent = None # type: Instruction
1452 self.sMnemonic = None;
1453 self.sBrief = None;
1454 self.asDescSections = [] # type: List[str]
1455 self.aoMaps = [] # type: List[InstructionMap]
1456 self.aoOperands = [] # type: List[Operand]
1457 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1458 self.sOpcode = None # type: str
1459 self.sSubOpcode = None # type: str
1460 self.sEncoding = None;
1461 self.asFlTest = None;
1462 self.asFlModify = None;
1463 self.asFlUndefined = None;
1464 self.asFlSet = None;
1465 self.asFlClear = None;
1466 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1467 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1468 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1469 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1470 self.aoTests = [] # type: List[InstructionTest]
1471 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1472 self.oCpuExpr = None; ##< Some CPU restriction expression...
1473 self.sGroup = None;
1474 self.fUnused = False; ##< Unused instruction.
1475 self.fInvalid = False; ##< Invalid instruction (like UD2).
1476 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1477 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1478 ## @}
1479
1480 ## @name Implementation attributes.
1481 ## @{
1482 self.sStats = None;
1483 self.sFunction = None;
1484 self.fStub = False;
1485 self.fUdStub = False;
1486 ## @}
1487
1488 ## @name Decoding info
1489 ## @{
1490 self.sSrcFile = sSrcFile;
1491 self.iLineCreated = iLine;
1492 self.iLineCompleted = None;
1493 self.cOpTags = 0;
1494 self.iLineFnIemOpMacro = -1;
1495 self.iLineMnemonicMacro = -1;
1496 ## @}
1497
1498 ## @name Intermediate input fields.
1499 ## @{
1500 self.sRawDisOpNo = None;
1501 self.asRawDisParams = [];
1502 self.sRawIemOpFlags = None;
1503 self.sRawOldOpcodes = None;
1504 self.asCopyTests = [];
1505 ## @}
1506
1507 ## All the MC blocks associated with this instruction.
1508 self.aoMcBlocks = [] # type: List[McBlock]
1509
1510 def toString(self, fRepr = False):
1511 """ Turn object into a string. """
1512 aasFields = [];
1513
1514 aasFields.append(['opcode', self.sOpcode]);
1515 if self.sPrefix:
1516 aasFields.append(['prefix', self.sPrefix]);
1517 aasFields.append(['mnemonic', self.sMnemonic]);
1518 for iOperand, oOperand in enumerate(self.aoOperands):
1519 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1520 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1521 aasFields.append(['encoding', self.sEncoding]);
1522 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1523 aasFields.append(['disenum', self.sDisEnum]);
1524 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1525 aasFields.append(['group', self.sGroup]);
1526 if self.fUnused: aasFields.append(['unused', 'True']);
1527 if self.fInvalid: aasFields.append(['invalid', 'True']);
1528 aasFields.append(['invlstyle', self.sInvalidStyle]);
1529 aasFields.append(['fltest', self.asFlTest]);
1530 aasFields.append(['flmodify', self.asFlModify]);
1531 aasFields.append(['flundef', self.asFlUndefined]);
1532 aasFields.append(['flset', self.asFlSet]);
1533 aasFields.append(['flclear', self.asFlClear]);
1534 aasFields.append(['mincpu', self.sMinCpu]);
1535 aasFields.append(['stats', self.sStats]);
1536 aasFields.append(['sFunction', self.sFunction]);
1537 if self.fStub: aasFields.append(['fStub', 'True']);
1538 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1539 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1540 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1541 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1542
1543 sRet = '<' if fRepr else '';
1544 for sField, sValue in aasFields:
1545 if sValue is not None:
1546 if len(sRet) > 1:
1547 sRet += '; ';
1548 sRet += '%s=%s' % (sField, sValue,);
1549 if fRepr:
1550 sRet += '>';
1551
1552 return sRet;
1553
1554 def __str__(self):
1555 """ Provide string represenation. """
1556 return self.toString(False);
1557
1558 def __repr__(self):
1559 """ Provide unambigious string representation. """
1560 return self.toString(True);
1561
1562 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1563 """
1564 Makes a copy of the object for the purpose of putting in a different map
1565 or a different place in the current map.
1566 """
1567 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1568
1569 oCopy.oParent = self;
1570 oCopy.sMnemonic = self.sMnemonic;
1571 oCopy.sBrief = self.sBrief;
1572 oCopy.asDescSections = list(self.asDescSections);
1573 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1574 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1575 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1576 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1577 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1578 oCopy.sEncoding = self.sEncoding;
1579 oCopy.asFlTest = self.asFlTest;
1580 oCopy.asFlModify = self.asFlModify;
1581 oCopy.asFlUndefined = self.asFlUndefined;
1582 oCopy.asFlSet = self.asFlSet;
1583 oCopy.asFlClear = self.asFlClear;
1584 oCopy.dHints = dict(self.dHints);
1585 oCopy.sDisEnum = self.sDisEnum;
1586 oCopy.asCpuIds = list(self.asCpuIds);
1587 oCopy.asReqFeatures = list(self.asReqFeatures);
1588 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1589 oCopy.sMinCpu = self.sMinCpu;
1590 oCopy.oCpuExpr = self.oCpuExpr;
1591 oCopy.sGroup = self.sGroup;
1592 oCopy.fUnused = self.fUnused;
1593 oCopy.fInvalid = self.fInvalid;
1594 oCopy.sInvalidStyle = self.sInvalidStyle;
1595 oCopy.sXcptType = self.sXcptType;
1596
1597 oCopy.sStats = self.sStats;
1598 oCopy.sFunction = self.sFunction;
1599 oCopy.fStub = self.fStub;
1600 oCopy.fUdStub = self.fUdStub;
1601
1602 oCopy.iLineCompleted = self.iLineCompleted;
1603 oCopy.cOpTags = self.cOpTags;
1604 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1605 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1606
1607 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1608 oCopy.asRawDisParams = list(self.asRawDisParams);
1609 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1610 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1611 oCopy.asCopyTests = list(self.asCopyTests);
1612
1613 return oCopy;
1614
1615 def getOpcodeByte(self):
1616 """
1617 Decodes sOpcode into a byte range integer value.
1618 Raises exception if sOpcode is None or invalid.
1619 """
1620 if self.sOpcode is None:
1621 raise Exception('No opcode byte for %s!' % (self,));
1622 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1623
1624 # Full hex byte form.
1625 if sOpcode[:2] == '0x':
1626 return int(sOpcode, 16);
1627
1628 # The /r form:
1629 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1630 return int(sOpcode[1:]) << 3;
1631
1632 # The 11/r form:
1633 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1634 return (int(sOpcode[-1:]) << 3) | 0xc0;
1635
1636 # The !11/r form (returns mod=1):
1637 ## @todo this doesn't really work...
1638 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1639 return (int(sOpcode[-1:]) << 3) | 0x80;
1640
1641 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1642
1643 @staticmethod
1644 def _flagsToIntegerMask(asFlags):
1645 """
1646 Returns the integer mask value for asFlags.
1647 """
1648 uRet = 0;
1649 if asFlags:
1650 for sFlag in asFlags:
1651 sConstant = g_kdEFlagsMnemonics[sFlag];
1652 assert sConstant[0] != '!', sConstant
1653 uRet |= g_kdX86EFlagsConstants[sConstant];
1654 return uRet;
1655
1656 def getTestedFlagsMask(self):
1657 """ Returns asFlTest into a integer mask value """
1658 return self._flagsToIntegerMask(self.asFlTest);
1659
1660 def getModifiedFlagsMask(self):
1661 """ Returns asFlModify into a integer mask value """
1662 return self._flagsToIntegerMask(self.asFlModify);
1663
1664 def getUndefinedFlagsMask(self):
1665 """ Returns asFlUndefined into a integer mask value """
1666 return self._flagsToIntegerMask(self.asFlUndefined);
1667
1668 def getSetFlagsMask(self):
1669 """ Returns asFlSet into a integer mask value """
1670 return self._flagsToIntegerMask(self.asFlSet);
1671
1672 def getClearedFlagsMask(self):
1673 """ Returns asFlClear into a integer mask value """
1674 return self._flagsToIntegerMask(self.asFlClear);
1675
1676 @staticmethod
1677 def _flagsToC(asFlags):
1678 """
1679 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1680 """
1681 if asFlags:
1682 asRet = [];
1683 for sFlag in asFlags:
1684 sConstant = g_kdEFlagsMnemonics[sFlag];
1685 assert sConstant[0] != '!', sConstant
1686 asRet.append(sConstant);
1687 return ' | '.join(asRet);
1688 return '0';
1689
1690 def getTestedFlagsCStyle(self):
1691 """ Returns asFlTest as C constants ored together. """
1692 return self._flagsToC(self.asFlTest);
1693
1694 def getModifiedFlagsCStyle(self):
1695 """ Returns asFlModify as C constants ored together. """
1696 return self._flagsToC(self.asFlModify);
1697
1698 def getUndefinedFlagsCStyle(self):
1699 """ Returns asFlUndefined as C constants ored together. """
1700 return self._flagsToC(self.asFlUndefined);
1701
1702 def getSetFlagsCStyle(self):
1703 """ Returns asFlSet as C constants ored together. """
1704 return self._flagsToC(self.asFlSet);
1705
1706 def getClearedFlagsCStyle(self):
1707 """ Returns asFlClear as C constants ored together. """
1708 return self._flagsToC(self.asFlClear);
1709
1710 def onlyInVexMaps(self):
1711 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1712 if not self.aoMaps:
1713 return False;
1714 for oMap in self.aoMaps:
1715 if not oMap.isVexMap():
1716 return False;
1717 return True;
1718
1719
1720
1721## All the instructions.
1722g_aoAllInstructions = [] # type: List[Instruction]
1723
1724## All the instructions indexed by statistics name (opstat).
1725g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1726
1727## All the instructions indexed by function name (opfunction).
1728g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1729
1730## Instructions tagged by oponlytest
1731g_aoOnlyTestInstructions = [] # type: List[Instruction]
1732
1733## Instruction maps.
1734g_aoInstructionMaps = [
1735 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1736 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1737 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1738 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1739 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1740 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1741 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1742 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1743 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1744 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1745 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1746 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1747 ## @todo g_apfnEscF1_E0toFF
1748 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1749 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1750 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1751 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1752 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1753 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1754 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1755 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1756
1757 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1758 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1759 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1760 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1761 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1762 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1763 ## @todo What about g_apfnGroup9MemReg?
1764 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1765 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1766 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1767 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1768 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1769 ## @todo What about g_apfnGroup15RegReg?
1770 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1771 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1772 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1773
1774 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1775 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1776
1777 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1778 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1779 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1780 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1781 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1782 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1783
1784 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1785 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1786
1787 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1788 InstructionMap('xopmap8', sEncoding = 'xop8'),
1789 InstructionMap('xopmap9', sEncoding = 'xop9'),
1790 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1791 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1792 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1793 InstructionMap('xopmap10', sEncoding = 'xop10'),
1794 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1795];
1796g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1797g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1798
1799
1800#
1801# Decoder functions.
1802#
1803
1804class DecoderFunction(object):
1805 """
1806 Decoder function.
1807
1808 This is mainly for searching for scoping searches for variables used in
1809 microcode blocks.
1810 """
1811 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1812 self.sName = sName; ##< The function name.
1813 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1814 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1815 self.iBeginLine = iBeginLine; ##< The start line.
1816 self.iEndLine = -1; ##< The line the function (probably) ends on.
1817 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1818
1819 def complete(self, iEndLine, asLines):
1820 """
1821 Completes the function.
1822 """
1823 assert self.iEndLine == -1;
1824 self.iEndLine = iEndLine;
1825 self.asLines = asLines;
1826
1827
1828#
1829# "Microcode" statements and blocks
1830#
1831
1832class McStmt(object):
1833 """
1834 Statement in a microcode block.
1835 """
1836 def __init__(self, sName, asParams):
1837 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1838 self.asParams = asParams;
1839 self.oUser = None;
1840
1841 def renderCode(self, cchIndent = 0):
1842 """
1843 Renders the code for the statement.
1844 """
1845 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1846
1847 @staticmethod
1848 def renderCodeForList(aoStmts, cchIndent = 0):
1849 """
1850 Renders a list of statements.
1851 """
1852 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1853
1854 @staticmethod
1855 def findStmtByNames(aoStmts, dNames):
1856 """
1857 Returns first statement with any of the given names in from the list.
1858
1859 Note! The names are passed as a dictionary for quick lookup, the value
1860 does not matter.
1861 """
1862 for oStmt in aoStmts:
1863 if oStmt.sName in dNames:
1864 return oStmt;
1865 if isinstance(oStmt, McStmtCond):
1866 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1867 if not oHit:
1868 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1869 if oHit:
1870 return oHit;
1871 return None;
1872
1873 @staticmethod
1874 def countStmtsByName(aoStmts, dNames, dRet):
1875 """
1876 Searches the given list of statements for the names in the dictionary,
1877 adding each found to dRet with an occurnece count.
1878
1879 return total number of hits;
1880 """
1881 cHits = 0;
1882 for oStmt in aoStmts:
1883 if oStmt.sName in dNames:
1884 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1885 cHits += 1;
1886 if isinstance(oStmt, McStmtCond):
1887 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1888 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1889 return cHits;
1890
1891 def isCppStmt(self):
1892 """ Checks if this is a C++ statement. """
1893 return self.sName.startswith('C++');
1894
1895class McStmtCond(McStmt):
1896 """
1897 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1898 """
1899 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1900 McStmt.__init__(self, sName, asParams);
1901 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1902 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1903 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1904 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1905 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1906
1907 def renderCode(self, cchIndent = 0):
1908 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1909 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1910 if self.aoElseBranch:
1911 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1912 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1913 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1914 return sRet;
1915
1916class McStmtNativeIf(McStmtCond):
1917 """ IEM_MC_NATIVE_IF """
1918 def __init__(self, sName, asArchitectures):
1919 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1920 self.asArchitectures = asArchitectures;
1921
1922class McStmtVar(McStmt):
1923 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1924 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1925 McStmt.__init__(self, sName, asParams);
1926 self.sType = sType;
1927 self.sVarName = sVarName;
1928 self.sValue = sValue; ##< None if no assigned / const value.
1929
1930class McStmtArg(McStmtVar):
1931 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1932 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1933 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1934 self.iArg = iArg;
1935 self.sRef = sRef; ##< The reference string (local variable, register).
1936 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1937 assert sRefType in ('none', 'local');
1938
1939class McStmtCall(McStmt):
1940 """ IEM_MC_CALL_* """
1941 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1942 McStmt.__init__(self, sName, asParams);
1943 self.idxFn = iFnParam;
1944 self.idxParams = iFnParam + 1;
1945 self.sFn = asParams[iFnParam];
1946 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1947
1948class McStmtAssertEFlags(McStmt):
1949 """
1950 IEM_MC_ASSERT_EFLAGS
1951 """
1952 def __init__(self, oInstruction):
1953 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1954 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1955
1956
1957class McCppGeneric(McStmt):
1958 """
1959 Generic C++/C statement.
1960 """
1961 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1962 McStmt.__init__(self, sName, [sCode,]);
1963 self.fDecode = fDecode;
1964 self.cchIndent = cchIndent;
1965
1966 def renderCode(self, cchIndent = 0):
1967 cchIndent += self.cchIndent;
1968 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1969 if self.fDecode:
1970 sRet = sRet.replace('\n', ' // C++ decode\n');
1971 else:
1972 sRet = sRet.replace('\n', ' // C++ normal\n');
1973 return sRet;
1974
1975class McCppCall(McCppGeneric):
1976 """
1977 A generic C++/C call statement.
1978
1979 The sName is still 'C++', so the function name is in the first parameter
1980 and the the arguments in the subsequent ones.
1981 """
1982 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1983 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1984 self.asParams.extend(asArgs);
1985
1986 def renderCode(self, cchIndent = 0):
1987 cchIndent += self.cchIndent;
1988 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1989 if self.fDecode:
1990 sRet += ' // C++ decode\n';
1991 else:
1992 sRet += ' // C++ normal\n';
1993 return sRet;
1994
1995class McCppCond(McStmtCond):
1996 """
1997 C++/C 'if' statement.
1998 """
1999 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2000 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2001 self.fDecode = fDecode;
2002 self.cchIndent = cchIndent;
2003
2004 def renderCode(self, cchIndent = 0):
2005 cchIndent += self.cchIndent;
2006 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2007 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2008 sRet += ' ' * cchIndent + '{\n';
2009 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2010 sRet += ' ' * cchIndent + '}\n';
2011 if self.aoElseBranch:
2012 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2013 sRet += ' ' * cchIndent + '{\n';
2014 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2015 sRet += ' ' * cchIndent + '}\n';
2016 return sRet;
2017
2018class McCppPreProc(McCppGeneric):
2019 """
2020 C++/C Preprocessor directive.
2021 """
2022 def __init__(self, sCode):
2023 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2024
2025 def renderCode(self, cchIndent = 0):
2026 return self.asParams[0] + '\n';
2027
2028
2029## IEM_MC_F_XXX values.
2030g_kdMcFlags = {
2031 'IEM_MC_F_ONLY_8086': (),
2032 'IEM_MC_F_MIN_186': (),
2033 'IEM_MC_F_MIN_286': (),
2034 'IEM_MC_F_NOT_286_OR_OLDER': (),
2035 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2036 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2037 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2038 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2039 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2040 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2041 'IEM_MC_F_NOT_64BIT': (),
2042};
2043## IEM_MC_F_XXX values.
2044g_kdCImplFlags = {
2045 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2046 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2047 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2048 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2049 'IEM_CIMPL_F_BRANCH_FAR': (),
2050 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2051 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2052 'IEM_CIMPL_F_BRANCH_STACK': (),
2053 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2054 'IEM_CIMPL_F_MODE': (),
2055 'IEM_CIMPL_F_RFLAGS': (),
2056 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2057 'IEM_CIMPL_F_STATUS_FLAGS': (),
2058 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2059 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2060 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2061 'IEM_CIMPL_F_VMEXIT': (),
2062 'IEM_CIMPL_F_FPU': (),
2063 'IEM_CIMPL_F_REP': (),
2064 'IEM_CIMPL_F_IO': (),
2065 'IEM_CIMPL_F_END_TB': (),
2066 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2067 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2068 'IEM_CIMPL_F_CALLS_CIMPL': (),
2069 'IEM_CIMPL_F_CALLS_AIMPL': (),
2070 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2071};
2072class McBlock(object):
2073 """
2074 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2075 """
2076
2077 ## @name Macro expansion types.
2078 ## @{
2079 kiMacroExp_None = 0;
2080 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2081 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2082 ## @}
2083
2084 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2085 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2086 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2087 self.fDeferToCImpl = fDeferToCImpl;
2088 ## The source file containing the block.
2089 self.sSrcFile = sSrcFile;
2090 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2091 self.iBeginLine = iBeginLine;
2092 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2093 self.offBeginLine = offBeginLine;
2094 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2095 self.iEndLine = -1;
2096 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2097 self.offEndLine = 0;
2098 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2099 self.offAfterEnd = 0;
2100 ## The function the block resides in.
2101 self.oFunction = oFunction;
2102 ## The name of the function the block resides in. DEPRECATED.
2103 self.sFunction = oFunction.sName;
2104 ## The block number within the function.
2105 self.iInFunction = iInFunction;
2106 ## The instruction this block is associated with - can be None.
2107 self.oInstruction = oInstruction # type: Instruction
2108 ## Indentation level of the block.
2109 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2110 ## The raw lines the block is made up of.
2111 self.asLines = [] # type: List[str]
2112 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2113 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2114 self.iMacroExp = self.kiMacroExp_None;
2115 ## IEM_MC_BEGIN: Argument count.
2116 self.cArgs = -1;
2117 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2118 self.aoArgs = [] # type: List[McStmtArg]
2119 ## IEM_MC_BEGIN: Locals count.
2120 self.cLocals = -1;
2121 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2122 self.aoLocals = [] # type: List[McStmtVar]
2123 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2124 self.dsMcFlags = {} # type: Dict[str, bool]
2125 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2126 self.dsCImplFlags = {} # type: Dict[str, bool]
2127 ## Decoded statements in the block.
2128 self.aoStmts = [] # type: List[McStmt]
2129
2130 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2131 """
2132 Completes the microcode block.
2133 """
2134 assert self.iEndLine == -1;
2135 self.iEndLine = iEndLine;
2136 self.offEndLine = offEndLine;
2137 self.offAfterEnd = offAfterEnd;
2138 self.asLines = asLines;
2139
2140 def raiseDecodeError(self, sRawCode, off, sMessage):
2141 """ Raises a decoding error. """
2142 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2143 iLine = sRawCode.count('\n', 0, off);
2144 raise ParserException('%s:%d:%d: parsing error: %s'
2145 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2146
2147 def raiseStmtError(self, sName, sMessage):
2148 """ Raises a statement parser error. """
2149 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2150
2151 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2152 """ Check the parameter count, raising an error it doesn't match. """
2153 if len(asParams) != cParamsExpected:
2154 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2155 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2156 return True;
2157
2158 @staticmethod
2159 def parseMcGeneric(oSelf, sName, asParams):
2160 """ Generic parser that returns a plain McStmt object. """
2161 _ = oSelf;
2162 return McStmt(sName, asParams);
2163
2164 @staticmethod
2165 def parseMcGenericCond(oSelf, sName, asParams):
2166 """ Generic parser that returns a plain McStmtCond object. """
2167 _ = oSelf;
2168 return McStmtCond(sName, asParams);
2169
2170 kdArchVals = {
2171 'RT_ARCH_VAL_X86': True,
2172 'RT_ARCH_VAL_AMD64': True,
2173 'RT_ARCH_VAL_ARM32': True,
2174 'RT_ARCH_VAL_ARM64': True,
2175 'RT_ARCH_VAL_SPARC32': True,
2176 'RT_ARCH_VAL_SPARC64': True,
2177 };
2178
2179 @staticmethod
2180 def parseMcNativeIf(oSelf, sName, asParams):
2181 """ IEM_MC_NATIVE_IF """
2182 oSelf.checkStmtParamCount(sName, asParams, 1);
2183 if asParams[0].strip() == '0':
2184 asArchitectures = [];
2185 else:
2186 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2187 for sArch in asArchitectures:
2188 if sArch not in oSelf.kdArchVals:
2189 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2190 return McStmtNativeIf(sName, asArchitectures);
2191
2192 @staticmethod
2193 def parseMcBegin(oSelf, sName, asParams):
2194 """ IEM_MC_BEGIN """
2195 oSelf.checkStmtParamCount(sName, asParams, 4);
2196 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2197 oSelf.raiseStmtError(sName, 'Used more than once!');
2198 oSelf.cArgs = int(asParams[0]);
2199 oSelf.cLocals = int(asParams[1]);
2200
2201 if asParams[2] != '0':
2202 for sFlag in asParams[2].split('|'):
2203 sFlag = sFlag.strip();
2204 if sFlag not in g_kdMcFlags:
2205 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2206 oSelf.dsMcFlags[sFlag] = True;
2207 for sFlag2 in g_kdMcFlags[sFlag]:
2208 oSelf.dsMcFlags[sFlag2] = True;
2209
2210 if asParams[3] != '0':
2211 oSelf.parseCImplFlags(sName, asParams[3]);
2212
2213 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2214
2215 @staticmethod
2216 def parseMcArg(oSelf, sName, asParams):
2217 """ IEM_MC_ARG """
2218 oSelf.checkStmtParamCount(sName, asParams, 3);
2219 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2220 oSelf.aoArgs.append(oStmt);
2221 return oStmt;
2222
2223 @staticmethod
2224 def parseMcArgConst(oSelf, sName, asParams):
2225 """ IEM_MC_ARG_CONST """
2226 oSelf.checkStmtParamCount(sName, asParams, 4);
2227 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2228 oSelf.aoArgs.append(oStmt);
2229 return oStmt;
2230
2231 @staticmethod
2232 def parseMcArgLocalRef(oSelf, sName, asParams):
2233 """ IEM_MC_ARG_LOCAL_REF """
2234 oSelf.checkStmtParamCount(sName, asParams, 4);
2235 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2236 oSelf.aoArgs.append(oStmt);
2237 return oStmt;
2238
2239 @staticmethod
2240 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2241 """ IEM_MC_ARG_LOCAL_EFLAGS """
2242 oSelf.checkStmtParamCount(sName, asParams, 3);
2243 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2244 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2245 oSelf.aoLocals.append(oStmtLocal);
2246 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2247 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2248 oSelf.aoArgs.append(oStmtArg);
2249 return (oStmtLocal, oStmtArg,);
2250
2251 @staticmethod
2252 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2253 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2254 oSelf.checkStmtParamCount(sName, asParams, 0);
2255 # Note! Translate to IEM_MC_ARG_CONST
2256 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2257 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2258 oSelf.aoArgs.append(oStmt);
2259 return oStmt;
2260
2261 @staticmethod
2262 def parseMcLocal(oSelf, sName, asParams):
2263 """ IEM_MC_LOCAL """
2264 oSelf.checkStmtParamCount(sName, asParams, 2);
2265 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2266 oSelf.aoLocals.append(oStmt);
2267 return oStmt;
2268
2269 @staticmethod
2270 def parseMcLocalAssign(oSelf, sName, asParams):
2271 """ IEM_MC_LOCAL_ASSIGN """
2272 oSelf.checkStmtParamCount(sName, asParams, 3);
2273 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2274 oSelf.aoLocals.append(oStmt);
2275 return oStmt;
2276
2277 @staticmethod
2278 def parseMcLocalConst(oSelf, sName, asParams):
2279 """ IEM_MC_LOCAL_CONST """
2280 oSelf.checkStmtParamCount(sName, asParams, 3);
2281 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2282 oSelf.aoLocals.append(oStmt);
2283 return oStmt;
2284
2285 @staticmethod
2286 def parseMcLocalEFlags(oSelf, sName, asParams):
2287 """ IEM_MC_LOCAL_EFLAGS"""
2288 oSelf.checkStmtParamCount(sName, asParams, 1);
2289 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2290 oSelf.aoLocals.append(oStmt);
2291 return oStmt;
2292
2293 @staticmethod
2294 def parseMcCallAImpl(oSelf, sName, asParams):
2295 """ IEM_MC_CALL_AIMPL_3|4 """
2296 cArgs = int(sName[-1]);
2297 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2298 return McStmtCall(sName, asParams, 1, 0);
2299
2300 @staticmethod
2301 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2302 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2303 cArgs = int(sName[-1]);
2304 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2305 return McStmtCall(sName, asParams, 0);
2306
2307 @staticmethod
2308 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2309 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2310 cArgs = int(sName[-1]);
2311 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2312 return McStmtCall(sName, asParams, 0);
2313
2314 @staticmethod
2315 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2316 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2317 cArgs = int(sName[-1]);
2318 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2319 return McStmtCall(sName, asParams, 0);
2320
2321 @staticmethod
2322 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2323 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2324 cArgs = int(sName[-1]);
2325 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2326 return McStmtCall(sName, asParams, 0);
2327
2328 @staticmethod
2329 def parseMcCallSseAImpl(oSelf, sName, asParams):
2330 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2331 cArgs = int(sName[-1]);
2332 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2333 return McStmtCall(sName, asParams, 0);
2334
2335 def parseCImplFlags(self, sName, sFlags):
2336 """
2337 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2338 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2339 """
2340 if sFlags != '0':
2341 sFlags = self.stripComments(sFlags);
2342 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2343 for sFlag in sFlags.split('|'):
2344 sFlag = sFlag.strip();
2345 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2346 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2347 #print('debug: %s' % sFlag)
2348 if sFlag not in g_kdCImplFlags:
2349 if sFlag == '0':
2350 continue;
2351 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2352 self.dsCImplFlags[sFlag] = True;
2353 for sFlag2 in g_kdCImplFlags[sFlag]:
2354 self.dsCImplFlags[sFlag2] = True;
2355 return None;
2356
2357 @staticmethod
2358 def parseMcCallCImpl(oSelf, sName, asParams):
2359 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2360 cArgs = int(sName[-1]);
2361 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2362 oSelf.parseCImplFlags(sName, asParams[0]);
2363 return McStmtCall(sName, asParams, 2);
2364
2365 @staticmethod
2366 def parseMcDeferToCImpl(oSelf, sName, asParams):
2367 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2368 # Note! This code is called by workerIemMcDeferToCImplXRet.
2369 #print('debug: %s, %s,...' % (sName, asParams[0],));
2370 cArgs = int(sName[-5]);
2371 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2372 oSelf.parseCImplFlags(sName, asParams[0]);
2373 return McStmtCall(sName, asParams, 2);
2374
2375 @staticmethod
2376 def stripComments(sCode):
2377 """ Returns sCode with comments removed. """
2378 off = 0;
2379 while off < len(sCode):
2380 off = sCode.find('/', off);
2381 if off < 0 or off + 1 >= len(sCode):
2382 break;
2383
2384 if sCode[off + 1] == '/':
2385 # C++ comment.
2386 offEnd = sCode.find('\n', off + 2);
2387 if offEnd < 0:
2388 return sCode[:off].rstrip();
2389 sCode = sCode[ : off] + sCode[offEnd : ];
2390 off += 1;
2391
2392 elif sCode[off + 1] == '*':
2393 # C comment
2394 offEnd = sCode.find('*/', off + 2);
2395 if offEnd < 0:
2396 return sCode[:off].rstrip();
2397 sSep = ' ';
2398 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2399 sSep = '';
2400 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2401 off += len(sSep);
2402
2403 else:
2404 # Not a comment.
2405 off += 1;
2406 return sCode;
2407
2408 @staticmethod
2409 def extractParam(sCode, offParam):
2410 """
2411 Extracts the parameter value at offParam in sCode.
2412 Returns stripped value and the end offset of the terminating ',' or ')'.
2413 """
2414 # Extract it.
2415 cNesting = 0;
2416 offStart = offParam;
2417 while offParam < len(sCode):
2418 ch = sCode[offParam];
2419 if ch == '(':
2420 cNesting += 1;
2421 elif ch == ')':
2422 if cNesting == 0:
2423 break;
2424 cNesting -= 1;
2425 elif ch == ',' and cNesting == 0:
2426 break;
2427 offParam += 1;
2428 return (sCode[offStart : offParam].strip(), offParam);
2429
2430 @staticmethod
2431 def extractParams(sCode, offOpenParen):
2432 """
2433 Parses a parameter list.
2434 Returns the list of parameter values and the offset of the closing parentheses.
2435 Returns (None, len(sCode)) on if no closing parentheses was found.
2436 """
2437 assert sCode[offOpenParen] == '(';
2438 asParams = [];
2439 off = offOpenParen + 1;
2440 while off < len(sCode):
2441 ch = sCode[off];
2442 if ch.isspace():
2443 off += 1;
2444 elif ch != ')':
2445 (sParam, off) = McBlock.extractParam(sCode, off);
2446 asParams.append(sParam);
2447 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2448 if sCode[off] == ',':
2449 off += 1;
2450 else:
2451 return (asParams, off);
2452 return (None, off);
2453
2454 @staticmethod
2455 def findClosingBraces(sCode, off, offStop):
2456 """
2457 Finds the matching '}' for the '{' at off in sCode.
2458 Returns offset of the matching '}' on success, otherwise -1.
2459
2460 Note! Does not take comments into account.
2461 """
2462 cDepth = 1;
2463 off += 1;
2464 while off < offStop:
2465 offClose = sCode.find('}', off, offStop);
2466 if offClose < 0:
2467 break;
2468 cDepth += sCode.count('{', off, offClose);
2469 cDepth -= 1;
2470 if cDepth == 0:
2471 return offClose;
2472 off = offClose + 1;
2473 return -1;
2474
2475 @staticmethod
2476 def countSpacesAt(sCode, off, offStop):
2477 """ Returns the number of space characters at off in sCode. """
2478 offStart = off;
2479 while off < offStop and sCode[off].isspace():
2480 off += 1;
2481 return off - offStart;
2482
2483 @staticmethod
2484 def skipSpacesAt(sCode, off, offStop):
2485 """ Returns first offset at or after off for a non-space character. """
2486 return off + McBlock.countSpacesAt(sCode, off, offStop);
2487
2488 @staticmethod
2489 def isSubstrAt(sStr, off, sSubStr):
2490 """ Returns true of sSubStr is found at off in sStr. """
2491 return sStr[off : off + len(sSubStr)] == sSubStr;
2492
2493 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2494 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2495 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2496 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2497 + r')');
2498
2499 kaasConditions = (
2500 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2501 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2502 );
2503 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2504 """
2505 Decodes sRawCode[off : offStop].
2506
2507 Returns list of McStmt instances.
2508 Raises ParserException on failure.
2509 """
2510 if offStop < 0:
2511 offStop = len(sRawCode);
2512 aoStmts = [];
2513 while off < offStop:
2514 ch = sRawCode[off];
2515
2516 #
2517 # Skip spaces and comments.
2518 #
2519 if ch.isspace():
2520 off += 1;
2521
2522 elif ch == '/':
2523 ch = sRawCode[off + 1];
2524 if ch == '/': # C++ comment.
2525 off = sRawCode.find('\n', off + 2);
2526 if off < 0:
2527 break;
2528 off += 1;
2529 elif ch == '*': # C comment.
2530 off = sRawCode.find('*/', off + 2);
2531 if off < 0:
2532 break;
2533 off += 2;
2534 else:
2535 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2536
2537 #
2538 # Is it a MC statement.
2539 #
2540 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2541 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2542 # Extract it and strip comments from it.
2543 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2544 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2545 else: iCond = -1;
2546 if iCond < 0:
2547 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2548 if offEnd <= off:
2549 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2550 else:
2551 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2552 if offEnd <= off:
2553 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2554 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2555 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2556 offEnd -= 1;
2557 while offEnd > off and sRawCode[offEnd - 1].isspace():
2558 offEnd -= 1;
2559
2560 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2561
2562 # Isolate the statement name.
2563 offOpenParen = sRawStmt.find('(');
2564 if offOpenParen < 0:
2565 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2566 sName = sRawStmt[: offOpenParen].strip();
2567
2568 # Extract the parameters.
2569 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2570 if asParams is None:
2571 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2572 if offCloseParen + 1 != len(sRawStmt):
2573 self.raiseDecodeError(sRawCode, off,
2574 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2575
2576 # Hand it to the handler.
2577 fnParser = g_dMcStmtParsers.get(sName);
2578 if not fnParser:
2579 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2580 fnParser = fnParser[0];
2581 oStmt = fnParser(self, sName, asParams);
2582 if not isinstance(oStmt, (list, tuple)):
2583 aoStmts.append(oStmt);
2584 else:
2585 aoStmts.extend(oStmt);
2586
2587 #
2588 # If conditional, we need to parse the whole statement.
2589 #
2590 # For reasons of simplicity, we assume the following structure
2591 # and parse each branch in a recursive call:
2592 # IEM_MC_IF_XXX() {
2593 # IEM_MC_WHATEVER();
2594 # } IEM_MC_ELSE() {
2595 # IEM_MC_WHATEVER();
2596 # } IEM_MC_ENDIF();
2597 #
2598 if iCond >= 0:
2599 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2600 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2601
2602 # Find start of the IF block:
2603 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2604 if sRawCode[offBlock1] != '{':
2605 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2606
2607 # Find the end of it.
2608 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2609 if offBlock1End < 0:
2610 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2611
2612 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2613
2614 # Is there an else section?
2615 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2616 sElseNm = self.kaasConditions[iCond][1];
2617 if self.isSubstrAt(sRawCode, off, sElseNm):
2618 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2619 if sRawCode[off] != '(':
2620 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2621 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2622 if sRawCode[off] != ')':
2623 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2624
2625 # Find start of the ELSE block.
2626 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2627 if sRawCode[offBlock2] != '{':
2628 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2629
2630 # Find the end of it.
2631 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2632 if offBlock2End < 0:
2633 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2634
2635 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2636 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2637
2638 # Parse past the endif statement.
2639 sEndIfNm = self.kaasConditions[iCond][2];
2640 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2641 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2642 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2643 if sRawCode[off] != '(':
2644 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2645 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2646 if sRawCode[off] != ')':
2647 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2648 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2649 if sRawCode[off] != ';':
2650 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2651 off += 1;
2652
2653 else:
2654 # Advance.
2655 off = offEnd + 1;
2656
2657 #
2658 # Otherwise it must be a C/C++ statement of sorts.
2659 #
2660 else:
2661 # Find the end of the statement. if and else requires special handling.
2662 sCondExpr = None;
2663 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2664 if oMatch:
2665 if oMatch.group(1)[-1] == '(':
2666 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2667 else:
2668 offEnd = oMatch.end();
2669 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2670 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2671 elif ch == '#':
2672 offEnd = sRawCode.find('\n', off, offStop);
2673 if offEnd < 0:
2674 offEnd = offStop;
2675 offEnd -= 1;
2676 while offEnd > off and sRawCode[offEnd - 1].isspace():
2677 offEnd -= 1;
2678 else:
2679 offEnd = sRawCode.find(';', off);
2680 if offEnd < 0:
2681 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2682
2683 # Check this and the following statement whether it might have
2684 # something to do with decoding. This is a statement filter
2685 # criteria when generating the threaded functions blocks.
2686 offNextEnd = sRawCode.find(';', offEnd + 1);
2687 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2688 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2689 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2690 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2691 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2692 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2693 );
2694
2695 if not oMatch:
2696 if ch != '#':
2697 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2698 else:
2699 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2700 off = offEnd + 1;
2701 elif oMatch.group(1).startswith('if'):
2702 #
2703 # if () xxx [else yyy] statement.
2704 #
2705 oStmt = McCppCond(sCondExpr, fDecode);
2706 aoStmts.append(oStmt);
2707 off = offEnd + 1;
2708
2709 # Following the if () we can either have a {} containing zero or more statements
2710 # or we have a single statement.
2711 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2712 if sRawCode[offBlock1] == '{':
2713 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2714 if offBlock1End < 0:
2715 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2716 offBlock1 += 1;
2717 else:
2718 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2719 if offBlock1End < 0:
2720 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2721
2722 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2723
2724 # The else is optional and can likewise be followed by {} or a single statement.
2725 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2726 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2727 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2728 if sRawCode[offBlock2] == '{':
2729 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2730 if offBlock2End < 0:
2731 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2732 offBlock2 += 1;
2733 else:
2734 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2735 if offBlock2End < 0:
2736 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2737
2738 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2739 off = offBlock2End + 1;
2740
2741 elif oMatch.group(1) == 'else':
2742 # Problematic 'else' branch, typically involving #ifdefs.
2743 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2744
2745 return aoStmts;
2746
2747 def decode(self):
2748 """
2749 Decodes the block, populating self.aoStmts if necessary.
2750 Returns the statement list.
2751 Raises ParserException on failure.
2752 """
2753 if not self.aoStmts:
2754 self.aoStmts = self.decodeCode(''.join(self.asLines));
2755 return self.aoStmts;
2756
2757
2758 def checkForTooEarlyEffSegUse(self, aoStmts):
2759 """
2760 Checks if iEffSeg is used before the effective address has been decoded.
2761 Returns None on success, error string on failure.
2762
2763 See r158454 for an example of this issue.
2764 """
2765
2766 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2767 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2768 # as we're ASSUMING these will not occur before address calculation.
2769 for iStmt, oStmt in enumerate(aoStmts):
2770 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2771 while iStmt > 0:
2772 iStmt -= 1;
2773 oStmt = aoStmts[iStmt];
2774 for sArg in oStmt.asParams:
2775 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2776 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2777 break;
2778 return None;
2779
2780 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2781 kdDecodeCppStmtOkayAfterDone = {
2782 'IEMOP_HLP_IN_VMX_OPERATION': True,
2783 'IEMOP_HLP_VMX_INSTR': True,
2784 };
2785
2786 def checkForDoneDecoding(self, aoStmts):
2787 """
2788 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2789 invocation.
2790 Returns None on success, error string on failure.
2791
2792 This ensures safe instruction restarting in case the recompiler runs
2793 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2794 entries).
2795 """
2796
2797 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2798 # don't need to look.
2799 cIemOpHlpDone = 0;
2800 for iStmt, oStmt in enumerate(aoStmts):
2801 if oStmt.isCppStmt():
2802 #print('dbg: #%u[%u]: %s %s (%s)'
2803 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2804
2805 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2806 if oMatch:
2807 sFirstWord = oMatch.group(1);
2808 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2809 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2810 cIemOpHlpDone += 1;
2811 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2812 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2813 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2814 else:
2815 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2816 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2817 cIemOpHlpDone += 1;
2818 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2819 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2820 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2821 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2822 if cIemOpHlpDone == 1:
2823 return None;
2824 if cIemOpHlpDone > 1:
2825 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2826 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2827
2828 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2829 """
2830 Checks that the register references are placed after register fetches
2831 from the same register class.
2832 Returns None on success, error string on failure.
2833
2834 Example:
2835 SHL CH, CL
2836
2837 If the CH reference is created first, the fetching of CL will cause the
2838 RCX guest register to have an active shadow register when it's being
2839 updated. The shadow register will then be stale after the SHL operation
2840 completes, without us noticing.
2841
2842 It's easier to ensure we've got correct code than complicating the
2843 recompiler code with safeguards here.
2844 """
2845 for iStmt, oStmt in enumerate(aoStmts):
2846 if not oStmt.isCppStmt():
2847 offRef = oStmt.sName.find("_REF_");
2848 if offRef > 0:
2849 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2850 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2851 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2852 sClass = 'FPUREG';
2853 else:
2854 offUnderscore = oStmt.sName.find('_', offRef + 5);
2855 if offUnderscore > 0:
2856 assert offUnderscore > offRef;
2857 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2858 else:
2859 sClass = oStmt.sName[offRef + 5];
2860 asRegRefClasses[sClass] = True;
2861 else:
2862 offFetch = oStmt.sName.find("_FETCH_");
2863 if offFetch > 0:
2864 sClass = oStmt.sName[offFetch + 7 : ];
2865 if not sClass.startswith("MEM"):
2866 offUnderscore = sClass.find('_');
2867 if offUnderscore >= 0:
2868 assert offUnderscore > 0;
2869 sClass = sClass[:offUnderscore];
2870 if sClass in asRegRefClasses:
2871 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2872 % (iStmt + 1, oStmt.sName,);
2873
2874 # Go into branches.
2875 if isinstance(oStmt, McStmtCond):
2876 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2877 if sRet:
2878 return sRet;
2879 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2880 if sRet:
2881 return sRet;
2882 return None;
2883
2884 def check(self):
2885 """
2886 Performs some sanity checks on the block.
2887 Returns error string list, empty if all is fine.
2888 """
2889 aoStmts = self.decode();
2890 asRet = [];
2891
2892 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2893 if sRet:
2894 asRet.append(sRet);
2895
2896 sRet = self.checkForDoneDecoding(aoStmts);
2897 if sRet:
2898 asRet.append(sRet);
2899
2900 sRet = self.checkForFetchAfterRef(aoStmts, {});
2901 if sRet:
2902 asRet.append(sRet);
2903
2904 return asRet;
2905
2906
2907
2908## IEM_MC_XXX -> parser + info dictionary.
2909#
2910# The info columns:
2911# - col 1+0: boolean entry indicating whether the statement modifies state and
2912# must not be used before IEMOP_HL_DONE_*.
2913# - col 1+1: boolean entry indicating similar to the previous column but is
2914# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2915# The difference is that most IEM_MC_IF_XXX entries are False here.
2916# - col 1+2: boolean entry indicating native recompiler support.
2917#
2918# The raw table was generated via the following command
2919# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2920# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2921# pylint: disable=line-too-long
2922g_dMcStmtParsers = {
2923 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2924 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2925 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2926 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2927 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2928 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2929 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2930 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2931 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2932 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2933 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2934 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2935 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2936 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2937 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2938 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2939 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2940 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2941 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2942 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2943 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2944 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2945 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2946 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2947 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2948 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2949 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2950 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2951 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2952 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2953 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2954 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2955 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2956 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2957 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2958 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2959 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2960 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2961 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2962 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2963 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2964 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2965 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2966 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2967 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2968 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2969 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2970 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2971 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2972 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2973 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2974 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2975 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2976 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2977 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2978 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2979 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2980 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2981 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2982 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2983 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2984 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2985 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2986 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2987 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2988 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2989 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2990 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2991 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2992 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2993 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
2994 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
2995 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2996 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, False, ),
2997 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, True, ),
2998 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
2999 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, True, ),
3000 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3001 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3002 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3003 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3004 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3005 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3006 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3007 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3008 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3009 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3010 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3011 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3012 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3013 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3014 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3015 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3016 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3017 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3018 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3019 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3020 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3021 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3022 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3023 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3024 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3025 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3026 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3027 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3028 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3029 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3030 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3031 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3032 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3033 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3034 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3035 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3036 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3037 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3038 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3039 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3040 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3041 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3042 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3043 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3044 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3045 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3046 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3047 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3048 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3049 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3050 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3051 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3052 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3053 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3054 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3055 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3056 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3057 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3058 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3059 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3060 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3061 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3062 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3063 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3064 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3065 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3066 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3067 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3068 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3069 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3070 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3071 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3072 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3073 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3074 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3075 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3076 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3077 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3078 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3079 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3080 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3081 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3082 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3083 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3084 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3085 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3086 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3087 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
3088 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3089 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3090 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, False, ),
3091 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3092 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3093 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3094 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3095 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3096 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False, False, False, ),
3097 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3098 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, False, ),
3099 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3100 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3101 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3102 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3103 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3104 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3105 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3106 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3107 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3108 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3109 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3110 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3111 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3112 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3113 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3114 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3115 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3116 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3117 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3118 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3119 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3120 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3121 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3122 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3123 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3124 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3125 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3126 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3127 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3128 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3129 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3130 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3131 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3132 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3133 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3134 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3135 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3136 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3137 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3138 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, False, ),
3139 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, False, ),
3140 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3141 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3142 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3143 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3144 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3145 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3146 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3147 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3148 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3149 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3150 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3151 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3152 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3153 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3154 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3155 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3156 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3157 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3158 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3159 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3160 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3161 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3162 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3163 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3164 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3165 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3166 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3167 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3168 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3169 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3170 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3171 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3172 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3173 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3174 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3175 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3176 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3177 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3178 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3179 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3180 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3181 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3182 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3183 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3184 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3185 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3186 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3188 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3189 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3193 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3196 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3197 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3198 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3199 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3200 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3201 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3202 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3203 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3204 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3205 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3206 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3207 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3208 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3209 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3211 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3212 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3213 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3214 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3215 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3216 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3217 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3218 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3219 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3220 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3221 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3222 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3223 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3224 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3225 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3226 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3227 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3228 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3229 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3230 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3231 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3232 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3233 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3234 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3235 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, False, ),
3236 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3237 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3238 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3239 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3240 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3241 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3242 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3243 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3244 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3245 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3246 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3247 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3248 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3249 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3250 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3251 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3252 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3253 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3254 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3255 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3256 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3257 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3258 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3259 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3260 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3261 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3262 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3263 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3264 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3265 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3266 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3267 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3268 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3269 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3270 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3271 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3272 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3273 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3274 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3275 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3276 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3277 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3278 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3279 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3280 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3281 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3282 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3283 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3284 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3285 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3286 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3287 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3288 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3289 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3290 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3291 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3292 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3293 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3294 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3295 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3296 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3297 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3298 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3299 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3300 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3301 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3302 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3303 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3304 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3305 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3306 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3307 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3308 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3309 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3310 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3311 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3312 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3313 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3314 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3315 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3316 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3317 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3318 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3319 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3320 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3321 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3322 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3323 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3324 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3325 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3326 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3327 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3328 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3329 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3331 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3332 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3334 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3335 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3336 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3337 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3338 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3339 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3340 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3341 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3342 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3343 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3344 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3345 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3346 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3347 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3348 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3349 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3350 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3351 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3352 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3353 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3354 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3355 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3356 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3357};
3358# pylint: enable=line-too-long
3359
3360## List of microcode blocks.
3361g_aoMcBlocks = [] # type: List[McBlock]
3362
3363
3364
3365class ParserException(Exception):
3366 """ Parser exception """
3367 def __init__(self, sMessage):
3368 Exception.__init__(self, sMessage);
3369
3370
3371class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3372 """
3373 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3374 """
3375
3376 ## @name Parser state.
3377 ## @{
3378 kiCode = 0;
3379 kiCommentMulti = 1;
3380 ## @}
3381
3382 class Macro(object):
3383 """ Macro """
3384 def __init__(self, sName, asArgs, sBody, iLine):
3385 self.sName = sName; ##< The macro name.
3386 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3387 self.sBody = sBody;
3388 self.iLine = iLine;
3389 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3390
3391 @staticmethod
3392 def _needSpace(ch):
3393 """ This is just to make the expanded output a bit prettier. """
3394 return ch.isspace() and ch != '(';
3395
3396 def expandMacro(self, oParent, asArgs = None):
3397 """ Expands the macro body with the given arguments. """
3398 _ = oParent;
3399 sBody = self.sBody;
3400
3401 if self.oReArgMatch:
3402 assert len(asArgs) == len(self.asArgs);
3403 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3404
3405 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3406 oMatch = self.oReArgMatch.search(sBody);
3407 while oMatch:
3408 sName = oMatch.group(2);
3409 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3410 sValue = dArgs[sName];
3411 sPre = '';
3412 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3413 sPre = ' ';
3414 sPost = '';
3415 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3416 sPost = ' ';
3417 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3418 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3419 else:
3420 assert not asArgs;
3421
3422 return sBody;
3423
3424 class PreprocessorConditional(object):
3425 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3426
3427 ## Known defines.
3428 # - A value of 1 indicates that it's always defined.
3429 # - A value of 0 if it's always undefined
3430 # - A value of -1 if it's an arch and it depends of script parameters.
3431 # - A value of -2 if it's not recognized when filtering MC blocks.
3432 kdKnownDefines = {
3433 'IEM_WITH_ONE_BYTE_TABLE': 1,
3434 'IEM_WITH_TWO_BYTE_TABLE': 1,
3435 'IEM_WITH_THREE_0F_38': 1,
3436 'IEM_WITH_THREE_0F_3A': 1,
3437 'IEM_WITH_THREE_BYTE_TABLES': 1,
3438 'IEM_WITH_3DNOW': 1,
3439 'IEM_WITH_3DNOW_TABLE': 1,
3440 'IEM_WITH_VEX': 1,
3441 'IEM_WITH_VEX_TABLES': 1,
3442 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3443 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3444 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3445 'LOG_ENABLED': 1,
3446 'RT_WITHOUT_PRAGMA_ONCE': 0,
3447 'TST_IEM_CHECK_MC': 0,
3448 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3449 'RT_ARCH_AMD64': -1,
3450 'RT_ARCH_ARM64': -1,
3451 'RT_ARCH_ARM32': -1,
3452 'RT_ARCH_X86': -1,
3453 'RT_ARCH_SPARC': -1,
3454 'RT_ARCH_SPARC64': -1,
3455 };
3456 kdBuildArchToIprt = {
3457 'amd64': 'RT_ARCH_AMD64',
3458 'arm64': 'RT_ARCH_ARM64',
3459 'sparc32': 'RT_ARCH_SPARC64',
3460 };
3461 ## For parsing the next defined(xxxx).
3462 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3463
3464 def __init__(self, sType, sExpr):
3465 self.sType = sType;
3466 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3467 self.aoElif = [] # type: List[PreprocessorConditional]
3468 self.fInElse = [];
3469 if sType in ('if', 'elif'):
3470 self.checkExpression(sExpr);
3471 else:
3472 self.checkSupportedDefine(sExpr)
3473
3474 @staticmethod
3475 def checkSupportedDefine(sDefine):
3476 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3477 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3478 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3479 return True;
3480 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3481 return True;
3482 raise Exception('Unsupported define: %s' % (sDefine,));
3483
3484 @staticmethod
3485 def checkExpression(sExpr):
3486 """ Check that the expression is supported. Raises exception if not. """
3487 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3488 if sExpr in ('0', '1'):
3489 return True;
3490
3491 off = 0;
3492 cParan = 0;
3493 while off < len(sExpr):
3494 ch = sExpr[off];
3495
3496 # Unary operator or parentheses:
3497 if ch in ('(', '!'):
3498 if ch == '(':
3499 cParan += 1;
3500 off += 1;
3501 else:
3502 # defined(xxxx)
3503 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3504 if oMatch:
3505 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3506 elif sExpr[off:] != '1':
3507 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3508 off = oMatch.end();
3509
3510 # Look for closing parentheses.
3511 while off < len(sExpr) and sExpr[off].isspace():
3512 off += 1;
3513 if cParan > 0:
3514 while off < len(sExpr) and sExpr[off] == ')':
3515 if cParan <= 0:
3516 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3517 cParan -= 1;
3518 off += 1;
3519 while off < len(sExpr) and sExpr[off].isspace():
3520 off += 1;
3521
3522 # Look for binary operator.
3523 if off >= len(sExpr):
3524 break;
3525 if sExpr[off:off + 2] in ('||', '&&'):
3526 off += 2;
3527 else:
3528 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3529
3530 # Skip spaces.
3531 while off < len(sExpr) and sExpr[off].isspace():
3532 off += 1;
3533 if cParan != 0:
3534 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3535 return True;
3536
3537 @staticmethod
3538 def isArchIncludedInExpr(sExpr, sArch):
3539 """ Checks if sArch is included in the given expression. """
3540 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3541 if sExpr == '0':
3542 return False;
3543 if sExpr == '1':
3544 return True;
3545 off = 0;
3546 while off < len(sExpr):
3547 # defined(xxxx)
3548 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3549 if not oMatch:
3550 if sExpr[off:] == '1':
3551 return True;
3552 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3553 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3554 return True;
3555 off = oMatch.end();
3556
3557 # Look for OR operator.
3558 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3559 off += 1;
3560 if off >= len(sExpr):
3561 break;
3562 if sExpr.startswith('||'):
3563 off += 2;
3564 else:
3565 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3566
3567 return False;
3568
3569 @staticmethod
3570 def matchArch(sDefine, sArch):
3571 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3572 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3573
3574 @staticmethod
3575 def matchDefined(sExpr, sArch):
3576 """ Check the result of an ifdef/ifndef expression, given sArch. """
3577 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3578 if iDefine == -2:
3579 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3580 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3581
3582 def isArchIncludedInPrimaryBlock(self, sArch):
3583 """ Checks if sArch is included in the (primary) 'if' block. """
3584 if self.sType == 'ifdef':
3585 return self.matchDefined(self.sExpr, sArch);
3586 if self.sType == 'ifndef':
3587 return not self.matchDefined(self.sExpr, sArch);
3588 return self.isArchIncludedInExpr(self.sExpr, sArch);
3589
3590 @staticmethod
3591 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3592 """ Checks if sArch is included in the current conditional block. """
3593 _ = iLine;
3594 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3595 for oCond in aoCppCondStack:
3596 if oCond.isArchIncludedInPrimaryBlock(sArch):
3597 if oCond.aoElif or oCond.fInElse:
3598 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3599 return False;
3600 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3601 else:
3602 fFine = False;
3603 for oElifCond in oCond.aoElif:
3604 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3605 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3606 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3607 return False;
3608 fFine = True;
3609 if not fFine and not oCond.fInElse:
3610 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3611 return False;
3612 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3613 return True;
3614
3615 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3616 self.sSrcFile = sSrcFile;
3617 self.asLines = asLines;
3618 self.iLine = 0;
3619 self.iState = self.kiCode;
3620 self.sComment = '';
3621 self.iCommentLine = 0;
3622 self.aoCurInstrs = [] # type: List[Instruction]
3623 self.oCurFunction = None # type: DecoderFunction
3624 self.iMcBlockInFunc = 0;
3625 self.oCurMcBlock = None # type: McBlock
3626 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3627 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3628 if oInheritMacrosFrom:
3629 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3630 self.oReMacros = oInheritMacrosFrom.oReMacros;
3631 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3632 self.sHostArch = sHostArch;
3633
3634 assert sDefaultMap in g_dInstructionMaps;
3635 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3636
3637 self.cTotalInstr = 0;
3638 self.cTotalStubs = 0;
3639 self.cTotalTagged = 0;
3640 self.cTotalMcBlocks = 0;
3641
3642 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3643 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3644 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3645 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3646 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3647 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3648 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3649 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3650 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3651 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3652 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3653 self.fDebug = True;
3654 self.fDebugMc = False;
3655 self.fDebugPreproc = False;
3656
3657 self.dTagHandlers = {
3658 '@opbrief': self.parseTagOpBrief,
3659 '@opdesc': self.parseTagOpDesc,
3660 '@opmnemonic': self.parseTagOpMnemonic,
3661 '@op1': self.parseTagOpOperandN,
3662 '@op2': self.parseTagOpOperandN,
3663 '@op3': self.parseTagOpOperandN,
3664 '@op4': self.parseTagOpOperandN,
3665 '@oppfx': self.parseTagOpPfx,
3666 '@opmaps': self.parseTagOpMaps,
3667 '@opcode': self.parseTagOpcode,
3668 '@opcodesub': self.parseTagOpcodeSub,
3669 '@openc': self.parseTagOpEnc,
3670 #@opfltest: Lists all flags that will be used as input in some way.
3671 '@opfltest': self.parseTagOpEFlags,
3672 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3673 '@opflmodify': self.parseTagOpEFlags,
3674 #@opflclear: Lists all flags that will be set (set to 1).
3675 '@opflset': self.parseTagOpEFlags,
3676 #@opflclear: Lists all flags that will be cleared (set to 0).
3677 '@opflclear': self.parseTagOpEFlags,
3678 #@opflundef: List of flag documented as undefined.
3679 '@opflundef': self.parseTagOpEFlags,
3680 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3681 '@opflclass': self.parseTagOpEFlagsClass,
3682 '@ophints': self.parseTagOpHints,
3683 '@opdisenum': self.parseTagOpDisEnum,
3684 '@opmincpu': self.parseTagOpMinCpu,
3685 '@opcpuid': self.parseTagOpCpuId,
3686 '@opgroup': self.parseTagOpGroup,
3687 '@opunused': self.parseTagOpUnusedInvalid,
3688 '@opinvalid': self.parseTagOpUnusedInvalid,
3689 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3690 '@optest': self.parseTagOpTest,
3691 '@optestign': self.parseTagOpTestIgnore,
3692 '@optestignore': self.parseTagOpTestIgnore,
3693 '@opcopytests': self.parseTagOpCopyTests,
3694 '@oponly': self.parseTagOpOnlyTest,
3695 '@oponlytest': self.parseTagOpOnlyTest,
3696 '@opxcpttype': self.parseTagOpXcptType,
3697 '@opstats': self.parseTagOpStats,
3698 '@opfunction': self.parseTagOpFunction,
3699 '@opdone': self.parseTagOpDone,
3700 };
3701 for i in range(48):
3702 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3703 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3704
3705 self.asErrors = [];
3706
3707 def raiseError(self, sMessage):
3708 """
3709 Raise error prefixed with the source and line number.
3710 """
3711 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3712
3713 def raiseCommentError(self, iLineInComment, sMessage):
3714 """
3715 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3716 """
3717 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3718
3719 def error(self, sMessage):
3720 """
3721 Adds an error.
3722 returns False;
3723 """
3724 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3725 return False;
3726
3727 def errorOnLine(self, iLine, sMessage):
3728 """
3729 Adds an error.
3730 returns False;
3731 """
3732 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3733 return False;
3734
3735 def errorComment(self, iLineInComment, sMessage):
3736 """
3737 Adds a comment error.
3738 returns False;
3739 """
3740 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3741 return False;
3742
3743 def printErrors(self):
3744 """
3745 Print the errors to stderr.
3746 Returns number of errors.
3747 """
3748 if self.asErrors:
3749 sys.stderr.write(u''.join(self.asErrors));
3750 return len(self.asErrors);
3751
3752 def debug(self, sMessage):
3753 """
3754 For debugging.
3755 """
3756 if self.fDebug:
3757 print('debug: %s' % (sMessage,), file = sys.stderr);
3758
3759 def stripComments(self, sLine):
3760 """
3761 Returns sLine with comments stripped.
3762
3763 Complains if traces of incomplete multi-line comments are encountered.
3764 """
3765 sLine = self.oReComment.sub(" ", sLine);
3766 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3767 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3768 return sLine;
3769
3770 def parseFunctionTable(self, sLine):
3771 """
3772 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3773
3774 Note! Updates iLine as it consumes the whole table.
3775 """
3776
3777 #
3778 # Extract the table name.
3779 #
3780 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3781 oMap = g_dInstructionMapsByIemName.get(sName);
3782 if not oMap:
3783 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3784 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3785
3786 #
3787 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3788 # entries per byte:
3789 # no prefix, 066h prefix, f3h prefix, f2h prefix
3790 # Those tables has 256 & 32 entries respectively.
3791 #
3792 cEntriesPerByte = 4;
3793 cValidTableLength = 1024;
3794 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3795
3796 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3797 if oEntriesMatch:
3798 cEntriesPerByte = 1;
3799 cValidTableLength = int(oEntriesMatch.group(1));
3800 asPrefixes = (None,);
3801
3802 #
3803 # The next line should be '{' and nothing else.
3804 #
3805 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3806 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3807 self.iLine += 1;
3808
3809 #
3810 # Parse till we find the end of the table.
3811 #
3812 iEntry = 0;
3813 while self.iLine < len(self.asLines):
3814 # Get the next line and strip comments and spaces (assumes no
3815 # multi-line comments).
3816 sLine = self.asLines[self.iLine];
3817 self.iLine += 1;
3818 sLine = self.stripComments(sLine).strip();
3819
3820 # Split the line up into entries, expanding IEMOP_X4 usage.
3821 asEntries = sLine.split(',');
3822 for i in range(len(asEntries) - 1, -1, -1):
3823 sEntry = asEntries[i].strip();
3824 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3825 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3826 asEntries.insert(i + 1, sEntry);
3827 asEntries.insert(i + 1, sEntry);
3828 asEntries.insert(i + 1, sEntry);
3829 if sEntry:
3830 asEntries[i] = sEntry;
3831 else:
3832 del asEntries[i];
3833
3834 # Process the entries.
3835 for sEntry in asEntries:
3836 if sEntry in ('};', '}'):
3837 if iEntry != cValidTableLength:
3838 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3839 return True;
3840 if sEntry.startswith('iemOp_Invalid'):
3841 pass; # skip
3842 else:
3843 # Look up matching instruction by function.
3844 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3845 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3846 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3847 if aoInstr:
3848 if not isinstance(aoInstr, list):
3849 aoInstr = [aoInstr,];
3850 oInstr = None;
3851 for oCurInstr in aoInstr:
3852 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3853 pass;
3854 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3855 oCurInstr.sPrefix = sPrefix;
3856 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3857 oCurInstr.sOpcode = sOpcode;
3858 oCurInstr.sPrefix = sPrefix;
3859 else:
3860 continue;
3861 oInstr = oCurInstr;
3862 break;
3863 if not oInstr:
3864 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3865 aoInstr.append(oInstr);
3866 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3867 g_aoAllInstructions.append(oInstr);
3868 oMap.aoInstructions.append(oInstr);
3869 else:
3870 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3871 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3872 iEntry += 1;
3873
3874 return self.error('Unexpected end of file in PFNIEMOP table');
3875
3876 def addInstruction(self, iLine = None):
3877 """
3878 Adds an instruction.
3879 """
3880 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3881 g_aoAllInstructions.append(oInstr);
3882 self.aoCurInstrs.append(oInstr);
3883 return oInstr;
3884
3885 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3886 """
3887 Derives the mnemonic and operands from a IEM stats base name like string.
3888 """
3889 if oInstr.sMnemonic is None:
3890 asWords = sStats.split('_');
3891 oInstr.sMnemonic = asWords[0].lower();
3892 if len(asWords) > 1 and not oInstr.aoOperands:
3893 for sType in asWords[1:]:
3894 if sType in g_kdOpTypes:
3895 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3896 else:
3897 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3898 return False;
3899 return True;
3900
3901 def doneInstructionOne(self, oInstr, iLine):
3902 """
3903 Complete the parsing by processing, validating and expanding raw inputs.
3904 """
3905 assert oInstr.iLineCompleted is None;
3906 oInstr.iLineCompleted = iLine;
3907
3908 #
3909 # Specified instructions.
3910 #
3911 if oInstr.cOpTags > 0:
3912 if oInstr.sStats is None:
3913 pass;
3914
3915 #
3916 # Unspecified legacy stuff. We generally only got a few things to go on here.
3917 # /** Opcode 0x0f 0x00 /0. */
3918 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3919 #
3920 else:
3921 #if oInstr.sRawOldOpcodes:
3922 #
3923 #if oInstr.sMnemonic:
3924 pass;
3925
3926 #
3927 # Common defaults.
3928 #
3929
3930 # Guess mnemonic and operands from stats if the former is missing.
3931 if oInstr.sMnemonic is None:
3932 if oInstr.sStats is not None:
3933 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3934 elif oInstr.sFunction is not None:
3935 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3936
3937 # Derive the disassembler op enum constant from the mnemonic.
3938 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3939 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3940
3941 # Derive the IEM statistics base name from mnemonic and operand types.
3942 if oInstr.sStats is None:
3943 if oInstr.sFunction is not None:
3944 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3945 elif oInstr.sMnemonic is not None:
3946 oInstr.sStats = oInstr.sMnemonic;
3947 for oOperand in oInstr.aoOperands:
3948 if oOperand.sType:
3949 oInstr.sStats += '_' + oOperand.sType;
3950
3951 # Derive the IEM function name from mnemonic and operand types.
3952 if oInstr.sFunction is None:
3953 if oInstr.sMnemonic is not None:
3954 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3955 for oOperand in oInstr.aoOperands:
3956 if oOperand.sType:
3957 oInstr.sFunction += '_' + oOperand.sType;
3958 elif oInstr.sStats:
3959 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3960
3961 #
3962 # Apply default map and then add the instruction to all it's groups.
3963 #
3964 if not oInstr.aoMaps:
3965 oInstr.aoMaps = [ self.oDefaultMap, ];
3966 for oMap in oInstr.aoMaps:
3967 oMap.aoInstructions.append(oInstr);
3968
3969 #
3970 # Derive encoding from operands and maps.
3971 #
3972 if oInstr.sEncoding is None:
3973 if not oInstr.aoOperands:
3974 if oInstr.fUnused and oInstr.sSubOpcode:
3975 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3976 else:
3977 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3978 elif oInstr.aoOperands[0].usesModRM():
3979 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3980 or oInstr.onlyInVexMaps():
3981 oInstr.sEncoding = 'VEX.ModR/M';
3982 else:
3983 oInstr.sEncoding = 'ModR/M';
3984
3985 #
3986 # Check the opstat value and add it to the opstat indexed dictionary.
3987 #
3988 if oInstr.sStats:
3989 if oInstr.sStats not in g_dAllInstructionsByStat:
3990 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3991 else:
3992 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3993 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3994
3995 #
3996 # Add to function indexed dictionary. We allow multiple instructions per function.
3997 #
3998 if oInstr.sFunction:
3999 if oInstr.sFunction not in g_dAllInstructionsByFunction:
4000 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4001 else:
4002 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4003
4004 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4005 return True;
4006
4007 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4008 """
4009 Done with current instruction.
4010 """
4011 for oInstr in self.aoCurInstrs:
4012 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4013 if oInstr.fStub:
4014 self.cTotalStubs += 1;
4015
4016 self.cTotalInstr += len(self.aoCurInstrs);
4017
4018 self.sComment = '';
4019 self.aoCurInstrs = [];
4020 if fEndOfFunction:
4021 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4022 if self.oCurFunction:
4023 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4024 self.oCurFunction = None;
4025 self.iMcBlockInFunc = 0;
4026 return True;
4027
4028 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4029 """
4030 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4031 is False, only None values and empty strings are replaced.
4032 """
4033 for oInstr in self.aoCurInstrs:
4034 if fOverwrite is not True:
4035 oOldValue = getattr(oInstr, sAttrib);
4036 if oOldValue is not None:
4037 continue;
4038 setattr(oInstr, sAttrib, oValue);
4039
4040 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4041 """
4042 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4043 If fOverwrite is False, only None values and empty strings are replaced.
4044 """
4045 for oInstr in self.aoCurInstrs:
4046 aoArray = getattr(oInstr, sAttrib);
4047 while len(aoArray) <= iEntry:
4048 aoArray.append(None);
4049 if fOverwrite is True or aoArray[iEntry] is None:
4050 aoArray[iEntry] = oValue;
4051
4052 def parseCommentOldOpcode(self, asLines):
4053 """ Deals with 'Opcode 0xff /4' like comments """
4054 asWords = asLines[0].split();
4055 if len(asWords) >= 2 \
4056 and asWords[0] == 'Opcode' \
4057 and ( asWords[1].startswith('0x')
4058 or asWords[1].startswith('0X')):
4059 asWords = asWords[:1];
4060 for iWord, sWord in enumerate(asWords):
4061 if sWord.startswith('0X'):
4062 sWord = '0x' + sWord[:2];
4063 asWords[iWord] = asWords;
4064 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4065
4066 return False;
4067
4068 def ensureInstructionForOpTag(self, iTagLine):
4069 """ Ensure there is an instruction for the op-tag being parsed. """
4070 if not self.aoCurInstrs:
4071 self.addInstruction(self.iCommentLine + iTagLine);
4072 for oInstr in self.aoCurInstrs:
4073 oInstr.cOpTags += 1;
4074 if oInstr.cOpTags == 1:
4075 self.cTotalTagged += 1;
4076 return self.aoCurInstrs[-1];
4077
4078 @staticmethod
4079 def flattenSections(aasSections):
4080 """
4081 Flattens multiline sections into stripped single strings.
4082 Returns list of strings, on section per string.
4083 """
4084 asRet = [];
4085 for asLines in aasSections:
4086 if asLines:
4087 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4088 return asRet;
4089
4090 @staticmethod
4091 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4092 """
4093 Flattens sections into a simple stripped string with newlines as
4094 section breaks. The final section does not sport a trailing newline.
4095 """
4096 # Typical: One section with a single line.
4097 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4098 return aasSections[0][0].strip();
4099
4100 sRet = '';
4101 for iSection, asLines in enumerate(aasSections):
4102 if asLines:
4103 if iSection > 0:
4104 sRet += sSectionSep;
4105 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4106 return sRet;
4107
4108
4109
4110 ## @name Tag parsers
4111 ## @{
4112
4113 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4114 """
4115 Tag: @opbrief
4116 Value: Text description, multiple sections, appended.
4117
4118 Brief description. If not given, it's the first sentence from @opdesc.
4119 """
4120 oInstr = self.ensureInstructionForOpTag(iTagLine);
4121
4122 # Flatten and validate the value.
4123 sBrief = self.flattenAllSections(aasSections);
4124 if not sBrief:
4125 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4126 if sBrief[-1] != '.':
4127 sBrief = sBrief + '.';
4128 if len(sBrief) > 180:
4129 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4130 offDot = sBrief.find('.');
4131 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4132 offDot = sBrief.find('.', offDot + 1);
4133 if offDot >= 0 and offDot != len(sBrief) - 1:
4134 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4135
4136 # Update the instruction.
4137 if oInstr.sBrief is not None:
4138 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4139 % (sTag, oInstr.sBrief, sBrief,));
4140 _ = iEndLine;
4141 return True;
4142
4143 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4144 """
4145 Tag: @opdesc
4146 Value: Text description, multiple sections, appended.
4147
4148 It is used to describe instructions.
4149 """
4150 oInstr = self.ensureInstructionForOpTag(iTagLine);
4151 if aasSections:
4152 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4153 return True;
4154
4155 _ = sTag; _ = iEndLine;
4156 return True;
4157
4158 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4159 """
4160 Tag: @opmenmonic
4161 Value: mnemonic
4162
4163 The 'mnemonic' value must be a valid C identifier string. Because of
4164 prefixes, groups and whatnot, there times when the mnemonic isn't that
4165 of an actual assembler mnemonic.
4166 """
4167 oInstr = self.ensureInstructionForOpTag(iTagLine);
4168
4169 # Flatten and validate the value.
4170 sMnemonic = self.flattenAllSections(aasSections);
4171 if not self.oReMnemonic.match(sMnemonic):
4172 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4173 if oInstr.sMnemonic is not None:
4174 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4175 % (sTag, oInstr.sMnemonic, sMnemonic,));
4176 oInstr.sMnemonic = sMnemonic
4177
4178 _ = iEndLine;
4179 return True;
4180
4181 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4182 """
4183 Tags: @op1, @op2, @op3, @op4
4184 Value: [where:]type
4185
4186 The 'where' value indicates where the operand is found, like the 'reg'
4187 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4188 a list.
4189
4190 The 'type' value indicates the operand type. These follow the types
4191 given in the opcode tables in the CPU reference manuals.
4192 See Instruction.kdOperandTypes for a list.
4193
4194 """
4195 oInstr = self.ensureInstructionForOpTag(iTagLine);
4196 idxOp = int(sTag[-1]) - 1;
4197 assert 0 <= idxOp < 4;
4198
4199 # flatten, split up, and validate the "where:type" value.
4200 sFlattened = self.flattenAllSections(aasSections);
4201 asSplit = sFlattened.split(':');
4202 if len(asSplit) == 1:
4203 sType = asSplit[0];
4204 sWhere = None;
4205 elif len(asSplit) == 2:
4206 (sWhere, sType) = asSplit;
4207 else:
4208 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4209
4210 if sType not in g_kdOpTypes:
4211 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4212 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4213 if sWhere is None:
4214 sWhere = g_kdOpTypes[sType][1];
4215 elif sWhere not in g_kdOpLocations:
4216 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4217 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4218
4219 # Insert the operand, refusing to overwrite an existing one.
4220 while idxOp >= len(oInstr.aoOperands):
4221 oInstr.aoOperands.append(None);
4222 if oInstr.aoOperands[idxOp] is not None:
4223 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4224 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4225 sWhere, sType,));
4226 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4227
4228 _ = iEndLine;
4229 return True;
4230
4231 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4232 """
4233 Tag: @opmaps
4234 Value: map[,map2]
4235
4236 Indicates which maps the instruction is in. There is a default map
4237 associated with each input file.
4238 """
4239 oInstr = self.ensureInstructionForOpTag(iTagLine);
4240
4241 # Flatten, split up and validate the value.
4242 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4243 asMaps = sFlattened.split(',');
4244 if not asMaps:
4245 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4246 for sMap in asMaps:
4247 if sMap not in g_dInstructionMaps:
4248 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4249 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4250
4251 # Add the maps to the current list. Throw errors on duplicates.
4252 for oMap in oInstr.aoMaps:
4253 if oMap.sName in asMaps:
4254 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4255
4256 for sMap in asMaps:
4257 oMap = g_dInstructionMaps[sMap];
4258 if oMap not in oInstr.aoMaps:
4259 oInstr.aoMaps.append(oMap);
4260 else:
4261 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4262
4263 _ = iEndLine;
4264 return True;
4265
4266 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4267 """
4268 Tag: @oppfx
4269 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4270
4271 Required prefix for the instruction. (In a (E)VEX context this is the
4272 value of the 'pp' field rather than an actual prefix.)
4273 """
4274 oInstr = self.ensureInstructionForOpTag(iTagLine);
4275
4276 # Flatten and validate the value.
4277 sFlattened = self.flattenAllSections(aasSections);
4278 asPrefixes = sFlattened.split();
4279 if len(asPrefixes) > 1:
4280 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4281
4282 sPrefix = asPrefixes[0].lower();
4283 if sPrefix == 'none':
4284 sPrefix = 'none';
4285 elif sPrefix == 'n/a':
4286 sPrefix = None;
4287 else:
4288 if len(sPrefix) == 2:
4289 sPrefix = '0x' + sPrefix;
4290 if not _isValidOpcodeByte(sPrefix):
4291 if sPrefix != '!0xf3':
4292 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4293
4294 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4295 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4296
4297 # Set it.
4298 if oInstr.sPrefix is not None:
4299 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4300 oInstr.sPrefix = sPrefix;
4301
4302 _ = iEndLine;
4303 return True;
4304
4305 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4306 """
4307 Tag: @opcode
4308 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4309
4310 The opcode byte or sub-byte for the instruction in the context of a map.
4311 """
4312 oInstr = self.ensureInstructionForOpTag(iTagLine);
4313
4314 # Flatten and validate the value.
4315 sOpcode = self.flattenAllSections(aasSections);
4316 if _isValidOpcodeByte(sOpcode):
4317 pass;
4318 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4319 pass;
4320 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4321 pass;
4322 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4323 pass;
4324 else:
4325 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4326
4327 # Set it.
4328 if oInstr.sOpcode is not None:
4329 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4330 oInstr.sOpcode = sOpcode;
4331
4332 _ = iEndLine;
4333 return True;
4334
4335 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4336 """
4337 Tag: @opcodesub
4338 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4339 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4340 | !11 rex.w=0 | !11 mr/reg rex.w=0
4341 | !11 rex.w=1 | !11 mr/reg rex.w=1
4342
4343 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4344 represents exactly two different instructions. The more proper way would
4345 be to go via maps with two members, but this is faster.
4346 """
4347 oInstr = self.ensureInstructionForOpTag(iTagLine);
4348
4349 # Flatten and validate the value.
4350 sSubOpcode = self.flattenAllSections(aasSections);
4351 if sSubOpcode not in g_kdSubOpcodes:
4352 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4353 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4354 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4355
4356 # Set it.
4357 if oInstr.sSubOpcode is not None:
4358 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4359 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4360 oInstr.sSubOpcode = sSubOpcode;
4361
4362 _ = iEndLine;
4363 return True;
4364
4365 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4366 """
4367 Tag: @openc
4368 Value: ModR/M|fixed|prefix|<map name>
4369
4370 The instruction operand encoding style.
4371 """
4372 oInstr = self.ensureInstructionForOpTag(iTagLine);
4373
4374 # Flatten and validate the value.
4375 sEncoding = self.flattenAllSections(aasSections);
4376 if sEncoding in g_kdEncodings:
4377 pass;
4378 elif sEncoding in g_dInstructionMaps:
4379 pass;
4380 elif not _isValidOpcodeByte(sEncoding):
4381 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4382
4383 # Set it.
4384 if oInstr.sEncoding is not None:
4385 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4386 % ( sTag, oInstr.sEncoding, sEncoding,));
4387 oInstr.sEncoding = sEncoding;
4388
4389 _ = iEndLine;
4390 return True;
4391
4392 ## EFlags tag to Instruction attribute name.
4393 kdOpFlagToAttr = {
4394 '@opfltest': 'asFlTest',
4395 '@opflmodify': 'asFlModify',
4396 '@opflundef': 'asFlUndefined',
4397 '@opflset': 'asFlSet',
4398 '@opflclear': 'asFlClear',
4399 };
4400
4401 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4402 """
4403 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4404 Value: <eflags specifier>
4405
4406 """
4407 oInstr = self.ensureInstructionForOpTag(iTagLine);
4408
4409 # Flatten, split up and validate the values.
4410 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4411 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4412 asFlags = [];
4413 else:
4414 fRc = True;
4415 for iFlag, sFlag in enumerate(asFlags):
4416 if sFlag not in g_kdEFlagsMnemonics:
4417 if sFlag.strip() in g_kdEFlagsMnemonics:
4418 asFlags[iFlag] = sFlag.strip();
4419 else:
4420 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4421 if not fRc:
4422 return False;
4423
4424 # Set them.
4425 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4426 if asOld is not None and len(asOld) > 0:
4427 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4428 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4429
4430 _ = iEndLine;
4431 return True;
4432
4433 ## EFLAGS class definitions with their attribute lists.
4434 kdEFlagsClasses = {
4435 'arithmetic': { # add, sub, ...
4436 'asFlTest': [],
4437 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4438 'asFlClear': [],
4439 'asFlSet': [],
4440 'asFlUndefined': [],
4441 },
4442 'arithmetic_carry': { # adc, sbb, ...
4443 'asFlTest': [ 'cf', ],
4444 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4445 'asFlClear': [],
4446 'asFlSet': [],
4447 'asFlUndefined': [],
4448 },
4449 'incdec': {
4450 'asFlTest': [],
4451 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4452 'asFlClear': [],
4453 'asFlSet': [],
4454 'asFlUndefined': [],
4455 },
4456 'division': { ## @todo specify intel/amd differences...
4457 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4458 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4459 'asFlClear': [],
4460 'asFlSet': [],
4461 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4462 },
4463 'multiply': { ## @todo specify intel/amd differences...
4464 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4465 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4466 'asFlClear': [], # between IMUL and MUL.
4467 'asFlSet': [],
4468 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4469 },
4470 'logical': { # and, or, xor, ...
4471 'asFlTest': [],
4472 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4473 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4474 'asFlSet': [],
4475 'asFlUndefined': [ 'af', ],
4476 },
4477 'rotate_1': { # rol and ror with fixed 1 shift count
4478 'asFlTest': [],
4479 'asFlModify': [ 'cf', 'of', ],
4480 'asFlClear': [],
4481 'asFlSet': [],
4482 'asFlUndefined': [],
4483 },
4484 'rotate_count': { # rol and ror w/o fixed 1 shift count
4485 'asFlTest': [],
4486 'asFlModify': [ 'cf', 'of', ],
4487 'asFlClear': [],
4488 'asFlSet': [],
4489 'asFlUndefined': [ 'of', ],
4490 },
4491 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4492 'asFlTest': [ 'cf', ],
4493 'asFlModify': [ 'cf', 'of', ],
4494 'asFlClear': [],
4495 'asFlSet': [],
4496 'asFlUndefined': [],
4497 },
4498 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4499 'asFlTest': [ 'cf', ],
4500 'asFlModify': [ 'cf', 'of', ],
4501 'asFlClear': [],
4502 'asFlSet': [],
4503 'asFlUndefined': [ 'of', ],
4504 },
4505 'shift_1': { # shl, shr or sar with fixed 1 count.
4506 'asFlTest': [],
4507 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4508 'asFlClear': [],
4509 'asFlSet': [],
4510 'asFlUndefined': [ 'af', ],
4511 },
4512 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4513 'asFlTest': [],
4514 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4515 'asFlClear': [],
4516 'asFlSet': [],
4517 'asFlUndefined': [ 'af', 'of', ],
4518 },
4519 'bitmap': { # bt, btc, btr, btc
4520 'asFlTest': [],
4521 'asFlModify': [ 'cf', ],
4522 'asFlClear': [],
4523 'asFlSet': [],
4524 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4525 },
4526 'unchanged': {
4527 'asFlTest': [],
4528 'asFlModify': [],
4529 'asFlClear': [],
4530 'asFlSet': [],
4531 'asFlUndefined': [],
4532 },
4533 };
4534 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4535 """
4536 Tags: @opflclass
4537 Value: arithmetic, logical, ...
4538
4539 """
4540 oInstr = self.ensureInstructionForOpTag(iTagLine);
4541
4542 # Flatten and validate the value.
4543 sClass = self.flattenAllSections(aasSections);
4544 kdAttribs = self.kdEFlagsClasses.get(sClass);
4545 if not kdAttribs:
4546 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4547 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4548
4549 # Set the attributes.
4550 for sAttrib, asFlags in kdAttribs.items():
4551 asOld = getattr(oInstr, sAttrib);
4552 if asOld is not None:
4553 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4554 % (sTag, asOld, asFlags, sAttrib));
4555 setattr(oInstr, sAttrib, asFlags);
4556
4557 _ = iEndLine;
4558 return True;
4559
4560 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4561 """
4562 Tag: @ophints
4563 Value: Comma or space separated list of flags and hints.
4564
4565 This covers the disassembler flags table and more.
4566 """
4567 oInstr = self.ensureInstructionForOpTag(iTagLine);
4568
4569 # Flatten as a space separated list, split it up and validate the values.
4570 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4571 if len(asHints) == 1 and asHints[0].lower() == 'none':
4572 asHints = [];
4573 else:
4574 fRc = True;
4575 for iHint, sHint in enumerate(asHints):
4576 if sHint not in g_kdHints:
4577 if sHint.strip() in g_kdHints:
4578 sHint[iHint] = sHint.strip();
4579 else:
4580 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4581 if not fRc:
4582 return False;
4583
4584 # Append them.
4585 for sHint in asHints:
4586 if sHint not in oInstr.dHints:
4587 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4588 else:
4589 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4590
4591 _ = iEndLine;
4592 return True;
4593
4594 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4595 """
4596 Tag: @opdisenum
4597 Value: OP_XXXX
4598
4599 This is for select a specific (legacy) disassembler enum value for the
4600 instruction.
4601 """
4602 oInstr = self.ensureInstructionForOpTag(iTagLine);
4603
4604 # Flatten and split.
4605 asWords = self.flattenAllSections(aasSections).split();
4606 if len(asWords) != 1:
4607 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4608 if not asWords:
4609 return False;
4610 sDisEnum = asWords[0];
4611 if not self.oReDisEnum.match(sDisEnum):
4612 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4613 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4614
4615 # Set it.
4616 if oInstr.sDisEnum is not None:
4617 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4618 oInstr.sDisEnum = sDisEnum;
4619
4620 _ = iEndLine;
4621 return True;
4622
4623 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4624 """
4625 Tag: @opmincpu
4626 Value: <simple CPU name>
4627
4628 Indicates when this instruction was introduced.
4629 """
4630 oInstr = self.ensureInstructionForOpTag(iTagLine);
4631
4632 # Flatten the value, split into words, make sure there's just one, valid it.
4633 asCpus = self.flattenAllSections(aasSections).split();
4634 if len(asCpus) > 1:
4635 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4636
4637 sMinCpu = asCpus[0];
4638 if sMinCpu in g_kdCpuNames:
4639 oInstr.sMinCpu = sMinCpu;
4640 else:
4641 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4642 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4643
4644 # Set it.
4645 if oInstr.sMinCpu is None:
4646 oInstr.sMinCpu = sMinCpu;
4647 elif oInstr.sMinCpu != sMinCpu:
4648 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4649
4650 _ = iEndLine;
4651 return True;
4652
4653 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4654 """
4655 Tag: @opcpuid
4656 Value: none | <CPUID flag specifier>
4657
4658 CPUID feature bit which is required for the instruction to be present.
4659 """
4660 oInstr = self.ensureInstructionForOpTag(iTagLine);
4661
4662 # Flatten as a space separated list, split it up and validate the values.
4663 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4664 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4665 asCpuIds = [];
4666 else:
4667 fRc = True;
4668 for iCpuId, sCpuId in enumerate(asCpuIds):
4669 if sCpuId not in g_kdCpuIdFlags:
4670 if sCpuId.strip() in g_kdCpuIdFlags:
4671 sCpuId[iCpuId] = sCpuId.strip();
4672 else:
4673 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4674 if not fRc:
4675 return False;
4676
4677 # Append them.
4678 for sCpuId in asCpuIds:
4679 if sCpuId not in oInstr.asCpuIds:
4680 oInstr.asCpuIds.append(sCpuId);
4681 else:
4682 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4683
4684 _ = iEndLine;
4685 return True;
4686
4687 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4688 """
4689 Tag: @opgroup
4690 Value: op_grp1[_subgrp2[_subsubgrp3]]
4691
4692 Instruction grouping.
4693 """
4694 oInstr = self.ensureInstructionForOpTag(iTagLine);
4695
4696 # Flatten as a space separated list, split it up and validate the values.
4697 asGroups = self.flattenAllSections(aasSections).split();
4698 if len(asGroups) != 1:
4699 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4700 sGroup = asGroups[0];
4701 if not self.oReGroupName.match(sGroup):
4702 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4703 % (sTag, sGroup, self.oReGroupName.pattern));
4704
4705 # Set it.
4706 if oInstr.sGroup is not None:
4707 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4708 oInstr.sGroup = sGroup;
4709
4710 _ = iEndLine;
4711 return True;
4712
4713 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4714 """
4715 Tag: @opunused, @opinvalid, @opinvlstyle
4716 Value: <invalid opcode behaviour style>
4717
4718 The @opunused indicates the specification is for a currently unused
4719 instruction encoding.
4720
4721 The @opinvalid indicates the specification is for an invalid currently
4722 instruction encoding (like UD2).
4723
4724 The @opinvlstyle just indicates how CPUs decode the instruction when
4725 not supported (@opcpuid, @opmincpu) or disabled.
4726 """
4727 oInstr = self.ensureInstructionForOpTag(iTagLine);
4728
4729 # Flatten as a space separated list, split it up and validate the values.
4730 asStyles = self.flattenAllSections(aasSections).split();
4731 if len(asStyles) != 1:
4732 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4733 sStyle = asStyles[0];
4734 if sStyle not in g_kdInvalidStyles:
4735 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4736 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4737 # Set it.
4738 if oInstr.sInvalidStyle is not None:
4739 return self.errorComment(iTagLine,
4740 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4741 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4742 oInstr.sInvalidStyle = sStyle;
4743 if sTag == '@opunused':
4744 oInstr.fUnused = True;
4745 elif sTag == '@opinvalid':
4746 oInstr.fInvalid = True;
4747
4748 _ = iEndLine;
4749 return True;
4750
4751 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4752 """
4753 Tag: @optest
4754 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4755 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4756
4757 The main idea here is to generate basic instruction tests.
4758
4759 The probably simplest way of handling the diverse input, would be to use
4760 it to produce size optimized byte code for a simple interpreter that
4761 modifies the register input and output states.
4762
4763 An alternative to the interpreter would be creating multiple tables,
4764 but that becomes rather complicated wrt what goes where and then to use
4765 them in an efficient manner.
4766 """
4767 oInstr = self.ensureInstructionForOpTag(iTagLine);
4768
4769 #
4770 # Do it section by section.
4771 #
4772 for asSectionLines in aasSections:
4773 #
4774 # Sort the input into outputs, inputs and selector conditions.
4775 #
4776 sFlatSection = self.flattenAllSections([asSectionLines,]);
4777 if not sFlatSection:
4778 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4779 continue;
4780 oTest = InstructionTest(oInstr);
4781
4782 asSelectors = [];
4783 asInputs = [];
4784 asOutputs = [];
4785 asCur = asOutputs;
4786 fRc = True;
4787 asWords = sFlatSection.split();
4788 for iWord in range(len(asWords) - 1, -1, -1):
4789 sWord = asWords[iWord];
4790 # Check for array switchers.
4791 if sWord == '->':
4792 if asCur != asOutputs:
4793 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4794 break;
4795 asCur = asInputs;
4796 elif sWord == '/':
4797 if asCur != asInputs:
4798 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4799 break;
4800 asCur = asSelectors;
4801 else:
4802 asCur.insert(0, sWord);
4803
4804 #
4805 # Validate and add selectors.
4806 #
4807 for sCond in asSelectors:
4808 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4809 oSelector = None;
4810 for sOp in TestSelector.kasCompareOps:
4811 off = sCondExp.find(sOp);
4812 if off >= 0:
4813 sVariable = sCondExp[:off];
4814 sValue = sCondExp[off + len(sOp):];
4815 if sVariable in TestSelector.kdVariables:
4816 if sValue in TestSelector.kdVariables[sVariable]:
4817 oSelector = TestSelector(sVariable, sOp, sValue);
4818 else:
4819 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4820 % ( sTag, sValue, sCond,
4821 TestSelector.kdVariables[sVariable].keys(),));
4822 else:
4823 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4824 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4825 break;
4826 if oSelector is not None:
4827 for oExisting in oTest.aoSelectors:
4828 if oExisting.sVariable == oSelector.sVariable:
4829 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4830 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4831 oTest.aoSelectors.append(oSelector);
4832 else:
4833 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4834
4835 #
4836 # Validate outputs and inputs, adding them to the test as we go along.
4837 #
4838 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4839 asValidFieldKinds = [ 'both', sDesc, ];
4840 for sItem in asItems:
4841 oItem = None;
4842 for sOp in TestInOut.kasOperators:
4843 off = sItem.find(sOp);
4844 if off < 0:
4845 continue;
4846 sField = sItem[:off];
4847 sValueType = sItem[off + len(sOp):];
4848 if sField in TestInOut.kdFields \
4849 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4850 asSplit = sValueType.split(':', 1);
4851 sValue = asSplit[0];
4852 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4853 if sType in TestInOut.kdTypes:
4854 oValid = TestInOut.kdTypes[sType].validate(sValue);
4855 if oValid is True:
4856 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4857 oItem = TestInOut(sField, sOp, sValue, sType);
4858 else:
4859 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4860 % ( sTag, sDesc, sItem, ));
4861 else:
4862 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4863 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4864 else:
4865 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4866 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4867 else:
4868 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4869 % ( sTag, sDesc, sField, sItem,
4870 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4871 if asVal[1] in asValidFieldKinds]),));
4872 break;
4873 if oItem is not None:
4874 for oExisting in aoDst:
4875 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4876 self.errorComment(iTagLine,
4877 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4878 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4879 aoDst.append(oItem);
4880 else:
4881 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4882
4883 #
4884 # .
4885 #
4886 if fRc:
4887 oInstr.aoTests.append(oTest);
4888 else:
4889 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4890 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4891 % (sTag, asSelectors, asInputs, asOutputs,));
4892
4893 _ = iEndLine;
4894 return True;
4895
4896 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4897 """
4898 Numbered @optest tag. Either @optest42 or @optest[42].
4899 """
4900 oInstr = self.ensureInstructionForOpTag(iTagLine);
4901
4902 iTest = 0;
4903 if sTag[-1] == ']':
4904 iTest = int(sTag[8:-1]);
4905 else:
4906 iTest = int(sTag[7:]);
4907
4908 if iTest != len(oInstr.aoTests):
4909 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4910 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4911
4912 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4913 """
4914 Tag: @optestign | @optestignore
4915 Value: <value is ignored>
4916
4917 This is a simple trick to ignore a test while debugging another.
4918
4919 See also @oponlytest.
4920 """
4921 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4922 return True;
4923
4924 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4925 """
4926 Tag: @opcopytests
4927 Value: <opstat | function> [..]
4928 Example: @opcopytests add_Eb_Gb
4929
4930 Trick to avoid duplicating tests for different encodings of the same
4931 operation.
4932 """
4933 oInstr = self.ensureInstructionForOpTag(iTagLine);
4934
4935 # Flatten, validate and append the copy job to the instruction. We execute
4936 # them after parsing all the input so we can handle forward references.
4937 asToCopy = self.flattenAllSections(aasSections).split();
4938 if not asToCopy:
4939 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4940 for sToCopy in asToCopy:
4941 if sToCopy not in oInstr.asCopyTests:
4942 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4943 oInstr.asCopyTests.append(sToCopy);
4944 else:
4945 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4946 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4947 else:
4948 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4949
4950 _ = iEndLine;
4951 return True;
4952
4953 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4954 """
4955 Tag: @oponlytest | @oponly
4956 Value: none
4957
4958 Only test instructions with this tag. This is a trick that is handy
4959 for singling out one or two new instructions or tests.
4960
4961 See also @optestignore.
4962 """
4963 oInstr = self.ensureInstructionForOpTag(iTagLine);
4964
4965 # Validate and add instruction to only test dictionary.
4966 sValue = self.flattenAllSections(aasSections).strip();
4967 if sValue:
4968 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4969
4970 if oInstr not in g_aoOnlyTestInstructions:
4971 g_aoOnlyTestInstructions.append(oInstr);
4972
4973 _ = iEndLine;
4974 return True;
4975
4976 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4977 """
4978 Tag: @opxcpttype
4979 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4980
4981 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4982 """
4983 oInstr = self.ensureInstructionForOpTag(iTagLine);
4984
4985 # Flatten as a space separated list, split it up and validate the values.
4986 asTypes = self.flattenAllSections(aasSections).split();
4987 if len(asTypes) != 1:
4988 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4989 sType = asTypes[0];
4990 if sType not in g_kdXcptTypes:
4991 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4992 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4993 # Set it.
4994 if oInstr.sXcptType is not None:
4995 return self.errorComment(iTagLine,
4996 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4997 % ( sTag, oInstr.sXcptType, sType,));
4998 oInstr.sXcptType = sType;
4999
5000 _ = iEndLine;
5001 return True;
5002
5003 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5004 """
5005 Tag: @opfunction
5006 Value: <VMM function name>
5007
5008 This is for explicitly setting the IEM function name. Normally we pick
5009 this up from the FNIEMOP_XXX macro invocation after the description, or
5010 generate it from the mnemonic and operands.
5011
5012 It it thought it maybe necessary to set it when specifying instructions
5013 which implementation isn't following immediately or aren't implemented yet.
5014 """
5015 oInstr = self.ensureInstructionForOpTag(iTagLine);
5016
5017 # Flatten and validate the value.
5018 sFunction = self.flattenAllSections(aasSections);
5019 if not self.oReFunctionName.match(sFunction):
5020 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5021 % (sTag, sFunction, self.oReFunctionName.pattern));
5022
5023 if oInstr.sFunction is not None:
5024 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5025 % (sTag, oInstr.sFunction, sFunction,));
5026 oInstr.sFunction = sFunction;
5027
5028 _ = iEndLine;
5029 return True;
5030
5031 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5032 """
5033 Tag: @opstats
5034 Value: <VMM statistics base name>
5035
5036 This is for explicitly setting the statistics name. Normally we pick
5037 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5038 the mnemonic and operands.
5039
5040 It it thought it maybe necessary to set it when specifying instructions
5041 which implementation isn't following immediately or aren't implemented yet.
5042 """
5043 oInstr = self.ensureInstructionForOpTag(iTagLine);
5044
5045 # Flatten and validate the value.
5046 sStats = self.flattenAllSections(aasSections);
5047 if not self.oReStatsName.match(sStats):
5048 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5049 % (sTag, sStats, self.oReStatsName.pattern));
5050
5051 if oInstr.sStats is not None:
5052 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5053 % (sTag, oInstr.sStats, sStats,));
5054 oInstr.sStats = sStats;
5055
5056 _ = iEndLine;
5057 return True;
5058
5059 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5060 """
5061 Tag: @opdone
5062 Value: none
5063
5064 Used to explictily flush the instructions that have been specified.
5065 """
5066 sFlattened = self.flattenAllSections(aasSections);
5067 if sFlattened != '':
5068 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5069 _ = sTag; _ = iEndLine;
5070 return self.doneInstructions();
5071
5072 ## @}
5073
5074
5075 def parseComment(self):
5076 """
5077 Parse the current comment (self.sComment).
5078
5079 If it's a opcode specifiying comment, we reset the macro stuff.
5080 """
5081 #
5082 # Reject if comment doesn't seem to contain anything interesting.
5083 #
5084 if self.sComment.find('Opcode') < 0 \
5085 and self.sComment.find('@') < 0:
5086 return False;
5087
5088 #
5089 # Split the comment into lines, removing leading asterisks and spaces.
5090 # Also remove leading and trailing empty lines.
5091 #
5092 asLines = self.sComment.split('\n');
5093 for iLine, sLine in enumerate(asLines):
5094 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5095
5096 while asLines and not asLines[0]:
5097 self.iCommentLine += 1;
5098 asLines.pop(0);
5099
5100 while asLines and not asLines[-1]:
5101 asLines.pop(len(asLines) - 1);
5102
5103 #
5104 # Check for old style: Opcode 0x0f 0x12
5105 #
5106 if asLines[0].startswith('Opcode '):
5107 self.parseCommentOldOpcode(asLines);
5108
5109 #
5110 # Look for @op* tagged data.
5111 #
5112 cOpTags = 0;
5113 sFlatDefault = None;
5114 sCurTag = '@default';
5115 iCurTagLine = 0;
5116 asCurSection = [];
5117 aasSections = [ asCurSection, ];
5118 for iLine, sLine in enumerate(asLines):
5119 if not sLine.startswith('@'):
5120 if sLine:
5121 asCurSection.append(sLine);
5122 elif asCurSection:
5123 asCurSection = [];
5124 aasSections.append(asCurSection);
5125 else:
5126 #
5127 # Process the previous tag.
5128 #
5129 if not asCurSection and len(aasSections) > 1:
5130 aasSections.pop(-1);
5131 if sCurTag in self.dTagHandlers:
5132 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5133 cOpTags += 1;
5134 elif sCurTag.startswith('@op'):
5135 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5136 elif sCurTag == '@default':
5137 sFlatDefault = self.flattenAllSections(aasSections);
5138 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5139 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5140 elif sCurTag in ['@encoding', '@opencoding']:
5141 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5142
5143 #
5144 # New tag.
5145 #
5146 asSplit = sLine.split(None, 1);
5147 sCurTag = asSplit[0].lower();
5148 if len(asSplit) > 1:
5149 asCurSection = [asSplit[1],];
5150 else:
5151 asCurSection = [];
5152 aasSections = [asCurSection, ];
5153 iCurTagLine = iLine;
5154
5155 #
5156 # Process the final tag.
5157 #
5158 if not asCurSection and len(aasSections) > 1:
5159 aasSections.pop(-1);
5160 if sCurTag in self.dTagHandlers:
5161 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5162 cOpTags += 1;
5163 elif sCurTag.startswith('@op'):
5164 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5165 elif sCurTag == '@default':
5166 sFlatDefault = self.flattenAllSections(aasSections);
5167
5168 #
5169 # Don't allow default text in blocks containing @op*.
5170 #
5171 if cOpTags > 0 and sFlatDefault:
5172 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5173
5174 return True;
5175
5176 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5177 """
5178 Parses a macro invocation.
5179
5180 Returns three values:
5181 1. A list of macro arguments, where the zero'th is the macro name.
5182 2. The offset following the macro invocation, into sInvocation of
5183 this is on the same line or into the last line if it is on a
5184 different line.
5185 3. Number of additional lines the invocation spans (i.e. zero if
5186 it is all contained within sInvocation).
5187 """
5188 # First the name.
5189 offOpen = sInvocation.find('(', offStartInvocation);
5190 if offOpen <= offStartInvocation:
5191 self.raiseError("macro invocation open parenthesis not found");
5192 sName = sInvocation[offStartInvocation:offOpen].strip();
5193 if not self.oReMacroName.match(sName):
5194 self.raiseError("invalid macro name '%s'" % (sName,));
5195 asRet = [sName, ];
5196
5197 # Arguments.
5198 iLine = self.iLine;
5199 cDepth = 1;
5200 off = offOpen + 1;
5201 offStart = off;
5202 offCurLn = 0;
5203 chQuote = None;
5204 while cDepth > 0:
5205 if off >= len(sInvocation):
5206 if iLine >= len(self.asLines):
5207 self.error('macro invocation beyond end of file');
5208 return (asRet, off - offCurLn, iLine - self.iLine);
5209 offCurLn = off;
5210 sInvocation += self.asLines[iLine];
5211 iLine += 1;
5212 ch = sInvocation[off];
5213
5214 if chQuote:
5215 if ch == '\\' and off + 1 < len(sInvocation):
5216 off += 1;
5217 elif ch == chQuote:
5218 chQuote = None;
5219 elif ch in ('"', '\'',):
5220 chQuote = ch;
5221 elif ch in (',', ')',):
5222 if cDepth == 1:
5223 asRet.append(sInvocation[offStart:off].strip());
5224 offStart = off + 1;
5225 if ch == ')':
5226 cDepth -= 1;
5227 elif ch == '(':
5228 cDepth += 1;
5229 off += 1;
5230
5231 return (asRet, off - offCurLn, iLine - self.iLine);
5232
5233 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5234 """
5235 Returns (None, len(sCode), 0) if not found, otherwise the
5236 parseMacroInvocation() return value.
5237 """
5238 offHit = sCode.find(sMacro, offStart);
5239 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5240 return self.parseMacroInvocation(sCode, offHit);
5241 return (None, len(sCode), 0);
5242
5243 def findAndParseMacroInvocation(self, sCode, sMacro):
5244 """
5245 Returns None if not found, arguments as per parseMacroInvocation if found.
5246 """
5247 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5248
5249 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5250 """
5251 Returns same as findAndParseMacroInvocation.
5252 """
5253 for sMacro in asMacro:
5254 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5255 if asRet is not None:
5256 return asRet;
5257 return None;
5258
5259 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5260 sDisHints, sIemHints, asOperands):
5261 """
5262 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5263 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5264 """
5265 #
5266 # Some invocation checks.
5267 #
5268 if sUpper != sUpper.upper():
5269 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5270 if sLower != sLower.lower():
5271 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5272 if sUpper.lower() != sLower:
5273 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5274 if not self.oReMnemonic.match(sLower):
5275 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5276
5277 #
5278 # Check if sIemHints tells us to not consider this macro invocation.
5279 #
5280 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5281 return True;
5282
5283 # Apply to the last instruction only for now.
5284 if not self.aoCurInstrs:
5285 self.addInstruction();
5286 oInstr = self.aoCurInstrs[-1];
5287 if oInstr.iLineMnemonicMacro == -1:
5288 oInstr.iLineMnemonicMacro = self.iLine;
5289 else:
5290 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5291 % (sMacro, oInstr.iLineMnemonicMacro,));
5292
5293 # Mnemonic
5294 if oInstr.sMnemonic is None:
5295 oInstr.sMnemonic = sLower;
5296 elif oInstr.sMnemonic != sLower:
5297 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5298
5299 # Process operands.
5300 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5301 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5302 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5303 for iOperand, sType in enumerate(asOperands):
5304 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5305 if sWhere is None:
5306 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5307 if iOperand < len(oInstr.aoOperands): # error recovery.
5308 sWhere = oInstr.aoOperands[iOperand].sWhere;
5309 sType = oInstr.aoOperands[iOperand].sType;
5310 else:
5311 sWhere = 'reg';
5312 sType = 'Gb';
5313 if iOperand == len(oInstr.aoOperands):
5314 oInstr.aoOperands.append(Operand(sWhere, sType))
5315 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5316 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5317 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5318 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5319
5320 # Encoding.
5321 if sForm not in g_kdIemForms:
5322 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5323 else:
5324 if oInstr.sEncoding is None:
5325 oInstr.sEncoding = g_kdIemForms[sForm][0];
5326 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5327 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5328 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5329
5330 # Check the parameter locations for the encoding.
5331 if g_kdIemForms[sForm][1] is not None:
5332 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5333 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5334 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5335 else:
5336 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5337 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5338 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5339 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5340 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5341 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5342 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5343 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5344 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5345 or sForm.replace('VEX','').find('V') < 0) ):
5346 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5347 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5348 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5349 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5350 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5351 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5352 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5353 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5354 oInstr.aoOperands[iOperand].sWhere));
5355
5356
5357 # Check @opcodesub
5358 if oInstr.sSubOpcode \
5359 and g_kdIemForms[sForm][2] \
5360 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5361 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5362 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5363
5364 # Stats.
5365 if not self.oReStatsName.match(sStats):
5366 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5367 elif oInstr.sStats is None:
5368 oInstr.sStats = sStats;
5369 elif oInstr.sStats != sStats:
5370 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5371 % (sMacro, oInstr.sStats, sStats,));
5372
5373 # Process the hints (simply merge with @ophints w/o checking anything).
5374 for sHint in sDisHints.split('|'):
5375 sHint = sHint.strip();
5376 if sHint.startswith('DISOPTYPE_'):
5377 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5378 if sShortHint in g_kdHints:
5379 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5380 else:
5381 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5382 elif sHint != '0':
5383 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5384
5385 for sHint in sIemHints.split('|'):
5386 sHint = sHint.strip();
5387 if sHint.startswith('IEMOPHINT_'):
5388 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5389 if sShortHint in g_kdHints:
5390 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5391 else:
5392 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5393 elif sHint != '0':
5394 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5395
5396 _ = sAsm;
5397 return True;
5398
5399 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5400 """
5401 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5402 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5403 """
5404 if not asOperands:
5405 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5406 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5407 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5408
5409 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5410 """
5411 Process a IEM_MC_BEGIN macro invocation.
5412 """
5413 if self.fDebugMc:
5414 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5415 #self.debug('%s<eos>' % (sCode,));
5416
5417 # Check preconditions.
5418 if not self.oCurFunction:
5419 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5420 if self.oCurMcBlock:
5421 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5422
5423 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5424 cchIndent = offBeginStatementInCodeStr;
5425 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5426 if offPrevNewline >= 0:
5427 cchIndent -= offPrevNewline + 1;
5428 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5429
5430 # Start a new block.
5431 # But don't add it to the list unless the context matches the host architecture.
5432 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5433 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5434 cchIndent = cchIndent);
5435 try:
5436 if ( not self.aoCppCondStack
5437 or not self.sHostArch
5438 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5439 g_aoMcBlocks.append(self.oCurMcBlock);
5440 self.cTotalMcBlocks += 1;
5441 except Exception as oXcpt:
5442 self.raiseError(oXcpt.args[0]);
5443
5444 if self.oCurMcBlock.oInstruction:
5445 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5446 self.iMcBlockInFunc += 1;
5447 return True;
5448
5449 @staticmethod
5450 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5451 """
5452 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5453 extracting a statement block from a string that's the result of macro
5454 expansion and therefore contains multiple "sub-lines" as it were.
5455
5456 Returns list of lines covering offBegin thru offEnd in sRawLine.
5457 """
5458
5459 off = sRawLine.find('\n', offEnd);
5460 if off > 0:
5461 sRawLine = sRawLine[:off + 1];
5462
5463 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5464 sRawLine = sRawLine[off:];
5465 if not sRawLine.strip().startswith(sBeginStmt):
5466 sRawLine = sRawLine[offBegin - off:]
5467
5468 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5469
5470 def workerIemMcEnd(self, offEndStatementInLine):
5471 """
5472 Process a IEM_MC_END macro invocation.
5473 """
5474 if self.fDebugMc:
5475 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5476
5477 # Check preconditions.
5478 if not self.oCurMcBlock:
5479 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5480
5481 #
5482 # HACK ALERT! For blocks originating from macro expansion the start and
5483 # end line will be the same, but the line has multiple
5484 # newlines inside it. So, we have to do some extra tricks
5485 # to get the lines out of there. We ASSUME macros aren't
5486 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5487 #
5488 if self.iLine > self.oCurMcBlock.iBeginLine:
5489 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5490 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5491 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5492
5493 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5494 # so we can deal correctly with IEM_MC_END below and everything else.
5495 for sLine in asLines:
5496 cNewLines = sLine.count('\n');
5497 assert cNewLines > 0;
5498 if cNewLines > 1:
5499 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5500 self.oCurMcBlock.offBeginLine,
5501 offEndStatementInLine
5502 + sum(len(s) for s in asLines)
5503 - len(asLines[-1]));
5504 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5505 break;
5506 else:
5507 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5508 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5509 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5510
5511 #
5512 # Strip anything following the IEM_MC_END(); statement in the final line,
5513 # so that we don't carry on any trailing 'break' after macro expansions
5514 # like for iemOp_movsb_Xb_Yb.
5515 #
5516 while asLines[-1].strip() == '':
5517 asLines.pop();
5518 sFinal = asLines[-1];
5519 offFinalEnd = sFinal.find('IEM_MC_END');
5520 offEndInFinal = offFinalEnd;
5521 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5522 offFinalEnd += len('IEM_MC_END');
5523
5524 while sFinal[offFinalEnd].isspace():
5525 offFinalEnd += 1;
5526 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5527 offFinalEnd += 1;
5528
5529 while sFinal[offFinalEnd].isspace():
5530 offFinalEnd += 1;
5531 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5532 offFinalEnd += 1;
5533
5534 while sFinal[offFinalEnd].isspace():
5535 offFinalEnd += 1;
5536 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5537 offFinalEnd += 1;
5538
5539 asLines[-1] = sFinal[: offFinalEnd];
5540
5541 #
5542 # Complete and discard the current block.
5543 #
5544 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5545 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5546 self.oCurMcBlock = None;
5547 return True;
5548
5549 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5550 """
5551 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5552 """
5553 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5554 if self.fDebugMc:
5555 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5556 #self.debug('%s<eos>' % (sCode,));
5557
5558 # Check preconditions.
5559 if not self.oCurFunction:
5560 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5561 if self.oCurMcBlock:
5562 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5563
5564 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5565 cchIndent = offBeginStatementInCodeStr;
5566 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5567 if offPrevNewline >= 0:
5568 cchIndent -= offPrevNewline + 1;
5569 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5570
5571 # Start a new block.
5572 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5573 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5574 cchIndent = cchIndent, fDeferToCImpl = True);
5575
5576 # Parse the statment.
5577 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5578 if asArgs is None:
5579 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5580 if len(asArgs) != cParams + 4:
5581 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5582 % (sStmt, len(asArgs), cParams + 4, asArgs));
5583
5584 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5585
5586 # These MCs are not typically part of macro expansions, but let's get
5587 # it out of the way immediately if it's the case.
5588 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5589 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5590 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5591 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5592 asLines[-1] = asLines[-1][:offAfter + 1];
5593 else:
5594 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5595 offAfter, sStmt);
5596 assert asLines[-1].find(';') >= 0;
5597 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5598
5599 assert asLines[0].find(sStmt) >= 0;
5600 #if not asLines[0].strip().startswith(sStmt):
5601 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5602
5603 # Advance to the line with the closing ')'.
5604 self.iLine += cLines;
5605
5606 # Complete the block.
5607 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5608
5609 g_aoMcBlocks.append(oMcBlock);
5610 if oMcBlock.oInstruction:
5611 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5612 self.cTotalMcBlocks += 1;
5613 self.iMcBlockInFunc += 1;
5614
5615 return True;
5616
5617 def workerStartFunction(self, asArgs):
5618 """
5619 Deals with the start of a decoder function.
5620
5621 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5622 macros, so we get a argument list for these where the 0th argument is the
5623 macro name.
5624 """
5625 # Complete any existing function.
5626 if self.oCurFunction:
5627 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5628
5629 # Create the new function.
5630 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5631 return True;
5632
5633 def checkCodeForMacro(self, sCode, offLine):
5634 """
5635 Checks code for relevant macro invocation.
5636 """
5637
5638 #
5639 # Scan macro invocations.
5640 #
5641 if sCode.find('(') > 0:
5642 # Look for instruction decoder function definitions. ASSUME single line.
5643 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5644 [ 'FNIEMOP_DEF',
5645 'FNIEMOPRM_DEF',
5646 'FNIEMOP_STUB',
5647 'FNIEMOP_STUB_1',
5648 'FNIEMOP_UD_STUB',
5649 'FNIEMOP_UD_STUB_1' ]);
5650 if asArgs is not None:
5651 self.workerStartFunction(asArgs);
5652 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5653
5654 if not self.aoCurInstrs:
5655 self.addInstruction();
5656 for oInstr in self.aoCurInstrs:
5657 if oInstr.iLineFnIemOpMacro == -1:
5658 oInstr.iLineFnIemOpMacro = self.iLine;
5659 else:
5660 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5661 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5662 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5663 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5664 if asArgs[0].find('STUB') > 0:
5665 self.doneInstructions(fEndOfFunction = True);
5666 return True;
5667
5668 # Check for worker function definitions, so we can get a context for MC blocks.
5669 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5670 [ 'FNIEMOP_DEF_1',
5671 'FNIEMOP_DEF_2', ]);
5672 if asArgs is not None:
5673 self.workerStartFunction(asArgs);
5674 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5675 return True;
5676
5677 # IEMOP_HLP_DONE_VEX_DECODING_*
5678 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5679 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5680 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5681 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5682 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5683 ]);
5684 if asArgs is not None:
5685 sMacro = asArgs[0];
5686 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5687 for oInstr in self.aoCurInstrs:
5688 if 'vex_l_zero' not in oInstr.dHints:
5689 if oInstr.iLineMnemonicMacro >= 0:
5690 self.errorOnLine(oInstr.iLineMnemonicMacro,
5691 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5692 oInstr.dHints['vex_l_zero'] = True;
5693
5694 #
5695 # IEMOP_MNEMONIC*
5696 #
5697 if sCode.find('IEMOP_MNEMONIC') >= 0:
5698 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5699 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5700 if asArgs is not None:
5701 if len(self.aoCurInstrs) == 1:
5702 oInstr = self.aoCurInstrs[0];
5703 if oInstr.sStats is None:
5704 oInstr.sStats = asArgs[1];
5705 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5706
5707 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5708 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5709 if asArgs is not None:
5710 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5711 asArgs[7], []);
5712 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5713 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5714 if asArgs is not None:
5715 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5716 asArgs[8], [asArgs[6],]);
5717 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5718 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5719 if asArgs is not None:
5720 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5721 asArgs[9], [asArgs[6], asArgs[7]]);
5722 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5723 # a_fIemHints)
5724 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5725 if asArgs is not None:
5726 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5727 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5728 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5729 # a_fIemHints)
5730 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5731 if asArgs is not None:
5732 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5733 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5734
5735 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5736 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5737 if asArgs is not None:
5738 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5739 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5740 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5741 if asArgs is not None:
5742 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5743 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5744 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5745 if asArgs is not None:
5746 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5747 [asArgs[4], asArgs[5],]);
5748 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5749 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5750 if asArgs is not None:
5751 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5752 [asArgs[4], asArgs[5], asArgs[6],]);
5753 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5754 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5755 if asArgs is not None:
5756 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5757 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5758
5759 #
5760 # IEM_MC_BEGIN + IEM_MC_END.
5761 # We must support multiple instances per code snippet.
5762 #
5763 offCode = sCode.find('IEM_MC_');
5764 if offCode >= 0:
5765 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5766 if oMatch.group(1) == 'END':
5767 self.workerIemMcEnd(offLine + oMatch.start());
5768 elif oMatch.group(1) == 'BEGIN':
5769 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5770 else:
5771 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5772 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5773 return True;
5774
5775 return False;
5776
5777 def workerPreprocessorRecreateMacroRegex(self):
5778 """
5779 Recreates self.oReMacros when self.dMacros changes.
5780 """
5781 if self.dMacros:
5782 sRegex = '';
5783 for sName, oMacro in self.dMacros.items():
5784 if sRegex:
5785 sRegex += r'|' + sName;
5786 else:
5787 sRegex = r'\b(' + sName;
5788 if oMacro.asArgs is not None:
5789 sRegex += r'\s*\(';
5790 else:
5791 sRegex += r'\b';
5792 sRegex += ')';
5793 self.oReMacros = re.compile(sRegex);
5794 else:
5795 self.oReMacros = None;
5796 return True;
5797
5798 def workerPreprocessorDefine(self, sRest):
5799 """
5800 Handles a macro #define, the sRest is what follows after the directive word.
5801 """
5802 assert sRest[-1] == '\n';
5803
5804 #
5805 # If using line continutation, just concat all the lines together,
5806 # preserving the newline character but not the escaping.
5807 #
5808 iLineStart = self.iLine;
5809 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5810 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5811 self.iLine += 1;
5812 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5813
5814 #
5815 # Use regex to split out the name, argument list and body.
5816 # If this fails, we assume it's a simple macro.
5817 #
5818 oMatch = self.oReHashDefine2.match(sRest);
5819 if oMatch:
5820 sAllArgs = oMatch.group(2).strip();
5821 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5822 sBody = oMatch.group(3);
5823 else:
5824 oMatch = self.oReHashDefine3.match(sRest);
5825 if not oMatch:
5826 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5827 return self.error('bogus macro definition: %s' % (sRest,));
5828 asArgs = None;
5829 sBody = oMatch.group(2);
5830 sName = oMatch.group(1);
5831 assert sName == sName.strip();
5832 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5833
5834 #
5835 # Is this of any interest to us? We do NOT support MC blocks wihtin
5836 # nested macro expansion, just to avoid lots of extra work.
5837 #
5838 # There is only limited support for macros expanding to partial MC blocks.
5839 #
5840 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5841 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5842 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5843 # siblings in the recompiler. This is a lot simpler than nested macro
5844 # expansion and lots of heuristics for locating all the relevant macros.
5845 # Also, this way we don't produce lots of unnecessary threaded functions.
5846 #
5847 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5848 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5849 return True;
5850
5851 #
5852 # Add the macro.
5853 #
5854 if self.fDebugPreproc:
5855 self.debug('#define %s on line %u' % (sName, self.iLine,));
5856 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5857 return self.workerPreprocessorRecreateMacroRegex();
5858
5859 def workerPreprocessorUndef(self, sRest):
5860 """
5861 Handles a macro #undef, the sRest is what follows after the directive word.
5862 """
5863 # Quick comment strip and isolate the name.
5864 offSlash = sRest.find('/');
5865 if offSlash > 0:
5866 sRest = sRest[:offSlash];
5867 sName = sRest.strip();
5868
5869 # Remove the macro if we're clocking it.
5870 if sName in self.dMacros:
5871 if self.fDebugPreproc:
5872 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5873 del self.dMacros[sName];
5874 return self.workerPreprocessorRecreateMacroRegex();
5875
5876 return True;
5877
5878 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5879 """
5880 Handles an #if, #ifdef, #ifndef or #elif directive.
5881 """
5882 #
5883 # Sanity check #elif.
5884 #
5885 if sDirective == 'elif':
5886 if len(self.aoCppCondStack) == 0:
5887 self.raiseError('#elif without #if');
5888 if self.aoCppCondStack[-1].fInElse:
5889 self.raiseError('#elif after #else');
5890
5891 #
5892 # If using line continutation, just concat all the lines together,
5893 # stripping both the newline and escape characters.
5894 #
5895 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5896 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5897 self.iLine += 1;
5898
5899 # Strip it of all comments and leading and trailing blanks.
5900 sRest = self.stripComments(sRest).strip();
5901
5902 #
5903 # Stash it.
5904 #
5905 try:
5906 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5907 except Exception as oXcpt:
5908 self.raiseError(oXcpt.args[0]);
5909
5910 if sDirective == 'elif':
5911 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5912 else:
5913 self.aoCppCondStack.append(oPreprocCond);
5914
5915 return True;
5916
5917 def workerPreprocessorElse(self):
5918 """
5919 Handles an #else directive.
5920 """
5921 if len(self.aoCppCondStack) == 0:
5922 self.raiseError('#else without #if');
5923 if self.aoCppCondStack[-1].fInElse:
5924 self.raiseError('Another #else after #else');
5925
5926 self.aoCppCondStack[-1].fInElse = True;
5927 return True;
5928
5929 def workerPreprocessorEndif(self):
5930 """
5931 Handles an #endif directive.
5932 """
5933 if len(self.aoCppCondStack) == 0:
5934 self.raiseError('#endif without #if');
5935
5936 self.aoCppCondStack.pop();
5937 return True;
5938
5939 def checkPreprocessorDirective(self, sLine):
5940 """
5941 Handles a preprocessor directive.
5942 """
5943 # Skip past the preprocessor hash.
5944 off = sLine.find('#');
5945 assert off >= 0;
5946 off += 1;
5947 while off < len(sLine) and sLine[off].isspace():
5948 off += 1;
5949
5950 # Extract the directive.
5951 offDirective = off;
5952 while off < len(sLine) and not sLine[off].isspace():
5953 off += 1;
5954 sDirective = sLine[offDirective:off];
5955 if self.fDebugPreproc:
5956 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5957
5958 # Skip spaces following it to where the arguments/whatever starts.
5959 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5960 off += 1;
5961 sTail = sLine[off:];
5962
5963 # Handle the directive.
5964 if sDirective == 'define':
5965 return self.workerPreprocessorDefine(sTail);
5966 if sDirective == 'undef':
5967 return self.workerPreprocessorUndef(sTail);
5968 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5969 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5970 if sDirective == 'else':
5971 return self.workerPreprocessorElse();
5972 if sDirective == 'endif':
5973 return self.workerPreprocessorEndif();
5974
5975 if self.fDebugPreproc:
5976 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5977 return False;
5978
5979 def expandMacros(self, sLine, oMatch):
5980 """
5981 Expands macros we know about in the given line.
5982 Currently we ASSUME there is only one and that is what oMatch matched.
5983 """
5984 #
5985 # Get our bearings.
5986 #
5987 offMatch = oMatch.start();
5988 sName = oMatch.group(1);
5989 assert sName == sLine[oMatch.start() : oMatch.end()];
5990 fWithArgs = sName.endswith('(');
5991 if fWithArgs:
5992 sName = sName[:-1].strip();
5993 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5994
5995 #
5996 # Deal with simple macro invocations w/o parameters.
5997 #
5998 if not fWithArgs:
5999 if self.fDebugPreproc:
6000 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6001 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6002
6003 #
6004 # Complicated macro with parameters.
6005 # Start by extracting the parameters. ASSUMES they are all on the same line!
6006 #
6007 cLevel = 1;
6008 offCur = oMatch.end();
6009 offCurArg = offCur;
6010 asArgs = [];
6011 while True:
6012 if offCur >= len(sLine):
6013 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6014 ch = sLine[offCur];
6015 if ch == '(':
6016 cLevel += 1;
6017 elif ch == ')':
6018 cLevel -= 1;
6019 if cLevel == 0:
6020 asArgs.append(sLine[offCurArg:offCur].strip());
6021 break;
6022 elif ch == ',' and cLevel == 1:
6023 asArgs.append(sLine[offCurArg:offCur].strip());
6024 offCurArg = offCur + 1;
6025 offCur += 1;
6026 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6027 asArgs = [];
6028 if len(oMacro.asArgs) != len(asArgs):
6029 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6030
6031 #
6032 # Do the expanding.
6033 #
6034 if self.fDebugPreproc:
6035 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6036 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6037
6038 def parse(self):
6039 """
6040 Parses the given file.
6041
6042 Returns number or errors.
6043 Raises exception on fatal trouble.
6044 """
6045 #self.debug('Parsing %s' % (self.sSrcFile,));
6046
6047 #
6048 # Loop thru the lines.
6049 #
6050 # Please mind that self.iLine may be updated by checkCodeForMacro and
6051 # other worker methods.
6052 #
6053 while self.iLine < len(self.asLines):
6054 sLine = self.asLines[self.iLine];
6055 self.iLine += 1;
6056 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6057
6058 # Expand macros we know about if we're currently in code.
6059 if self.iState == self.kiCode and self.oReMacros:
6060 oMatch = self.oReMacros.search(sLine);
6061 if oMatch:
6062 sLine = self.expandMacros(sLine, oMatch);
6063 if self.fDebugPreproc:
6064 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6065 self.asLines[self.iLine - 1] = sLine;
6066
6067 # Check for preprocessor directives before comments and other stuff.
6068 # ASSUMES preprocessor directives doesn't end with multiline comments.
6069 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6070 if self.fDebugPreproc:
6071 self.debug('line %d: preproc' % (self.iLine,));
6072 self.checkPreprocessorDirective(sLine);
6073 else:
6074 # Look for comments.
6075 offSlash = sLine.find('/');
6076 if offSlash >= 0:
6077 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6078 offLine = 0;
6079 while offLine < len(sLine):
6080 if self.iState == self.kiCode:
6081 # Look for substantial multiline comment so we pass the following MC as a whole line:
6082 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6083 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6084 offHit = sLine.find('/*', offLine);
6085 while offHit >= 0:
6086 offEnd = sLine.find('*/', offHit + 2);
6087 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6088 break;
6089 offHit = sLine.find('/*', offEnd);
6090
6091 if offHit >= 0:
6092 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6093 self.sComment = '';
6094 self.iCommentLine = self.iLine;
6095 self.iState = self.kiCommentMulti;
6096 offLine = offHit + 2;
6097 else:
6098 self.checkCodeForMacro(sLine[offLine:], offLine);
6099 offLine = len(sLine);
6100
6101 elif self.iState == self.kiCommentMulti:
6102 offHit = sLine.find('*/', offLine);
6103 if offHit >= 0:
6104 self.sComment += sLine[offLine:offHit];
6105 self.iState = self.kiCode;
6106 offLine = offHit + 2;
6107 self.parseComment();
6108 else:
6109 self.sComment += sLine[offLine:];
6110 offLine = len(sLine);
6111 else:
6112 assert False;
6113 # C++ line comment.
6114 elif offSlash > 0:
6115 self.checkCodeForMacro(sLine[:offSlash], 0);
6116
6117 # No slash, but append the line if in multi-line comment.
6118 elif self.iState == self.kiCommentMulti:
6119 #self.debug('line %d: multi' % (self.iLine,));
6120 self.sComment += sLine;
6121
6122 # No slash, but check code line for relevant macro.
6123 elif ( self.iState == self.kiCode
6124 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6125 #self.debug('line %d: macro' % (self.iLine,));
6126 self.checkCodeForMacro(sLine, 0);
6127
6128 # If the line is a '}' in the first position, complete the instructions.
6129 elif self.iState == self.kiCode and sLine[0] == '}':
6130 #self.debug('line %d: }' % (self.iLine,));
6131 self.doneInstructions(fEndOfFunction = True);
6132
6133 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6134 # so we can check/add @oppfx info from it.
6135 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6136 self.parseFunctionTable(sLine);
6137
6138 self.doneInstructions(fEndOfFunction = True);
6139 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6140 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6141 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6142 return self.printErrors();
6143
6144# Some sanity checking.
6145def __sanityCheckEFlagsClasses():
6146 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6147 for sAttrib, asFlags in dLists.items():
6148 for sFlag in asFlags:
6149 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6150__sanityCheckEFlagsClasses();
6151
6152## The parsed content of IEMAllInstCommonBodyMacros.h.
6153g_oParsedCommonBodyMacros = None # type: SimpleParser
6154
6155def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6156 """
6157 Parses one source file for instruction specfications.
6158 """
6159 #
6160 # Read sSrcFile into a line array.
6161 #
6162 try:
6163 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6164 except Exception as oXcpt:
6165 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6166 try:
6167 asLines = oFile.readlines();
6168 except Exception as oXcpt:
6169 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6170 finally:
6171 oFile.close();
6172
6173 #
6174 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6175 # can use the macros from it when processing the other files.
6176 #
6177 global g_oParsedCommonBodyMacros;
6178 if g_oParsedCommonBodyMacros is None:
6179 # Locate the file.
6180 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6181 if not os.path.isfile(sCommonBodyMacros):
6182 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6183
6184 # Read it.
6185 try:
6186 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6187 asIncFiles = oIncFile.readlines();
6188 except Exception as oXcpt:
6189 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6190
6191 # Parse it.
6192 try:
6193 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6194 if oParser.parse() != 0:
6195 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6196 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6197 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6198 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6199 oParser.cTotalMcBlocks,
6200 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6201 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6202 except ParserException as oXcpt:
6203 print(str(oXcpt), file = sys.stderr);
6204 raise;
6205 g_oParsedCommonBodyMacros = oParser;
6206
6207 #
6208 # Do the parsing.
6209 #
6210 try:
6211 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6212 return (oParser.parse(), oParser) ;
6213 except ParserException as oXcpt:
6214 print(str(oXcpt), file = sys.stderr);
6215 raise;
6216
6217
6218def __doTestCopying():
6219 """
6220 Executes the asCopyTests instructions.
6221 """
6222 asErrors = [];
6223 for oDstInstr in g_aoAllInstructions:
6224 if oDstInstr.asCopyTests:
6225 for sSrcInstr in oDstInstr.asCopyTests:
6226 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6227 if oSrcInstr:
6228 aoSrcInstrs = [oSrcInstr,];
6229 else:
6230 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6231 if aoSrcInstrs:
6232 for oSrcInstr in aoSrcInstrs:
6233 if oSrcInstr != oDstInstr:
6234 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6235 else:
6236 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6237 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6238 else:
6239 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6240 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6241
6242 if asErrors:
6243 sys.stderr.write(u''.join(asErrors));
6244 return len(asErrors);
6245
6246
6247def __applyOnlyTest():
6248 """
6249 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6250 all other instructions so that only these get tested.
6251 """
6252 if g_aoOnlyTestInstructions:
6253 for oInstr in g_aoAllInstructions:
6254 if oInstr.aoTests:
6255 if oInstr not in g_aoOnlyTestInstructions:
6256 oInstr.aoTests = [];
6257 return 0;
6258
6259## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6260g_aaoAllInstrFilesAndDefaultMapAndSet = (
6261 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6262 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6263 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6264 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6265 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6266 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6267 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6268 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6269 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6270);
6271
6272def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6273 """
6274 Parses all the IEMAllInstruction*.cpp.h files.
6275
6276 Returns a list of the parsers on success.
6277 Raises exception on failure.
6278 """
6279 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6280 cErrors = 0;
6281 aoParsers = [];
6282 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6283 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6284 sFilename = os.path.join(sSrcDir, sFilename);
6285 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6286 cErrors += cThisErrors;
6287 aoParsers.append(oParser);
6288 cErrors += __doTestCopying();
6289 cErrors += __applyOnlyTest();
6290
6291 # Total stub stats:
6292 cTotalStubs = 0;
6293 for oInstr in g_aoAllInstructions:
6294 cTotalStubs += oInstr.fStub;
6295 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6296 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6297 file = sys.stderr);
6298
6299 if cErrors != 0:
6300 raise Exception('%d parse errors' % (cErrors,));
6301 return aoParsers;
6302
6303
6304def parseFiles(asFiles, sHostArch = None):
6305 """
6306 Parses a selection of IEMAllInstruction*.cpp.h files.
6307
6308 Returns a list of the parsers on success.
6309 Raises exception on failure.
6310 """
6311 # Look up default maps for the files and call __parseFilesWorker to do the job.
6312 asFilesAndDefaultMap = [];
6313 for sFilename in asFiles:
6314 sName = os.path.split(sFilename)[1].lower();
6315 sMap = None;
6316 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6317 if aoInfo[0].lower() == sName:
6318 sMap = aoInfo[1];
6319 break;
6320 if not sMap:
6321 raise Exception('Unable to classify file: %s' % (sFilename,));
6322 asFilesAndDefaultMap.append((sFilename, sMap));
6323
6324 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6325
6326
6327def parseAll(sHostArch = None):
6328 """
6329 Parses all the IEMAllInstruction*.cpp.h files.
6330
6331 Returns a list of the parsers on success.
6332 Raises exception on failure.
6333 """
6334 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6335
6336
6337#
6338# Generators (may perhaps move later).
6339#
6340def __formatDisassemblerTableEntry(oInstr):
6341 """
6342 """
6343 sMacro = 'OP';
6344 cMaxOperands = 3;
6345 if len(oInstr.aoOperands) > 3:
6346 sMacro = 'OPVEX'
6347 cMaxOperands = 4;
6348 assert len(oInstr.aoOperands) <= cMaxOperands;
6349
6350 #
6351 # Format string.
6352 #
6353 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6354 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6355 sTmp += ' ' if iOperand == 0 else ',';
6356 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6357 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6358 else:
6359 sTmp += g_kdOpTypes[oOperand.sType][2];
6360 sTmp += '",';
6361 asColumns = [ sTmp, ];
6362
6363 #
6364 # Decoders.
6365 #
6366 iStart = len(asColumns);
6367 if oInstr.sEncoding is None:
6368 pass;
6369 elif oInstr.sEncoding == 'ModR/M':
6370 # ASSUME the first operand is using the ModR/M encoding
6371 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6372 asColumns.append('IDX_ParseModRM,');
6373 elif oInstr.sEncoding in [ 'prefix', ]:
6374 for oOperand in oInstr.aoOperands:
6375 asColumns.append('0,');
6376 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6377 pass;
6378 elif oInstr.sEncoding == 'VEX.ModR/M':
6379 asColumns.append('IDX_ParseModRM,');
6380 elif oInstr.sEncoding == 'vex2':
6381 asColumns.append('IDX_ParseVex2b,')
6382 elif oInstr.sEncoding == 'vex3':
6383 asColumns.append('IDX_ParseVex3b,')
6384 elif oInstr.sEncoding in g_dInstructionMaps:
6385 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6386 else:
6387 ## @todo
6388 #IDX_ParseTwoByteEsc,
6389 #IDX_ParseGrp1,
6390 #IDX_ParseShiftGrp2,
6391 #IDX_ParseGrp3,
6392 #IDX_ParseGrp4,
6393 #IDX_ParseGrp5,
6394 #IDX_Parse3DNow,
6395 #IDX_ParseGrp6,
6396 #IDX_ParseGrp7,
6397 #IDX_ParseGrp8,
6398 #IDX_ParseGrp9,
6399 #IDX_ParseGrp10,
6400 #IDX_ParseGrp12,
6401 #IDX_ParseGrp13,
6402 #IDX_ParseGrp14,
6403 #IDX_ParseGrp15,
6404 #IDX_ParseGrp16,
6405 #IDX_ParseThreeByteEsc4,
6406 #IDX_ParseThreeByteEsc5,
6407 #IDX_ParseModFence,
6408 #IDX_ParseEscFP,
6409 #IDX_ParseNopPause,
6410 #IDX_ParseInvOpModRM,
6411 assert False, str(oInstr);
6412
6413 # Check for immediates and stuff in the remaining operands.
6414 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6415 sIdx = g_kdOpTypes[oOperand.sType][0];
6416 #if sIdx != 'IDX_UseModRM':
6417 asColumns.append(sIdx + ',');
6418 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6419
6420 #
6421 # Opcode and operands.
6422 #
6423 assert oInstr.sDisEnum, str(oInstr);
6424 asColumns.append(oInstr.sDisEnum + ',');
6425 iStart = len(asColumns)
6426 for oOperand in oInstr.aoOperands:
6427 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6428 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6429
6430 #
6431 # Flags.
6432 #
6433 sTmp = '';
6434 for sHint in sorted(oInstr.dHints.keys()):
6435 sDefine = g_kdHints[sHint];
6436 if sDefine.startswith('DISOPTYPE_'):
6437 if sTmp:
6438 sTmp += ' | ' + sDefine;
6439 else:
6440 sTmp += sDefine;
6441 if sTmp:
6442 sTmp += '),';
6443 else:
6444 sTmp += '0),';
6445 asColumns.append(sTmp);
6446
6447 #
6448 # Format the columns into a line.
6449 #
6450 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6451 sLine = '';
6452 for i, s in enumerate(asColumns):
6453 if len(sLine) < aoffColumns[i]:
6454 sLine += ' ' * (aoffColumns[i] - len(sLine));
6455 else:
6456 sLine += ' ';
6457 sLine += s;
6458
6459 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6460 # DISOPTYPE_HARMLESS),
6461 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6462 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6463 return sLine;
6464
6465def __checkIfShortTable(aoTableOrdered, oMap):
6466 """
6467 Returns (iInstr, cInstructions, fShortTable)
6468 """
6469
6470 # Determin how much we can trim off.
6471 cInstructions = len(aoTableOrdered);
6472 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6473 cInstructions -= 1;
6474
6475 iInstr = 0;
6476 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6477 iInstr += 1;
6478
6479 # If we can save more than 30%, we go for the short table version.
6480 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6481 return (iInstr, cInstructions, True);
6482 _ = oMap; # Use this for overriding.
6483
6484 # Output the full table.
6485 return (0, len(aoTableOrdered), False);
6486
6487def generateDisassemblerTables(oDstFile = sys.stdout):
6488 """
6489 Generates disassembler tables.
6490
6491 Returns exit code.
6492 """
6493
6494 #
6495 # Parse all.
6496 #
6497 try:
6498 parseAll();
6499 except Exception as oXcpt:
6500 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6501 traceback.print_exc(file = sys.stderr);
6502 return 1;
6503
6504
6505 #
6506 # The disassembler uses a slightly different table layout to save space,
6507 # since several of the prefix varia
6508 #
6509 aoDisasmMaps = [];
6510 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6511 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6512 if oMap.sSelector != 'byte+pfx':
6513 aoDisasmMaps.append(oMap);
6514 else:
6515 # Split the map by prefix.
6516 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6517 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6518 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6519 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6520
6521 #
6522 # Dump each map.
6523 #
6524 asHeaderLines = [];
6525 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6526 for oMap in aoDisasmMaps:
6527 sName = oMap.sName;
6528
6529 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6530
6531 #
6532 # Get the instructions for the map and see if we can do a short version or not.
6533 #
6534 aoTableOrder = oMap.getInstructionsInTableOrder();
6535 cEntriesPerByte = oMap.getEntriesPerByte();
6536 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6537
6538 #
6539 # Output the table start.
6540 # Note! Short tables are static and only accessible via the map range record.
6541 #
6542 asLines = [];
6543 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6544 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6545 if fShortTable:
6546 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6547 else:
6548 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6549 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6550 asLines.append('{');
6551
6552 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6553 asLines.append(' /* %#04x: */' % (iInstrStart,));
6554
6555 #
6556 # Output the instructions.
6557 #
6558 iInstr = iInstrStart;
6559 while iInstr < iInstrEnd:
6560 oInstr = aoTableOrder[iInstr];
6561 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6562 if iInstr != iInstrStart:
6563 asLines.append('');
6564 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6565
6566 if oInstr is None:
6567 # Invalid. Optimize blocks of invalid instructions.
6568 cInvalidInstrs = 1;
6569 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6570 cInvalidInstrs += 1;
6571 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6572 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6573 iInstr += 0x10 * cEntriesPerByte - 1;
6574 elif cEntriesPerByte > 1:
6575 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6576 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6577 iInstr += 3;
6578 else:
6579 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6580 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6581 else:
6582 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6583 elif isinstance(oInstr, list):
6584 if len(oInstr) != 0:
6585 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6586 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6587 else:
6588 asLines.append(__formatDisassemblerTableEntry(oInstr));
6589 else:
6590 asLines.append(__formatDisassemblerTableEntry(oInstr));
6591
6592 iInstr += 1;
6593
6594 if iInstrStart >= iInstrEnd:
6595 asLines.append(' /* dummy */ INVALID_OPCODE');
6596
6597 asLines.append('};');
6598 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6599
6600 #
6601 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6602 #
6603 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6604 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6605 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6606
6607 #
6608 # Write out the lines.
6609 #
6610 oDstFile.write('\n'.join(asLines));
6611 oDstFile.write('\n');
6612 oDstFile.write('\n');
6613 #break; #for now
6614 return 0;
6615
6616if __name__ == '__main__':
6617 sys.exit(generateDisassemblerTables());
6618
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette