VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103863

Last change on this file since 103863 was 103863, checked in by vboxsync, 9 months ago

VMM/IEM: Revert r162233 as it is not the cause for the corruption, bugref:10371

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 321.4 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103863 2024-03-15 08:41:25Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103863 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
241 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
242 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
243 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
244 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
246 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
248 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
250 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
252 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
254 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
256 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
259 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
260
261 # ModR/M.rm - register only.
262 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
263 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
264 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
265 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
266 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
267 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
268 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
269 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
270
271 # ModR/M.rm - memory only.
272 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
273 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
274 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
275 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
276 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
277 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
278 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
279 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
280 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
281 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
282 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
283 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
284 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
285 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
286 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
287
288 # ModR/M.reg
289 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
290 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
291 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
292 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
293 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
294 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
295 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
296 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
297 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
298 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
299 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
300 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
301 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
302 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
303 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
304 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
305 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
306 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
307 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
308 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
309 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
310 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
311 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
312 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
313 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
314 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
315 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
316 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
317 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
318 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
319 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
320 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
321
322 # VEX.vvvv
323 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
324 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
325 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
326 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
327 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
328 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
329 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
330 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
331
332 # Immediate values.
333 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
334 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
335 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
336 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
337 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
338 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
339
340 # Address operands (no ModR/M).
341 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
342 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
343
344 # Relative jump targets
345 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
346 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
347
348 # DS:rSI
349 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
350 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
351 # ES:rDI
352 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
353 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
354
355 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
356
357 # Fixed registers.
358 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
359 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
360 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
361 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
362 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
363 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
364 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
365 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
366 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
367 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
368
369 # Fixed values.
370 '1': ( '', '1', '1', '1', '', ),
371};
372
373# IDX_ParseFixedReg
374# IDX_ParseVexDest
375
376
377## IEMFORM_XXX mappings.
378g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
379 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
380 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
381 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
382 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
383 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
384 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
385 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
386 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
387 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
388 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
389 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
390 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
391 'M': ( 'ModR/M', [ 'rm', ], '', ),
392 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
393 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
394 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
395 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
396 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
397 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
398 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
399 'R': ( 'ModR/M', [ 'reg', ], '', ),
400
401 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
402 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
403 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
404 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
405 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
406 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
407 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
408 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
409 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
410 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
411 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
412 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
414 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
415 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
416 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
417 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
418 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
419 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
420 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
421 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
422 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
423
424 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
425 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
426 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
427 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
428 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
429 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
430
431 'FIXED': ( 'fixed', None, '', ),
432};
433
434## \@oppfx values.
435g_kdPrefixes = {
436 'none': [],
437 '0x66': [],
438 '0xf3': [],
439 '0xf2': [],
440 '!0xf3': [], # special case for bsf/tzcnt
441};
442
443## Special \@opcode tag values.
444g_kdSpecialOpcodes = {
445 '/reg': [],
446 'mr/reg': [],
447 '11 /reg': [],
448 '!11 /reg': [],
449 '11 mr/reg': [],
450 '!11 mr/reg': [],
451};
452
453## Special \@opcodesub tag values.
454## The first value is the real value for aliases.
455## The second value is for bs3cg1.
456g_kdSubOpcodes = {
457 'none': [ None, '', ],
458 '11 mr/reg': [ '11 mr/reg', '', ],
459 '11': [ '11 mr/reg', '', ], ##< alias
460 '!11 mr/reg': [ '!11 mr/reg', '', ],
461 '!11': [ '!11 mr/reg', '', ], ##< alias
462 'rex.w=0': [ 'rex.w=0', 'WZ', ],
463 'w=0': [ 'rex.w=0', '', ], ##< alias
464 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
465 'w=1': [ 'rex.w=1', '', ], ##< alias
466 'vex.l=0': [ 'vex.l=0', 'L0', ],
467 'vex.l=1': [ 'vex.l=0', 'L1', ],
468 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
469 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
470 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
471 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
472 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
473 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
474};
475
476## Valid values for \@openc
477g_kdEncodings = {
478 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
479 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
480 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
481 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
482 'prefix': [ None, ], ##< Prefix
483};
484
485## \@opunused, \@opinvalid, \@opinvlstyle
486g_kdInvalidStyles = {
487 'immediate': [], ##< CPU stops decoding immediately after the opcode.
488 'vex.modrm': [], ##< VEX+ModR/M, everyone.
489 'intel-modrm': [], ##< Intel decodes ModR/M.
490 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
491 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
492 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
493};
494
495g_kdCpuNames = {
496 '8086': (),
497 '80186': (),
498 '80286': (),
499 '80386': (),
500 '80486': (),
501};
502
503## \@opcpuid
504g_kdCpuIdFlags = {
505 'vme': 'X86_CPUID_FEATURE_EDX_VME',
506 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
507 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
508 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
509 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
510 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
511 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
512 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
513 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
514 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
515 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
516 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
517 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
518 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
519 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
520 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
521 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
522 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
523 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
524 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
525 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
526 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
527 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
528 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
529 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
530 'aes': 'X86_CPUID_FEATURE_ECX_AES',
531 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
532 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
533 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
534 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
535 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
536
537 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
538 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
539 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
540 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
541 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
542 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
543 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
544 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
545 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
546 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
547};
548
549## \@ophints values.
550# pylint: disable=line-too-long
551g_kdHints = {
552 'invalid': 'DISOPTYPE_INVALID', ##<
553 'harmless': 'DISOPTYPE_HARMLESS', ##<
554 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
555 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
556 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
557 'portio': 'DISOPTYPE_PORTIO', ##<
558 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
559 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
560 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
561 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
562 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
563 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
564 'illegal': 'DISOPTYPE_ILLEGAL', ##<
565 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
566 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
567 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
568 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
569 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
570 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
571 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
572 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
573 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
574 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
575 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
576 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
577 ## (only in 16 & 32 bits mode!)
578 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
579 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
580 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
581 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
582 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
583 'ignores_rexw': '', ##< Ignores REX.W.
584 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
585 'vex_l_zero': '', ##< VEX.L must be 0.
586 'vex_l_ignored': '', ##< VEX.L is ignored.
587 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
588 'lock_allowed': '', ##< Lock prefix allowed.
589};
590# pylint: enable=line-too-long
591
592## \@opxcpttype values (see SDMv2 2.4, 2.7).
593g_kdXcptTypes = {
594 'none': [],
595 '1': [],
596 '2': [],
597 '3': [],
598 '4': [],
599 '4UA': [],
600 '5': [],
601 '5LZ': [], # LZ = VEX.L must be zero.
602 '6': [],
603 '7': [],
604 '7LZ': [],
605 '8': [],
606 '11': [],
607 '12': [],
608 'E1': [],
609 'E1NF': [],
610 'E2': [],
611 'E3': [],
612 'E3NF': [],
613 'E4': [],
614 'E4NF': [],
615 'E5': [],
616 'E5NF': [],
617 'E6': [],
618 'E6NF': [],
619 'E7NF': [],
620 'E9': [],
621 'E9NF': [],
622 'E10': [],
623 'E11': [],
624 'E12': [],
625 'E12NF': [],
626};
627
628
629def _isValidOpcodeByte(sOpcode):
630 """
631 Checks if sOpcode is a valid lower case opcode byte.
632 Returns true/false.
633 """
634 if len(sOpcode) == 4:
635 if sOpcode[:2] == '0x':
636 if sOpcode[2] in '0123456789abcdef':
637 if sOpcode[3] in '0123456789abcdef':
638 return True;
639 return False;
640
641
642class InstructionMap(object):
643 """
644 Instruction map.
645
646 The opcode map provides the lead opcode bytes (empty for the one byte
647 opcode map). An instruction can be member of multiple opcode maps as long
648 as it uses the same opcode value within the map (because of VEX).
649 """
650
651 kdEncodings = {
652 'legacy': [],
653 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
654 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
655 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
656 'xop8': [], ##< XOP prefix with vvvvv = 8
657 'xop9': [], ##< XOP prefix with vvvvv = 9
658 'xop10': [], ##< XOP prefix with vvvvv = 10
659 };
660 ## Selectors.
661 ## 1. The first value is the number of table entries required by a
662 ## decoder or disassembler for this type of selector.
663 ## 2. The second value is how many entries per opcode byte if applicable.
664 kdSelectors = {
665 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
666 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
667 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
668 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
669 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
670 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
671 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
672 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
673 };
674
675 ## Define the subentry number according to the Instruction::sPrefix
676 ## value for 'byte+pfx' selected tables.
677 kiPrefixOrder = {
678 'none': 0,
679 '0x66': 1,
680 '0xf3': 2,
681 '0xf2': 3,
682 };
683
684 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
685 sEncoding = 'legacy', sDisParse = None):
686 assert sSelector in self.kdSelectors;
687 assert sEncoding in self.kdEncodings;
688 if asLeadOpcodes is None:
689 asLeadOpcodes = [];
690 else:
691 for sOpcode in asLeadOpcodes:
692 assert _isValidOpcodeByte(sOpcode);
693 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
694
695 self.sName = sName;
696 self.sIemName = sIemName;
697 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
698 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
699 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
700 self.aoInstructions = [] # type: Instruction
701 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
702
703 def copy(self, sNewName, sPrefixFilter = None):
704 """
705 Copies the table with filtering instruction by sPrefix if not None.
706 """
707 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
708 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
709 else self.sSelector,
710 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
711 if sPrefixFilter is None:
712 oCopy.aoInstructions = list(self.aoInstructions);
713 else:
714 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
715 return oCopy;
716
717 def getTableSize(self):
718 """
719 Number of table entries. This corresponds directly to the selector.
720 """
721 return self.kdSelectors[self.sSelector][0];
722
723 def getEntriesPerByte(self):
724 """
725 Number of table entries per opcode bytes.
726
727 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
728 the others it will just return 1.
729 """
730 return self.kdSelectors[self.sSelector][1];
731
732 def getInstructionIndex(self, oInstr):
733 """
734 Returns the table index for the instruction.
735 """
736 bOpcode = oInstr.getOpcodeByte();
737
738 # The byte selectors are simple. We need a full opcode byte and need just return it.
739 if self.sSelector == 'byte':
740 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
741 return bOpcode;
742
743 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
744 if self.sSelector == 'byte+pfx':
745 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
746 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
747 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
748
749 # The other selectors needs masking and shifting.
750 if self.sSelector == '/r':
751 return (bOpcode >> 3) & 0x7;
752
753 if self.sSelector == 'mod /r':
754 return (bOpcode >> 3) & 0x1f;
755
756 if self.sSelector == 'memreg /r':
757 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
758
759 if self.sSelector == '!11 /r':
760 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
761 return (bOpcode >> 3) & 0x7;
762
763 if self.sSelector == '11 /r':
764 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
765 return (bOpcode >> 3) & 0x7;
766
767 if self.sSelector == '11':
768 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
769 return bOpcode & 0x3f;
770
771 assert False, self.sSelector;
772 return -1;
773
774 def getInstructionsInTableOrder(self):
775 """
776 Get instructions in table order.
777
778 Returns array of instructions. Normally there is exactly one
779 instruction per entry. However the entry could also be None if
780 not instruction was specified for that opcode value. Or there
781 could be a list of instructions to deal with special encodings
782 where for instance prefix (e.g. REX.W) encodes a different
783 instruction or different CPUs have different instructions or
784 prefixes in the same place.
785 """
786 # Start with empty table.
787 cTable = self.getTableSize();
788 aoTable = [None] * cTable;
789
790 # Insert the instructions.
791 for oInstr in self.aoInstructions:
792 if oInstr.sOpcode:
793 idxOpcode = self.getInstructionIndex(oInstr);
794 assert idxOpcode < cTable, str(idxOpcode);
795
796 oExisting = aoTable[idxOpcode];
797 if oExisting is None:
798 aoTable[idxOpcode] = oInstr;
799 elif not isinstance(oExisting, list):
800 aoTable[idxOpcode] = list([oExisting, oInstr]);
801 else:
802 oExisting.append(oInstr);
803
804 return aoTable;
805
806
807 def getDisasTableName(self):
808 """
809 Returns the disassembler table name for this map.
810 """
811 sName = 'g_aDisas';
812 for sWord in self.sName.split('_'):
813 if sWord == 'm': # suffix indicating modrm.mod==mem
814 sName += '_m';
815 elif sWord == 'r': # suffix indicating modrm.mod==reg
816 sName += '_r';
817 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
818 sName += '_' + sWord;
819 else:
820 sWord = sWord.replace('grp', 'Grp');
821 sWord = sWord.replace('map', 'Map');
822 sName += sWord[0].upper() + sWord[1:];
823 return sName;
824
825 def getDisasRangeName(self):
826 """
827 Returns the disassembler table range name for this map.
828 """
829 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
830
831 def isVexMap(self):
832 """ Returns True if a VEX map. """
833 return self.sEncoding.startswith('vex');
834
835
836class TestType(object):
837 """
838 Test value type.
839
840 This base class deals with integer like values. The fUnsigned constructor
841 parameter indicates the default stance on zero vs sign extending. It is
842 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
843 """
844 def __init__(self, sName, acbSizes = None, fUnsigned = True):
845 self.sName = sName;
846 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
847 self.fUnsigned = fUnsigned;
848
849 class BadValue(Exception):
850 """ Bad value exception. """
851 def __init__(self, sMessage):
852 Exception.__init__(self, sMessage);
853 self.sMessage = sMessage;
854
855 ## For ascii ~ operator.
856 kdHexInv = {
857 '0': 'f',
858 '1': 'e',
859 '2': 'd',
860 '3': 'c',
861 '4': 'b',
862 '5': 'a',
863 '6': '9',
864 '7': '8',
865 '8': '7',
866 '9': '6',
867 'a': '5',
868 'b': '4',
869 'c': '3',
870 'd': '2',
871 'e': '1',
872 'f': '0',
873 };
874
875 def get(self, sValue):
876 """
877 Get the shortest normal sized byte representation of oValue.
878
879 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
880 The latter form is for AND+OR pairs where the first entry is what to
881 AND with the field and the second the one or OR with.
882
883 Raises BadValue if invalid value.
884 """
885 if not sValue:
886 raise TestType.BadValue('empty value');
887
888 # Deal with sign and detect hexadecimal or decimal.
889 fSignExtend = not self.fUnsigned;
890 if sValue[0] == '-' or sValue[0] == '+':
891 fSignExtend = True;
892 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
893 else:
894 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
895
896 # try convert it to long integer.
897 try:
898 iValue = long(sValue, 16 if fHex else 10);
899 except Exception as oXcpt:
900 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
901
902 # Convert the hex string and pad it to a decent value. Negative values
903 # needs to be manually converted to something non-negative (~-n + 1).
904 if iValue >= 0:
905 sHex = hex(iValue);
906 if sys.version_info[0] < 3:
907 assert sHex[-1] == 'L';
908 sHex = sHex[:-1];
909 assert sHex[:2] == '0x';
910 sHex = sHex[2:];
911 else:
912 sHex = hex(-iValue - 1);
913 if sys.version_info[0] < 3:
914 assert sHex[-1] == 'L';
915 sHex = sHex[:-1];
916 assert sHex[:2] == '0x';
917 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
918 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
919 sHex = 'f' + sHex;
920
921 cDigits = len(sHex);
922 if cDigits <= self.acbSizes[-1] * 2:
923 for cb in self.acbSizes:
924 cNaturalDigits = cb * 2;
925 if cDigits <= cNaturalDigits:
926 break;
927 else:
928 cNaturalDigits = self.acbSizes[-1] * 2;
929 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
930 assert isinstance(cNaturalDigits, int)
931
932 if cNaturalDigits != cDigits:
933 cNeeded = cNaturalDigits - cDigits;
934 if iValue >= 0:
935 sHex = ('0' * cNeeded) + sHex;
936 else:
937 sHex = ('f' * cNeeded) + sHex;
938
939 # Invert and convert to bytearray and return it.
940 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
941
942 return ((fSignExtend, abValue),);
943
944 def validate(self, sValue):
945 """
946 Returns True if value is okay, error message on failure.
947 """
948 try:
949 self.get(sValue);
950 except TestType.BadValue as oXcpt:
951 return oXcpt.sMessage;
952 return True;
953
954 def isAndOrPair(self, sValue):
955 """
956 Checks if sValue is a pair.
957 """
958 _ = sValue;
959 return False;
960
961
962class TestTypeEflags(TestType):
963 """
964 Special value parsing for EFLAGS/RFLAGS/FLAGS.
965 """
966
967 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
968
969 def __init__(self, sName):
970 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
971
972 def get(self, sValue):
973 fClear = 0;
974 fSet = 0;
975 for sFlag in sValue.split(','):
976 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
977 if sConstant is None:
978 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
979 if sConstant[0] == '!':
980 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
981 else:
982 fSet |= g_kdX86EFlagsConstants[sConstant];
983
984 aoSet = TestType.get(self, '0x%x' % (fSet,));
985 if fClear != 0:
986 aoClear = TestType.get(self, '%#x' % (fClear,))
987 assert self.isAndOrPair(sValue) is True;
988 return (aoClear[0], aoSet[0]);
989 assert self.isAndOrPair(sValue) is False;
990 return aoSet;
991
992 def isAndOrPair(self, sValue):
993 for sZeroFlag in self.kdZeroValueFlags:
994 if sValue.find(sZeroFlag) >= 0:
995 return True;
996 return False;
997
998class TestTypeFromDict(TestType):
999 """
1000 Special value parsing for CR0.
1001 """
1002
1003 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1004
1005 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1006 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1007 self.kdConstantsAndValues = kdConstantsAndValues;
1008 self.sConstantPrefix = sConstantPrefix;
1009
1010 def get(self, sValue):
1011 fValue = 0;
1012 for sFlag in sValue.split(','):
1013 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1014 if fFlagValue is None:
1015 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1016 fValue |= fFlagValue;
1017 return TestType.get(self, '0x%x' % (fValue,));
1018
1019
1020class TestInOut(object):
1021 """
1022 One input or output state modifier.
1023
1024 This should be thought as values to modify BS3REGCTX and extended (needs
1025 to be structured) state.
1026 """
1027 ## Assigned operators.
1028 kasOperators = [
1029 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1030 '&~=',
1031 '&=',
1032 '|=',
1033 '='
1034 ];
1035 ## Types
1036 kdTypes = {
1037 'uint': TestType('uint', fUnsigned = True),
1038 'int': TestType('int'),
1039 'efl': TestTypeEflags('efl'),
1040 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1041 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1042 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1043 };
1044 ## CPU context fields.
1045 kdFields = {
1046 # name: ( default type, [both|input|output], )
1047 # Operands.
1048 'op1': ( 'uint', 'both', ), ## \@op1
1049 'op2': ( 'uint', 'both', ), ## \@op2
1050 'op3': ( 'uint', 'both', ), ## \@op3
1051 'op4': ( 'uint', 'both', ), ## \@op4
1052 # Flags.
1053 'efl': ( 'efl', 'both', ),
1054 'efl_undef': ( 'uint', 'output', ),
1055 # 8-bit GPRs.
1056 'al': ( 'uint', 'both', ),
1057 'cl': ( 'uint', 'both', ),
1058 'dl': ( 'uint', 'both', ),
1059 'bl': ( 'uint', 'both', ),
1060 'ah': ( 'uint', 'both', ),
1061 'ch': ( 'uint', 'both', ),
1062 'dh': ( 'uint', 'both', ),
1063 'bh': ( 'uint', 'both', ),
1064 'r8l': ( 'uint', 'both', ),
1065 'r9l': ( 'uint', 'both', ),
1066 'r10l': ( 'uint', 'both', ),
1067 'r11l': ( 'uint', 'both', ),
1068 'r12l': ( 'uint', 'both', ),
1069 'r13l': ( 'uint', 'both', ),
1070 'r14l': ( 'uint', 'both', ),
1071 'r15l': ( 'uint', 'both', ),
1072 # 16-bit GPRs.
1073 'ax': ( 'uint', 'both', ),
1074 'dx': ( 'uint', 'both', ),
1075 'cx': ( 'uint', 'both', ),
1076 'bx': ( 'uint', 'both', ),
1077 'sp': ( 'uint', 'both', ),
1078 'bp': ( 'uint', 'both', ),
1079 'si': ( 'uint', 'both', ),
1080 'di': ( 'uint', 'both', ),
1081 'r8w': ( 'uint', 'both', ),
1082 'r9w': ( 'uint', 'both', ),
1083 'r10w': ( 'uint', 'both', ),
1084 'r11w': ( 'uint', 'both', ),
1085 'r12w': ( 'uint', 'both', ),
1086 'r13w': ( 'uint', 'both', ),
1087 'r14w': ( 'uint', 'both', ),
1088 'r15w': ( 'uint', 'both', ),
1089 # 32-bit GPRs.
1090 'eax': ( 'uint', 'both', ),
1091 'edx': ( 'uint', 'both', ),
1092 'ecx': ( 'uint', 'both', ),
1093 'ebx': ( 'uint', 'both', ),
1094 'esp': ( 'uint', 'both', ),
1095 'ebp': ( 'uint', 'both', ),
1096 'esi': ( 'uint', 'both', ),
1097 'edi': ( 'uint', 'both', ),
1098 'r8d': ( 'uint', 'both', ),
1099 'r9d': ( 'uint', 'both', ),
1100 'r10d': ( 'uint', 'both', ),
1101 'r11d': ( 'uint', 'both', ),
1102 'r12d': ( 'uint', 'both', ),
1103 'r13d': ( 'uint', 'both', ),
1104 'r14d': ( 'uint', 'both', ),
1105 'r15d': ( 'uint', 'both', ),
1106 # 64-bit GPRs.
1107 'rax': ( 'uint', 'both', ),
1108 'rdx': ( 'uint', 'both', ),
1109 'rcx': ( 'uint', 'both', ),
1110 'rbx': ( 'uint', 'both', ),
1111 'rsp': ( 'uint', 'both', ),
1112 'rbp': ( 'uint', 'both', ),
1113 'rsi': ( 'uint', 'both', ),
1114 'rdi': ( 'uint', 'both', ),
1115 'r8': ( 'uint', 'both', ),
1116 'r9': ( 'uint', 'both', ),
1117 'r10': ( 'uint', 'both', ),
1118 'r11': ( 'uint', 'both', ),
1119 'r12': ( 'uint', 'both', ),
1120 'r13': ( 'uint', 'both', ),
1121 'r14': ( 'uint', 'both', ),
1122 'r15': ( 'uint', 'both', ),
1123 # 16-bit, 32-bit or 64-bit registers according to operand size.
1124 'oz.rax': ( 'uint', 'both', ),
1125 'oz.rdx': ( 'uint', 'both', ),
1126 'oz.rcx': ( 'uint', 'both', ),
1127 'oz.rbx': ( 'uint', 'both', ),
1128 'oz.rsp': ( 'uint', 'both', ),
1129 'oz.rbp': ( 'uint', 'both', ),
1130 'oz.rsi': ( 'uint', 'both', ),
1131 'oz.rdi': ( 'uint', 'both', ),
1132 'oz.r8': ( 'uint', 'both', ),
1133 'oz.r9': ( 'uint', 'both', ),
1134 'oz.r10': ( 'uint', 'both', ),
1135 'oz.r11': ( 'uint', 'both', ),
1136 'oz.r12': ( 'uint', 'both', ),
1137 'oz.r13': ( 'uint', 'both', ),
1138 'oz.r14': ( 'uint', 'both', ),
1139 'oz.r15': ( 'uint', 'both', ),
1140 # Control registers.
1141 'cr0': ( 'cr0', 'both', ),
1142 'cr4': ( 'cr4', 'both', ),
1143 'xcr0': ( 'xcr0', 'both', ),
1144 # FPU Registers
1145 'fcw': ( 'uint', 'both', ),
1146 'fsw': ( 'uint', 'both', ),
1147 'ftw': ( 'uint', 'both', ),
1148 'fop': ( 'uint', 'both', ),
1149 'fpuip': ( 'uint', 'both', ),
1150 'fpucs': ( 'uint', 'both', ),
1151 'fpudp': ( 'uint', 'both', ),
1152 'fpuds': ( 'uint', 'both', ),
1153 'mxcsr': ( 'uint', 'both', ),
1154 'st0': ( 'uint', 'both', ),
1155 'st1': ( 'uint', 'both', ),
1156 'st2': ( 'uint', 'both', ),
1157 'st3': ( 'uint', 'both', ),
1158 'st4': ( 'uint', 'both', ),
1159 'st5': ( 'uint', 'both', ),
1160 'st6': ( 'uint', 'both', ),
1161 'st7': ( 'uint', 'both', ),
1162 # MMX registers.
1163 'mm0': ( 'uint', 'both', ),
1164 'mm1': ( 'uint', 'both', ),
1165 'mm2': ( 'uint', 'both', ),
1166 'mm3': ( 'uint', 'both', ),
1167 'mm4': ( 'uint', 'both', ),
1168 'mm5': ( 'uint', 'both', ),
1169 'mm6': ( 'uint', 'both', ),
1170 'mm7': ( 'uint', 'both', ),
1171 # SSE registers.
1172 'xmm0': ( 'uint', 'both', ),
1173 'xmm1': ( 'uint', 'both', ),
1174 'xmm2': ( 'uint', 'both', ),
1175 'xmm3': ( 'uint', 'both', ),
1176 'xmm4': ( 'uint', 'both', ),
1177 'xmm5': ( 'uint', 'both', ),
1178 'xmm6': ( 'uint', 'both', ),
1179 'xmm7': ( 'uint', 'both', ),
1180 'xmm8': ( 'uint', 'both', ),
1181 'xmm9': ( 'uint', 'both', ),
1182 'xmm10': ( 'uint', 'both', ),
1183 'xmm11': ( 'uint', 'both', ),
1184 'xmm12': ( 'uint', 'both', ),
1185 'xmm13': ( 'uint', 'both', ),
1186 'xmm14': ( 'uint', 'both', ),
1187 'xmm15': ( 'uint', 'both', ),
1188 'xmm0.lo': ( 'uint', 'both', ),
1189 'xmm1.lo': ( 'uint', 'both', ),
1190 'xmm2.lo': ( 'uint', 'both', ),
1191 'xmm3.lo': ( 'uint', 'both', ),
1192 'xmm4.lo': ( 'uint', 'both', ),
1193 'xmm5.lo': ( 'uint', 'both', ),
1194 'xmm6.lo': ( 'uint', 'both', ),
1195 'xmm7.lo': ( 'uint', 'both', ),
1196 'xmm8.lo': ( 'uint', 'both', ),
1197 'xmm9.lo': ( 'uint', 'both', ),
1198 'xmm10.lo': ( 'uint', 'both', ),
1199 'xmm11.lo': ( 'uint', 'both', ),
1200 'xmm12.lo': ( 'uint', 'both', ),
1201 'xmm13.lo': ( 'uint', 'both', ),
1202 'xmm14.lo': ( 'uint', 'both', ),
1203 'xmm15.lo': ( 'uint', 'both', ),
1204 'xmm0.hi': ( 'uint', 'both', ),
1205 'xmm1.hi': ( 'uint', 'both', ),
1206 'xmm2.hi': ( 'uint', 'both', ),
1207 'xmm3.hi': ( 'uint', 'both', ),
1208 'xmm4.hi': ( 'uint', 'both', ),
1209 'xmm5.hi': ( 'uint', 'both', ),
1210 'xmm6.hi': ( 'uint', 'both', ),
1211 'xmm7.hi': ( 'uint', 'both', ),
1212 'xmm8.hi': ( 'uint', 'both', ),
1213 'xmm9.hi': ( 'uint', 'both', ),
1214 'xmm10.hi': ( 'uint', 'both', ),
1215 'xmm11.hi': ( 'uint', 'both', ),
1216 'xmm12.hi': ( 'uint', 'both', ),
1217 'xmm13.hi': ( 'uint', 'both', ),
1218 'xmm14.hi': ( 'uint', 'both', ),
1219 'xmm15.hi': ( 'uint', 'both', ),
1220 'xmm0.lo.zx': ( 'uint', 'both', ),
1221 'xmm1.lo.zx': ( 'uint', 'both', ),
1222 'xmm2.lo.zx': ( 'uint', 'both', ),
1223 'xmm3.lo.zx': ( 'uint', 'both', ),
1224 'xmm4.lo.zx': ( 'uint', 'both', ),
1225 'xmm5.lo.zx': ( 'uint', 'both', ),
1226 'xmm6.lo.zx': ( 'uint', 'both', ),
1227 'xmm7.lo.zx': ( 'uint', 'both', ),
1228 'xmm8.lo.zx': ( 'uint', 'both', ),
1229 'xmm9.lo.zx': ( 'uint', 'both', ),
1230 'xmm10.lo.zx': ( 'uint', 'both', ),
1231 'xmm11.lo.zx': ( 'uint', 'both', ),
1232 'xmm12.lo.zx': ( 'uint', 'both', ),
1233 'xmm13.lo.zx': ( 'uint', 'both', ),
1234 'xmm14.lo.zx': ( 'uint', 'both', ),
1235 'xmm15.lo.zx': ( 'uint', 'both', ),
1236 'xmm0.dw0': ( 'uint', 'both', ),
1237 'xmm1.dw0': ( 'uint', 'both', ),
1238 'xmm2.dw0': ( 'uint', 'both', ),
1239 'xmm3.dw0': ( 'uint', 'both', ),
1240 'xmm4.dw0': ( 'uint', 'both', ),
1241 'xmm5.dw0': ( 'uint', 'both', ),
1242 'xmm6.dw0': ( 'uint', 'both', ),
1243 'xmm7.dw0': ( 'uint', 'both', ),
1244 'xmm8.dw0': ( 'uint', 'both', ),
1245 'xmm9.dw0': ( 'uint', 'both', ),
1246 'xmm10.dw0': ( 'uint', 'both', ),
1247 'xmm11.dw0': ( 'uint', 'both', ),
1248 'xmm12.dw0': ( 'uint', 'both', ),
1249 'xmm13.dw0': ( 'uint', 'both', ),
1250 'xmm14.dw0': ( 'uint', 'both', ),
1251 'xmm15_dw0': ( 'uint', 'both', ),
1252 # AVX registers.
1253 'ymm0': ( 'uint', 'both', ),
1254 'ymm1': ( 'uint', 'both', ),
1255 'ymm2': ( 'uint', 'both', ),
1256 'ymm3': ( 'uint', 'both', ),
1257 'ymm4': ( 'uint', 'both', ),
1258 'ymm5': ( 'uint', 'both', ),
1259 'ymm6': ( 'uint', 'both', ),
1260 'ymm7': ( 'uint', 'both', ),
1261 'ymm8': ( 'uint', 'both', ),
1262 'ymm9': ( 'uint', 'both', ),
1263 'ymm10': ( 'uint', 'both', ),
1264 'ymm11': ( 'uint', 'both', ),
1265 'ymm12': ( 'uint', 'both', ),
1266 'ymm13': ( 'uint', 'both', ),
1267 'ymm14': ( 'uint', 'both', ),
1268 'ymm15': ( 'uint', 'both', ),
1269
1270 # Special ones.
1271 'value.xcpt': ( 'uint', 'output', ),
1272 };
1273
1274 def __init__(self, sField, sOp, sValue, sType):
1275 assert sField in self.kdFields;
1276 assert sOp in self.kasOperators;
1277 self.sField = sField;
1278 self.sOp = sOp;
1279 self.sValue = sValue;
1280 self.sType = sType;
1281 assert isinstance(sField, str);
1282 assert isinstance(sOp, str);
1283 assert isinstance(sType, str);
1284 assert isinstance(sValue, str);
1285
1286
1287class TestSelector(object):
1288 """
1289 One selector for an instruction test.
1290 """
1291 ## Selector compare operators.
1292 kasCompareOps = [ '==', '!=' ];
1293 ## Selector variables and their valid values.
1294 kdVariables = {
1295 # Operand size.
1296 'size': {
1297 'o16': 'size_o16',
1298 'o32': 'size_o32',
1299 'o64': 'size_o64',
1300 },
1301 # VEX.L value.
1302 'vex.l': {
1303 '0': 'vexl_0',
1304 '1': 'vexl_1',
1305 },
1306 # Execution ring.
1307 'ring': {
1308 '0': 'ring_0',
1309 '1': 'ring_1',
1310 '2': 'ring_2',
1311 '3': 'ring_3',
1312 '0..2': 'ring_0_thru_2',
1313 '1..3': 'ring_1_thru_3',
1314 },
1315 # Basic code mode.
1316 'codebits': {
1317 '64': 'code_64bit',
1318 '32': 'code_32bit',
1319 '16': 'code_16bit',
1320 },
1321 # cpu modes.
1322 'mode': {
1323 'real': 'mode_real',
1324 'prot': 'mode_prot',
1325 'long': 'mode_long',
1326 'v86': 'mode_v86',
1327 'smm': 'mode_smm',
1328 'vmx': 'mode_vmx',
1329 'svm': 'mode_svm',
1330 },
1331 # paging on/off
1332 'paging': {
1333 'on': 'paging_on',
1334 'off': 'paging_off',
1335 },
1336 # CPU vendor
1337 'vendor': {
1338 'amd': 'vendor_amd',
1339 'intel': 'vendor_intel',
1340 'via': 'vendor_via',
1341 },
1342 };
1343 ## Selector shorthand predicates.
1344 ## These translates into variable expressions.
1345 kdPredicates = {
1346 'o16': 'size==o16',
1347 'o32': 'size==o32',
1348 'o64': 'size==o64',
1349 'ring0': 'ring==0',
1350 '!ring0': 'ring==1..3',
1351 'ring1': 'ring==1',
1352 'ring2': 'ring==2',
1353 'ring3': 'ring==3',
1354 'user': 'ring==3',
1355 'supervisor': 'ring==0..2',
1356 '16-bit': 'codebits==16',
1357 '32-bit': 'codebits==32',
1358 '64-bit': 'codebits==64',
1359 'real': 'mode==real',
1360 'prot': 'mode==prot',
1361 'long': 'mode==long',
1362 'v86': 'mode==v86',
1363 'smm': 'mode==smm',
1364 'vmx': 'mode==vmx',
1365 'svm': 'mode==svm',
1366 'paging': 'paging==on',
1367 '!paging': 'paging==off',
1368 'amd': 'vendor==amd',
1369 '!amd': 'vendor!=amd',
1370 'intel': 'vendor==intel',
1371 '!intel': 'vendor!=intel',
1372 'via': 'vendor==via',
1373 '!via': 'vendor!=via',
1374 };
1375
1376 def __init__(self, sVariable, sOp, sValue):
1377 assert sVariable in self.kdVariables;
1378 assert sOp in self.kasCompareOps;
1379 assert sValue in self.kdVariables[sVariable];
1380 self.sVariable = sVariable;
1381 self.sOp = sOp;
1382 self.sValue = sValue;
1383
1384
1385class InstructionTest(object):
1386 """
1387 Instruction test.
1388 """
1389
1390 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1391 self.oInstr = oInstr # type: InstructionTest
1392 self.aoInputs = [] # type: List[TestInOut]
1393 self.aoOutputs = [] # type: List[TestInOut]
1394 self.aoSelectors = [] # type: List[TestSelector]
1395
1396 def toString(self, fRepr = False):
1397 """
1398 Converts it to string representation.
1399 """
1400 asWords = [];
1401 if self.aoSelectors:
1402 for oSelector in self.aoSelectors:
1403 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1404 asWords.append('/');
1405
1406 for oModifier in self.aoInputs:
1407 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1408
1409 asWords.append('->');
1410
1411 for oModifier in self.aoOutputs:
1412 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1413
1414 if fRepr:
1415 return '<' + ' '.join(asWords) + '>';
1416 return ' '.join(asWords);
1417
1418 def __str__(self):
1419 """ Provide string represenation. """
1420 return self.toString(False);
1421
1422 def __repr__(self):
1423 """ Provide unambigious string representation. """
1424 return self.toString(True);
1425
1426class Operand(object):
1427 """
1428 Instruction operand.
1429 """
1430
1431 def __init__(self, sWhere, sType):
1432 assert sWhere in g_kdOpLocations, sWhere;
1433 assert sType in g_kdOpTypes, sType;
1434 self.sWhere = sWhere; ##< g_kdOpLocations
1435 self.sType = sType; ##< g_kdOpTypes
1436
1437 def usesModRM(self):
1438 """ Returns True if using some form of ModR/M encoding. """
1439 return self.sType[0] in ['E', 'G', 'M'];
1440
1441
1442
1443class Instruction(object): # pylint: disable=too-many-instance-attributes
1444 """
1445 Instruction.
1446 """
1447
1448 def __init__(self, sSrcFile, iLine):
1449 ## @name Core attributes.
1450 ## @{
1451 self.oParent = None # type: Instruction
1452 self.sMnemonic = None;
1453 self.sBrief = None;
1454 self.asDescSections = [] # type: List[str]
1455 self.aoMaps = [] # type: List[InstructionMap]
1456 self.aoOperands = [] # type: List[Operand]
1457 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1458 self.sOpcode = None # type: str
1459 self.sSubOpcode = None # type: str
1460 self.sEncoding = None;
1461 self.asFlTest = None;
1462 self.asFlModify = None;
1463 self.asFlUndefined = None;
1464 self.asFlSet = None;
1465 self.asFlClear = None;
1466 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1467 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1468 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1469 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1470 self.aoTests = [] # type: List[InstructionTest]
1471 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1472 self.oCpuExpr = None; ##< Some CPU restriction expression...
1473 self.sGroup = None;
1474 self.fUnused = False; ##< Unused instruction.
1475 self.fInvalid = False; ##< Invalid instruction (like UD2).
1476 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1477 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1478 ## @}
1479
1480 ## @name Implementation attributes.
1481 ## @{
1482 self.sStats = None;
1483 self.sFunction = None;
1484 self.fStub = False;
1485 self.fUdStub = False;
1486 ## @}
1487
1488 ## @name Decoding info
1489 ## @{
1490 self.sSrcFile = sSrcFile;
1491 self.iLineCreated = iLine;
1492 self.iLineCompleted = None;
1493 self.cOpTags = 0;
1494 self.iLineFnIemOpMacro = -1;
1495 self.iLineMnemonicMacro = -1;
1496 ## @}
1497
1498 ## @name Intermediate input fields.
1499 ## @{
1500 self.sRawDisOpNo = None;
1501 self.asRawDisParams = [];
1502 self.sRawIemOpFlags = None;
1503 self.sRawOldOpcodes = None;
1504 self.asCopyTests = [];
1505 ## @}
1506
1507 ## All the MC blocks associated with this instruction.
1508 self.aoMcBlocks = [] # type: List[McBlock]
1509
1510 def toString(self, fRepr = False):
1511 """ Turn object into a string. """
1512 aasFields = [];
1513
1514 aasFields.append(['opcode', self.sOpcode]);
1515 if self.sPrefix:
1516 aasFields.append(['prefix', self.sPrefix]);
1517 aasFields.append(['mnemonic', self.sMnemonic]);
1518 for iOperand, oOperand in enumerate(self.aoOperands):
1519 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1520 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1521 aasFields.append(['encoding', self.sEncoding]);
1522 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1523 aasFields.append(['disenum', self.sDisEnum]);
1524 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1525 aasFields.append(['group', self.sGroup]);
1526 if self.fUnused: aasFields.append(['unused', 'True']);
1527 if self.fInvalid: aasFields.append(['invalid', 'True']);
1528 aasFields.append(['invlstyle', self.sInvalidStyle]);
1529 aasFields.append(['fltest', self.asFlTest]);
1530 aasFields.append(['flmodify', self.asFlModify]);
1531 aasFields.append(['flundef', self.asFlUndefined]);
1532 aasFields.append(['flset', self.asFlSet]);
1533 aasFields.append(['flclear', self.asFlClear]);
1534 aasFields.append(['mincpu', self.sMinCpu]);
1535 aasFields.append(['stats', self.sStats]);
1536 aasFields.append(['sFunction', self.sFunction]);
1537 if self.fStub: aasFields.append(['fStub', 'True']);
1538 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1539 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1540 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1541 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1542
1543 sRet = '<' if fRepr else '';
1544 for sField, sValue in aasFields:
1545 if sValue is not None:
1546 if len(sRet) > 1:
1547 sRet += '; ';
1548 sRet += '%s=%s' % (sField, sValue,);
1549 if fRepr:
1550 sRet += '>';
1551
1552 return sRet;
1553
1554 def __str__(self):
1555 """ Provide string represenation. """
1556 return self.toString(False);
1557
1558 def __repr__(self):
1559 """ Provide unambigious string representation. """
1560 return self.toString(True);
1561
1562 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1563 """
1564 Makes a copy of the object for the purpose of putting in a different map
1565 or a different place in the current map.
1566 """
1567 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1568
1569 oCopy.oParent = self;
1570 oCopy.sMnemonic = self.sMnemonic;
1571 oCopy.sBrief = self.sBrief;
1572 oCopy.asDescSections = list(self.asDescSections);
1573 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1574 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1575 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1576 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1577 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1578 oCopy.sEncoding = self.sEncoding;
1579 oCopy.asFlTest = self.asFlTest;
1580 oCopy.asFlModify = self.asFlModify;
1581 oCopy.asFlUndefined = self.asFlUndefined;
1582 oCopy.asFlSet = self.asFlSet;
1583 oCopy.asFlClear = self.asFlClear;
1584 oCopy.dHints = dict(self.dHints);
1585 oCopy.sDisEnum = self.sDisEnum;
1586 oCopy.asCpuIds = list(self.asCpuIds);
1587 oCopy.asReqFeatures = list(self.asReqFeatures);
1588 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1589 oCopy.sMinCpu = self.sMinCpu;
1590 oCopy.oCpuExpr = self.oCpuExpr;
1591 oCopy.sGroup = self.sGroup;
1592 oCopy.fUnused = self.fUnused;
1593 oCopy.fInvalid = self.fInvalid;
1594 oCopy.sInvalidStyle = self.sInvalidStyle;
1595 oCopy.sXcptType = self.sXcptType;
1596
1597 oCopy.sStats = self.sStats;
1598 oCopy.sFunction = self.sFunction;
1599 oCopy.fStub = self.fStub;
1600 oCopy.fUdStub = self.fUdStub;
1601
1602 oCopy.iLineCompleted = self.iLineCompleted;
1603 oCopy.cOpTags = self.cOpTags;
1604 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1605 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1606
1607 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1608 oCopy.asRawDisParams = list(self.asRawDisParams);
1609 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1610 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1611 oCopy.asCopyTests = list(self.asCopyTests);
1612
1613 return oCopy;
1614
1615 def getOpcodeByte(self):
1616 """
1617 Decodes sOpcode into a byte range integer value.
1618 Raises exception if sOpcode is None or invalid.
1619 """
1620 if self.sOpcode is None:
1621 raise Exception('No opcode byte for %s!' % (self,));
1622 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1623
1624 # Full hex byte form.
1625 if sOpcode[:2] == '0x':
1626 return int(sOpcode, 16);
1627
1628 # The /r form:
1629 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1630 return int(sOpcode[1:]) << 3;
1631
1632 # The 11/r form:
1633 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1634 return (int(sOpcode[-1:]) << 3) | 0xc0;
1635
1636 # The !11/r form (returns mod=1):
1637 ## @todo this doesn't really work...
1638 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1639 return (int(sOpcode[-1:]) << 3) | 0x80;
1640
1641 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1642
1643 @staticmethod
1644 def _flagsToIntegerMask(asFlags):
1645 """
1646 Returns the integer mask value for asFlags.
1647 """
1648 uRet = 0;
1649 if asFlags:
1650 for sFlag in asFlags:
1651 sConstant = g_kdEFlagsMnemonics[sFlag];
1652 assert sConstant[0] != '!', sConstant
1653 uRet |= g_kdX86EFlagsConstants[sConstant];
1654 return uRet;
1655
1656 def getTestedFlagsMask(self):
1657 """ Returns asFlTest into a integer mask value """
1658 return self._flagsToIntegerMask(self.asFlTest);
1659
1660 def getModifiedFlagsMask(self):
1661 """ Returns asFlModify into a integer mask value """
1662 return self._flagsToIntegerMask(self.asFlModify);
1663
1664 def getUndefinedFlagsMask(self):
1665 """ Returns asFlUndefined into a integer mask value """
1666 return self._flagsToIntegerMask(self.asFlUndefined);
1667
1668 def getSetFlagsMask(self):
1669 """ Returns asFlSet into a integer mask value """
1670 return self._flagsToIntegerMask(self.asFlSet);
1671
1672 def getClearedFlagsMask(self):
1673 """ Returns asFlClear into a integer mask value """
1674 return self._flagsToIntegerMask(self.asFlClear);
1675
1676 @staticmethod
1677 def _flagsToC(asFlags):
1678 """
1679 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1680 """
1681 if asFlags:
1682 asRet = [];
1683 for sFlag in asFlags:
1684 sConstant = g_kdEFlagsMnemonics[sFlag];
1685 assert sConstant[0] != '!', sConstant
1686 asRet.append(sConstant);
1687 return ' | '.join(asRet);
1688 return '0';
1689
1690 def getTestedFlagsCStyle(self):
1691 """ Returns asFlTest as C constants ored together. """
1692 return self._flagsToC(self.asFlTest);
1693
1694 def getModifiedFlagsCStyle(self):
1695 """ Returns asFlModify as C constants ored together. """
1696 return self._flagsToC(self.asFlModify);
1697
1698 def getUndefinedFlagsCStyle(self):
1699 """ Returns asFlUndefined as C constants ored together. """
1700 return self._flagsToC(self.asFlUndefined);
1701
1702 def getSetFlagsCStyle(self):
1703 """ Returns asFlSet as C constants ored together. """
1704 return self._flagsToC(self.asFlSet);
1705
1706 def getClearedFlagsCStyle(self):
1707 """ Returns asFlClear as C constants ored together. """
1708 return self._flagsToC(self.asFlClear);
1709
1710 def onlyInVexMaps(self):
1711 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1712 if not self.aoMaps:
1713 return False;
1714 for oMap in self.aoMaps:
1715 if not oMap.isVexMap():
1716 return False;
1717 return True;
1718
1719
1720
1721## All the instructions.
1722g_aoAllInstructions = [] # type: List[Instruction]
1723
1724## All the instructions indexed by statistics name (opstat).
1725g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1726
1727## All the instructions indexed by function name (opfunction).
1728g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1729
1730## Instructions tagged by oponlytest
1731g_aoOnlyTestInstructions = [] # type: List[Instruction]
1732
1733## Instruction maps.
1734g_aoInstructionMaps = [
1735 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1736 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1737 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1738 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1739 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1740 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1741 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1742 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1743 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1744 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1745 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1746 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1747 ## @todo g_apfnEscF1_E0toFF
1748 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1749 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1750 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1751 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1752 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1753 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1754 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1755 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1756
1757 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1758 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1759 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1760 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1761 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1762 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1763 ## @todo What about g_apfnGroup9MemReg?
1764 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1765 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1766 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1767 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1768 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1769 ## @todo What about g_apfnGroup15RegReg?
1770 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1771 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1772 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1773
1774 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1775 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1776
1777 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1778 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1779 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1780 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1781 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1782 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1783
1784 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1785 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1786
1787 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1788 InstructionMap('xopmap8', sEncoding = 'xop8'),
1789 InstructionMap('xopmap9', sEncoding = 'xop9'),
1790 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1791 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1792 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1793 InstructionMap('xopmap10', sEncoding = 'xop10'),
1794 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1795];
1796g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1797g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1798
1799
1800#
1801# Decoder functions.
1802#
1803
1804class DecoderFunction(object):
1805 """
1806 Decoder function.
1807
1808 This is mainly for searching for scoping searches for variables used in
1809 microcode blocks.
1810 """
1811 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1812 self.sName = sName; ##< The function name.
1813 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1814 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1815 self.iBeginLine = iBeginLine; ##< The start line.
1816 self.iEndLine = -1; ##< The line the function (probably) ends on.
1817 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1818
1819 def complete(self, iEndLine, asLines):
1820 """
1821 Completes the function.
1822 """
1823 assert self.iEndLine == -1;
1824 self.iEndLine = iEndLine;
1825 self.asLines = asLines;
1826
1827
1828#
1829# "Microcode" statements and blocks
1830#
1831
1832class McStmt(object):
1833 """
1834 Statement in a microcode block.
1835 """
1836 def __init__(self, sName, asParams):
1837 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1838 self.asParams = asParams;
1839 self.oUser = None;
1840
1841 def renderCode(self, cchIndent = 0):
1842 """
1843 Renders the code for the statement.
1844 """
1845 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1846
1847 @staticmethod
1848 def renderCodeForList(aoStmts, cchIndent = 0):
1849 """
1850 Renders a list of statements.
1851 """
1852 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1853
1854 @staticmethod
1855 def findStmtByNames(aoStmts, dNames):
1856 """
1857 Returns first statement with any of the given names in from the list.
1858
1859 Note! The names are passed as a dictionary for quick lookup, the value
1860 does not matter.
1861 """
1862 for oStmt in aoStmts:
1863 if oStmt.sName in dNames:
1864 return oStmt;
1865 if isinstance(oStmt, McStmtCond):
1866 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1867 if not oHit:
1868 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1869 if oHit:
1870 return oHit;
1871 return None;
1872
1873 @staticmethod
1874 def countStmtsByName(aoStmts, dNames, dRet):
1875 """
1876 Searches the given list of statements for the names in the dictionary,
1877 adding each found to dRet with an occurnece count.
1878
1879 return total number of hits;
1880 """
1881 cHits = 0;
1882 for oStmt in aoStmts:
1883 if oStmt.sName in dNames:
1884 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1885 cHits += 1;
1886 if isinstance(oStmt, McStmtCond):
1887 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1888 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1889 return cHits;
1890
1891 def isCppStmt(self):
1892 """ Checks if this is a C++ statement. """
1893 return self.sName.startswith('C++');
1894
1895class McStmtCond(McStmt):
1896 """
1897 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1898 """
1899 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1900 McStmt.__init__(self, sName, asParams);
1901 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1902 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1903 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1904 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1905 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1906
1907 def renderCode(self, cchIndent = 0):
1908 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1909 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1910 if self.aoElseBranch:
1911 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1912 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1913 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1914 return sRet;
1915
1916class McStmtNativeIf(McStmtCond):
1917 """ IEM_MC_NATIVE_IF """
1918 def __init__(self, sName, asArchitectures):
1919 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1920 self.asArchitectures = asArchitectures;
1921
1922class McStmtVar(McStmt):
1923 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1924 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1925 McStmt.__init__(self, sName, asParams);
1926 self.sType = sType;
1927 self.sVarName = sVarName;
1928 self.sValue = sValue; ##< None if no assigned / const value.
1929
1930class McStmtArg(McStmtVar):
1931 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1932 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1933 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1934 self.iArg = iArg;
1935 self.sRef = sRef; ##< The reference string (local variable, register).
1936 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1937 assert sRefType in ('none', 'local');
1938
1939class McStmtCall(McStmt):
1940 """ IEM_MC_CALL_* """
1941 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1942 McStmt.__init__(self, sName, asParams);
1943 self.idxFn = iFnParam;
1944 self.idxParams = iFnParam + 1;
1945 self.sFn = asParams[iFnParam];
1946 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1947
1948class McStmtAssertEFlags(McStmt):
1949 """
1950 IEM_MC_ASSERT_EFLAGS
1951 """
1952 def __init__(self, oInstruction):
1953 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1954 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1955
1956
1957class McCppGeneric(McStmt):
1958 """
1959 Generic C++/C statement.
1960 """
1961 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1962 McStmt.__init__(self, sName, [sCode,]);
1963 self.fDecode = fDecode;
1964 self.cchIndent = cchIndent;
1965
1966 def renderCode(self, cchIndent = 0):
1967 cchIndent += self.cchIndent;
1968 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1969 if self.fDecode:
1970 sRet = sRet.replace('\n', ' // C++ decode\n');
1971 else:
1972 sRet = sRet.replace('\n', ' // C++ normal\n');
1973 return sRet;
1974
1975class McCppCall(McCppGeneric):
1976 """
1977 A generic C++/C call statement.
1978
1979 The sName is still 'C++', so the function name is in the first parameter
1980 and the the arguments in the subsequent ones.
1981 """
1982 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1983 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1984 self.asParams.extend(asArgs);
1985
1986 def renderCode(self, cchIndent = 0):
1987 cchIndent += self.cchIndent;
1988 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1989 if self.fDecode:
1990 sRet += ' // C++ decode\n';
1991 else:
1992 sRet += ' // C++ normal\n';
1993 return sRet;
1994
1995class McCppCond(McStmtCond):
1996 """
1997 C++/C 'if' statement.
1998 """
1999 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2000 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2001 self.fDecode = fDecode;
2002 self.cchIndent = cchIndent;
2003
2004 def renderCode(self, cchIndent = 0):
2005 cchIndent += self.cchIndent;
2006 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2007 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2008 sRet += ' ' * cchIndent + '{\n';
2009 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2010 sRet += ' ' * cchIndent + '}\n';
2011 if self.aoElseBranch:
2012 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2013 sRet += ' ' * cchIndent + '{\n';
2014 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2015 sRet += ' ' * cchIndent + '}\n';
2016 return sRet;
2017
2018class McCppPreProc(McCppGeneric):
2019 """
2020 C++/C Preprocessor directive.
2021 """
2022 def __init__(self, sCode):
2023 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2024
2025 def renderCode(self, cchIndent = 0):
2026 return self.asParams[0] + '\n';
2027
2028
2029## IEM_MC_F_XXX values.
2030g_kdMcFlags = {
2031 'IEM_MC_F_ONLY_8086': (),
2032 'IEM_MC_F_MIN_186': (),
2033 'IEM_MC_F_MIN_286': (),
2034 'IEM_MC_F_NOT_286_OR_OLDER': (),
2035 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2036 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2037 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2038 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2039 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2040 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2041 'IEM_MC_F_NOT_64BIT': (),
2042};
2043## IEM_MC_F_XXX values.
2044g_kdCImplFlags = {
2045 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2046 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2047 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2048 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2049 'IEM_CIMPL_F_BRANCH_FAR': (),
2050 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2051 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2052 'IEM_CIMPL_F_BRANCH_STACK': (),
2053 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2054 'IEM_CIMPL_F_MODE': (),
2055 'IEM_CIMPL_F_RFLAGS': (),
2056 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2057 'IEM_CIMPL_F_STATUS_FLAGS': (),
2058 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2059 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2060 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2061 'IEM_CIMPL_F_VMEXIT': (),
2062 'IEM_CIMPL_F_FPU': (),
2063 'IEM_CIMPL_F_REP': (),
2064 'IEM_CIMPL_F_IO': (),
2065 'IEM_CIMPL_F_END_TB': (),
2066 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2067 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2068 'IEM_CIMPL_F_CALLS_CIMPL': (),
2069 'IEM_CIMPL_F_CALLS_AIMPL': (),
2070 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2071};
2072class McBlock(object):
2073 """
2074 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2075 """
2076
2077 ## @name Macro expansion types.
2078 ## @{
2079 kiMacroExp_None = 0;
2080 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2081 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2082 ## @}
2083
2084 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2085 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2086 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2087 self.fDeferToCImpl = fDeferToCImpl;
2088 ## The source file containing the block.
2089 self.sSrcFile = sSrcFile;
2090 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2091 self.iBeginLine = iBeginLine;
2092 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2093 self.offBeginLine = offBeginLine;
2094 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2095 self.iEndLine = -1;
2096 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2097 self.offEndLine = 0;
2098 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2099 self.offAfterEnd = 0;
2100 ## The function the block resides in.
2101 self.oFunction = oFunction;
2102 ## The name of the function the block resides in. DEPRECATED.
2103 self.sFunction = oFunction.sName;
2104 ## The block number within the function.
2105 self.iInFunction = iInFunction;
2106 ## The instruction this block is associated with - can be None.
2107 self.oInstruction = oInstruction # type: Instruction
2108 ## Indentation level of the block.
2109 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2110 ## The raw lines the block is made up of.
2111 self.asLines = [] # type: List[str]
2112 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2113 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2114 self.iMacroExp = self.kiMacroExp_None;
2115 ## IEM_MC_BEGIN: Argument count.
2116 self.cArgs = -1;
2117 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2118 self.aoArgs = [] # type: List[McStmtArg]
2119 ## IEM_MC_BEGIN: Locals count.
2120 self.cLocals = -1;
2121 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2122 self.aoLocals = [] # type: List[McStmtVar]
2123 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2124 self.dsMcFlags = {} # type: Dict[str, bool]
2125 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2126 self.dsCImplFlags = {} # type: Dict[str, bool]
2127 ## Decoded statements in the block.
2128 self.aoStmts = [] # type: List[McStmt]
2129
2130 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2131 """
2132 Completes the microcode block.
2133 """
2134 assert self.iEndLine == -1;
2135 self.iEndLine = iEndLine;
2136 self.offEndLine = offEndLine;
2137 self.offAfterEnd = offAfterEnd;
2138 self.asLines = asLines;
2139
2140 def raiseDecodeError(self, sRawCode, off, sMessage):
2141 """ Raises a decoding error. """
2142 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2143 iLine = sRawCode.count('\n', 0, off);
2144 raise ParserException('%s:%d:%d: parsing error: %s'
2145 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2146
2147 def raiseStmtError(self, sName, sMessage):
2148 """ Raises a statement parser error. """
2149 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2150
2151 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2152 """ Check the parameter count, raising an error it doesn't match. """
2153 if len(asParams) != cParamsExpected:
2154 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2155 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2156 return True;
2157
2158 @staticmethod
2159 def parseMcGeneric(oSelf, sName, asParams):
2160 """ Generic parser that returns a plain McStmt object. """
2161 _ = oSelf;
2162 return McStmt(sName, asParams);
2163
2164 @staticmethod
2165 def parseMcGenericCond(oSelf, sName, asParams):
2166 """ Generic parser that returns a plain McStmtCond object. """
2167 _ = oSelf;
2168 return McStmtCond(sName, asParams);
2169
2170 kdArchVals = {
2171 'RT_ARCH_VAL_X86': True,
2172 'RT_ARCH_VAL_AMD64': True,
2173 'RT_ARCH_VAL_ARM32': True,
2174 'RT_ARCH_VAL_ARM64': True,
2175 'RT_ARCH_VAL_SPARC32': True,
2176 'RT_ARCH_VAL_SPARC64': True,
2177 };
2178
2179 @staticmethod
2180 def parseMcNativeIf(oSelf, sName, asParams):
2181 """ IEM_MC_NATIVE_IF """
2182 oSelf.checkStmtParamCount(sName, asParams, 1);
2183 if asParams[0].strip() == '0':
2184 asArchitectures = [];
2185 else:
2186 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2187 for sArch in asArchitectures:
2188 if sArch not in oSelf.kdArchVals:
2189 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2190 return McStmtNativeIf(sName, asArchitectures);
2191
2192 @staticmethod
2193 def parseMcBegin(oSelf, sName, asParams):
2194 """ IEM_MC_BEGIN """
2195 oSelf.checkStmtParamCount(sName, asParams, 4);
2196 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2197 oSelf.raiseStmtError(sName, 'Used more than once!');
2198 oSelf.cArgs = int(asParams[0]);
2199 oSelf.cLocals = int(asParams[1]);
2200
2201 if asParams[2] != '0':
2202 for sFlag in asParams[2].split('|'):
2203 sFlag = sFlag.strip();
2204 if sFlag not in g_kdMcFlags:
2205 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2206 oSelf.dsMcFlags[sFlag] = True;
2207 for sFlag2 in g_kdMcFlags[sFlag]:
2208 oSelf.dsMcFlags[sFlag2] = True;
2209
2210 if asParams[3] != '0':
2211 oSelf.parseCImplFlags(sName, asParams[3]);
2212
2213 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2214
2215 @staticmethod
2216 def parseMcArg(oSelf, sName, asParams):
2217 """ IEM_MC_ARG """
2218 oSelf.checkStmtParamCount(sName, asParams, 3);
2219 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2220 oSelf.aoArgs.append(oStmt);
2221 return oStmt;
2222
2223 @staticmethod
2224 def parseMcArgConst(oSelf, sName, asParams):
2225 """ IEM_MC_ARG_CONST """
2226 oSelf.checkStmtParamCount(sName, asParams, 4);
2227 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2228 oSelf.aoArgs.append(oStmt);
2229 return oStmt;
2230
2231 @staticmethod
2232 def parseMcArgLocalRef(oSelf, sName, asParams):
2233 """ IEM_MC_ARG_LOCAL_REF """
2234 oSelf.checkStmtParamCount(sName, asParams, 4);
2235 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2236 oSelf.aoArgs.append(oStmt);
2237 return oStmt;
2238
2239 @staticmethod
2240 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2241 """ IEM_MC_ARG_LOCAL_EFLAGS """
2242 oSelf.checkStmtParamCount(sName, asParams, 3);
2243 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2244 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2245 oSelf.aoLocals.append(oStmtLocal);
2246 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2247 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2248 oSelf.aoArgs.append(oStmtArg);
2249 return (oStmtLocal, oStmtArg,);
2250
2251 @staticmethod
2252 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2253 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2254 oSelf.checkStmtParamCount(sName, asParams, 0);
2255 # Note! Translate to IEM_MC_ARG_CONST
2256 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2257 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2258 oSelf.aoArgs.append(oStmt);
2259 return oStmt;
2260
2261 @staticmethod
2262 def parseMcLocal(oSelf, sName, asParams):
2263 """ IEM_MC_LOCAL """
2264 oSelf.checkStmtParamCount(sName, asParams, 2);
2265 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2266 oSelf.aoLocals.append(oStmt);
2267 return oStmt;
2268
2269 @staticmethod
2270 def parseMcLocalAssign(oSelf, sName, asParams):
2271 """ IEM_MC_LOCAL_ASSIGN """
2272 oSelf.checkStmtParamCount(sName, asParams, 3);
2273 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2274 oSelf.aoLocals.append(oStmt);
2275 return oStmt;
2276
2277 @staticmethod
2278 def parseMcLocalConst(oSelf, sName, asParams):
2279 """ IEM_MC_LOCAL_CONST """
2280 oSelf.checkStmtParamCount(sName, asParams, 3);
2281 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2282 oSelf.aoLocals.append(oStmt);
2283 return oStmt;
2284
2285 @staticmethod
2286 def parseMcLocalEFlags(oSelf, sName, asParams):
2287 """ IEM_MC_LOCAL_EFLAGS"""
2288 oSelf.checkStmtParamCount(sName, asParams, 1);
2289 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2290 oSelf.aoLocals.append(oStmt);
2291 return oStmt;
2292
2293 @staticmethod
2294 def parseMcCallAImpl(oSelf, sName, asParams):
2295 """ IEM_MC_CALL_AIMPL_3|4 """
2296 cArgs = int(sName[-1]);
2297 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2298 return McStmtCall(sName, asParams, 1, 0);
2299
2300 @staticmethod
2301 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2302 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2303 cArgs = int(sName[-1]);
2304 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2305 return McStmtCall(sName, asParams, 0);
2306
2307 @staticmethod
2308 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2309 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2310 cArgs = int(sName[-1]);
2311 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2312 return McStmtCall(sName, asParams, 0);
2313
2314 @staticmethod
2315 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2316 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2317 cArgs = int(sName[-1]);
2318 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2319 return McStmtCall(sName, asParams, 0);
2320
2321 @staticmethod
2322 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2323 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2324 cArgs = int(sName[-1]);
2325 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2326 return McStmtCall(sName, asParams, 0);
2327
2328 @staticmethod
2329 def parseMcCallSseAImpl(oSelf, sName, asParams):
2330 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2331 cArgs = int(sName[-1]);
2332 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2333 return McStmtCall(sName, asParams, 0);
2334
2335 def parseCImplFlags(self, sName, sFlags):
2336 """
2337 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2338 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2339 """
2340 if sFlags != '0':
2341 sFlags = self.stripComments(sFlags);
2342 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2343 for sFlag in sFlags.split('|'):
2344 sFlag = sFlag.strip();
2345 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2346 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2347 #print('debug: %s' % sFlag)
2348 if sFlag not in g_kdCImplFlags:
2349 if sFlag == '0':
2350 continue;
2351 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2352 self.dsCImplFlags[sFlag] = True;
2353 for sFlag2 in g_kdCImplFlags[sFlag]:
2354 self.dsCImplFlags[sFlag2] = True;
2355 return None;
2356
2357 @staticmethod
2358 def parseMcCallCImpl(oSelf, sName, asParams):
2359 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2360 cArgs = int(sName[-1]);
2361 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2362 oSelf.parseCImplFlags(sName, asParams[0]);
2363 return McStmtCall(sName, asParams, 2);
2364
2365 @staticmethod
2366 def parseMcDeferToCImpl(oSelf, sName, asParams):
2367 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2368 # Note! This code is called by workerIemMcDeferToCImplXRet.
2369 #print('debug: %s, %s,...' % (sName, asParams[0],));
2370 cArgs = int(sName[-5]);
2371 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2372 oSelf.parseCImplFlags(sName, asParams[0]);
2373 return McStmtCall(sName, asParams, 2);
2374
2375 @staticmethod
2376 def stripComments(sCode):
2377 """ Returns sCode with comments removed. """
2378 off = 0;
2379 while off < len(sCode):
2380 off = sCode.find('/', off);
2381 if off < 0 or off + 1 >= len(sCode):
2382 break;
2383
2384 if sCode[off + 1] == '/':
2385 # C++ comment.
2386 offEnd = sCode.find('\n', off + 2);
2387 if offEnd < 0:
2388 return sCode[:off].rstrip();
2389 sCode = sCode[ : off] + sCode[offEnd : ];
2390 off += 1;
2391
2392 elif sCode[off + 1] == '*':
2393 # C comment
2394 offEnd = sCode.find('*/', off + 2);
2395 if offEnd < 0:
2396 return sCode[:off].rstrip();
2397 sSep = ' ';
2398 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2399 sSep = '';
2400 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2401 off += len(sSep);
2402
2403 else:
2404 # Not a comment.
2405 off += 1;
2406 return sCode;
2407
2408 @staticmethod
2409 def extractParam(sCode, offParam):
2410 """
2411 Extracts the parameter value at offParam in sCode.
2412 Returns stripped value and the end offset of the terminating ',' or ')'.
2413 """
2414 # Extract it.
2415 cNesting = 0;
2416 offStart = offParam;
2417 while offParam < len(sCode):
2418 ch = sCode[offParam];
2419 if ch == '(':
2420 cNesting += 1;
2421 elif ch == ')':
2422 if cNesting == 0:
2423 break;
2424 cNesting -= 1;
2425 elif ch == ',' and cNesting == 0:
2426 break;
2427 offParam += 1;
2428 return (sCode[offStart : offParam].strip(), offParam);
2429
2430 @staticmethod
2431 def extractParams(sCode, offOpenParen):
2432 """
2433 Parses a parameter list.
2434 Returns the list of parameter values and the offset of the closing parentheses.
2435 Returns (None, len(sCode)) on if no closing parentheses was found.
2436 """
2437 assert sCode[offOpenParen] == '(';
2438 asParams = [];
2439 off = offOpenParen + 1;
2440 while off < len(sCode):
2441 ch = sCode[off];
2442 if ch.isspace():
2443 off += 1;
2444 elif ch != ')':
2445 (sParam, off) = McBlock.extractParam(sCode, off);
2446 asParams.append(sParam);
2447 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2448 if sCode[off] == ',':
2449 off += 1;
2450 else:
2451 return (asParams, off);
2452 return (None, off);
2453
2454 @staticmethod
2455 def findClosingBraces(sCode, off, offStop):
2456 """
2457 Finds the matching '}' for the '{' at off in sCode.
2458 Returns offset of the matching '}' on success, otherwise -1.
2459
2460 Note! Does not take comments into account.
2461 """
2462 cDepth = 1;
2463 off += 1;
2464 while off < offStop:
2465 offClose = sCode.find('}', off, offStop);
2466 if offClose < 0:
2467 break;
2468 cDepth += sCode.count('{', off, offClose);
2469 cDepth -= 1;
2470 if cDepth == 0:
2471 return offClose;
2472 off = offClose + 1;
2473 return -1;
2474
2475 @staticmethod
2476 def countSpacesAt(sCode, off, offStop):
2477 """ Returns the number of space characters at off in sCode. """
2478 offStart = off;
2479 while off < offStop and sCode[off].isspace():
2480 off += 1;
2481 return off - offStart;
2482
2483 @staticmethod
2484 def skipSpacesAt(sCode, off, offStop):
2485 """ Returns first offset at or after off for a non-space character. """
2486 return off + McBlock.countSpacesAt(sCode, off, offStop);
2487
2488 @staticmethod
2489 def isSubstrAt(sStr, off, sSubStr):
2490 """ Returns true of sSubStr is found at off in sStr. """
2491 return sStr[off : off + len(sSubStr)] == sSubStr;
2492
2493 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2494 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2495 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2496 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2497 + r')');
2498
2499 kaasConditions = (
2500 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2501 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2502 );
2503 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2504 """
2505 Decodes sRawCode[off : offStop].
2506
2507 Returns list of McStmt instances.
2508 Raises ParserException on failure.
2509 """
2510 if offStop < 0:
2511 offStop = len(sRawCode);
2512 aoStmts = [];
2513 while off < offStop:
2514 ch = sRawCode[off];
2515
2516 #
2517 # Skip spaces and comments.
2518 #
2519 if ch.isspace():
2520 off += 1;
2521
2522 elif ch == '/':
2523 ch = sRawCode[off + 1];
2524 if ch == '/': # C++ comment.
2525 off = sRawCode.find('\n', off + 2);
2526 if off < 0:
2527 break;
2528 off += 1;
2529 elif ch == '*': # C comment.
2530 off = sRawCode.find('*/', off + 2);
2531 if off < 0:
2532 break;
2533 off += 2;
2534 else:
2535 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2536
2537 #
2538 # Is it a MC statement.
2539 #
2540 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2541 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2542 # Extract it and strip comments from it.
2543 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2544 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2545 else: iCond = -1;
2546 if iCond < 0:
2547 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2548 if offEnd <= off:
2549 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2550 else:
2551 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2552 if offEnd <= off:
2553 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2554 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2555 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2556 offEnd -= 1;
2557 while offEnd > off and sRawCode[offEnd - 1].isspace():
2558 offEnd -= 1;
2559
2560 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2561
2562 # Isolate the statement name.
2563 offOpenParen = sRawStmt.find('(');
2564 if offOpenParen < 0:
2565 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2566 sName = sRawStmt[: offOpenParen].strip();
2567
2568 # Extract the parameters.
2569 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2570 if asParams is None:
2571 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2572 if offCloseParen + 1 != len(sRawStmt):
2573 self.raiseDecodeError(sRawCode, off,
2574 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2575
2576 # Hand it to the handler.
2577 fnParser = g_dMcStmtParsers.get(sName);
2578 if not fnParser:
2579 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2580 fnParser = fnParser[0];
2581 oStmt = fnParser(self, sName, asParams);
2582 if not isinstance(oStmt, (list, tuple)):
2583 aoStmts.append(oStmt);
2584 else:
2585 aoStmts.extend(oStmt);
2586
2587 #
2588 # If conditional, we need to parse the whole statement.
2589 #
2590 # For reasons of simplicity, we assume the following structure
2591 # and parse each branch in a recursive call:
2592 # IEM_MC_IF_XXX() {
2593 # IEM_MC_WHATEVER();
2594 # } IEM_MC_ELSE() {
2595 # IEM_MC_WHATEVER();
2596 # } IEM_MC_ENDIF();
2597 #
2598 if iCond >= 0:
2599 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2600 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2601
2602 # Find start of the IF block:
2603 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2604 if sRawCode[offBlock1] != '{':
2605 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2606
2607 # Find the end of it.
2608 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2609 if offBlock1End < 0:
2610 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2611
2612 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2613
2614 # Is there an else section?
2615 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2616 sElseNm = self.kaasConditions[iCond][1];
2617 if self.isSubstrAt(sRawCode, off, sElseNm):
2618 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2619 if sRawCode[off] != '(':
2620 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2621 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2622 if sRawCode[off] != ')':
2623 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2624
2625 # Find start of the ELSE block.
2626 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2627 if sRawCode[offBlock2] != '{':
2628 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2629
2630 # Find the end of it.
2631 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2632 if offBlock2End < 0:
2633 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2634
2635 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2636 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2637
2638 # Parse past the endif statement.
2639 sEndIfNm = self.kaasConditions[iCond][2];
2640 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2641 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2642 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2643 if sRawCode[off] != '(':
2644 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2645 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2646 if sRawCode[off] != ')':
2647 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2648 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2649 if sRawCode[off] != ';':
2650 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2651 off += 1;
2652
2653 else:
2654 # Advance.
2655 off = offEnd + 1;
2656
2657 #
2658 # Otherwise it must be a C/C++ statement of sorts.
2659 #
2660 else:
2661 # Find the end of the statement. if and else requires special handling.
2662 sCondExpr = None;
2663 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2664 if oMatch:
2665 if oMatch.group(1)[-1] == '(':
2666 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2667 else:
2668 offEnd = oMatch.end();
2669 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2670 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2671 elif ch == '#':
2672 offEnd = sRawCode.find('\n', off, offStop);
2673 if offEnd < 0:
2674 offEnd = offStop;
2675 offEnd -= 1;
2676 while offEnd > off and sRawCode[offEnd - 1].isspace():
2677 offEnd -= 1;
2678 else:
2679 offEnd = sRawCode.find(';', off);
2680 if offEnd < 0:
2681 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2682
2683 # Check this and the following statement whether it might have
2684 # something to do with decoding. This is a statement filter
2685 # criteria when generating the threaded functions blocks.
2686 offNextEnd = sRawCode.find(';', offEnd + 1);
2687 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2688 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2689 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2690 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2691 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2692 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2693 );
2694
2695 if not oMatch:
2696 if ch != '#':
2697 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2698 else:
2699 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2700 off = offEnd + 1;
2701 elif oMatch.group(1).startswith('if'):
2702 #
2703 # if () xxx [else yyy] statement.
2704 #
2705 oStmt = McCppCond(sCondExpr, fDecode);
2706 aoStmts.append(oStmt);
2707 off = offEnd + 1;
2708
2709 # Following the if () we can either have a {} containing zero or more statements
2710 # or we have a single statement.
2711 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2712 if sRawCode[offBlock1] == '{':
2713 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2714 if offBlock1End < 0:
2715 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2716 offBlock1 += 1;
2717 else:
2718 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2719 if offBlock1End < 0:
2720 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2721
2722 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2723
2724 # The else is optional and can likewise be followed by {} or a single statement.
2725 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2726 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2727 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2728 if sRawCode[offBlock2] == '{':
2729 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2730 if offBlock2End < 0:
2731 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2732 offBlock2 += 1;
2733 else:
2734 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2735 if offBlock2End < 0:
2736 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2737
2738 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2739 off = offBlock2End + 1;
2740
2741 elif oMatch.group(1) == 'else':
2742 # Problematic 'else' branch, typically involving #ifdefs.
2743 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2744
2745 return aoStmts;
2746
2747 def decode(self):
2748 """
2749 Decodes the block, populating self.aoStmts if necessary.
2750 Returns the statement list.
2751 Raises ParserException on failure.
2752 """
2753 if not self.aoStmts:
2754 self.aoStmts = self.decodeCode(''.join(self.asLines));
2755 return self.aoStmts;
2756
2757
2758 def checkForTooEarlyEffSegUse(self, aoStmts):
2759 """
2760 Checks if iEffSeg is used before the effective address has been decoded.
2761 Returns None on success, error string on failure.
2762
2763 See r158454 for an example of this issue.
2764 """
2765
2766 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2767 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2768 # as we're ASSUMING these will not occur before address calculation.
2769 for iStmt, oStmt in enumerate(aoStmts):
2770 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2771 while iStmt > 0:
2772 iStmt -= 1;
2773 oStmt = aoStmts[iStmt];
2774 for sArg in oStmt.asParams:
2775 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2776 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2777 break;
2778 return None;
2779
2780 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2781 kdDecodeCppStmtOkayAfterDone = {
2782 'IEMOP_HLP_IN_VMX_OPERATION': True,
2783 'IEMOP_HLP_VMX_INSTR': True,
2784 };
2785
2786 def checkForDoneDecoding(self, aoStmts):
2787 """
2788 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2789 invocation.
2790 Returns None on success, error string on failure.
2791
2792 This ensures safe instruction restarting in case the recompiler runs
2793 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2794 entries).
2795 """
2796
2797 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2798 # don't need to look.
2799 cIemOpHlpDone = 0;
2800 for iStmt, oStmt in enumerate(aoStmts):
2801 if oStmt.isCppStmt():
2802 #print('dbg: #%u[%u]: %s %s (%s)'
2803 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2804
2805 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2806 if oMatch:
2807 sFirstWord = oMatch.group(1);
2808 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2809 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2810 cIemOpHlpDone += 1;
2811 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2812 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2813 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2814 else:
2815 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2816 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2817 cIemOpHlpDone += 1;
2818 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2819 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2820 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2821 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2822 if cIemOpHlpDone == 1:
2823 return None;
2824 if cIemOpHlpDone > 1:
2825 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2826 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2827
2828 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2829 """
2830 Checks that the register references are placed after register fetches
2831 from the same register class.
2832 Returns None on success, error string on failure.
2833
2834 Example:
2835 SHL CH, CL
2836
2837 If the CH reference is created first, the fetching of CL will cause the
2838 RCX guest register to have an active shadow register when it's being
2839 updated. The shadow register will then be stale after the SHL operation
2840 completes, without us noticing.
2841
2842 It's easier to ensure we've got correct code than complicating the
2843 recompiler code with safeguards here.
2844 """
2845 for iStmt, oStmt in enumerate(aoStmts):
2846 if not oStmt.isCppStmt():
2847 offRef = oStmt.sName.find("_REF_");
2848 if offRef > 0:
2849 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2850 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2851 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2852 sClass = 'FPUREG';
2853 else:
2854 offUnderscore = oStmt.sName.find('_', offRef + 5);
2855 if offUnderscore > 0:
2856 assert offUnderscore > offRef;
2857 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2858 else:
2859 sClass = oStmt.sName[offRef + 5];
2860 asRegRefClasses[sClass] = True;
2861 else:
2862 offFetch = oStmt.sName.find("_FETCH_");
2863 if offFetch > 0:
2864 sClass = oStmt.sName[offFetch + 7 : ];
2865 if not sClass.startswith("MEM"):
2866 offUnderscore = sClass.find('_');
2867 if offUnderscore >= 0:
2868 assert offUnderscore > 0;
2869 sClass = sClass[:offUnderscore];
2870 if sClass in asRegRefClasses:
2871 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2872 % (iStmt + 1, oStmt.sName,);
2873
2874 # Go into branches.
2875 if isinstance(oStmt, McStmtCond):
2876 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2877 if sRet:
2878 return sRet;
2879 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2880 if sRet:
2881 return sRet;
2882 return None;
2883
2884 def check(self):
2885 """
2886 Performs some sanity checks on the block.
2887 Returns error string list, empty if all is fine.
2888 """
2889 aoStmts = self.decode();
2890 asRet = [];
2891
2892 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2893 if sRet:
2894 asRet.append(sRet);
2895
2896 sRet = self.checkForDoneDecoding(aoStmts);
2897 if sRet:
2898 asRet.append(sRet);
2899
2900 sRet = self.checkForFetchAfterRef(aoStmts, {});
2901 if sRet:
2902 asRet.append(sRet);
2903
2904 return asRet;
2905
2906
2907## Temporary flag for enabling / disabling experimental MCs depending on the
2908## SIMD register allocator.
2909g_fNativeSimd = True;
2910
2911## IEM_MC_XXX -> parser + info dictionary.
2912#
2913# The info columns:
2914# - col 1+0: boolean entry indicating whether the statement modifies state and
2915# must not be used before IEMOP_HL_DONE_*.
2916# - col 1+1: boolean entry indicating similar to the previous column but is
2917# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2918# The difference is that most IEM_MC_IF_XXX entries are False here.
2919# - col 1+2: boolean entry indicating native recompiler support.
2920#
2921# The raw table was generated via the following command
2922# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2923# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2924# pylint: disable=line-too-long
2925g_dMcStmtParsers = {
2926 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2927 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2928 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2929 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2930 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2931 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2932 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2933 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2934 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2935 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2936 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2937 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2938 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2939 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2940 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2941 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2942 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2943 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2944 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2945 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2946 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2947 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2948 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2949 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2950 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2951 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2952 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2953 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2954 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2955 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2956 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2957 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2958 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2959 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2960 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2961 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2962 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2963 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2964 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2965 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2966 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2967 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2968 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2969 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
2970 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2971 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2972 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2973 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2974 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2975 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2976 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2977 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2978 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2979 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2980 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2981 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2982 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2983 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2984 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2985 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2986 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2987 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2988 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2989 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2990 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2991 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2992 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2993 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2994 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2995 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2996 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
2997 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
2998 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2999 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3000 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3001 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
3002 'IEM_MC_COMMIT_EFLAGS_OPT': (McBlock.parseMcGeneric, True, True, True, ),
3003 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3004 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3005 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3006 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3007 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3008 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3009 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3010 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3011 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3012 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3013 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3014 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3015 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3016 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3017 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3018 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3019 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3020 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3021 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3022 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3023 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3024 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3025 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3026 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3027 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3028 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3029 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3030 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3031 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3032 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3033 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3034 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3035 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3036 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3037 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3038 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3039 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3040 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3041 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3042 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3043 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3044 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3045 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3046 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3047 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3048 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3049 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3050 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3051 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3052 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3053 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3054 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3055 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3056 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3057 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3058 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3059 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3060 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3061 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3062 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3063 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3064 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3065 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3066 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3067 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3068 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3069 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3070 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3071 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3072 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3073 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3074 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3075 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3076 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3077 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3078 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3079 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3080 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3081 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3082 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3083 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3084 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3085 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3086 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3087 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3088 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3089 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3090 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3091 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3092 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3093 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3094 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3095 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3096 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3097 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3098 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3099 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3100 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3101 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, False, ),
3102 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3103 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd),
3104 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3105 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3106 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3107 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3108 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3109 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3110 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3111 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3112 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3113 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3114 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3115 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3116 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3117 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3118 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3119 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3120 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3121 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3122 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3123 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3124 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3125 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3126 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3127 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3128 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3129 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3130 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3131 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3132 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3133 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3134 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3135 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3136 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3137 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3138 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3139 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3140 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3141 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, True, ),
3142 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, g_fNativeSimd),
3143 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3144 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3145 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3146 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3147 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3148 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3149 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3150 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3151 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3152 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3153 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3154 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3155 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3156 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3157 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3158 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3159 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3160 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3161 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3162 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3163 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3164 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3165 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3166 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3167 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3168 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3169 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3170 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3171 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3172 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3173 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3174 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3175 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3176 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3177 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3178 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3179 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3180 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3181 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3182 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3183 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3184 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3185 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3186 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3188 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3189 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3193 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3196 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3197 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3198 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3199 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3200 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3201 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3202 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3203 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3204 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3205 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3206 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3207 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3208 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3209 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3211 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3212 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3213 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3214 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3215 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3216 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3217 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3218 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3219 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3220 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3221 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3222 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3223 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3224 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3225 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3226 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3227 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3228 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3229 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3230 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3231 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3232 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3233 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3234 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3235 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3236 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3237 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3238 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, True, ),
3239 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3240 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3241 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3242 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3243 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3244 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3245 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3246 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3247 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3248 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3249 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3250 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3251 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3252 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3253 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3254 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3255 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3256 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3257 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3258 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3259 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3260 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3261 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3262 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3263 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3264 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3265 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3266 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3267 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3268 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3269 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3270 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3271 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3272 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3273 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3274 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3275 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3276 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3277 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3278 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3279 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3280 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3281 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3282 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3283 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3284 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3285 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3286 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3287 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3288 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3289 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3290 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3291 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3292 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3293 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3294 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3295 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3296 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3297 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3298 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3299 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3300 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3301 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3302 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3303 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3304 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3305 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3306 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3307 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3308 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3309 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3310 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3311 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3312 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3313 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3314 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3315 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3316 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3317 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3318 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3319 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3320 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3321 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3322 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3323 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3324 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3325 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3326 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3327 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3328 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3329 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3331 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3332 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3334 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3335 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3336 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3337 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3338 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3339 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3340 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3341 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3342 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3343 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3344 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3345 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3346 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3347 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd),
3348 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3349 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3350 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3351 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3352 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3353 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3354 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3355 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3356 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3357 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3358 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3359 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3360};
3361# pylint: enable=line-too-long
3362
3363## List of microcode blocks.
3364g_aoMcBlocks = [] # type: List[McBlock]
3365
3366
3367
3368class ParserException(Exception):
3369 """ Parser exception """
3370 def __init__(self, sMessage):
3371 Exception.__init__(self, sMessage);
3372
3373
3374class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3375 """
3376 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3377 """
3378
3379 ## @name Parser state.
3380 ## @{
3381 kiCode = 0;
3382 kiCommentMulti = 1;
3383 ## @}
3384
3385 class Macro(object):
3386 """ Macro """
3387 def __init__(self, sName, asArgs, sBody, iLine):
3388 self.sName = sName; ##< The macro name.
3389 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3390 self.sBody = sBody;
3391 self.iLine = iLine;
3392 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3393
3394 @staticmethod
3395 def _needSpace(ch):
3396 """ This is just to make the expanded output a bit prettier. """
3397 return ch.isspace() and ch != '(';
3398
3399 def expandMacro(self, oParent, asArgs = None):
3400 """ Expands the macro body with the given arguments. """
3401 _ = oParent;
3402 sBody = self.sBody;
3403
3404 if self.oReArgMatch:
3405 assert len(asArgs) == len(self.asArgs);
3406 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3407
3408 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3409 oMatch = self.oReArgMatch.search(sBody);
3410 while oMatch:
3411 sName = oMatch.group(2);
3412 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3413 sValue = dArgs[sName];
3414 sPre = '';
3415 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3416 sPre = ' ';
3417 sPost = '';
3418 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3419 sPost = ' ';
3420 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3421 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3422 else:
3423 assert not asArgs;
3424
3425 return sBody;
3426
3427 class PreprocessorConditional(object):
3428 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3429
3430 ## Known defines.
3431 # - A value of 1 indicates that it's always defined.
3432 # - A value of 0 if it's always undefined
3433 # - A value of -1 if it's an arch and it depends of script parameters.
3434 # - A value of -2 if it's not recognized when filtering MC blocks.
3435 kdKnownDefines = {
3436 'IEM_WITH_ONE_BYTE_TABLE': 1,
3437 'IEM_WITH_TWO_BYTE_TABLE': 1,
3438 'IEM_WITH_THREE_0F_38': 1,
3439 'IEM_WITH_THREE_0F_3A': 1,
3440 'IEM_WITH_THREE_BYTE_TABLES': 1,
3441 'IEM_WITH_3DNOW': 1,
3442 'IEM_WITH_3DNOW_TABLE': 1,
3443 'IEM_WITH_VEX': 1,
3444 'IEM_WITH_VEX_TABLES': 1,
3445 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3446 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3447 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3448 'LOG_ENABLED': 1,
3449 'RT_WITHOUT_PRAGMA_ONCE': 0,
3450 'TST_IEM_CHECK_MC': 0,
3451 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3452 'RT_ARCH_AMD64': -1,
3453 'RT_ARCH_ARM64': -1,
3454 'RT_ARCH_ARM32': -1,
3455 'RT_ARCH_X86': -1,
3456 'RT_ARCH_SPARC': -1,
3457 'RT_ARCH_SPARC64': -1,
3458 };
3459 kdBuildArchToIprt = {
3460 'amd64': 'RT_ARCH_AMD64',
3461 'arm64': 'RT_ARCH_ARM64',
3462 'sparc32': 'RT_ARCH_SPARC64',
3463 };
3464 ## For parsing the next defined(xxxx).
3465 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3466
3467 def __init__(self, sType, sExpr):
3468 self.sType = sType;
3469 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3470 self.aoElif = [] # type: List[PreprocessorConditional]
3471 self.fInElse = [];
3472 if sType in ('if', 'elif'):
3473 self.checkExpression(sExpr);
3474 else:
3475 self.checkSupportedDefine(sExpr)
3476
3477 @staticmethod
3478 def checkSupportedDefine(sDefine):
3479 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3480 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3481 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3482 return True;
3483 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3484 return True;
3485 raise Exception('Unsupported define: %s' % (sDefine,));
3486
3487 @staticmethod
3488 def checkExpression(sExpr):
3489 """ Check that the expression is supported. Raises exception if not. """
3490 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3491 if sExpr in ('0', '1'):
3492 return True;
3493
3494 off = 0;
3495 cParan = 0;
3496 while off < len(sExpr):
3497 ch = sExpr[off];
3498
3499 # Unary operator or parentheses:
3500 if ch in ('(', '!'):
3501 if ch == '(':
3502 cParan += 1;
3503 off += 1;
3504 else:
3505 # defined(xxxx)
3506 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3507 if oMatch:
3508 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3509 elif sExpr[off:] != '1':
3510 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3511 off = oMatch.end();
3512
3513 # Look for closing parentheses.
3514 while off < len(sExpr) and sExpr[off].isspace():
3515 off += 1;
3516 if cParan > 0:
3517 while off < len(sExpr) and sExpr[off] == ')':
3518 if cParan <= 0:
3519 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3520 cParan -= 1;
3521 off += 1;
3522 while off < len(sExpr) and sExpr[off].isspace():
3523 off += 1;
3524
3525 # Look for binary operator.
3526 if off >= len(sExpr):
3527 break;
3528 if sExpr[off:off + 2] in ('||', '&&'):
3529 off += 2;
3530 else:
3531 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3532
3533 # Skip spaces.
3534 while off < len(sExpr) and sExpr[off].isspace():
3535 off += 1;
3536 if cParan != 0:
3537 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3538 return True;
3539
3540 @staticmethod
3541 def isArchIncludedInExpr(sExpr, sArch):
3542 """ Checks if sArch is included in the given expression. """
3543 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3544 if sExpr == '0':
3545 return False;
3546 if sExpr == '1':
3547 return True;
3548 off = 0;
3549 while off < len(sExpr):
3550 # defined(xxxx)
3551 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3552 if not oMatch:
3553 if sExpr[off:] == '1':
3554 return True;
3555 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3556 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3557 return True;
3558 off = oMatch.end();
3559
3560 # Look for OR operator.
3561 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3562 off += 1;
3563 if off >= len(sExpr):
3564 break;
3565 if sExpr.startswith('||'):
3566 off += 2;
3567 else:
3568 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3569
3570 return False;
3571
3572 @staticmethod
3573 def matchArch(sDefine, sArch):
3574 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3575 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3576
3577 @staticmethod
3578 def matchDefined(sExpr, sArch):
3579 """ Check the result of an ifdef/ifndef expression, given sArch. """
3580 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3581 if iDefine == -2:
3582 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3583 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3584
3585 def isArchIncludedInPrimaryBlock(self, sArch):
3586 """ Checks if sArch is included in the (primary) 'if' block. """
3587 if self.sType == 'ifdef':
3588 return self.matchDefined(self.sExpr, sArch);
3589 if self.sType == 'ifndef':
3590 return not self.matchDefined(self.sExpr, sArch);
3591 return self.isArchIncludedInExpr(self.sExpr, sArch);
3592
3593 @staticmethod
3594 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3595 """ Checks if sArch is included in the current conditional block. """
3596 _ = iLine;
3597 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3598 for oCond in aoCppCondStack:
3599 if oCond.isArchIncludedInPrimaryBlock(sArch):
3600 if oCond.aoElif or oCond.fInElse:
3601 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3602 return False;
3603 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3604 else:
3605 fFine = False;
3606 for oElifCond in oCond.aoElif:
3607 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3608 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3609 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3610 return False;
3611 fFine = True;
3612 if not fFine and not oCond.fInElse:
3613 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3614 return False;
3615 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3616 return True;
3617
3618 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3619 self.sSrcFile = sSrcFile;
3620 self.asLines = asLines;
3621 self.iLine = 0;
3622 self.iState = self.kiCode;
3623 self.sComment = '';
3624 self.iCommentLine = 0;
3625 self.aoCurInstrs = [] # type: List[Instruction]
3626 self.oCurFunction = None # type: DecoderFunction
3627 self.iMcBlockInFunc = 0;
3628 self.oCurMcBlock = None # type: McBlock
3629 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3630 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3631 if oInheritMacrosFrom:
3632 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3633 self.oReMacros = oInheritMacrosFrom.oReMacros;
3634 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3635 self.sHostArch = sHostArch;
3636
3637 assert sDefaultMap in g_dInstructionMaps;
3638 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3639
3640 self.cTotalInstr = 0;
3641 self.cTotalStubs = 0;
3642 self.cTotalTagged = 0;
3643 self.cTotalMcBlocks = 0;
3644
3645 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3646 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3647 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3648 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3649 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3650 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3651 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3652 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3653 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3654 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3655 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3656 self.fDebug = True;
3657 self.fDebugMc = False;
3658 self.fDebugPreproc = False;
3659
3660 self.dTagHandlers = {
3661 '@opbrief': self.parseTagOpBrief,
3662 '@opdesc': self.parseTagOpDesc,
3663 '@opmnemonic': self.parseTagOpMnemonic,
3664 '@op1': self.parseTagOpOperandN,
3665 '@op2': self.parseTagOpOperandN,
3666 '@op3': self.parseTagOpOperandN,
3667 '@op4': self.parseTagOpOperandN,
3668 '@oppfx': self.parseTagOpPfx,
3669 '@opmaps': self.parseTagOpMaps,
3670 '@opcode': self.parseTagOpcode,
3671 '@opcodesub': self.parseTagOpcodeSub,
3672 '@openc': self.parseTagOpEnc,
3673 #@opfltest: Lists all flags that will be used as input in some way.
3674 '@opfltest': self.parseTagOpEFlags,
3675 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3676 '@opflmodify': self.parseTagOpEFlags,
3677 #@opflclear: Lists all flags that will be set (set to 1).
3678 '@opflset': self.parseTagOpEFlags,
3679 #@opflclear: Lists all flags that will be cleared (set to 0).
3680 '@opflclear': self.parseTagOpEFlags,
3681 #@opflundef: List of flag documented as undefined.
3682 '@opflundef': self.parseTagOpEFlags,
3683 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3684 '@opflclass': self.parseTagOpEFlagsClass,
3685 '@ophints': self.parseTagOpHints,
3686 '@opdisenum': self.parseTagOpDisEnum,
3687 '@opmincpu': self.parseTagOpMinCpu,
3688 '@opcpuid': self.parseTagOpCpuId,
3689 '@opgroup': self.parseTagOpGroup,
3690 '@opunused': self.parseTagOpUnusedInvalid,
3691 '@opinvalid': self.parseTagOpUnusedInvalid,
3692 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3693 '@optest': self.parseTagOpTest,
3694 '@optestign': self.parseTagOpTestIgnore,
3695 '@optestignore': self.parseTagOpTestIgnore,
3696 '@opcopytests': self.parseTagOpCopyTests,
3697 '@oponly': self.parseTagOpOnlyTest,
3698 '@oponlytest': self.parseTagOpOnlyTest,
3699 '@opxcpttype': self.parseTagOpXcptType,
3700 '@opstats': self.parseTagOpStats,
3701 '@opfunction': self.parseTagOpFunction,
3702 '@opdone': self.parseTagOpDone,
3703 };
3704 for i in range(48):
3705 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3706 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3707
3708 self.asErrors = [];
3709
3710 def raiseError(self, sMessage):
3711 """
3712 Raise error prefixed with the source and line number.
3713 """
3714 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3715
3716 def raiseCommentError(self, iLineInComment, sMessage):
3717 """
3718 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3719 """
3720 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3721
3722 def error(self, sMessage):
3723 """
3724 Adds an error.
3725 returns False;
3726 """
3727 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3728 return False;
3729
3730 def errorOnLine(self, iLine, sMessage):
3731 """
3732 Adds an error.
3733 returns False;
3734 """
3735 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3736 return False;
3737
3738 def errorComment(self, iLineInComment, sMessage):
3739 """
3740 Adds a comment error.
3741 returns False;
3742 """
3743 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3744 return False;
3745
3746 def printErrors(self):
3747 """
3748 Print the errors to stderr.
3749 Returns number of errors.
3750 """
3751 if self.asErrors:
3752 sys.stderr.write(u''.join(self.asErrors));
3753 return len(self.asErrors);
3754
3755 def debug(self, sMessage):
3756 """
3757 For debugging.
3758 """
3759 if self.fDebug:
3760 print('debug: %s' % (sMessage,), file = sys.stderr);
3761
3762 def stripComments(self, sLine):
3763 """
3764 Returns sLine with comments stripped.
3765
3766 Complains if traces of incomplete multi-line comments are encountered.
3767 """
3768 sLine = self.oReComment.sub(" ", sLine);
3769 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3770 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3771 return sLine;
3772
3773 def parseFunctionTable(self, sLine):
3774 """
3775 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3776
3777 Note! Updates iLine as it consumes the whole table.
3778 """
3779
3780 #
3781 # Extract the table name.
3782 #
3783 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3784 oMap = g_dInstructionMapsByIemName.get(sName);
3785 if not oMap:
3786 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3787 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3788
3789 #
3790 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3791 # entries per byte:
3792 # no prefix, 066h prefix, f3h prefix, f2h prefix
3793 # Those tables has 256 & 32 entries respectively.
3794 #
3795 cEntriesPerByte = 4;
3796 cValidTableLength = 1024;
3797 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3798
3799 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3800 if oEntriesMatch:
3801 cEntriesPerByte = 1;
3802 cValidTableLength = int(oEntriesMatch.group(1));
3803 asPrefixes = (None,);
3804
3805 #
3806 # The next line should be '{' and nothing else.
3807 #
3808 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3809 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3810 self.iLine += 1;
3811
3812 #
3813 # Parse till we find the end of the table.
3814 #
3815 iEntry = 0;
3816 while self.iLine < len(self.asLines):
3817 # Get the next line and strip comments and spaces (assumes no
3818 # multi-line comments).
3819 sLine = self.asLines[self.iLine];
3820 self.iLine += 1;
3821 sLine = self.stripComments(sLine).strip();
3822
3823 # Split the line up into entries, expanding IEMOP_X4 usage.
3824 asEntries = sLine.split(',');
3825 for i in range(len(asEntries) - 1, -1, -1):
3826 sEntry = asEntries[i].strip();
3827 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3828 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3829 asEntries.insert(i + 1, sEntry);
3830 asEntries.insert(i + 1, sEntry);
3831 asEntries.insert(i + 1, sEntry);
3832 if sEntry:
3833 asEntries[i] = sEntry;
3834 else:
3835 del asEntries[i];
3836
3837 # Process the entries.
3838 for sEntry in asEntries:
3839 if sEntry in ('};', '}'):
3840 if iEntry != cValidTableLength:
3841 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3842 return True;
3843 if sEntry.startswith('iemOp_Invalid'):
3844 pass; # skip
3845 else:
3846 # Look up matching instruction by function.
3847 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3848 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3849 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3850 if aoInstr:
3851 if not isinstance(aoInstr, list):
3852 aoInstr = [aoInstr,];
3853 oInstr = None;
3854 for oCurInstr in aoInstr:
3855 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3856 pass;
3857 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3858 oCurInstr.sPrefix = sPrefix;
3859 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3860 oCurInstr.sOpcode = sOpcode;
3861 oCurInstr.sPrefix = sPrefix;
3862 else:
3863 continue;
3864 oInstr = oCurInstr;
3865 break;
3866 if not oInstr:
3867 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3868 aoInstr.append(oInstr);
3869 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3870 g_aoAllInstructions.append(oInstr);
3871 oMap.aoInstructions.append(oInstr);
3872 else:
3873 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3874 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3875 iEntry += 1;
3876
3877 return self.error('Unexpected end of file in PFNIEMOP table');
3878
3879 def addInstruction(self, iLine = None):
3880 """
3881 Adds an instruction.
3882 """
3883 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3884 g_aoAllInstructions.append(oInstr);
3885 self.aoCurInstrs.append(oInstr);
3886 return oInstr;
3887
3888 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3889 """
3890 Derives the mnemonic and operands from a IEM stats base name like string.
3891 """
3892 if oInstr.sMnemonic is None:
3893 asWords = sStats.split('_');
3894 oInstr.sMnemonic = asWords[0].lower();
3895 if len(asWords) > 1 and not oInstr.aoOperands:
3896 for sType in asWords[1:]:
3897 if sType in g_kdOpTypes:
3898 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3899 else:
3900 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3901 return False;
3902 return True;
3903
3904 def doneInstructionOne(self, oInstr, iLine):
3905 """
3906 Complete the parsing by processing, validating and expanding raw inputs.
3907 """
3908 assert oInstr.iLineCompleted is None;
3909 oInstr.iLineCompleted = iLine;
3910
3911 #
3912 # Specified instructions.
3913 #
3914 if oInstr.cOpTags > 0:
3915 if oInstr.sStats is None:
3916 pass;
3917
3918 #
3919 # Unspecified legacy stuff. We generally only got a few things to go on here.
3920 # /** Opcode 0x0f 0x00 /0. */
3921 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3922 #
3923 else:
3924 #if oInstr.sRawOldOpcodes:
3925 #
3926 #if oInstr.sMnemonic:
3927 pass;
3928
3929 #
3930 # Common defaults.
3931 #
3932
3933 # Guess mnemonic and operands from stats if the former is missing.
3934 if oInstr.sMnemonic is None:
3935 if oInstr.sStats is not None:
3936 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3937 elif oInstr.sFunction is not None:
3938 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3939
3940 # Derive the disassembler op enum constant from the mnemonic.
3941 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3942 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3943
3944 # Derive the IEM statistics base name from mnemonic and operand types.
3945 if oInstr.sStats is None:
3946 if oInstr.sFunction is not None:
3947 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3948 elif oInstr.sMnemonic is not None:
3949 oInstr.sStats = oInstr.sMnemonic;
3950 for oOperand in oInstr.aoOperands:
3951 if oOperand.sType:
3952 oInstr.sStats += '_' + oOperand.sType;
3953
3954 # Derive the IEM function name from mnemonic and operand types.
3955 if oInstr.sFunction is None:
3956 if oInstr.sMnemonic is not None:
3957 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3958 for oOperand in oInstr.aoOperands:
3959 if oOperand.sType:
3960 oInstr.sFunction += '_' + oOperand.sType;
3961 elif oInstr.sStats:
3962 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3963
3964 #
3965 # Apply default map and then add the instruction to all it's groups.
3966 #
3967 if not oInstr.aoMaps:
3968 oInstr.aoMaps = [ self.oDefaultMap, ];
3969 for oMap in oInstr.aoMaps:
3970 oMap.aoInstructions.append(oInstr);
3971
3972 #
3973 # Derive encoding from operands and maps.
3974 #
3975 if oInstr.sEncoding is None:
3976 if not oInstr.aoOperands:
3977 if oInstr.fUnused and oInstr.sSubOpcode:
3978 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3979 else:
3980 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3981 elif oInstr.aoOperands[0].usesModRM():
3982 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3983 or oInstr.onlyInVexMaps():
3984 oInstr.sEncoding = 'VEX.ModR/M';
3985 else:
3986 oInstr.sEncoding = 'ModR/M';
3987
3988 #
3989 # Check the opstat value and add it to the opstat indexed dictionary.
3990 #
3991 if oInstr.sStats:
3992 if oInstr.sStats not in g_dAllInstructionsByStat:
3993 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3994 else:
3995 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3996 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3997
3998 #
3999 # Add to function indexed dictionary. We allow multiple instructions per function.
4000 #
4001 if oInstr.sFunction:
4002 if oInstr.sFunction not in g_dAllInstructionsByFunction:
4003 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4004 else:
4005 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4006
4007 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4008 return True;
4009
4010 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4011 """
4012 Done with current instruction.
4013 """
4014 for oInstr in self.aoCurInstrs:
4015 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4016 if oInstr.fStub:
4017 self.cTotalStubs += 1;
4018
4019 self.cTotalInstr += len(self.aoCurInstrs);
4020
4021 self.sComment = '';
4022 self.aoCurInstrs = [];
4023 if fEndOfFunction:
4024 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4025 if self.oCurFunction:
4026 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4027 self.oCurFunction = None;
4028 self.iMcBlockInFunc = 0;
4029 return True;
4030
4031 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4032 """
4033 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4034 is False, only None values and empty strings are replaced.
4035 """
4036 for oInstr in self.aoCurInstrs:
4037 if fOverwrite is not True:
4038 oOldValue = getattr(oInstr, sAttrib);
4039 if oOldValue is not None:
4040 continue;
4041 setattr(oInstr, sAttrib, oValue);
4042
4043 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4044 """
4045 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4046 If fOverwrite is False, only None values and empty strings are replaced.
4047 """
4048 for oInstr in self.aoCurInstrs:
4049 aoArray = getattr(oInstr, sAttrib);
4050 while len(aoArray) <= iEntry:
4051 aoArray.append(None);
4052 if fOverwrite is True or aoArray[iEntry] is None:
4053 aoArray[iEntry] = oValue;
4054
4055 def parseCommentOldOpcode(self, asLines):
4056 """ Deals with 'Opcode 0xff /4' like comments """
4057 asWords = asLines[0].split();
4058 if len(asWords) >= 2 \
4059 and asWords[0] == 'Opcode' \
4060 and ( asWords[1].startswith('0x')
4061 or asWords[1].startswith('0X')):
4062 asWords = asWords[:1];
4063 for iWord, sWord in enumerate(asWords):
4064 if sWord.startswith('0X'):
4065 sWord = '0x' + sWord[:2];
4066 asWords[iWord] = asWords;
4067 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4068
4069 return False;
4070
4071 def ensureInstructionForOpTag(self, iTagLine):
4072 """ Ensure there is an instruction for the op-tag being parsed. """
4073 if not self.aoCurInstrs:
4074 self.addInstruction(self.iCommentLine + iTagLine);
4075 for oInstr in self.aoCurInstrs:
4076 oInstr.cOpTags += 1;
4077 if oInstr.cOpTags == 1:
4078 self.cTotalTagged += 1;
4079 return self.aoCurInstrs[-1];
4080
4081 @staticmethod
4082 def flattenSections(aasSections):
4083 """
4084 Flattens multiline sections into stripped single strings.
4085 Returns list of strings, on section per string.
4086 """
4087 asRet = [];
4088 for asLines in aasSections:
4089 if asLines:
4090 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4091 return asRet;
4092
4093 @staticmethod
4094 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4095 """
4096 Flattens sections into a simple stripped string with newlines as
4097 section breaks. The final section does not sport a trailing newline.
4098 """
4099 # Typical: One section with a single line.
4100 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4101 return aasSections[0][0].strip();
4102
4103 sRet = '';
4104 for iSection, asLines in enumerate(aasSections):
4105 if asLines:
4106 if iSection > 0:
4107 sRet += sSectionSep;
4108 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4109 return sRet;
4110
4111
4112
4113 ## @name Tag parsers
4114 ## @{
4115
4116 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4117 """
4118 Tag: @opbrief
4119 Value: Text description, multiple sections, appended.
4120
4121 Brief description. If not given, it's the first sentence from @opdesc.
4122 """
4123 oInstr = self.ensureInstructionForOpTag(iTagLine);
4124
4125 # Flatten and validate the value.
4126 sBrief = self.flattenAllSections(aasSections);
4127 if not sBrief:
4128 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4129 if sBrief[-1] != '.':
4130 sBrief = sBrief + '.';
4131 if len(sBrief) > 180:
4132 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4133 offDot = sBrief.find('.');
4134 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4135 offDot = sBrief.find('.', offDot + 1);
4136 if offDot >= 0 and offDot != len(sBrief) - 1:
4137 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4138
4139 # Update the instruction.
4140 if oInstr.sBrief is not None:
4141 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4142 % (sTag, oInstr.sBrief, sBrief,));
4143 _ = iEndLine;
4144 return True;
4145
4146 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4147 """
4148 Tag: @opdesc
4149 Value: Text description, multiple sections, appended.
4150
4151 It is used to describe instructions.
4152 """
4153 oInstr = self.ensureInstructionForOpTag(iTagLine);
4154 if aasSections:
4155 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4156 return True;
4157
4158 _ = sTag; _ = iEndLine;
4159 return True;
4160
4161 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4162 """
4163 Tag: @opmenmonic
4164 Value: mnemonic
4165
4166 The 'mnemonic' value must be a valid C identifier string. Because of
4167 prefixes, groups and whatnot, there times when the mnemonic isn't that
4168 of an actual assembler mnemonic.
4169 """
4170 oInstr = self.ensureInstructionForOpTag(iTagLine);
4171
4172 # Flatten and validate the value.
4173 sMnemonic = self.flattenAllSections(aasSections);
4174 if not self.oReMnemonic.match(sMnemonic):
4175 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4176 if oInstr.sMnemonic is not None:
4177 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4178 % (sTag, oInstr.sMnemonic, sMnemonic,));
4179 oInstr.sMnemonic = sMnemonic
4180
4181 _ = iEndLine;
4182 return True;
4183
4184 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4185 """
4186 Tags: @op1, @op2, @op3, @op4
4187 Value: [where:]type
4188
4189 The 'where' value indicates where the operand is found, like the 'reg'
4190 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4191 a list.
4192
4193 The 'type' value indicates the operand type. These follow the types
4194 given in the opcode tables in the CPU reference manuals.
4195 See Instruction.kdOperandTypes for a list.
4196
4197 """
4198 oInstr = self.ensureInstructionForOpTag(iTagLine);
4199 idxOp = int(sTag[-1]) - 1;
4200 assert 0 <= idxOp < 4;
4201
4202 # flatten, split up, and validate the "where:type" value.
4203 sFlattened = self.flattenAllSections(aasSections);
4204 asSplit = sFlattened.split(':');
4205 if len(asSplit) == 1:
4206 sType = asSplit[0];
4207 sWhere = None;
4208 elif len(asSplit) == 2:
4209 (sWhere, sType) = asSplit;
4210 else:
4211 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4212
4213 if sType not in g_kdOpTypes:
4214 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4215 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4216 if sWhere is None:
4217 sWhere = g_kdOpTypes[sType][1];
4218 elif sWhere not in g_kdOpLocations:
4219 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4220 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4221
4222 # Insert the operand, refusing to overwrite an existing one.
4223 while idxOp >= len(oInstr.aoOperands):
4224 oInstr.aoOperands.append(None);
4225 if oInstr.aoOperands[idxOp] is not None:
4226 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4227 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4228 sWhere, sType,));
4229 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4230
4231 _ = iEndLine;
4232 return True;
4233
4234 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4235 """
4236 Tag: @opmaps
4237 Value: map[,map2]
4238
4239 Indicates which maps the instruction is in. There is a default map
4240 associated with each input file.
4241 """
4242 oInstr = self.ensureInstructionForOpTag(iTagLine);
4243
4244 # Flatten, split up and validate the value.
4245 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4246 asMaps = sFlattened.split(',');
4247 if not asMaps:
4248 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4249 for sMap in asMaps:
4250 if sMap not in g_dInstructionMaps:
4251 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4252 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4253
4254 # Add the maps to the current list. Throw errors on duplicates.
4255 for oMap in oInstr.aoMaps:
4256 if oMap.sName in asMaps:
4257 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4258
4259 for sMap in asMaps:
4260 oMap = g_dInstructionMaps[sMap];
4261 if oMap not in oInstr.aoMaps:
4262 oInstr.aoMaps.append(oMap);
4263 else:
4264 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4265
4266 _ = iEndLine;
4267 return True;
4268
4269 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4270 """
4271 Tag: @oppfx
4272 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4273
4274 Required prefix for the instruction. (In a (E)VEX context this is the
4275 value of the 'pp' field rather than an actual prefix.)
4276 """
4277 oInstr = self.ensureInstructionForOpTag(iTagLine);
4278
4279 # Flatten and validate the value.
4280 sFlattened = self.flattenAllSections(aasSections);
4281 asPrefixes = sFlattened.split();
4282 if len(asPrefixes) > 1:
4283 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4284
4285 sPrefix = asPrefixes[0].lower();
4286 if sPrefix == 'none':
4287 sPrefix = 'none';
4288 elif sPrefix == 'n/a':
4289 sPrefix = None;
4290 else:
4291 if len(sPrefix) == 2:
4292 sPrefix = '0x' + sPrefix;
4293 if not _isValidOpcodeByte(sPrefix):
4294 if sPrefix != '!0xf3':
4295 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4296
4297 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4298 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4299
4300 # Set it.
4301 if oInstr.sPrefix is not None:
4302 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4303 oInstr.sPrefix = sPrefix;
4304
4305 _ = iEndLine;
4306 return True;
4307
4308 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4309 """
4310 Tag: @opcode
4311 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4312
4313 The opcode byte or sub-byte for the instruction in the context of a map.
4314 """
4315 oInstr = self.ensureInstructionForOpTag(iTagLine);
4316
4317 # Flatten and validate the value.
4318 sOpcode = self.flattenAllSections(aasSections);
4319 if _isValidOpcodeByte(sOpcode):
4320 pass;
4321 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4322 pass;
4323 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4324 pass;
4325 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4326 pass;
4327 else:
4328 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4329
4330 # Set it.
4331 if oInstr.sOpcode is not None:
4332 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4333 oInstr.sOpcode = sOpcode;
4334
4335 _ = iEndLine;
4336 return True;
4337
4338 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4339 """
4340 Tag: @opcodesub
4341 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4342 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4343 | !11 rex.w=0 | !11 mr/reg rex.w=0
4344 | !11 rex.w=1 | !11 mr/reg rex.w=1
4345
4346 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4347 represents exactly two different instructions. The more proper way would
4348 be to go via maps with two members, but this is faster.
4349 """
4350 oInstr = self.ensureInstructionForOpTag(iTagLine);
4351
4352 # Flatten and validate the value.
4353 sSubOpcode = self.flattenAllSections(aasSections);
4354 if sSubOpcode not in g_kdSubOpcodes:
4355 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4356 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4357 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4358
4359 # Set it.
4360 if oInstr.sSubOpcode is not None:
4361 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4362 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4363 oInstr.sSubOpcode = sSubOpcode;
4364
4365 _ = iEndLine;
4366 return True;
4367
4368 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4369 """
4370 Tag: @openc
4371 Value: ModR/M|fixed|prefix|<map name>
4372
4373 The instruction operand encoding style.
4374 """
4375 oInstr = self.ensureInstructionForOpTag(iTagLine);
4376
4377 # Flatten and validate the value.
4378 sEncoding = self.flattenAllSections(aasSections);
4379 if sEncoding in g_kdEncodings:
4380 pass;
4381 elif sEncoding in g_dInstructionMaps:
4382 pass;
4383 elif not _isValidOpcodeByte(sEncoding):
4384 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4385
4386 # Set it.
4387 if oInstr.sEncoding is not None:
4388 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4389 % ( sTag, oInstr.sEncoding, sEncoding,));
4390 oInstr.sEncoding = sEncoding;
4391
4392 _ = iEndLine;
4393 return True;
4394
4395 ## EFlags tag to Instruction attribute name.
4396 kdOpFlagToAttr = {
4397 '@opfltest': 'asFlTest',
4398 '@opflmodify': 'asFlModify',
4399 '@opflundef': 'asFlUndefined',
4400 '@opflset': 'asFlSet',
4401 '@opflclear': 'asFlClear',
4402 };
4403
4404 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4405 """
4406 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4407 Value: <eflags specifier>
4408
4409 """
4410 oInstr = self.ensureInstructionForOpTag(iTagLine);
4411
4412 # Flatten, split up and validate the values.
4413 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4414 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4415 asFlags = [];
4416 else:
4417 fRc = True;
4418 for iFlag, sFlag in enumerate(asFlags):
4419 if sFlag not in g_kdEFlagsMnemonics:
4420 if sFlag.strip() in g_kdEFlagsMnemonics:
4421 asFlags[iFlag] = sFlag.strip();
4422 else:
4423 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4424 if not fRc:
4425 return False;
4426
4427 # Set them.
4428 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4429 if asOld is not None and len(asOld) > 0:
4430 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4431 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4432
4433 _ = iEndLine;
4434 return True;
4435
4436 ## EFLAGS class definitions with their attribute lists.
4437 kdEFlagsClasses = {
4438 'arithmetic': { # add, sub, ...
4439 'asFlTest': [],
4440 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4441 'asFlClear': [],
4442 'asFlSet': [],
4443 'asFlUndefined': [],
4444 },
4445 'arithmetic_carry': { # adc, sbb, ...
4446 'asFlTest': [ 'cf', ],
4447 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4448 'asFlClear': [],
4449 'asFlSet': [],
4450 'asFlUndefined': [],
4451 },
4452 'incdec': {
4453 'asFlTest': [],
4454 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4455 'asFlClear': [],
4456 'asFlSet': [],
4457 'asFlUndefined': [],
4458 },
4459 'division': { ## @todo specify intel/amd differences...
4460 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4461 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4462 'asFlClear': [],
4463 'asFlSet': [],
4464 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4465 },
4466 'multiply': { ## @todo specify intel/amd differences...
4467 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4468 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4469 'asFlClear': [], # between IMUL and MUL.
4470 'asFlSet': [],
4471 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4472 },
4473 'logical': { # and, or, xor, ...
4474 'asFlTest': [],
4475 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4476 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4477 'asFlSet': [],
4478 'asFlUndefined': [ 'af', ],
4479 },
4480 'rotate_1': { # rol and ror with fixed 1 shift count
4481 'asFlTest': [],
4482 'asFlModify': [ 'cf', 'of', ],
4483 'asFlClear': [],
4484 'asFlSet': [],
4485 'asFlUndefined': [],
4486 },
4487 'rotate_count': { # rol and ror w/o fixed 1 shift count
4488 'asFlTest': [],
4489 'asFlModify': [ 'cf', 'of', ],
4490 'asFlClear': [],
4491 'asFlSet': [],
4492 'asFlUndefined': [ 'of', ],
4493 },
4494 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4495 'asFlTest': [ 'cf', ],
4496 'asFlModify': [ 'cf', 'of', ],
4497 'asFlClear': [],
4498 'asFlSet': [],
4499 'asFlUndefined': [],
4500 },
4501 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4502 'asFlTest': [ 'cf', ],
4503 'asFlModify': [ 'cf', 'of', ],
4504 'asFlClear': [],
4505 'asFlSet': [],
4506 'asFlUndefined': [ 'of', ],
4507 },
4508 'shift_1': { # shl, shr or sar with fixed 1 count.
4509 'asFlTest': [],
4510 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4511 'asFlClear': [],
4512 'asFlSet': [],
4513 'asFlUndefined': [ 'af', ],
4514 },
4515 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4516 'asFlTest': [],
4517 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4518 'asFlClear': [],
4519 'asFlSet': [],
4520 'asFlUndefined': [ 'af', 'of', ],
4521 },
4522 'bitmap': { # bt, btc, btr, btc
4523 'asFlTest': [],
4524 'asFlModify': [ 'cf', ],
4525 'asFlClear': [],
4526 'asFlSet': [],
4527 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4528 },
4529 'unchanged': {
4530 'asFlTest': [],
4531 'asFlModify': [],
4532 'asFlClear': [],
4533 'asFlSet': [],
4534 'asFlUndefined': [],
4535 },
4536 };
4537 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4538 """
4539 Tags: @opflclass
4540 Value: arithmetic, logical, ...
4541
4542 """
4543 oInstr = self.ensureInstructionForOpTag(iTagLine);
4544
4545 # Flatten and validate the value.
4546 sClass = self.flattenAllSections(aasSections);
4547 kdAttribs = self.kdEFlagsClasses.get(sClass);
4548 if not kdAttribs:
4549 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4550 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4551
4552 # Set the attributes.
4553 for sAttrib, asFlags in kdAttribs.items():
4554 asOld = getattr(oInstr, sAttrib);
4555 if asOld is not None:
4556 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4557 % (sTag, asOld, asFlags, sAttrib));
4558 setattr(oInstr, sAttrib, asFlags);
4559
4560 _ = iEndLine;
4561 return True;
4562
4563 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4564 """
4565 Tag: @ophints
4566 Value: Comma or space separated list of flags and hints.
4567
4568 This covers the disassembler flags table and more.
4569 """
4570 oInstr = self.ensureInstructionForOpTag(iTagLine);
4571
4572 # Flatten as a space separated list, split it up and validate the values.
4573 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4574 if len(asHints) == 1 and asHints[0].lower() == 'none':
4575 asHints = [];
4576 else:
4577 fRc = True;
4578 for iHint, sHint in enumerate(asHints):
4579 if sHint not in g_kdHints:
4580 if sHint.strip() in g_kdHints:
4581 sHint[iHint] = sHint.strip();
4582 else:
4583 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4584 if not fRc:
4585 return False;
4586
4587 # Append them.
4588 for sHint in asHints:
4589 if sHint not in oInstr.dHints:
4590 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4591 else:
4592 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4593
4594 _ = iEndLine;
4595 return True;
4596
4597 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4598 """
4599 Tag: @opdisenum
4600 Value: OP_XXXX
4601
4602 This is for select a specific (legacy) disassembler enum value for the
4603 instruction.
4604 """
4605 oInstr = self.ensureInstructionForOpTag(iTagLine);
4606
4607 # Flatten and split.
4608 asWords = self.flattenAllSections(aasSections).split();
4609 if len(asWords) != 1:
4610 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4611 if not asWords:
4612 return False;
4613 sDisEnum = asWords[0];
4614 if not self.oReDisEnum.match(sDisEnum):
4615 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4616 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4617
4618 # Set it.
4619 if oInstr.sDisEnum is not None:
4620 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4621 oInstr.sDisEnum = sDisEnum;
4622
4623 _ = iEndLine;
4624 return True;
4625
4626 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4627 """
4628 Tag: @opmincpu
4629 Value: <simple CPU name>
4630
4631 Indicates when this instruction was introduced.
4632 """
4633 oInstr = self.ensureInstructionForOpTag(iTagLine);
4634
4635 # Flatten the value, split into words, make sure there's just one, valid it.
4636 asCpus = self.flattenAllSections(aasSections).split();
4637 if len(asCpus) > 1:
4638 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4639
4640 sMinCpu = asCpus[0];
4641 if sMinCpu in g_kdCpuNames:
4642 oInstr.sMinCpu = sMinCpu;
4643 else:
4644 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4645 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4646
4647 # Set it.
4648 if oInstr.sMinCpu is None:
4649 oInstr.sMinCpu = sMinCpu;
4650 elif oInstr.sMinCpu != sMinCpu:
4651 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4652
4653 _ = iEndLine;
4654 return True;
4655
4656 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4657 """
4658 Tag: @opcpuid
4659 Value: none | <CPUID flag specifier>
4660
4661 CPUID feature bit which is required for the instruction to be present.
4662 """
4663 oInstr = self.ensureInstructionForOpTag(iTagLine);
4664
4665 # Flatten as a space separated list, split it up and validate the values.
4666 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4667 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4668 asCpuIds = [];
4669 else:
4670 fRc = True;
4671 for iCpuId, sCpuId in enumerate(asCpuIds):
4672 if sCpuId not in g_kdCpuIdFlags:
4673 if sCpuId.strip() in g_kdCpuIdFlags:
4674 sCpuId[iCpuId] = sCpuId.strip();
4675 else:
4676 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4677 if not fRc:
4678 return False;
4679
4680 # Append them.
4681 for sCpuId in asCpuIds:
4682 if sCpuId not in oInstr.asCpuIds:
4683 oInstr.asCpuIds.append(sCpuId);
4684 else:
4685 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4686
4687 _ = iEndLine;
4688 return True;
4689
4690 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4691 """
4692 Tag: @opgroup
4693 Value: op_grp1[_subgrp2[_subsubgrp3]]
4694
4695 Instruction grouping.
4696 """
4697 oInstr = self.ensureInstructionForOpTag(iTagLine);
4698
4699 # Flatten as a space separated list, split it up and validate the values.
4700 asGroups = self.flattenAllSections(aasSections).split();
4701 if len(asGroups) != 1:
4702 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4703 sGroup = asGroups[0];
4704 if not self.oReGroupName.match(sGroup):
4705 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4706 % (sTag, sGroup, self.oReGroupName.pattern));
4707
4708 # Set it.
4709 if oInstr.sGroup is not None:
4710 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4711 oInstr.sGroup = sGroup;
4712
4713 _ = iEndLine;
4714 return True;
4715
4716 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4717 """
4718 Tag: @opunused, @opinvalid, @opinvlstyle
4719 Value: <invalid opcode behaviour style>
4720
4721 The @opunused indicates the specification is for a currently unused
4722 instruction encoding.
4723
4724 The @opinvalid indicates the specification is for an invalid currently
4725 instruction encoding (like UD2).
4726
4727 The @opinvlstyle just indicates how CPUs decode the instruction when
4728 not supported (@opcpuid, @opmincpu) or disabled.
4729 """
4730 oInstr = self.ensureInstructionForOpTag(iTagLine);
4731
4732 # Flatten as a space separated list, split it up and validate the values.
4733 asStyles = self.flattenAllSections(aasSections).split();
4734 if len(asStyles) != 1:
4735 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4736 sStyle = asStyles[0];
4737 if sStyle not in g_kdInvalidStyles:
4738 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4739 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4740 # Set it.
4741 if oInstr.sInvalidStyle is not None:
4742 return self.errorComment(iTagLine,
4743 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4744 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4745 oInstr.sInvalidStyle = sStyle;
4746 if sTag == '@opunused':
4747 oInstr.fUnused = True;
4748 elif sTag == '@opinvalid':
4749 oInstr.fInvalid = True;
4750
4751 _ = iEndLine;
4752 return True;
4753
4754 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4755 """
4756 Tag: @optest
4757 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4758 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4759
4760 The main idea here is to generate basic instruction tests.
4761
4762 The probably simplest way of handling the diverse input, would be to use
4763 it to produce size optimized byte code for a simple interpreter that
4764 modifies the register input and output states.
4765
4766 An alternative to the interpreter would be creating multiple tables,
4767 but that becomes rather complicated wrt what goes where and then to use
4768 them in an efficient manner.
4769 """
4770 oInstr = self.ensureInstructionForOpTag(iTagLine);
4771
4772 #
4773 # Do it section by section.
4774 #
4775 for asSectionLines in aasSections:
4776 #
4777 # Sort the input into outputs, inputs and selector conditions.
4778 #
4779 sFlatSection = self.flattenAllSections([asSectionLines,]);
4780 if not sFlatSection:
4781 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4782 continue;
4783 oTest = InstructionTest(oInstr);
4784
4785 asSelectors = [];
4786 asInputs = [];
4787 asOutputs = [];
4788 asCur = asOutputs;
4789 fRc = True;
4790 asWords = sFlatSection.split();
4791 for iWord in range(len(asWords) - 1, -1, -1):
4792 sWord = asWords[iWord];
4793 # Check for array switchers.
4794 if sWord == '->':
4795 if asCur != asOutputs:
4796 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4797 break;
4798 asCur = asInputs;
4799 elif sWord == '/':
4800 if asCur != asInputs:
4801 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4802 break;
4803 asCur = asSelectors;
4804 else:
4805 asCur.insert(0, sWord);
4806
4807 #
4808 # Validate and add selectors.
4809 #
4810 for sCond in asSelectors:
4811 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4812 oSelector = None;
4813 for sOp in TestSelector.kasCompareOps:
4814 off = sCondExp.find(sOp);
4815 if off >= 0:
4816 sVariable = sCondExp[:off];
4817 sValue = sCondExp[off + len(sOp):];
4818 if sVariable in TestSelector.kdVariables:
4819 if sValue in TestSelector.kdVariables[sVariable]:
4820 oSelector = TestSelector(sVariable, sOp, sValue);
4821 else:
4822 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4823 % ( sTag, sValue, sCond,
4824 TestSelector.kdVariables[sVariable].keys(),));
4825 else:
4826 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4827 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4828 break;
4829 if oSelector is not None:
4830 for oExisting in oTest.aoSelectors:
4831 if oExisting.sVariable == oSelector.sVariable:
4832 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4833 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4834 oTest.aoSelectors.append(oSelector);
4835 else:
4836 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4837
4838 #
4839 # Validate outputs and inputs, adding them to the test as we go along.
4840 #
4841 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4842 asValidFieldKinds = [ 'both', sDesc, ];
4843 for sItem in asItems:
4844 oItem = None;
4845 for sOp in TestInOut.kasOperators:
4846 off = sItem.find(sOp);
4847 if off < 0:
4848 continue;
4849 sField = sItem[:off];
4850 sValueType = sItem[off + len(sOp):];
4851 if sField in TestInOut.kdFields \
4852 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4853 asSplit = sValueType.split(':', 1);
4854 sValue = asSplit[0];
4855 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4856 if sType in TestInOut.kdTypes:
4857 oValid = TestInOut.kdTypes[sType].validate(sValue);
4858 if oValid is True:
4859 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4860 oItem = TestInOut(sField, sOp, sValue, sType);
4861 else:
4862 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4863 % ( sTag, sDesc, sItem, ));
4864 else:
4865 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4866 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4867 else:
4868 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4869 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4870 else:
4871 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4872 % ( sTag, sDesc, sField, sItem,
4873 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4874 if asVal[1] in asValidFieldKinds]),));
4875 break;
4876 if oItem is not None:
4877 for oExisting in aoDst:
4878 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4879 self.errorComment(iTagLine,
4880 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4881 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4882 aoDst.append(oItem);
4883 else:
4884 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4885
4886 #
4887 # .
4888 #
4889 if fRc:
4890 oInstr.aoTests.append(oTest);
4891 else:
4892 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4893 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4894 % (sTag, asSelectors, asInputs, asOutputs,));
4895
4896 _ = iEndLine;
4897 return True;
4898
4899 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4900 """
4901 Numbered @optest tag. Either @optest42 or @optest[42].
4902 """
4903 oInstr = self.ensureInstructionForOpTag(iTagLine);
4904
4905 iTest = 0;
4906 if sTag[-1] == ']':
4907 iTest = int(sTag[8:-1]);
4908 else:
4909 iTest = int(sTag[7:]);
4910
4911 if iTest != len(oInstr.aoTests):
4912 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4913 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4914
4915 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4916 """
4917 Tag: @optestign | @optestignore
4918 Value: <value is ignored>
4919
4920 This is a simple trick to ignore a test while debugging another.
4921
4922 See also @oponlytest.
4923 """
4924 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4925 return True;
4926
4927 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4928 """
4929 Tag: @opcopytests
4930 Value: <opstat | function> [..]
4931 Example: @opcopytests add_Eb_Gb
4932
4933 Trick to avoid duplicating tests for different encodings of the same
4934 operation.
4935 """
4936 oInstr = self.ensureInstructionForOpTag(iTagLine);
4937
4938 # Flatten, validate and append the copy job to the instruction. We execute
4939 # them after parsing all the input so we can handle forward references.
4940 asToCopy = self.flattenAllSections(aasSections).split();
4941 if not asToCopy:
4942 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4943 for sToCopy in asToCopy:
4944 if sToCopy not in oInstr.asCopyTests:
4945 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4946 oInstr.asCopyTests.append(sToCopy);
4947 else:
4948 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4949 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4950 else:
4951 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4952
4953 _ = iEndLine;
4954 return True;
4955
4956 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4957 """
4958 Tag: @oponlytest | @oponly
4959 Value: none
4960
4961 Only test instructions with this tag. This is a trick that is handy
4962 for singling out one or two new instructions or tests.
4963
4964 See also @optestignore.
4965 """
4966 oInstr = self.ensureInstructionForOpTag(iTagLine);
4967
4968 # Validate and add instruction to only test dictionary.
4969 sValue = self.flattenAllSections(aasSections).strip();
4970 if sValue:
4971 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4972
4973 if oInstr not in g_aoOnlyTestInstructions:
4974 g_aoOnlyTestInstructions.append(oInstr);
4975
4976 _ = iEndLine;
4977 return True;
4978
4979 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4980 """
4981 Tag: @opxcpttype
4982 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4983
4984 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4985 """
4986 oInstr = self.ensureInstructionForOpTag(iTagLine);
4987
4988 # Flatten as a space separated list, split it up and validate the values.
4989 asTypes = self.flattenAllSections(aasSections).split();
4990 if len(asTypes) != 1:
4991 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4992 sType = asTypes[0];
4993 if sType not in g_kdXcptTypes:
4994 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4995 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4996 # Set it.
4997 if oInstr.sXcptType is not None:
4998 return self.errorComment(iTagLine,
4999 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
5000 % ( sTag, oInstr.sXcptType, sType,));
5001 oInstr.sXcptType = sType;
5002
5003 _ = iEndLine;
5004 return True;
5005
5006 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5007 """
5008 Tag: @opfunction
5009 Value: <VMM function name>
5010
5011 This is for explicitly setting the IEM function name. Normally we pick
5012 this up from the FNIEMOP_XXX macro invocation after the description, or
5013 generate it from the mnemonic and operands.
5014
5015 It it thought it maybe necessary to set it when specifying instructions
5016 which implementation isn't following immediately or aren't implemented yet.
5017 """
5018 oInstr = self.ensureInstructionForOpTag(iTagLine);
5019
5020 # Flatten and validate the value.
5021 sFunction = self.flattenAllSections(aasSections);
5022 if not self.oReFunctionName.match(sFunction):
5023 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5024 % (sTag, sFunction, self.oReFunctionName.pattern));
5025
5026 if oInstr.sFunction is not None:
5027 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5028 % (sTag, oInstr.sFunction, sFunction,));
5029 oInstr.sFunction = sFunction;
5030
5031 _ = iEndLine;
5032 return True;
5033
5034 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5035 """
5036 Tag: @opstats
5037 Value: <VMM statistics base name>
5038
5039 This is for explicitly setting the statistics name. Normally we pick
5040 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5041 the mnemonic and operands.
5042
5043 It it thought it maybe necessary to set it when specifying instructions
5044 which implementation isn't following immediately or aren't implemented yet.
5045 """
5046 oInstr = self.ensureInstructionForOpTag(iTagLine);
5047
5048 # Flatten and validate the value.
5049 sStats = self.flattenAllSections(aasSections);
5050 if not self.oReStatsName.match(sStats):
5051 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5052 % (sTag, sStats, self.oReStatsName.pattern));
5053
5054 if oInstr.sStats is not None:
5055 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5056 % (sTag, oInstr.sStats, sStats,));
5057 oInstr.sStats = sStats;
5058
5059 _ = iEndLine;
5060 return True;
5061
5062 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5063 """
5064 Tag: @opdone
5065 Value: none
5066
5067 Used to explictily flush the instructions that have been specified.
5068 """
5069 sFlattened = self.flattenAllSections(aasSections);
5070 if sFlattened != '':
5071 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5072 _ = sTag; _ = iEndLine;
5073 return self.doneInstructions();
5074
5075 ## @}
5076
5077
5078 def parseComment(self):
5079 """
5080 Parse the current comment (self.sComment).
5081
5082 If it's a opcode specifiying comment, we reset the macro stuff.
5083 """
5084 #
5085 # Reject if comment doesn't seem to contain anything interesting.
5086 #
5087 if self.sComment.find('Opcode') < 0 \
5088 and self.sComment.find('@') < 0:
5089 return False;
5090
5091 #
5092 # Split the comment into lines, removing leading asterisks and spaces.
5093 # Also remove leading and trailing empty lines.
5094 #
5095 asLines = self.sComment.split('\n');
5096 for iLine, sLine in enumerate(asLines):
5097 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5098
5099 while asLines and not asLines[0]:
5100 self.iCommentLine += 1;
5101 asLines.pop(0);
5102
5103 while asLines and not asLines[-1]:
5104 asLines.pop(len(asLines) - 1);
5105
5106 #
5107 # Check for old style: Opcode 0x0f 0x12
5108 #
5109 if asLines[0].startswith('Opcode '):
5110 self.parseCommentOldOpcode(asLines);
5111
5112 #
5113 # Look for @op* tagged data.
5114 #
5115 cOpTags = 0;
5116 sFlatDefault = None;
5117 sCurTag = '@default';
5118 iCurTagLine = 0;
5119 asCurSection = [];
5120 aasSections = [ asCurSection, ];
5121 for iLine, sLine in enumerate(asLines):
5122 if not sLine.startswith('@'):
5123 if sLine:
5124 asCurSection.append(sLine);
5125 elif asCurSection:
5126 asCurSection = [];
5127 aasSections.append(asCurSection);
5128 else:
5129 #
5130 # Process the previous tag.
5131 #
5132 if not asCurSection and len(aasSections) > 1:
5133 aasSections.pop(-1);
5134 if sCurTag in self.dTagHandlers:
5135 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5136 cOpTags += 1;
5137 elif sCurTag.startswith('@op'):
5138 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5139 elif sCurTag == '@default':
5140 sFlatDefault = self.flattenAllSections(aasSections);
5141 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5142 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5143 elif sCurTag in ['@encoding', '@opencoding']:
5144 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5145
5146 #
5147 # New tag.
5148 #
5149 asSplit = sLine.split(None, 1);
5150 sCurTag = asSplit[0].lower();
5151 if len(asSplit) > 1:
5152 asCurSection = [asSplit[1],];
5153 else:
5154 asCurSection = [];
5155 aasSections = [asCurSection, ];
5156 iCurTagLine = iLine;
5157
5158 #
5159 # Process the final tag.
5160 #
5161 if not asCurSection and len(aasSections) > 1:
5162 aasSections.pop(-1);
5163 if sCurTag in self.dTagHandlers:
5164 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5165 cOpTags += 1;
5166 elif sCurTag.startswith('@op'):
5167 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5168 elif sCurTag == '@default':
5169 sFlatDefault = self.flattenAllSections(aasSections);
5170
5171 #
5172 # Don't allow default text in blocks containing @op*.
5173 #
5174 if cOpTags > 0 and sFlatDefault:
5175 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5176
5177 return True;
5178
5179 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5180 """
5181 Parses a macro invocation.
5182
5183 Returns three values:
5184 1. A list of macro arguments, where the zero'th is the macro name.
5185 2. The offset following the macro invocation, into sInvocation of
5186 this is on the same line or into the last line if it is on a
5187 different line.
5188 3. Number of additional lines the invocation spans (i.e. zero if
5189 it is all contained within sInvocation).
5190 """
5191 # First the name.
5192 offOpen = sInvocation.find('(', offStartInvocation);
5193 if offOpen <= offStartInvocation:
5194 self.raiseError("macro invocation open parenthesis not found");
5195 sName = sInvocation[offStartInvocation:offOpen].strip();
5196 if not self.oReMacroName.match(sName):
5197 self.raiseError("invalid macro name '%s'" % (sName,));
5198 asRet = [sName, ];
5199
5200 # Arguments.
5201 iLine = self.iLine;
5202 cDepth = 1;
5203 off = offOpen + 1;
5204 offStart = off;
5205 offCurLn = 0;
5206 chQuote = None;
5207 while cDepth > 0:
5208 if off >= len(sInvocation):
5209 if iLine >= len(self.asLines):
5210 self.error('macro invocation beyond end of file');
5211 return (asRet, off - offCurLn, iLine - self.iLine);
5212 offCurLn = off;
5213 sInvocation += self.asLines[iLine];
5214 iLine += 1;
5215 ch = sInvocation[off];
5216
5217 if chQuote:
5218 if ch == '\\' and off + 1 < len(sInvocation):
5219 off += 1;
5220 elif ch == chQuote:
5221 chQuote = None;
5222 elif ch in ('"', '\'',):
5223 chQuote = ch;
5224 elif ch in (',', ')',):
5225 if cDepth == 1:
5226 asRet.append(sInvocation[offStart:off].strip());
5227 offStart = off + 1;
5228 if ch == ')':
5229 cDepth -= 1;
5230 elif ch == '(':
5231 cDepth += 1;
5232 off += 1;
5233
5234 return (asRet, off - offCurLn, iLine - self.iLine);
5235
5236 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5237 """
5238 Returns (None, len(sCode), 0) if not found, otherwise the
5239 parseMacroInvocation() return value.
5240 """
5241 offHit = sCode.find(sMacro, offStart);
5242 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5243 return self.parseMacroInvocation(sCode, offHit);
5244 return (None, len(sCode), 0);
5245
5246 def findAndParseMacroInvocation(self, sCode, sMacro):
5247 """
5248 Returns None if not found, arguments as per parseMacroInvocation if found.
5249 """
5250 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5251
5252 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5253 """
5254 Returns same as findAndParseMacroInvocation.
5255 """
5256 for sMacro in asMacro:
5257 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5258 if asRet is not None:
5259 return asRet;
5260 return None;
5261
5262 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5263 sDisHints, sIemHints, asOperands):
5264 """
5265 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5266 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5267 """
5268 #
5269 # Some invocation checks.
5270 #
5271 if sUpper != sUpper.upper():
5272 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5273 if sLower != sLower.lower():
5274 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5275 if sUpper.lower() != sLower:
5276 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5277 if not self.oReMnemonic.match(sLower):
5278 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5279
5280 #
5281 # Check if sIemHints tells us to not consider this macro invocation.
5282 #
5283 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5284 return True;
5285
5286 # Apply to the last instruction only for now.
5287 if not self.aoCurInstrs:
5288 self.addInstruction();
5289 oInstr = self.aoCurInstrs[-1];
5290 if oInstr.iLineMnemonicMacro == -1:
5291 oInstr.iLineMnemonicMacro = self.iLine;
5292 else:
5293 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5294 % (sMacro, oInstr.iLineMnemonicMacro,));
5295
5296 # Mnemonic
5297 if oInstr.sMnemonic is None:
5298 oInstr.sMnemonic = sLower;
5299 elif oInstr.sMnemonic != sLower:
5300 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5301
5302 # Process operands.
5303 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5304 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5305 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5306 for iOperand, sType in enumerate(asOperands):
5307 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5308 if sWhere is None:
5309 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5310 if iOperand < len(oInstr.aoOperands): # error recovery.
5311 sWhere = oInstr.aoOperands[iOperand].sWhere;
5312 sType = oInstr.aoOperands[iOperand].sType;
5313 else:
5314 sWhere = 'reg';
5315 sType = 'Gb';
5316 if iOperand == len(oInstr.aoOperands):
5317 oInstr.aoOperands.append(Operand(sWhere, sType))
5318 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5319 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5320 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5321 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5322
5323 # Encoding.
5324 if sForm not in g_kdIemForms:
5325 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5326 else:
5327 if oInstr.sEncoding is None:
5328 oInstr.sEncoding = g_kdIemForms[sForm][0];
5329 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5330 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5331 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5332
5333 # Check the parameter locations for the encoding.
5334 if g_kdIemForms[sForm][1] is not None:
5335 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5336 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5337 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5338 else:
5339 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5340 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5341 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5342 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5343 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5344 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5345 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5346 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5347 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5348 or sForm.replace('VEX','').find('V') < 0) ):
5349 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5350 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5351 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5352 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5353 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5354 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5355 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5356 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5357 oInstr.aoOperands[iOperand].sWhere));
5358
5359
5360 # Check @opcodesub
5361 if oInstr.sSubOpcode \
5362 and g_kdIemForms[sForm][2] \
5363 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5364 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5365 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5366
5367 # Stats.
5368 if not self.oReStatsName.match(sStats):
5369 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5370 elif oInstr.sStats is None:
5371 oInstr.sStats = sStats;
5372 elif oInstr.sStats != sStats:
5373 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5374 % (sMacro, oInstr.sStats, sStats,));
5375
5376 # Process the hints (simply merge with @ophints w/o checking anything).
5377 for sHint in sDisHints.split('|'):
5378 sHint = sHint.strip();
5379 if sHint.startswith('DISOPTYPE_'):
5380 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5381 if sShortHint in g_kdHints:
5382 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5383 else:
5384 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5385 elif sHint != '0':
5386 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5387
5388 for sHint in sIemHints.split('|'):
5389 sHint = sHint.strip();
5390 if sHint.startswith('IEMOPHINT_'):
5391 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5392 if sShortHint in g_kdHints:
5393 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5394 else:
5395 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5396 elif sHint != '0':
5397 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5398
5399 _ = sAsm;
5400 return True;
5401
5402 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5403 """
5404 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5405 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5406 """
5407 if not asOperands:
5408 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5409 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5410 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5411
5412 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5413 """
5414 Process a IEM_MC_BEGIN macro invocation.
5415 """
5416 if self.fDebugMc:
5417 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5418 #self.debug('%s<eos>' % (sCode,));
5419
5420 # Check preconditions.
5421 if not self.oCurFunction:
5422 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5423 if self.oCurMcBlock:
5424 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5425
5426 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5427 cchIndent = offBeginStatementInCodeStr;
5428 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5429 if offPrevNewline >= 0:
5430 cchIndent -= offPrevNewline + 1;
5431 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5432
5433 # Start a new block.
5434 # But don't add it to the list unless the context matches the host architecture.
5435 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5436 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5437 cchIndent = cchIndent);
5438 try:
5439 if ( not self.aoCppCondStack
5440 or not self.sHostArch
5441 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5442 g_aoMcBlocks.append(self.oCurMcBlock);
5443 self.cTotalMcBlocks += 1;
5444 except Exception as oXcpt:
5445 self.raiseError(oXcpt.args[0]);
5446
5447 if self.oCurMcBlock.oInstruction:
5448 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5449 self.iMcBlockInFunc += 1;
5450 return True;
5451
5452 @staticmethod
5453 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5454 """
5455 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5456 extracting a statement block from a string that's the result of macro
5457 expansion and therefore contains multiple "sub-lines" as it were.
5458
5459 Returns list of lines covering offBegin thru offEnd in sRawLine.
5460 """
5461
5462 off = sRawLine.find('\n', offEnd);
5463 if off > 0:
5464 sRawLine = sRawLine[:off + 1];
5465
5466 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5467 sRawLine = sRawLine[off:];
5468 if not sRawLine.strip().startswith(sBeginStmt):
5469 sRawLine = sRawLine[offBegin - off:]
5470
5471 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5472
5473 def workerIemMcEnd(self, offEndStatementInLine):
5474 """
5475 Process a IEM_MC_END macro invocation.
5476 """
5477 if self.fDebugMc:
5478 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5479
5480 # Check preconditions.
5481 if not self.oCurMcBlock:
5482 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5483
5484 #
5485 # HACK ALERT! For blocks originating from macro expansion the start and
5486 # end line will be the same, but the line has multiple
5487 # newlines inside it. So, we have to do some extra tricks
5488 # to get the lines out of there. We ASSUME macros aren't
5489 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5490 #
5491 if self.iLine > self.oCurMcBlock.iBeginLine:
5492 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5493 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5494 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5495
5496 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5497 # so we can deal correctly with IEM_MC_END below and everything else.
5498 for sLine in asLines:
5499 cNewLines = sLine.count('\n');
5500 assert cNewLines > 0;
5501 if cNewLines > 1:
5502 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5503 self.oCurMcBlock.offBeginLine,
5504 offEndStatementInLine
5505 + sum(len(s) for s in asLines)
5506 - len(asLines[-1]));
5507 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5508 break;
5509 else:
5510 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5511 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5512 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5513
5514 #
5515 # Strip anything following the IEM_MC_END(); statement in the final line,
5516 # so that we don't carry on any trailing 'break' after macro expansions
5517 # like for iemOp_movsb_Xb_Yb.
5518 #
5519 while asLines[-1].strip() == '':
5520 asLines.pop();
5521 sFinal = asLines[-1];
5522 offFinalEnd = sFinal.find('IEM_MC_END');
5523 offEndInFinal = offFinalEnd;
5524 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5525 offFinalEnd += len('IEM_MC_END');
5526
5527 while sFinal[offFinalEnd].isspace():
5528 offFinalEnd += 1;
5529 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5530 offFinalEnd += 1;
5531
5532 while sFinal[offFinalEnd].isspace():
5533 offFinalEnd += 1;
5534 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5535 offFinalEnd += 1;
5536
5537 while sFinal[offFinalEnd].isspace():
5538 offFinalEnd += 1;
5539 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5540 offFinalEnd += 1;
5541
5542 asLines[-1] = sFinal[: offFinalEnd];
5543
5544 #
5545 # Complete and discard the current block.
5546 #
5547 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5548 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5549 self.oCurMcBlock = None;
5550 return True;
5551
5552 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5553 """
5554 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5555 """
5556 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5557 if self.fDebugMc:
5558 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5559 #self.debug('%s<eos>' % (sCode,));
5560
5561 # Check preconditions.
5562 if not self.oCurFunction:
5563 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5564 if self.oCurMcBlock:
5565 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5566
5567 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5568 cchIndent = offBeginStatementInCodeStr;
5569 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5570 if offPrevNewline >= 0:
5571 cchIndent -= offPrevNewline + 1;
5572 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5573
5574 # Start a new block.
5575 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5576 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5577 cchIndent = cchIndent, fDeferToCImpl = True);
5578
5579 # Parse the statment.
5580 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5581 if asArgs is None:
5582 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5583 if len(asArgs) != cParams + 4:
5584 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5585 % (sStmt, len(asArgs), cParams + 4, asArgs));
5586
5587 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5588
5589 # These MCs are not typically part of macro expansions, but let's get
5590 # it out of the way immediately if it's the case.
5591 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5592 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5593 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5594 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5595 asLines[-1] = asLines[-1][:offAfter + 1];
5596 else:
5597 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5598 offAfter, sStmt);
5599 assert asLines[-1].find(';') >= 0;
5600 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5601
5602 assert asLines[0].find(sStmt) >= 0;
5603 #if not asLines[0].strip().startswith(sStmt):
5604 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5605
5606 # Advance to the line with the closing ')'.
5607 self.iLine += cLines;
5608
5609 # Complete the block.
5610 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5611
5612 g_aoMcBlocks.append(oMcBlock);
5613 if oMcBlock.oInstruction:
5614 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5615 self.cTotalMcBlocks += 1;
5616 self.iMcBlockInFunc += 1;
5617
5618 return True;
5619
5620 def workerStartFunction(self, asArgs):
5621 """
5622 Deals with the start of a decoder function.
5623
5624 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5625 macros, so we get a argument list for these where the 0th argument is the
5626 macro name.
5627 """
5628 # Complete any existing function.
5629 if self.oCurFunction:
5630 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5631
5632 # Create the new function.
5633 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5634 return True;
5635
5636 def checkCodeForMacro(self, sCode, offLine):
5637 """
5638 Checks code for relevant macro invocation.
5639 """
5640
5641 #
5642 # Scan macro invocations.
5643 #
5644 if sCode.find('(') > 0:
5645 # Look for instruction decoder function definitions. ASSUME single line.
5646 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5647 [ 'FNIEMOP_DEF',
5648 'FNIEMOPRM_DEF',
5649 'FNIEMOP_STUB',
5650 'FNIEMOP_STUB_1',
5651 'FNIEMOP_UD_STUB',
5652 'FNIEMOP_UD_STUB_1' ]);
5653 if asArgs is not None:
5654 self.workerStartFunction(asArgs);
5655 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5656
5657 if not self.aoCurInstrs:
5658 self.addInstruction();
5659 for oInstr in self.aoCurInstrs:
5660 if oInstr.iLineFnIemOpMacro == -1:
5661 oInstr.iLineFnIemOpMacro = self.iLine;
5662 else:
5663 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5664 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5665 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5666 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5667 if asArgs[0].find('STUB') > 0:
5668 self.doneInstructions(fEndOfFunction = True);
5669 return True;
5670
5671 # Check for worker function definitions, so we can get a context for MC blocks.
5672 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5673 [ 'FNIEMOP_DEF_1',
5674 'FNIEMOP_DEF_2', ]);
5675 if asArgs is not None:
5676 self.workerStartFunction(asArgs);
5677 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5678 return True;
5679
5680 # IEMOP_HLP_DONE_VEX_DECODING_*
5681 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5682 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5683 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5684 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5685 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5686 ]);
5687 if asArgs is not None:
5688 sMacro = asArgs[0];
5689 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5690 for oInstr in self.aoCurInstrs:
5691 if 'vex_l_zero' not in oInstr.dHints:
5692 if oInstr.iLineMnemonicMacro >= 0:
5693 self.errorOnLine(oInstr.iLineMnemonicMacro,
5694 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5695 oInstr.dHints['vex_l_zero'] = True;
5696
5697 #
5698 # IEMOP_MNEMONIC*
5699 #
5700 if sCode.find('IEMOP_MNEMONIC') >= 0:
5701 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5702 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5703 if asArgs is not None:
5704 if len(self.aoCurInstrs) == 1:
5705 oInstr = self.aoCurInstrs[0];
5706 if oInstr.sStats is None:
5707 oInstr.sStats = asArgs[1];
5708 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5709
5710 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5711 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5712 if asArgs is not None:
5713 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5714 asArgs[7], []);
5715 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5716 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5717 if asArgs is not None:
5718 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5719 asArgs[8], [asArgs[6],]);
5720 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5721 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5722 if asArgs is not None:
5723 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5724 asArgs[9], [asArgs[6], asArgs[7]]);
5725 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5726 # a_fIemHints)
5727 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5728 if asArgs is not None:
5729 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5730 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5731 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5732 # a_fIemHints)
5733 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5734 if asArgs is not None:
5735 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5736 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5737
5738 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5739 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5740 if asArgs is not None:
5741 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5742 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5743 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5744 if asArgs is not None:
5745 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5746 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5747 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5748 if asArgs is not None:
5749 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5750 [asArgs[4], asArgs[5],]);
5751 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5752 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5753 if asArgs is not None:
5754 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5755 [asArgs[4], asArgs[5], asArgs[6],]);
5756 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5757 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5758 if asArgs is not None:
5759 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5760 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5761
5762 #
5763 # IEM_MC_BEGIN + IEM_MC_END.
5764 # We must support multiple instances per code snippet.
5765 #
5766 offCode = sCode.find('IEM_MC_');
5767 if offCode >= 0:
5768 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5769 if oMatch.group(1) == 'END':
5770 self.workerIemMcEnd(offLine + oMatch.start());
5771 elif oMatch.group(1) == 'BEGIN':
5772 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5773 else:
5774 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5775 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5776 return True;
5777
5778 return False;
5779
5780 def workerPreprocessorRecreateMacroRegex(self):
5781 """
5782 Recreates self.oReMacros when self.dMacros changes.
5783 """
5784 if self.dMacros:
5785 sRegex = '';
5786 for sName, oMacro in self.dMacros.items():
5787 if sRegex:
5788 sRegex += r'|' + sName;
5789 else:
5790 sRegex = r'\b(' + sName;
5791 if oMacro.asArgs is not None:
5792 sRegex += r'\s*\(';
5793 else:
5794 sRegex += r'\b';
5795 sRegex += ')';
5796 self.oReMacros = re.compile(sRegex);
5797 else:
5798 self.oReMacros = None;
5799 return True;
5800
5801 def workerPreprocessorDefine(self, sRest):
5802 """
5803 Handles a macro #define, the sRest is what follows after the directive word.
5804 """
5805 assert sRest[-1] == '\n';
5806
5807 #
5808 # If using line continutation, just concat all the lines together,
5809 # preserving the newline character but not the escaping.
5810 #
5811 iLineStart = self.iLine;
5812 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5813 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5814 self.iLine += 1;
5815 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5816
5817 #
5818 # Use regex to split out the name, argument list and body.
5819 # If this fails, we assume it's a simple macro.
5820 #
5821 oMatch = self.oReHashDefine2.match(sRest);
5822 if oMatch:
5823 sAllArgs = oMatch.group(2).strip();
5824 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5825 sBody = oMatch.group(3);
5826 else:
5827 oMatch = self.oReHashDefine3.match(sRest);
5828 if not oMatch:
5829 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5830 return self.error('bogus macro definition: %s' % (sRest,));
5831 asArgs = None;
5832 sBody = oMatch.group(2);
5833 sName = oMatch.group(1);
5834 assert sName == sName.strip();
5835 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5836
5837 #
5838 # Is this of any interest to us? We do NOT support MC blocks wihtin
5839 # nested macro expansion, just to avoid lots of extra work.
5840 #
5841 # There is only limited support for macros expanding to partial MC blocks.
5842 #
5843 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5844 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5845 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5846 # siblings in the recompiler. This is a lot simpler than nested macro
5847 # expansion and lots of heuristics for locating all the relevant macros.
5848 # Also, this way we don't produce lots of unnecessary threaded functions.
5849 #
5850 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5851 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5852 return True;
5853
5854 #
5855 # Add the macro.
5856 #
5857 if self.fDebugPreproc:
5858 self.debug('#define %s on line %u' % (sName, self.iLine,));
5859 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5860 return self.workerPreprocessorRecreateMacroRegex();
5861
5862 def workerPreprocessorUndef(self, sRest):
5863 """
5864 Handles a macro #undef, the sRest is what follows after the directive word.
5865 """
5866 # Quick comment strip and isolate the name.
5867 offSlash = sRest.find('/');
5868 if offSlash > 0:
5869 sRest = sRest[:offSlash];
5870 sName = sRest.strip();
5871
5872 # Remove the macro if we're clocking it.
5873 if sName in self.dMacros:
5874 if self.fDebugPreproc:
5875 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5876 del self.dMacros[sName];
5877 return self.workerPreprocessorRecreateMacroRegex();
5878
5879 return True;
5880
5881 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5882 """
5883 Handles an #if, #ifdef, #ifndef or #elif directive.
5884 """
5885 #
5886 # Sanity check #elif.
5887 #
5888 if sDirective == 'elif':
5889 if len(self.aoCppCondStack) == 0:
5890 self.raiseError('#elif without #if');
5891 if self.aoCppCondStack[-1].fInElse:
5892 self.raiseError('#elif after #else');
5893
5894 #
5895 # If using line continutation, just concat all the lines together,
5896 # stripping both the newline and escape characters.
5897 #
5898 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5899 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5900 self.iLine += 1;
5901
5902 # Strip it of all comments and leading and trailing blanks.
5903 sRest = self.stripComments(sRest).strip();
5904
5905 #
5906 # Stash it.
5907 #
5908 try:
5909 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5910 except Exception as oXcpt:
5911 self.raiseError(oXcpt.args[0]);
5912
5913 if sDirective == 'elif':
5914 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5915 else:
5916 self.aoCppCondStack.append(oPreprocCond);
5917
5918 return True;
5919
5920 def workerPreprocessorElse(self):
5921 """
5922 Handles an #else directive.
5923 """
5924 if len(self.aoCppCondStack) == 0:
5925 self.raiseError('#else without #if');
5926 if self.aoCppCondStack[-1].fInElse:
5927 self.raiseError('Another #else after #else');
5928
5929 self.aoCppCondStack[-1].fInElse = True;
5930 return True;
5931
5932 def workerPreprocessorEndif(self):
5933 """
5934 Handles an #endif directive.
5935 """
5936 if len(self.aoCppCondStack) == 0:
5937 self.raiseError('#endif without #if');
5938
5939 self.aoCppCondStack.pop();
5940 return True;
5941
5942 def checkPreprocessorDirective(self, sLine):
5943 """
5944 Handles a preprocessor directive.
5945 """
5946 # Skip past the preprocessor hash.
5947 off = sLine.find('#');
5948 assert off >= 0;
5949 off += 1;
5950 while off < len(sLine) and sLine[off].isspace():
5951 off += 1;
5952
5953 # Extract the directive.
5954 offDirective = off;
5955 while off < len(sLine) and not sLine[off].isspace():
5956 off += 1;
5957 sDirective = sLine[offDirective:off];
5958 if self.fDebugPreproc:
5959 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5960
5961 # Skip spaces following it to where the arguments/whatever starts.
5962 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5963 off += 1;
5964 sTail = sLine[off:];
5965
5966 # Handle the directive.
5967 if sDirective == 'define':
5968 return self.workerPreprocessorDefine(sTail);
5969 if sDirective == 'undef':
5970 return self.workerPreprocessorUndef(sTail);
5971 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5972 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5973 if sDirective == 'else':
5974 return self.workerPreprocessorElse();
5975 if sDirective == 'endif':
5976 return self.workerPreprocessorEndif();
5977
5978 if self.fDebugPreproc:
5979 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5980 return False;
5981
5982 def expandMacros(self, sLine, oMatch):
5983 """
5984 Expands macros we know about in the given line.
5985 Currently we ASSUME there is only one and that is what oMatch matched.
5986 """
5987 #
5988 # Get our bearings.
5989 #
5990 offMatch = oMatch.start();
5991 sName = oMatch.group(1);
5992 assert sName == sLine[oMatch.start() : oMatch.end()];
5993 fWithArgs = sName.endswith('(');
5994 if fWithArgs:
5995 sName = sName[:-1].strip();
5996 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5997
5998 #
5999 # Deal with simple macro invocations w/o parameters.
6000 #
6001 if not fWithArgs:
6002 if self.fDebugPreproc:
6003 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6004 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6005
6006 #
6007 # Complicated macro with parameters.
6008 # Start by extracting the parameters. ASSUMES they are all on the same line!
6009 #
6010 cLevel = 1;
6011 offCur = oMatch.end();
6012 offCurArg = offCur;
6013 asArgs = [];
6014 while True:
6015 if offCur >= len(sLine):
6016 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6017 ch = sLine[offCur];
6018 if ch == '(':
6019 cLevel += 1;
6020 elif ch == ')':
6021 cLevel -= 1;
6022 if cLevel == 0:
6023 asArgs.append(sLine[offCurArg:offCur].strip());
6024 break;
6025 elif ch == ',' and cLevel == 1:
6026 asArgs.append(sLine[offCurArg:offCur].strip());
6027 offCurArg = offCur + 1;
6028 offCur += 1;
6029 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6030 asArgs = [];
6031 if len(oMacro.asArgs) != len(asArgs):
6032 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6033
6034 #
6035 # Do the expanding.
6036 #
6037 if self.fDebugPreproc:
6038 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6039 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6040
6041 def parse(self):
6042 """
6043 Parses the given file.
6044
6045 Returns number or errors.
6046 Raises exception on fatal trouble.
6047 """
6048 #self.debug('Parsing %s' % (self.sSrcFile,));
6049
6050 #
6051 # Loop thru the lines.
6052 #
6053 # Please mind that self.iLine may be updated by checkCodeForMacro and
6054 # other worker methods.
6055 #
6056 while self.iLine < len(self.asLines):
6057 sLine = self.asLines[self.iLine];
6058 self.iLine += 1;
6059 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6060
6061 # Expand macros we know about if we're currently in code.
6062 if self.iState == self.kiCode and self.oReMacros:
6063 oMatch = self.oReMacros.search(sLine);
6064 if oMatch:
6065 sLine = self.expandMacros(sLine, oMatch);
6066 if self.fDebugPreproc:
6067 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6068 self.asLines[self.iLine - 1] = sLine;
6069
6070 # Check for preprocessor directives before comments and other stuff.
6071 # ASSUMES preprocessor directives doesn't end with multiline comments.
6072 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6073 if self.fDebugPreproc:
6074 self.debug('line %d: preproc' % (self.iLine,));
6075 self.checkPreprocessorDirective(sLine);
6076 else:
6077 # Look for comments.
6078 offSlash = sLine.find('/');
6079 if offSlash >= 0:
6080 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6081 offLine = 0;
6082 while offLine < len(sLine):
6083 if self.iState == self.kiCode:
6084 # Look for substantial multiline comment so we pass the following MC as a whole line:
6085 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6086 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6087 offHit = sLine.find('/*', offLine);
6088 while offHit >= 0:
6089 offEnd = sLine.find('*/', offHit + 2);
6090 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6091 break;
6092 offHit = sLine.find('/*', offEnd);
6093
6094 if offHit >= 0:
6095 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6096 self.sComment = '';
6097 self.iCommentLine = self.iLine;
6098 self.iState = self.kiCommentMulti;
6099 offLine = offHit + 2;
6100 else:
6101 self.checkCodeForMacro(sLine[offLine:], offLine);
6102 offLine = len(sLine);
6103
6104 elif self.iState == self.kiCommentMulti:
6105 offHit = sLine.find('*/', offLine);
6106 if offHit >= 0:
6107 self.sComment += sLine[offLine:offHit];
6108 self.iState = self.kiCode;
6109 offLine = offHit + 2;
6110 self.parseComment();
6111 else:
6112 self.sComment += sLine[offLine:];
6113 offLine = len(sLine);
6114 else:
6115 assert False;
6116 # C++ line comment.
6117 elif offSlash > 0:
6118 self.checkCodeForMacro(sLine[:offSlash], 0);
6119
6120 # No slash, but append the line if in multi-line comment.
6121 elif self.iState == self.kiCommentMulti:
6122 #self.debug('line %d: multi' % (self.iLine,));
6123 self.sComment += sLine;
6124
6125 # No slash, but check code line for relevant macro.
6126 elif ( self.iState == self.kiCode
6127 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6128 #self.debug('line %d: macro' % (self.iLine,));
6129 self.checkCodeForMacro(sLine, 0);
6130
6131 # If the line is a '}' in the first position, complete the instructions.
6132 elif self.iState == self.kiCode and sLine[0] == '}':
6133 #self.debug('line %d: }' % (self.iLine,));
6134 self.doneInstructions(fEndOfFunction = True);
6135
6136 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6137 # so we can check/add @oppfx info from it.
6138 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6139 self.parseFunctionTable(sLine);
6140
6141 self.doneInstructions(fEndOfFunction = True);
6142 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6143 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6144 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6145 return self.printErrors();
6146
6147# Some sanity checking.
6148def __sanityCheckEFlagsClasses():
6149 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6150 for sAttrib, asFlags in dLists.items():
6151 for sFlag in asFlags:
6152 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6153__sanityCheckEFlagsClasses();
6154
6155## The parsed content of IEMAllInstCommonBodyMacros.h.
6156g_oParsedCommonBodyMacros = None # type: SimpleParser
6157
6158def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6159 """
6160 Parses one source file for instruction specfications.
6161 """
6162 #
6163 # Read sSrcFile into a line array.
6164 #
6165 try:
6166 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6167 except Exception as oXcpt:
6168 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6169 try:
6170 asLines = oFile.readlines();
6171 except Exception as oXcpt:
6172 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6173 finally:
6174 oFile.close();
6175
6176 #
6177 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6178 # can use the macros from it when processing the other files.
6179 #
6180 global g_oParsedCommonBodyMacros;
6181 if g_oParsedCommonBodyMacros is None:
6182 # Locate the file.
6183 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6184 if not os.path.isfile(sCommonBodyMacros):
6185 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6186
6187 # Read it.
6188 try:
6189 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6190 asIncFiles = oIncFile.readlines();
6191 except Exception as oXcpt:
6192 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6193
6194 # Parse it.
6195 try:
6196 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6197 if oParser.parse() != 0:
6198 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6199 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6200 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6201 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6202 oParser.cTotalMcBlocks,
6203 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6204 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6205 except ParserException as oXcpt:
6206 print(str(oXcpt), file = sys.stderr);
6207 raise;
6208 g_oParsedCommonBodyMacros = oParser;
6209
6210 #
6211 # Do the parsing.
6212 #
6213 try:
6214 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6215 return (oParser.parse(), oParser) ;
6216 except ParserException as oXcpt:
6217 print(str(oXcpt), file = sys.stderr);
6218 raise;
6219
6220
6221def __doTestCopying():
6222 """
6223 Executes the asCopyTests instructions.
6224 """
6225 asErrors = [];
6226 for oDstInstr in g_aoAllInstructions:
6227 if oDstInstr.asCopyTests:
6228 for sSrcInstr in oDstInstr.asCopyTests:
6229 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6230 if oSrcInstr:
6231 aoSrcInstrs = [oSrcInstr,];
6232 else:
6233 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6234 if aoSrcInstrs:
6235 for oSrcInstr in aoSrcInstrs:
6236 if oSrcInstr != oDstInstr:
6237 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6238 else:
6239 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6240 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6241 else:
6242 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6243 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6244
6245 if asErrors:
6246 sys.stderr.write(u''.join(asErrors));
6247 return len(asErrors);
6248
6249
6250def __applyOnlyTest():
6251 """
6252 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6253 all other instructions so that only these get tested.
6254 """
6255 if g_aoOnlyTestInstructions:
6256 for oInstr in g_aoAllInstructions:
6257 if oInstr.aoTests:
6258 if oInstr not in g_aoOnlyTestInstructions:
6259 oInstr.aoTests = [];
6260 return 0;
6261
6262## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6263g_aaoAllInstrFilesAndDefaultMapAndSet = (
6264 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6265 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6266 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6267 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6268 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6269 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6270 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6271 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6272 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6273);
6274
6275def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6276 """
6277 Parses all the IEMAllInstruction*.cpp.h files.
6278
6279 Returns a list of the parsers on success.
6280 Raises exception on failure.
6281 """
6282 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6283 cErrors = 0;
6284 aoParsers = [];
6285 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6286 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6287 sFilename = os.path.join(sSrcDir, sFilename);
6288 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6289 cErrors += cThisErrors;
6290 aoParsers.append(oParser);
6291 cErrors += __doTestCopying();
6292 cErrors += __applyOnlyTest();
6293
6294 # Total stub stats:
6295 cTotalStubs = 0;
6296 for oInstr in g_aoAllInstructions:
6297 cTotalStubs += oInstr.fStub;
6298 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6299 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6300 file = sys.stderr);
6301
6302 if cErrors != 0:
6303 raise Exception('%d parse errors' % (cErrors,));
6304 return aoParsers;
6305
6306
6307def parseFiles(asFiles, sHostArch = None):
6308 """
6309 Parses a selection of IEMAllInstruction*.cpp.h files.
6310
6311 Returns a list of the parsers on success.
6312 Raises exception on failure.
6313 """
6314 # Look up default maps for the files and call __parseFilesWorker to do the job.
6315 asFilesAndDefaultMap = [];
6316 for sFilename in asFiles:
6317 sName = os.path.split(sFilename)[1].lower();
6318 sMap = None;
6319 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6320 if aoInfo[0].lower() == sName:
6321 sMap = aoInfo[1];
6322 break;
6323 if not sMap:
6324 raise Exception('Unable to classify file: %s' % (sFilename,));
6325 asFilesAndDefaultMap.append((sFilename, sMap));
6326
6327 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6328
6329
6330def parseAll(sHostArch = None):
6331 """
6332 Parses all the IEMAllInstruction*.cpp.h files.
6333
6334 Returns a list of the parsers on success.
6335 Raises exception on failure.
6336 """
6337 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6338
6339
6340#
6341# Generators (may perhaps move later).
6342#
6343def __formatDisassemblerTableEntry(oInstr):
6344 """
6345 """
6346 sMacro = 'OP';
6347 cMaxOperands = 3;
6348 if len(oInstr.aoOperands) > 3:
6349 sMacro = 'OPVEX'
6350 cMaxOperands = 4;
6351 assert len(oInstr.aoOperands) <= cMaxOperands;
6352
6353 #
6354 # Format string.
6355 #
6356 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6357 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6358 sTmp += ' ' if iOperand == 0 else ',';
6359 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6360 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6361 else:
6362 sTmp += g_kdOpTypes[oOperand.sType][2];
6363 sTmp += '",';
6364 asColumns = [ sTmp, ];
6365
6366 #
6367 # Decoders.
6368 #
6369 iStart = len(asColumns);
6370 if oInstr.sEncoding is None:
6371 pass;
6372 elif oInstr.sEncoding == 'ModR/M':
6373 # ASSUME the first operand is using the ModR/M encoding
6374 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6375 asColumns.append('IDX_ParseModRM,');
6376 elif oInstr.sEncoding in [ 'prefix', ]:
6377 for oOperand in oInstr.aoOperands:
6378 asColumns.append('0,');
6379 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6380 pass;
6381 elif oInstr.sEncoding == 'VEX.ModR/M':
6382 asColumns.append('IDX_ParseModRM,');
6383 elif oInstr.sEncoding == 'vex2':
6384 asColumns.append('IDX_ParseVex2b,')
6385 elif oInstr.sEncoding == 'vex3':
6386 asColumns.append('IDX_ParseVex3b,')
6387 elif oInstr.sEncoding in g_dInstructionMaps:
6388 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6389 else:
6390 ## @todo
6391 #IDX_ParseTwoByteEsc,
6392 #IDX_ParseGrp1,
6393 #IDX_ParseShiftGrp2,
6394 #IDX_ParseGrp3,
6395 #IDX_ParseGrp4,
6396 #IDX_ParseGrp5,
6397 #IDX_Parse3DNow,
6398 #IDX_ParseGrp6,
6399 #IDX_ParseGrp7,
6400 #IDX_ParseGrp8,
6401 #IDX_ParseGrp9,
6402 #IDX_ParseGrp10,
6403 #IDX_ParseGrp12,
6404 #IDX_ParseGrp13,
6405 #IDX_ParseGrp14,
6406 #IDX_ParseGrp15,
6407 #IDX_ParseGrp16,
6408 #IDX_ParseThreeByteEsc4,
6409 #IDX_ParseThreeByteEsc5,
6410 #IDX_ParseModFence,
6411 #IDX_ParseEscFP,
6412 #IDX_ParseNopPause,
6413 #IDX_ParseInvOpModRM,
6414 assert False, str(oInstr);
6415
6416 # Check for immediates and stuff in the remaining operands.
6417 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6418 sIdx = g_kdOpTypes[oOperand.sType][0];
6419 #if sIdx != 'IDX_UseModRM':
6420 asColumns.append(sIdx + ',');
6421 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6422
6423 #
6424 # Opcode and operands.
6425 #
6426 assert oInstr.sDisEnum, str(oInstr);
6427 asColumns.append(oInstr.sDisEnum + ',');
6428 iStart = len(asColumns)
6429 for oOperand in oInstr.aoOperands:
6430 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6431 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6432
6433 #
6434 # Flags.
6435 #
6436 sTmp = '';
6437 for sHint in sorted(oInstr.dHints.keys()):
6438 sDefine = g_kdHints[sHint];
6439 if sDefine.startswith('DISOPTYPE_'):
6440 if sTmp:
6441 sTmp += ' | ' + sDefine;
6442 else:
6443 sTmp += sDefine;
6444 if sTmp:
6445 sTmp += '),';
6446 else:
6447 sTmp += '0),';
6448 asColumns.append(sTmp);
6449
6450 #
6451 # Format the columns into a line.
6452 #
6453 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6454 sLine = '';
6455 for i, s in enumerate(asColumns):
6456 if len(sLine) < aoffColumns[i]:
6457 sLine += ' ' * (aoffColumns[i] - len(sLine));
6458 else:
6459 sLine += ' ';
6460 sLine += s;
6461
6462 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6463 # DISOPTYPE_HARMLESS),
6464 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6465 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6466 return sLine;
6467
6468def __checkIfShortTable(aoTableOrdered, oMap):
6469 """
6470 Returns (iInstr, cInstructions, fShortTable)
6471 """
6472
6473 # Determin how much we can trim off.
6474 cInstructions = len(aoTableOrdered);
6475 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6476 cInstructions -= 1;
6477
6478 iInstr = 0;
6479 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6480 iInstr += 1;
6481
6482 # If we can save more than 30%, we go for the short table version.
6483 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6484 return (iInstr, cInstructions, True);
6485 _ = oMap; # Use this for overriding.
6486
6487 # Output the full table.
6488 return (0, len(aoTableOrdered), False);
6489
6490def generateDisassemblerTables(oDstFile = sys.stdout):
6491 """
6492 Generates disassembler tables.
6493
6494 Returns exit code.
6495 """
6496
6497 #
6498 # Parse all.
6499 #
6500 try:
6501 parseAll();
6502 except Exception as oXcpt:
6503 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6504 traceback.print_exc(file = sys.stderr);
6505 return 1;
6506
6507
6508 #
6509 # The disassembler uses a slightly different table layout to save space,
6510 # since several of the prefix varia
6511 #
6512 aoDisasmMaps = [];
6513 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6514 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6515 if oMap.sSelector != 'byte+pfx':
6516 aoDisasmMaps.append(oMap);
6517 else:
6518 # Split the map by prefix.
6519 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6520 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6521 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6522 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6523
6524 #
6525 # Dump each map.
6526 #
6527 asHeaderLines = [];
6528 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6529 for oMap in aoDisasmMaps:
6530 sName = oMap.sName;
6531
6532 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6533
6534 #
6535 # Get the instructions for the map and see if we can do a short version or not.
6536 #
6537 aoTableOrder = oMap.getInstructionsInTableOrder();
6538 cEntriesPerByte = oMap.getEntriesPerByte();
6539 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6540
6541 #
6542 # Output the table start.
6543 # Note! Short tables are static and only accessible via the map range record.
6544 #
6545 asLines = [];
6546 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6547 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6548 if fShortTable:
6549 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6550 else:
6551 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6552 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6553 asLines.append('{');
6554
6555 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6556 asLines.append(' /* %#04x: */' % (iInstrStart,));
6557
6558 #
6559 # Output the instructions.
6560 #
6561 iInstr = iInstrStart;
6562 while iInstr < iInstrEnd:
6563 oInstr = aoTableOrder[iInstr];
6564 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6565 if iInstr != iInstrStart:
6566 asLines.append('');
6567 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6568
6569 if oInstr is None:
6570 # Invalid. Optimize blocks of invalid instructions.
6571 cInvalidInstrs = 1;
6572 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6573 cInvalidInstrs += 1;
6574 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6575 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6576 iInstr += 0x10 * cEntriesPerByte - 1;
6577 elif cEntriesPerByte > 1:
6578 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6579 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6580 iInstr += 3;
6581 else:
6582 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6583 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6584 else:
6585 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6586 elif isinstance(oInstr, list):
6587 if len(oInstr) != 0:
6588 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6589 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6590 else:
6591 asLines.append(__formatDisassemblerTableEntry(oInstr));
6592 else:
6593 asLines.append(__formatDisassemblerTableEntry(oInstr));
6594
6595 iInstr += 1;
6596
6597 if iInstrStart >= iInstrEnd:
6598 asLines.append(' /* dummy */ INVALID_OPCODE');
6599
6600 asLines.append('};');
6601 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6602
6603 #
6604 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6605 #
6606 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6607 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6608 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6609
6610 #
6611 # Write out the lines.
6612 #
6613 oDstFile.write('\n'.join(asLines));
6614 oDstFile.write('\n');
6615 oDstFile.write('\n');
6616 #break; #for now
6617 return 0;
6618
6619if __name__ == '__main__':
6620 sys.exit(generateDisassemblerTables());
6621
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette