VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103190

Last change on this file since 103190 was 103190, checked in by vboxsync, 10 months ago

VMM/IEMAllInst*: Liveness analysis, part 3: Flag input & modification annotations. bugref:10372

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 314.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103190 2024-02-04 23:26:35Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103190 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
241 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
242 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
243 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
244 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
246 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
248 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
250 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
252 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
254 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
256 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
259 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
260
261 # ModR/M.rm - register only.
262 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
263 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
264 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
265 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
266 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
267 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
268 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
269 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
270
271 # ModR/M.rm - memory only.
272 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
273 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
274 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
275 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
276 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
277 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
278 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
279 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
280 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
281 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
282 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
283 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
284 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
285 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
286 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
287
288 # ModR/M.reg
289 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
290 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
291 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
292 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
293 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
294 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
295 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
296 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
297 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
298 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
299 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
300 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
301 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
302 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
303 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
304 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
305 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
306 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
307 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
308 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
309 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
310 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
311 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
312 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
313 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
314 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
315 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
316 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
317 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
318 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
319 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
320 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
321
322 # VEX.vvvv
323 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
324 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
325 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
326 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
327 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
328 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
329 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
330 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
331
332 # Immediate values.
333 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
334 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
335 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
336 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
337 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
338 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
339
340 # Address operands (no ModR/M).
341 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
342 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
343
344 # Relative jump targets
345 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
346 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
347
348 # DS:rSI
349 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
350 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
351 # ES:rDI
352 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
353 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
354
355 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
356
357 # Fixed registers.
358 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
359 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
360 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
361 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
362 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
363 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
364 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
365 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
366 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
367 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
368
369 # Fixed values.
370 '1': ( '', '1', '1', '1', '', ),
371};
372
373# IDX_ParseFixedReg
374# IDX_ParseVexDest
375
376
377## IEMFORM_XXX mappings.
378g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
379 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
380 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
381 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
382 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
383 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
384 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
385 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
386 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
387 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
388 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
389 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
390 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
391 'M': ( 'ModR/M', [ 'rm', ], '', ),
392 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
393 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
394 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
395 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
396 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
397 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
398 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
399 'R': ( 'ModR/M', [ 'reg', ], '', ),
400
401 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
402 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
403 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
404 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
405 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
406 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
407 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
408 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
409 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
410 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
411 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
412 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
414 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
415 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
416 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
417 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
418 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
419 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
420 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
421 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
422 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
423
424 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
425 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
426 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
427 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
428 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
429 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
430
431 'FIXED': ( 'fixed', None, '', ),
432};
433
434## \@oppfx values.
435g_kdPrefixes = {
436 'none': [],
437 '0x66': [],
438 '0xf3': [],
439 '0xf2': [],
440 '!0xf3': [], # special case for bsf/tzcnt
441};
442
443## Special \@opcode tag values.
444g_kdSpecialOpcodes = {
445 '/reg': [],
446 'mr/reg': [],
447 '11 /reg': [],
448 '!11 /reg': [],
449 '11 mr/reg': [],
450 '!11 mr/reg': [],
451};
452
453## Special \@opcodesub tag values.
454## The first value is the real value for aliases.
455## The second value is for bs3cg1.
456g_kdSubOpcodes = {
457 'none': [ None, '', ],
458 '11 mr/reg': [ '11 mr/reg', '', ],
459 '11': [ '11 mr/reg', '', ], ##< alias
460 '!11 mr/reg': [ '!11 mr/reg', '', ],
461 '!11': [ '!11 mr/reg', '', ], ##< alias
462 'rex.w=0': [ 'rex.w=0', 'WZ', ],
463 'w=0': [ 'rex.w=0', '', ], ##< alias
464 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
465 'w=1': [ 'rex.w=1', '', ], ##< alias
466 'vex.l=0': [ 'vex.l=0', 'L0', ],
467 'vex.l=1': [ 'vex.l=0', 'L1', ],
468 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
469 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
470 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
471 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
472 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
473 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
474};
475
476## Valid values for \@openc
477g_kdEncodings = {
478 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
479 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
480 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
481 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
482 'prefix': [ None, ], ##< Prefix
483};
484
485## \@opunused, \@opinvalid, \@opinvlstyle
486g_kdInvalidStyles = {
487 'immediate': [], ##< CPU stops decoding immediately after the opcode.
488 'vex.modrm': [], ##< VEX+ModR/M, everyone.
489 'intel-modrm': [], ##< Intel decodes ModR/M.
490 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
491 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
492 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
493};
494
495g_kdCpuNames = {
496 '8086': (),
497 '80186': (),
498 '80286': (),
499 '80386': (),
500 '80486': (),
501};
502
503## \@opcpuid
504g_kdCpuIdFlags = {
505 'vme': 'X86_CPUID_FEATURE_EDX_VME',
506 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
507 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
508 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
509 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
510 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
511 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
512 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
513 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
514 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
515 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
516 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
517 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
518 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
519 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
520 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
521 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
522 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
523 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
524 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
525 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
526 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
527 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
528 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
529 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
530 'aes': 'X86_CPUID_FEATURE_ECX_AES',
531 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
532 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
533 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
534 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
535 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
536
537 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
538 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
539 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
540 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
541 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
542 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
543 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
544 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
545 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
546 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
547};
548
549## \@ophints values.
550# pylint: disable=line-too-long
551g_kdHints = {
552 'invalid': 'DISOPTYPE_INVALID', ##<
553 'harmless': 'DISOPTYPE_HARMLESS', ##<
554 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
555 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
556 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
557 'portio': 'DISOPTYPE_PORTIO', ##<
558 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
559 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
560 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
561 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
562 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
563 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
564 'illegal': 'DISOPTYPE_ILLEGAL', ##<
565 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
566 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
567 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
568 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
569 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
570 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
571 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
572 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
573 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
574 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
575 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
576 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
577 ## (only in 16 & 32 bits mode!)
578 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
579 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
580 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
581 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
582 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
583 'ignores_rexw': '', ##< Ignores REX.W.
584 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
585 'vex_l_zero': '', ##< VEX.L must be 0.
586 'vex_l_ignored': '', ##< VEX.L is ignored.
587 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
588 'lock_allowed': '', ##< Lock prefix allowed.
589};
590# pylint: enable=line-too-long
591
592## \@opxcpttype values (see SDMv2 2.4, 2.7).
593g_kdXcptTypes = {
594 'none': [],
595 '1': [],
596 '2': [],
597 '3': [],
598 '4': [],
599 '4UA': [],
600 '5': [],
601 '5LZ': [], # LZ = VEX.L must be zero.
602 '6': [],
603 '7': [],
604 '7LZ': [],
605 '8': [],
606 '11': [],
607 '12': [],
608 'E1': [],
609 'E1NF': [],
610 'E2': [],
611 'E3': [],
612 'E3NF': [],
613 'E4': [],
614 'E4NF': [],
615 'E5': [],
616 'E5NF': [],
617 'E6': [],
618 'E6NF': [],
619 'E7NF': [],
620 'E9': [],
621 'E9NF': [],
622 'E10': [],
623 'E11': [],
624 'E12': [],
625 'E12NF': [],
626};
627
628
629def _isValidOpcodeByte(sOpcode):
630 """
631 Checks if sOpcode is a valid lower case opcode byte.
632 Returns true/false.
633 """
634 if len(sOpcode) == 4:
635 if sOpcode[:2] == '0x':
636 if sOpcode[2] in '0123456789abcdef':
637 if sOpcode[3] in '0123456789abcdef':
638 return True;
639 return False;
640
641
642class InstructionMap(object):
643 """
644 Instruction map.
645
646 The opcode map provides the lead opcode bytes (empty for the one byte
647 opcode map). An instruction can be member of multiple opcode maps as long
648 as it uses the same opcode value within the map (because of VEX).
649 """
650
651 kdEncodings = {
652 'legacy': [],
653 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
654 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
655 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
656 'xop8': [], ##< XOP prefix with vvvvv = 8
657 'xop9': [], ##< XOP prefix with vvvvv = 9
658 'xop10': [], ##< XOP prefix with vvvvv = 10
659 };
660 ## Selectors.
661 ## 1. The first value is the number of table entries required by a
662 ## decoder or disassembler for this type of selector.
663 ## 2. The second value is how many entries per opcode byte if applicable.
664 kdSelectors = {
665 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
666 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
667 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
668 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
669 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
670 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
671 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
672 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
673 };
674
675 ## Define the subentry number according to the Instruction::sPrefix
676 ## value for 'byte+pfx' selected tables.
677 kiPrefixOrder = {
678 'none': 0,
679 '0x66': 1,
680 '0xf3': 2,
681 '0xf2': 3,
682 };
683
684 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
685 sEncoding = 'legacy', sDisParse = None):
686 assert sSelector in self.kdSelectors;
687 assert sEncoding in self.kdEncodings;
688 if asLeadOpcodes is None:
689 asLeadOpcodes = [];
690 else:
691 for sOpcode in asLeadOpcodes:
692 assert _isValidOpcodeByte(sOpcode);
693 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
694
695 self.sName = sName;
696 self.sIemName = sIemName;
697 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
698 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
699 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
700 self.aoInstructions = [] # type: Instruction
701 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
702
703 def copy(self, sNewName, sPrefixFilter = None):
704 """
705 Copies the table with filtering instruction by sPrefix if not None.
706 """
707 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
708 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
709 else self.sSelector,
710 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
711 if sPrefixFilter is None:
712 oCopy.aoInstructions = list(self.aoInstructions);
713 else:
714 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
715 return oCopy;
716
717 def getTableSize(self):
718 """
719 Number of table entries. This corresponds directly to the selector.
720 """
721 return self.kdSelectors[self.sSelector][0];
722
723 def getEntriesPerByte(self):
724 """
725 Number of table entries per opcode bytes.
726
727 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
728 the others it will just return 1.
729 """
730 return self.kdSelectors[self.sSelector][1];
731
732 def getInstructionIndex(self, oInstr):
733 """
734 Returns the table index for the instruction.
735 """
736 bOpcode = oInstr.getOpcodeByte();
737
738 # The byte selectors are simple. We need a full opcode byte and need just return it.
739 if self.sSelector == 'byte':
740 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
741 return bOpcode;
742
743 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
744 if self.sSelector == 'byte+pfx':
745 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
746 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
747 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
748
749 # The other selectors needs masking and shifting.
750 if self.sSelector == '/r':
751 return (bOpcode >> 3) & 0x7;
752
753 if self.sSelector == 'mod /r':
754 return (bOpcode >> 3) & 0x1f;
755
756 if self.sSelector == 'memreg /r':
757 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
758
759 if self.sSelector == '!11 /r':
760 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
761 return (bOpcode >> 3) & 0x7;
762
763 if self.sSelector == '11 /r':
764 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
765 return (bOpcode >> 3) & 0x7;
766
767 if self.sSelector == '11':
768 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
769 return bOpcode & 0x3f;
770
771 assert False, self.sSelector;
772 return -1;
773
774 def getInstructionsInTableOrder(self):
775 """
776 Get instructions in table order.
777
778 Returns array of instructions. Normally there is exactly one
779 instruction per entry. However the entry could also be None if
780 not instruction was specified for that opcode value. Or there
781 could be a list of instructions to deal with special encodings
782 where for instance prefix (e.g. REX.W) encodes a different
783 instruction or different CPUs have different instructions or
784 prefixes in the same place.
785 """
786 # Start with empty table.
787 cTable = self.getTableSize();
788 aoTable = [None] * cTable;
789
790 # Insert the instructions.
791 for oInstr in self.aoInstructions:
792 if oInstr.sOpcode:
793 idxOpcode = self.getInstructionIndex(oInstr);
794 assert idxOpcode < cTable, str(idxOpcode);
795
796 oExisting = aoTable[idxOpcode];
797 if oExisting is None:
798 aoTable[idxOpcode] = oInstr;
799 elif not isinstance(oExisting, list):
800 aoTable[idxOpcode] = list([oExisting, oInstr]);
801 else:
802 oExisting.append(oInstr);
803
804 return aoTable;
805
806
807 def getDisasTableName(self):
808 """
809 Returns the disassembler table name for this map.
810 """
811 sName = 'g_aDisas';
812 for sWord in self.sName.split('_'):
813 if sWord == 'm': # suffix indicating modrm.mod==mem
814 sName += '_m';
815 elif sWord == 'r': # suffix indicating modrm.mod==reg
816 sName += '_r';
817 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
818 sName += '_' + sWord;
819 else:
820 sWord = sWord.replace('grp', 'Grp');
821 sWord = sWord.replace('map', 'Map');
822 sName += sWord[0].upper() + sWord[1:];
823 return sName;
824
825 def getDisasRangeName(self):
826 """
827 Returns the disassembler table range name for this map.
828 """
829 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
830
831 def isVexMap(self):
832 """ Returns True if a VEX map. """
833 return self.sEncoding.startswith('vex');
834
835
836class TestType(object):
837 """
838 Test value type.
839
840 This base class deals with integer like values. The fUnsigned constructor
841 parameter indicates the default stance on zero vs sign extending. It is
842 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
843 """
844 def __init__(self, sName, acbSizes = None, fUnsigned = True):
845 self.sName = sName;
846 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
847 self.fUnsigned = fUnsigned;
848
849 class BadValue(Exception):
850 """ Bad value exception. """
851 def __init__(self, sMessage):
852 Exception.__init__(self, sMessage);
853 self.sMessage = sMessage;
854
855 ## For ascii ~ operator.
856 kdHexInv = {
857 '0': 'f',
858 '1': 'e',
859 '2': 'd',
860 '3': 'c',
861 '4': 'b',
862 '5': 'a',
863 '6': '9',
864 '7': '8',
865 '8': '7',
866 '9': '6',
867 'a': '5',
868 'b': '4',
869 'c': '3',
870 'd': '2',
871 'e': '1',
872 'f': '0',
873 };
874
875 def get(self, sValue):
876 """
877 Get the shortest normal sized byte representation of oValue.
878
879 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
880 The latter form is for AND+OR pairs where the first entry is what to
881 AND with the field and the second the one or OR with.
882
883 Raises BadValue if invalid value.
884 """
885 if not sValue:
886 raise TestType.BadValue('empty value');
887
888 # Deal with sign and detect hexadecimal or decimal.
889 fSignExtend = not self.fUnsigned;
890 if sValue[0] == '-' or sValue[0] == '+':
891 fSignExtend = True;
892 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
893 else:
894 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
895
896 # try convert it to long integer.
897 try:
898 iValue = long(sValue, 16 if fHex else 10);
899 except Exception as oXcpt:
900 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
901
902 # Convert the hex string and pad it to a decent value. Negative values
903 # needs to be manually converted to something non-negative (~-n + 1).
904 if iValue >= 0:
905 sHex = hex(iValue);
906 if sys.version_info[0] < 3:
907 assert sHex[-1] == 'L';
908 sHex = sHex[:-1];
909 assert sHex[:2] == '0x';
910 sHex = sHex[2:];
911 else:
912 sHex = hex(-iValue - 1);
913 if sys.version_info[0] < 3:
914 assert sHex[-1] == 'L';
915 sHex = sHex[:-1];
916 assert sHex[:2] == '0x';
917 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
918 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
919 sHex = 'f' + sHex;
920
921 cDigits = len(sHex);
922 if cDigits <= self.acbSizes[-1] * 2:
923 for cb in self.acbSizes:
924 cNaturalDigits = cb * 2;
925 if cDigits <= cNaturalDigits:
926 break;
927 else:
928 cNaturalDigits = self.acbSizes[-1] * 2;
929 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
930 assert isinstance(cNaturalDigits, int)
931
932 if cNaturalDigits != cDigits:
933 cNeeded = cNaturalDigits - cDigits;
934 if iValue >= 0:
935 sHex = ('0' * cNeeded) + sHex;
936 else:
937 sHex = ('f' * cNeeded) + sHex;
938
939 # Invert and convert to bytearray and return it.
940 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
941
942 return ((fSignExtend, abValue),);
943
944 def validate(self, sValue):
945 """
946 Returns True if value is okay, error message on failure.
947 """
948 try:
949 self.get(sValue);
950 except TestType.BadValue as oXcpt:
951 return oXcpt.sMessage;
952 return True;
953
954 def isAndOrPair(self, sValue):
955 """
956 Checks if sValue is a pair.
957 """
958 _ = sValue;
959 return False;
960
961
962class TestTypeEflags(TestType):
963 """
964 Special value parsing for EFLAGS/RFLAGS/FLAGS.
965 """
966
967 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
968
969 def __init__(self, sName):
970 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
971
972 def get(self, sValue):
973 fClear = 0;
974 fSet = 0;
975 for sFlag in sValue.split(','):
976 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
977 if sConstant is None:
978 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
979 if sConstant[0] == '!':
980 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
981 else:
982 fSet |= g_kdX86EFlagsConstants[sConstant];
983
984 aoSet = TestType.get(self, '0x%x' % (fSet,));
985 if fClear != 0:
986 aoClear = TestType.get(self, '%#x' % (fClear,))
987 assert self.isAndOrPair(sValue) is True;
988 return (aoClear[0], aoSet[0]);
989 assert self.isAndOrPair(sValue) is False;
990 return aoSet;
991
992 def isAndOrPair(self, sValue):
993 for sZeroFlag in self.kdZeroValueFlags:
994 if sValue.find(sZeroFlag) >= 0:
995 return True;
996 return False;
997
998class TestTypeFromDict(TestType):
999 """
1000 Special value parsing for CR0.
1001 """
1002
1003 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1004
1005 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1006 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1007 self.kdConstantsAndValues = kdConstantsAndValues;
1008 self.sConstantPrefix = sConstantPrefix;
1009
1010 def get(self, sValue):
1011 fValue = 0;
1012 for sFlag in sValue.split(','):
1013 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1014 if fFlagValue is None:
1015 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1016 fValue |= fFlagValue;
1017 return TestType.get(self, '0x%x' % (fValue,));
1018
1019
1020class TestInOut(object):
1021 """
1022 One input or output state modifier.
1023
1024 This should be thought as values to modify BS3REGCTX and extended (needs
1025 to be structured) state.
1026 """
1027 ## Assigned operators.
1028 kasOperators = [
1029 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1030 '&~=',
1031 '&=',
1032 '|=',
1033 '='
1034 ];
1035 ## Types
1036 kdTypes = {
1037 'uint': TestType('uint', fUnsigned = True),
1038 'int': TestType('int'),
1039 'efl': TestTypeEflags('efl'),
1040 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1041 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1042 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1043 };
1044 ## CPU context fields.
1045 kdFields = {
1046 # name: ( default type, [both|input|output], )
1047 # Operands.
1048 'op1': ( 'uint', 'both', ), ## \@op1
1049 'op2': ( 'uint', 'both', ), ## \@op2
1050 'op3': ( 'uint', 'both', ), ## \@op3
1051 'op4': ( 'uint', 'both', ), ## \@op4
1052 # Flags.
1053 'efl': ( 'efl', 'both', ),
1054 'efl_undef': ( 'uint', 'output', ),
1055 # 8-bit GPRs.
1056 'al': ( 'uint', 'both', ),
1057 'cl': ( 'uint', 'both', ),
1058 'dl': ( 'uint', 'both', ),
1059 'bl': ( 'uint', 'both', ),
1060 'ah': ( 'uint', 'both', ),
1061 'ch': ( 'uint', 'both', ),
1062 'dh': ( 'uint', 'both', ),
1063 'bh': ( 'uint', 'both', ),
1064 'r8l': ( 'uint', 'both', ),
1065 'r9l': ( 'uint', 'both', ),
1066 'r10l': ( 'uint', 'both', ),
1067 'r11l': ( 'uint', 'both', ),
1068 'r12l': ( 'uint', 'both', ),
1069 'r13l': ( 'uint', 'both', ),
1070 'r14l': ( 'uint', 'both', ),
1071 'r15l': ( 'uint', 'both', ),
1072 # 16-bit GPRs.
1073 'ax': ( 'uint', 'both', ),
1074 'dx': ( 'uint', 'both', ),
1075 'cx': ( 'uint', 'both', ),
1076 'bx': ( 'uint', 'both', ),
1077 'sp': ( 'uint', 'both', ),
1078 'bp': ( 'uint', 'both', ),
1079 'si': ( 'uint', 'both', ),
1080 'di': ( 'uint', 'both', ),
1081 'r8w': ( 'uint', 'both', ),
1082 'r9w': ( 'uint', 'both', ),
1083 'r10w': ( 'uint', 'both', ),
1084 'r11w': ( 'uint', 'both', ),
1085 'r12w': ( 'uint', 'both', ),
1086 'r13w': ( 'uint', 'both', ),
1087 'r14w': ( 'uint', 'both', ),
1088 'r15w': ( 'uint', 'both', ),
1089 # 32-bit GPRs.
1090 'eax': ( 'uint', 'both', ),
1091 'edx': ( 'uint', 'both', ),
1092 'ecx': ( 'uint', 'both', ),
1093 'ebx': ( 'uint', 'both', ),
1094 'esp': ( 'uint', 'both', ),
1095 'ebp': ( 'uint', 'both', ),
1096 'esi': ( 'uint', 'both', ),
1097 'edi': ( 'uint', 'both', ),
1098 'r8d': ( 'uint', 'both', ),
1099 'r9d': ( 'uint', 'both', ),
1100 'r10d': ( 'uint', 'both', ),
1101 'r11d': ( 'uint', 'both', ),
1102 'r12d': ( 'uint', 'both', ),
1103 'r13d': ( 'uint', 'both', ),
1104 'r14d': ( 'uint', 'both', ),
1105 'r15d': ( 'uint', 'both', ),
1106 # 64-bit GPRs.
1107 'rax': ( 'uint', 'both', ),
1108 'rdx': ( 'uint', 'both', ),
1109 'rcx': ( 'uint', 'both', ),
1110 'rbx': ( 'uint', 'both', ),
1111 'rsp': ( 'uint', 'both', ),
1112 'rbp': ( 'uint', 'both', ),
1113 'rsi': ( 'uint', 'both', ),
1114 'rdi': ( 'uint', 'both', ),
1115 'r8': ( 'uint', 'both', ),
1116 'r9': ( 'uint', 'both', ),
1117 'r10': ( 'uint', 'both', ),
1118 'r11': ( 'uint', 'both', ),
1119 'r12': ( 'uint', 'both', ),
1120 'r13': ( 'uint', 'both', ),
1121 'r14': ( 'uint', 'both', ),
1122 'r15': ( 'uint', 'both', ),
1123 # 16-bit, 32-bit or 64-bit registers according to operand size.
1124 'oz.rax': ( 'uint', 'both', ),
1125 'oz.rdx': ( 'uint', 'both', ),
1126 'oz.rcx': ( 'uint', 'both', ),
1127 'oz.rbx': ( 'uint', 'both', ),
1128 'oz.rsp': ( 'uint', 'both', ),
1129 'oz.rbp': ( 'uint', 'both', ),
1130 'oz.rsi': ( 'uint', 'both', ),
1131 'oz.rdi': ( 'uint', 'both', ),
1132 'oz.r8': ( 'uint', 'both', ),
1133 'oz.r9': ( 'uint', 'both', ),
1134 'oz.r10': ( 'uint', 'both', ),
1135 'oz.r11': ( 'uint', 'both', ),
1136 'oz.r12': ( 'uint', 'both', ),
1137 'oz.r13': ( 'uint', 'both', ),
1138 'oz.r14': ( 'uint', 'both', ),
1139 'oz.r15': ( 'uint', 'both', ),
1140 # Control registers.
1141 'cr0': ( 'cr0', 'both', ),
1142 'cr4': ( 'cr4', 'both', ),
1143 'xcr0': ( 'xcr0', 'both', ),
1144 # FPU Registers
1145 'fcw': ( 'uint', 'both', ),
1146 'fsw': ( 'uint', 'both', ),
1147 'ftw': ( 'uint', 'both', ),
1148 'fop': ( 'uint', 'both', ),
1149 'fpuip': ( 'uint', 'both', ),
1150 'fpucs': ( 'uint', 'both', ),
1151 'fpudp': ( 'uint', 'both', ),
1152 'fpuds': ( 'uint', 'both', ),
1153 'mxcsr': ( 'uint', 'both', ),
1154 'st0': ( 'uint', 'both', ),
1155 'st1': ( 'uint', 'both', ),
1156 'st2': ( 'uint', 'both', ),
1157 'st3': ( 'uint', 'both', ),
1158 'st4': ( 'uint', 'both', ),
1159 'st5': ( 'uint', 'both', ),
1160 'st6': ( 'uint', 'both', ),
1161 'st7': ( 'uint', 'both', ),
1162 # MMX registers.
1163 'mm0': ( 'uint', 'both', ),
1164 'mm1': ( 'uint', 'both', ),
1165 'mm2': ( 'uint', 'both', ),
1166 'mm3': ( 'uint', 'both', ),
1167 'mm4': ( 'uint', 'both', ),
1168 'mm5': ( 'uint', 'both', ),
1169 'mm6': ( 'uint', 'both', ),
1170 'mm7': ( 'uint', 'both', ),
1171 # SSE registers.
1172 'xmm0': ( 'uint', 'both', ),
1173 'xmm1': ( 'uint', 'both', ),
1174 'xmm2': ( 'uint', 'both', ),
1175 'xmm3': ( 'uint', 'both', ),
1176 'xmm4': ( 'uint', 'both', ),
1177 'xmm5': ( 'uint', 'both', ),
1178 'xmm6': ( 'uint', 'both', ),
1179 'xmm7': ( 'uint', 'both', ),
1180 'xmm8': ( 'uint', 'both', ),
1181 'xmm9': ( 'uint', 'both', ),
1182 'xmm10': ( 'uint', 'both', ),
1183 'xmm11': ( 'uint', 'both', ),
1184 'xmm12': ( 'uint', 'both', ),
1185 'xmm13': ( 'uint', 'both', ),
1186 'xmm14': ( 'uint', 'both', ),
1187 'xmm15': ( 'uint', 'both', ),
1188 'xmm0.lo': ( 'uint', 'both', ),
1189 'xmm1.lo': ( 'uint', 'both', ),
1190 'xmm2.lo': ( 'uint', 'both', ),
1191 'xmm3.lo': ( 'uint', 'both', ),
1192 'xmm4.lo': ( 'uint', 'both', ),
1193 'xmm5.lo': ( 'uint', 'both', ),
1194 'xmm6.lo': ( 'uint', 'both', ),
1195 'xmm7.lo': ( 'uint', 'both', ),
1196 'xmm8.lo': ( 'uint', 'both', ),
1197 'xmm9.lo': ( 'uint', 'both', ),
1198 'xmm10.lo': ( 'uint', 'both', ),
1199 'xmm11.lo': ( 'uint', 'both', ),
1200 'xmm12.lo': ( 'uint', 'both', ),
1201 'xmm13.lo': ( 'uint', 'both', ),
1202 'xmm14.lo': ( 'uint', 'both', ),
1203 'xmm15.lo': ( 'uint', 'both', ),
1204 'xmm0.hi': ( 'uint', 'both', ),
1205 'xmm1.hi': ( 'uint', 'both', ),
1206 'xmm2.hi': ( 'uint', 'both', ),
1207 'xmm3.hi': ( 'uint', 'both', ),
1208 'xmm4.hi': ( 'uint', 'both', ),
1209 'xmm5.hi': ( 'uint', 'both', ),
1210 'xmm6.hi': ( 'uint', 'both', ),
1211 'xmm7.hi': ( 'uint', 'both', ),
1212 'xmm8.hi': ( 'uint', 'both', ),
1213 'xmm9.hi': ( 'uint', 'both', ),
1214 'xmm10.hi': ( 'uint', 'both', ),
1215 'xmm11.hi': ( 'uint', 'both', ),
1216 'xmm12.hi': ( 'uint', 'both', ),
1217 'xmm13.hi': ( 'uint', 'both', ),
1218 'xmm14.hi': ( 'uint', 'both', ),
1219 'xmm15.hi': ( 'uint', 'both', ),
1220 'xmm0.lo.zx': ( 'uint', 'both', ),
1221 'xmm1.lo.zx': ( 'uint', 'both', ),
1222 'xmm2.lo.zx': ( 'uint', 'both', ),
1223 'xmm3.lo.zx': ( 'uint', 'both', ),
1224 'xmm4.lo.zx': ( 'uint', 'both', ),
1225 'xmm5.lo.zx': ( 'uint', 'both', ),
1226 'xmm6.lo.zx': ( 'uint', 'both', ),
1227 'xmm7.lo.zx': ( 'uint', 'both', ),
1228 'xmm8.lo.zx': ( 'uint', 'both', ),
1229 'xmm9.lo.zx': ( 'uint', 'both', ),
1230 'xmm10.lo.zx': ( 'uint', 'both', ),
1231 'xmm11.lo.zx': ( 'uint', 'both', ),
1232 'xmm12.lo.zx': ( 'uint', 'both', ),
1233 'xmm13.lo.zx': ( 'uint', 'both', ),
1234 'xmm14.lo.zx': ( 'uint', 'both', ),
1235 'xmm15.lo.zx': ( 'uint', 'both', ),
1236 'xmm0.dw0': ( 'uint', 'both', ),
1237 'xmm1.dw0': ( 'uint', 'both', ),
1238 'xmm2.dw0': ( 'uint', 'both', ),
1239 'xmm3.dw0': ( 'uint', 'both', ),
1240 'xmm4.dw0': ( 'uint', 'both', ),
1241 'xmm5.dw0': ( 'uint', 'both', ),
1242 'xmm6.dw0': ( 'uint', 'both', ),
1243 'xmm7.dw0': ( 'uint', 'both', ),
1244 'xmm8.dw0': ( 'uint', 'both', ),
1245 'xmm9.dw0': ( 'uint', 'both', ),
1246 'xmm10.dw0': ( 'uint', 'both', ),
1247 'xmm11.dw0': ( 'uint', 'both', ),
1248 'xmm12.dw0': ( 'uint', 'both', ),
1249 'xmm13.dw0': ( 'uint', 'both', ),
1250 'xmm14.dw0': ( 'uint', 'both', ),
1251 'xmm15_dw0': ( 'uint', 'both', ),
1252 # AVX registers.
1253 'ymm0': ( 'uint', 'both', ),
1254 'ymm1': ( 'uint', 'both', ),
1255 'ymm2': ( 'uint', 'both', ),
1256 'ymm3': ( 'uint', 'both', ),
1257 'ymm4': ( 'uint', 'both', ),
1258 'ymm5': ( 'uint', 'both', ),
1259 'ymm6': ( 'uint', 'both', ),
1260 'ymm7': ( 'uint', 'both', ),
1261 'ymm8': ( 'uint', 'both', ),
1262 'ymm9': ( 'uint', 'both', ),
1263 'ymm10': ( 'uint', 'both', ),
1264 'ymm11': ( 'uint', 'both', ),
1265 'ymm12': ( 'uint', 'both', ),
1266 'ymm13': ( 'uint', 'both', ),
1267 'ymm14': ( 'uint', 'both', ),
1268 'ymm15': ( 'uint', 'both', ),
1269
1270 # Special ones.
1271 'value.xcpt': ( 'uint', 'output', ),
1272 };
1273
1274 def __init__(self, sField, sOp, sValue, sType):
1275 assert sField in self.kdFields;
1276 assert sOp in self.kasOperators;
1277 self.sField = sField;
1278 self.sOp = sOp;
1279 self.sValue = sValue;
1280 self.sType = sType;
1281 assert isinstance(sField, str);
1282 assert isinstance(sOp, str);
1283 assert isinstance(sType, str);
1284 assert isinstance(sValue, str);
1285
1286
1287class TestSelector(object):
1288 """
1289 One selector for an instruction test.
1290 """
1291 ## Selector compare operators.
1292 kasCompareOps = [ '==', '!=' ];
1293 ## Selector variables and their valid values.
1294 kdVariables = {
1295 # Operand size.
1296 'size': {
1297 'o16': 'size_o16',
1298 'o32': 'size_o32',
1299 'o64': 'size_o64',
1300 },
1301 # VEX.L value.
1302 'vex.l': {
1303 '0': 'vexl_0',
1304 '1': 'vexl_1',
1305 },
1306 # Execution ring.
1307 'ring': {
1308 '0': 'ring_0',
1309 '1': 'ring_1',
1310 '2': 'ring_2',
1311 '3': 'ring_3',
1312 '0..2': 'ring_0_thru_2',
1313 '1..3': 'ring_1_thru_3',
1314 },
1315 # Basic code mode.
1316 'codebits': {
1317 '64': 'code_64bit',
1318 '32': 'code_32bit',
1319 '16': 'code_16bit',
1320 },
1321 # cpu modes.
1322 'mode': {
1323 'real': 'mode_real',
1324 'prot': 'mode_prot',
1325 'long': 'mode_long',
1326 'v86': 'mode_v86',
1327 'smm': 'mode_smm',
1328 'vmx': 'mode_vmx',
1329 'svm': 'mode_svm',
1330 },
1331 # paging on/off
1332 'paging': {
1333 'on': 'paging_on',
1334 'off': 'paging_off',
1335 },
1336 # CPU vendor
1337 'vendor': {
1338 'amd': 'vendor_amd',
1339 'intel': 'vendor_intel',
1340 'via': 'vendor_via',
1341 },
1342 };
1343 ## Selector shorthand predicates.
1344 ## These translates into variable expressions.
1345 kdPredicates = {
1346 'o16': 'size==o16',
1347 'o32': 'size==o32',
1348 'o64': 'size==o64',
1349 'ring0': 'ring==0',
1350 '!ring0': 'ring==1..3',
1351 'ring1': 'ring==1',
1352 'ring2': 'ring==2',
1353 'ring3': 'ring==3',
1354 'user': 'ring==3',
1355 'supervisor': 'ring==0..2',
1356 '16-bit': 'codebits==16',
1357 '32-bit': 'codebits==32',
1358 '64-bit': 'codebits==64',
1359 'real': 'mode==real',
1360 'prot': 'mode==prot',
1361 'long': 'mode==long',
1362 'v86': 'mode==v86',
1363 'smm': 'mode==smm',
1364 'vmx': 'mode==vmx',
1365 'svm': 'mode==svm',
1366 'paging': 'paging==on',
1367 '!paging': 'paging==off',
1368 'amd': 'vendor==amd',
1369 '!amd': 'vendor!=amd',
1370 'intel': 'vendor==intel',
1371 '!intel': 'vendor!=intel',
1372 'via': 'vendor==via',
1373 '!via': 'vendor!=via',
1374 };
1375
1376 def __init__(self, sVariable, sOp, sValue):
1377 assert sVariable in self.kdVariables;
1378 assert sOp in self.kasCompareOps;
1379 assert sValue in self.kdVariables[sVariable];
1380 self.sVariable = sVariable;
1381 self.sOp = sOp;
1382 self.sValue = sValue;
1383
1384
1385class InstructionTest(object):
1386 """
1387 Instruction test.
1388 """
1389
1390 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1391 self.oInstr = oInstr # type: InstructionTest
1392 self.aoInputs = [] # type: List[TestInOut]
1393 self.aoOutputs = [] # type: List[TestInOut]
1394 self.aoSelectors = [] # type: List[TestSelector]
1395
1396 def toString(self, fRepr = False):
1397 """
1398 Converts it to string representation.
1399 """
1400 asWords = [];
1401 if self.aoSelectors:
1402 for oSelector in self.aoSelectors:
1403 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1404 asWords.append('/');
1405
1406 for oModifier in self.aoInputs:
1407 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1408
1409 asWords.append('->');
1410
1411 for oModifier in self.aoOutputs:
1412 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1413
1414 if fRepr:
1415 return '<' + ' '.join(asWords) + '>';
1416 return ' '.join(asWords);
1417
1418 def __str__(self):
1419 """ Provide string represenation. """
1420 return self.toString(False);
1421
1422 def __repr__(self):
1423 """ Provide unambigious string representation. """
1424 return self.toString(True);
1425
1426class Operand(object):
1427 """
1428 Instruction operand.
1429 """
1430
1431 def __init__(self, sWhere, sType):
1432 assert sWhere in g_kdOpLocations, sWhere;
1433 assert sType in g_kdOpTypes, sType;
1434 self.sWhere = sWhere; ##< g_kdOpLocations
1435 self.sType = sType; ##< g_kdOpTypes
1436
1437 def usesModRM(self):
1438 """ Returns True if using some form of ModR/M encoding. """
1439 return self.sType[0] in ['E', 'G', 'M'];
1440
1441
1442
1443class Instruction(object): # pylint: disable=too-many-instance-attributes
1444 """
1445 Instruction.
1446 """
1447
1448 def __init__(self, sSrcFile, iLine):
1449 ## @name Core attributes.
1450 ## @{
1451 self.oParent = None # type: Instruction
1452 self.sMnemonic = None;
1453 self.sBrief = None;
1454 self.asDescSections = [] # type: List[str]
1455 self.aoMaps = [] # type: List[InstructionMap]
1456 self.aoOperands = [] # type: List[Operand]
1457 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1458 self.sOpcode = None # type: str
1459 self.sSubOpcode = None # type: str
1460 self.sEncoding = None;
1461 self.asFlTest = None;
1462 self.asFlModify = None;
1463 self.asFlUndefined = None;
1464 self.asFlSet = None;
1465 self.asFlClear = None;
1466 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1467 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1468 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1469 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1470 self.aoTests = [] # type: List[InstructionTest]
1471 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1472 self.oCpuExpr = None; ##< Some CPU restriction expression...
1473 self.sGroup = None;
1474 self.fUnused = False; ##< Unused instruction.
1475 self.fInvalid = False; ##< Invalid instruction (like UD2).
1476 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1477 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1478 ## @}
1479
1480 ## @name Implementation attributes.
1481 ## @{
1482 self.sStats = None;
1483 self.sFunction = None;
1484 self.fStub = False;
1485 self.fUdStub = False;
1486 ## @}
1487
1488 ## @name Decoding info
1489 ## @{
1490 self.sSrcFile = sSrcFile;
1491 self.iLineCreated = iLine;
1492 self.iLineCompleted = None;
1493 self.cOpTags = 0;
1494 self.iLineFnIemOpMacro = -1;
1495 self.iLineMnemonicMacro = -1;
1496 ## @}
1497
1498 ## @name Intermediate input fields.
1499 ## @{
1500 self.sRawDisOpNo = None;
1501 self.asRawDisParams = [];
1502 self.sRawIemOpFlags = None;
1503 self.sRawOldOpcodes = None;
1504 self.asCopyTests = [];
1505 ## @}
1506
1507 def toString(self, fRepr = False):
1508 """ Turn object into a string. """
1509 aasFields = [];
1510
1511 aasFields.append(['opcode', self.sOpcode]);
1512 if self.sPrefix:
1513 aasFields.append(['prefix', self.sPrefix]);
1514 aasFields.append(['mnemonic', self.sMnemonic]);
1515 for iOperand, oOperand in enumerate(self.aoOperands):
1516 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1517 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1518 aasFields.append(['encoding', self.sEncoding]);
1519 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1520 aasFields.append(['disenum', self.sDisEnum]);
1521 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1522 aasFields.append(['group', self.sGroup]);
1523 if self.fUnused: aasFields.append(['unused', 'True']);
1524 if self.fInvalid: aasFields.append(['invalid', 'True']);
1525 aasFields.append(['invlstyle', self.sInvalidStyle]);
1526 aasFields.append(['fltest', self.asFlTest]);
1527 aasFields.append(['flmodify', self.asFlModify]);
1528 aasFields.append(['flundef', self.asFlUndefined]);
1529 aasFields.append(['flset', self.asFlSet]);
1530 aasFields.append(['flclear', self.asFlClear]);
1531 aasFields.append(['mincpu', self.sMinCpu]);
1532 aasFields.append(['stats', self.sStats]);
1533 aasFields.append(['sFunction', self.sFunction]);
1534 if self.fStub: aasFields.append(['fStub', 'True']);
1535 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1536 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1537 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1538 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1539
1540 sRet = '<' if fRepr else '';
1541 for sField, sValue in aasFields:
1542 if sValue is not None:
1543 if len(sRet) > 1:
1544 sRet += '; ';
1545 sRet += '%s=%s' % (sField, sValue,);
1546 if fRepr:
1547 sRet += '>';
1548
1549 return sRet;
1550
1551 def __str__(self):
1552 """ Provide string represenation. """
1553 return self.toString(False);
1554
1555 def __repr__(self):
1556 """ Provide unambigious string representation. """
1557 return self.toString(True);
1558
1559 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1560 """
1561 Makes a copy of the object for the purpose of putting in a different map
1562 or a different place in the current map.
1563 """
1564 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1565
1566 oCopy.oParent = self;
1567 oCopy.sMnemonic = self.sMnemonic;
1568 oCopy.sBrief = self.sBrief;
1569 oCopy.asDescSections = list(self.asDescSections);
1570 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1571 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1572 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1573 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1574 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1575 oCopy.sEncoding = self.sEncoding;
1576 oCopy.asFlTest = self.asFlTest;
1577 oCopy.asFlModify = self.asFlModify;
1578 oCopy.asFlUndefined = self.asFlUndefined;
1579 oCopy.asFlSet = self.asFlSet;
1580 oCopy.asFlClear = self.asFlClear;
1581 oCopy.dHints = dict(self.dHints);
1582 oCopy.sDisEnum = self.sDisEnum;
1583 oCopy.asCpuIds = list(self.asCpuIds);
1584 oCopy.asReqFeatures = list(self.asReqFeatures);
1585 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1586 oCopy.sMinCpu = self.sMinCpu;
1587 oCopy.oCpuExpr = self.oCpuExpr;
1588 oCopy.sGroup = self.sGroup;
1589 oCopy.fUnused = self.fUnused;
1590 oCopy.fInvalid = self.fInvalid;
1591 oCopy.sInvalidStyle = self.sInvalidStyle;
1592 oCopy.sXcptType = self.sXcptType;
1593
1594 oCopy.sStats = self.sStats;
1595 oCopy.sFunction = self.sFunction;
1596 oCopy.fStub = self.fStub;
1597 oCopy.fUdStub = self.fUdStub;
1598
1599 oCopy.iLineCompleted = self.iLineCompleted;
1600 oCopy.cOpTags = self.cOpTags;
1601 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1602 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1603
1604 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1605 oCopy.asRawDisParams = list(self.asRawDisParams);
1606 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1607 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1608 oCopy.asCopyTests = list(self.asCopyTests);
1609
1610 return oCopy;
1611
1612 def getOpcodeByte(self):
1613 """
1614 Decodes sOpcode into a byte range integer value.
1615 Raises exception if sOpcode is None or invalid.
1616 """
1617 if self.sOpcode is None:
1618 raise Exception('No opcode byte for %s!' % (self,));
1619 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1620
1621 # Full hex byte form.
1622 if sOpcode[:2] == '0x':
1623 return int(sOpcode, 16);
1624
1625 # The /r form:
1626 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1627 return int(sOpcode[1:]) << 3;
1628
1629 # The 11/r form:
1630 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1631 return (int(sOpcode[-1:]) << 3) | 0xc0;
1632
1633 # The !11/r form (returns mod=1):
1634 ## @todo this doesn't really work...
1635 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1636 return (int(sOpcode[-1:]) << 3) | 0x80;
1637
1638 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1639
1640 @staticmethod
1641 def _flagsToIntegerMask(asFlags):
1642 """
1643 Returns the integer mask value for asFlags.
1644 """
1645 uRet = 0;
1646 if asFlags:
1647 for sFlag in asFlags:
1648 sConstant = g_kdEFlagsMnemonics[sFlag];
1649 assert sConstant[0] != '!', sConstant
1650 uRet |= g_kdX86EFlagsConstants[sConstant];
1651 return uRet;
1652
1653 def getTestedFlagsMask(self):
1654 """ Returns asFlTest into a integer mask value """
1655 return self._flagsToIntegerMask(self.asFlTest);
1656
1657 def getModifiedFlagsMask(self):
1658 """ Returns asFlModify into a integer mask value """
1659 return self._flagsToIntegerMask(self.asFlModify);
1660
1661 def getUndefinedFlagsMask(self):
1662 """ Returns asFlUndefined into a integer mask value """
1663 return self._flagsToIntegerMask(self.asFlUndefined);
1664
1665 def getSetFlagsMask(self):
1666 """ Returns asFlSet into a integer mask value """
1667 return self._flagsToIntegerMask(self.asFlSet);
1668
1669 def getClearedFlagsMask(self):
1670 """ Returns asFlClear into a integer mask value """
1671 return self._flagsToIntegerMask(self.asFlClear);
1672
1673 def onlyInVexMaps(self):
1674 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1675 if not self.aoMaps:
1676 return False;
1677 for oMap in self.aoMaps:
1678 if not oMap.isVexMap():
1679 return False;
1680 return True;
1681
1682
1683
1684## All the instructions.
1685g_aoAllInstructions = [] # type: List[Instruction]
1686
1687## All the instructions indexed by statistics name (opstat).
1688g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1689
1690## All the instructions indexed by function name (opfunction).
1691g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1692
1693## Instructions tagged by oponlytest
1694g_aoOnlyTestInstructions = [] # type: List[Instruction]
1695
1696## Instruction maps.
1697g_aoInstructionMaps = [
1698 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1699 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1700 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1701 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1702 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1703 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1704 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1705 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1706 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1707 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1708 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1709 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1710 ## @todo g_apfnEscF1_E0toFF
1711 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1712 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1713 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1714 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1715 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1716 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1717 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1718 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1719
1720 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1721 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1722 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1723 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1724 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1725 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1726 ## @todo What about g_apfnGroup9MemReg?
1727 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1728 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1729 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1730 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1731 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1732 ## @todo What about g_apfnGroup15RegReg?
1733 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1734 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1735 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1736
1737 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1738 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1739
1740 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1741 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1742 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1743 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1744 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1745 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1746
1747 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1748 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1749
1750 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1751 InstructionMap('xopmap8', sEncoding = 'xop8'),
1752 InstructionMap('xopmap9', sEncoding = 'xop9'),
1753 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1754 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1755 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1756 InstructionMap('xopmap10', sEncoding = 'xop10'),
1757 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1758];
1759g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1760g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1761
1762
1763#
1764# Decoder functions.
1765#
1766
1767class DecoderFunction(object):
1768 """
1769 Decoder function.
1770
1771 This is mainly for searching for scoping searches for variables used in
1772 microcode blocks.
1773 """
1774 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1775 self.sName = sName; ##< The function name.
1776 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1777 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1778 self.iBeginLine = iBeginLine; ##< The start line.
1779 self.iEndLine = -1; ##< The line the function (probably) ends on.
1780 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1781
1782 def complete(self, iEndLine, asLines):
1783 """
1784 Completes the function.
1785 """
1786 assert self.iEndLine == -1;
1787 self.iEndLine = iEndLine;
1788 self.asLines = asLines;
1789
1790
1791#
1792# "Microcode" statements and blocks
1793#
1794
1795class McStmt(object):
1796 """
1797 Statement in a microcode block.
1798 """
1799 def __init__(self, sName, asParams):
1800 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1801 self.asParams = asParams;
1802 self.oUser = None;
1803
1804 def renderCode(self, cchIndent = 0):
1805 """
1806 Renders the code for the statement.
1807 """
1808 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1809
1810 @staticmethod
1811 def renderCodeForList(aoStmts, cchIndent = 0):
1812 """
1813 Renders a list of statements.
1814 """
1815 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1816
1817 @staticmethod
1818 def findStmtByNames(aoStmts, dNames):
1819 """
1820 Returns first statement with any of the given names in from the list.
1821
1822 Note! The names are passed as a dictionary for quick lookup, the value
1823 does not matter.
1824 """
1825 for oStmt in aoStmts:
1826 if oStmt.sName in dNames:
1827 return oStmt;
1828 if isinstance(oStmt, McStmtCond):
1829 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1830 if not oHit:
1831 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1832 if oHit:
1833 return oHit;
1834 return None;
1835
1836 def isCppStmt(self):
1837 """ Checks if this is a C++ statement. """
1838 return self.sName.startswith('C++');
1839
1840class McStmtCond(McStmt):
1841 """
1842 Base class for conditional statements (IEM_MC_IF_XXX).
1843 """
1844 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1845 McStmt.__init__(self, sName, asParams);
1846 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1847 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1848 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1849 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1850
1851 def renderCode(self, cchIndent = 0):
1852 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1853 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1854 if self.aoElseBranch:
1855 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1856 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1857 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1858 return sRet;
1859
1860class McStmtVar(McStmt):
1861 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1862 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1863 McStmt.__init__(self, sName, asParams);
1864 self.sType = sType;
1865 self.sVarName = sVarName;
1866 self.sValue = sValue; ##< None if no assigned / const value.
1867
1868class McStmtArg(McStmtVar):
1869 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1870 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1871 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1872 self.iArg = iArg;
1873 self.sRef = sRef; ##< The reference string (local variable, register).
1874 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1875 assert sRefType in ('none', 'local');
1876
1877
1878class McStmtCall(McStmt):
1879 """ IEM_MC_CALL_* """
1880 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1881 McStmt.__init__(self, sName, asParams);
1882 self.idxFn = iFnParam;
1883 self.idxParams = iFnParam + 1;
1884 self.sFn = asParams[iFnParam];
1885 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1886
1887class McCppGeneric(McStmt):
1888 """
1889 Generic C++/C statement.
1890 """
1891 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1892 McStmt.__init__(self, sName, [sCode,]);
1893 self.fDecode = fDecode;
1894 self.cchIndent = cchIndent;
1895
1896 def renderCode(self, cchIndent = 0):
1897 cchIndent += self.cchIndent;
1898 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1899 if self.fDecode:
1900 sRet = sRet.replace('\n', ' // C++ decode\n');
1901 else:
1902 sRet = sRet.replace('\n', ' // C++ normal\n');
1903 return sRet;
1904
1905class McCppCall(McCppGeneric):
1906 """
1907 A generic C++/C call statement.
1908
1909 The sName is still 'C++', so the function name is in the first parameter
1910 and the the arguments in the subsequent ones.
1911 """
1912 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1913 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1914 self.asParams.extend(asArgs);
1915
1916 def renderCode(self, cchIndent = 0):
1917 cchIndent += self.cchIndent;
1918 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1919 if self.fDecode:
1920 sRet += ' // C++ decode\n';
1921 else:
1922 sRet += ' // C++ normal\n';
1923 return sRet;
1924
1925class McCppCond(McStmtCond):
1926 """
1927 C++/C 'if' statement.
1928 """
1929 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1930 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1931 self.fDecode = fDecode;
1932 self.cchIndent = cchIndent;
1933
1934 def renderCode(self, cchIndent = 0):
1935 cchIndent += self.cchIndent;
1936 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1937 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1938 sRet += ' ' * cchIndent + '{\n';
1939 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1940 sRet += ' ' * cchIndent + '}\n';
1941 if self.aoElseBranch:
1942 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1943 sRet += ' ' * cchIndent + '{\n';
1944 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1945 sRet += ' ' * cchIndent + '}\n';
1946 return sRet;
1947
1948class McCppPreProc(McCppGeneric):
1949 """
1950 C++/C Preprocessor directive.
1951 """
1952 def __init__(self, sCode):
1953 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1954
1955 def renderCode(self, cchIndent = 0):
1956 return self.asParams[0] + '\n';
1957
1958
1959## IEM_MC_F_XXX values.
1960g_kdMcFlags = {
1961 'IEM_MC_F_ONLY_8086': (),
1962 'IEM_MC_F_MIN_186': (),
1963 'IEM_MC_F_MIN_286': (),
1964 'IEM_MC_F_NOT_286_OR_OLDER': (),
1965 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
1966 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
1967 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
1968 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
1969 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
1970 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
1971 'IEM_MC_F_NOT_64BIT': (),
1972};
1973## IEM_MC_F_XXX values.
1974g_kdCImplFlags = {
1975 'IEM_CIMPL_F_BRANCH_DIRECT': (),
1976 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
1977 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
1978 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
1979 'IEM_CIMPL_F_BRANCH_FAR': (),
1980 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
1981 'IEM_CIMPL_F_BRANCH_RELATIVE',),
1982 'IEM_CIMPL_F_BRANCH_STACK': (),
1983 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
1984 'IEM_CIMPL_F_MODE': (),
1985 'IEM_CIMPL_F_RFLAGS': (),
1986 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
1987 'IEM_CIMPL_F_STATUS_FLAGS': (),
1988 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
1989 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
1990 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
1991 'IEM_CIMPL_F_VMEXIT': (),
1992 'IEM_CIMPL_F_FPU': (),
1993 'IEM_CIMPL_F_REP': (),
1994 'IEM_CIMPL_F_IO': (),
1995 'IEM_CIMPL_F_END_TB': (),
1996 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
1997 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
1998 'IEM_CIMPL_F_CALLS_CIMPL': (),
1999 'IEM_CIMPL_F_CALLS_AIMPL': (),
2000 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2001};
2002class McBlock(object):
2003 """
2004 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2005 """
2006
2007 ## @name Macro expansion types.
2008 ## @{
2009 kiMacroExp_None = 0;
2010 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2011 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2012 ## @}
2013
2014 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None, fDeferToCImpl = False):
2015 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2016 self.fDeferToCImpl = fDeferToCImpl;
2017 ## The source file containing the block.
2018 self.sSrcFile = sSrcFile;
2019 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2020 self.iBeginLine = iBeginLine;
2021 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2022 self.offBeginLine = offBeginLine;
2023 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2024 self.iEndLine = -1;
2025 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2026 self.offEndLine = 0;
2027 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2028 self.offAfterEnd = 0;
2029 ## The function the block resides in.
2030 self.oFunction = oFunction;
2031 ## The name of the function the block resides in. DEPRECATED.
2032 self.sFunction = oFunction.sName;
2033 ## The block number within the function.
2034 self.iInFunction = iInFunction;
2035 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2036 ##< The raw lines the block is made up of.
2037 self.asLines = [] # type: List[str]
2038 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2039 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2040 self.iMacroExp = self.kiMacroExp_None;
2041 ## IEM_MC_BEGIN: Argument count.
2042 self.cArgs = -1;
2043 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2044 self.aoArgs = [] # type: List[McStmtArg]
2045 ## IEM_MC_BEGIN: Locals count.
2046 self.cLocals = -1;
2047 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2048 self.aoLocals = [] # type: List[McStmtVar]
2049 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2050 self.dsMcFlags = {} # type: Dict[str, bool]
2051 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2052 self.dsCImplFlags = {} # type: Dict[str, bool]
2053 ## Decoded statements in the block.
2054 self.aoStmts = [] # type: List[McStmt]
2055
2056 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2057 """
2058 Completes the microcode block.
2059 """
2060 assert self.iEndLine == -1;
2061 self.iEndLine = iEndLine;
2062 self.offEndLine = offEndLine;
2063 self.offAfterEnd = offAfterEnd;
2064 self.asLines = asLines;
2065
2066 def raiseDecodeError(self, sRawCode, off, sMessage):
2067 """ Raises a decoding error. """
2068 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2069 iLine = sRawCode.count('\n', 0, off);
2070 raise ParserException('%s:%d:%d: parsing error: %s'
2071 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2072
2073 def raiseStmtError(self, sName, sMessage):
2074 """ Raises a statement parser error. """
2075 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2076
2077 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2078 """ Check the parameter count, raising an error it doesn't match. """
2079 if len(asParams) != cParamsExpected:
2080 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2081 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2082 return True;
2083
2084 @staticmethod
2085 def parseMcGeneric(oSelf, sName, asParams):
2086 """ Generic parser that returns a plain McStmt object. """
2087 _ = oSelf;
2088 return McStmt(sName, asParams);
2089
2090 @staticmethod
2091 def parseMcGenericCond(oSelf, sName, asParams):
2092 """ Generic parser that returns a plain McStmtCond object. """
2093 _ = oSelf;
2094 return McStmtCond(sName, asParams);
2095
2096 @staticmethod
2097 def parseMcBegin(oSelf, sName, asParams):
2098 """ IEM_MC_BEGIN """
2099 oSelf.checkStmtParamCount(sName, asParams, 4);
2100 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2101 oSelf.raiseStmtError(sName, 'Used more than once!');
2102 oSelf.cArgs = int(asParams[0]);
2103 oSelf.cLocals = int(asParams[1]);
2104
2105 if asParams[2] != '0':
2106 for sFlag in asParams[2].split('|'):
2107 sFlag = sFlag.strip();
2108 if sFlag not in g_kdMcFlags:
2109 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2110 oSelf.dsMcFlags[sFlag] = True;
2111 for sFlag2 in g_kdMcFlags[sFlag]:
2112 oSelf.dsMcFlags[sFlag2] = True;
2113
2114 if asParams[3] != '0':
2115 oSelf.parseCImplFlags(sName, asParams[3]);
2116
2117 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2118
2119 @staticmethod
2120 def parseMcArg(oSelf, sName, asParams):
2121 """ IEM_MC_ARG """
2122 oSelf.checkStmtParamCount(sName, asParams, 3);
2123 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2124 oSelf.aoArgs.append(oStmt);
2125 return oStmt;
2126
2127 @staticmethod
2128 def parseMcArgConst(oSelf, sName, asParams):
2129 """ IEM_MC_ARG_CONST """
2130 oSelf.checkStmtParamCount(sName, asParams, 4);
2131 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2132 oSelf.aoArgs.append(oStmt);
2133 return oStmt;
2134
2135 @staticmethod
2136 def parseMcArgLocalRef(oSelf, sName, asParams):
2137 """ IEM_MC_ARG_LOCAL_REF """
2138 oSelf.checkStmtParamCount(sName, asParams, 4);
2139 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2140 oSelf.aoArgs.append(oStmt);
2141 return oStmt;
2142
2143 @staticmethod
2144 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2145 """ IEM_MC_ARG_LOCAL_EFLAGS """
2146 oSelf.checkStmtParamCount(sName, asParams, 3);
2147 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2148 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2149 oSelf.aoLocals.append(oStmtLocal);
2150 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2151 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2152 oSelf.aoArgs.append(oStmtArg);
2153 return (oStmtLocal, oStmtArg,);
2154
2155 @staticmethod
2156 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2157 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2158 oSelf.checkStmtParamCount(sName, asParams, 0);
2159 # Note! Translate to IEM_MC_ARG_CONST
2160 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2161 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2162 oSelf.aoArgs.append(oStmt);
2163 return oStmt;
2164
2165 @staticmethod
2166 def parseMcLocal(oSelf, sName, asParams):
2167 """ IEM_MC_LOCAL """
2168 oSelf.checkStmtParamCount(sName, asParams, 2);
2169 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2170 oSelf.aoLocals.append(oStmt);
2171 return oStmt;
2172
2173 @staticmethod
2174 def parseMcLocalAssign(oSelf, sName, asParams):
2175 """ IEM_MC_LOCAL_ASSIGN """
2176 oSelf.checkStmtParamCount(sName, asParams, 3);
2177 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2178 oSelf.aoLocals.append(oStmt);
2179 return oStmt;
2180
2181 @staticmethod
2182 def parseMcLocalConst(oSelf, sName, asParams):
2183 """ IEM_MC_LOCAL_CONST """
2184 oSelf.checkStmtParamCount(sName, asParams, 3);
2185 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2186 oSelf.aoLocals.append(oStmt);
2187 return oStmt;
2188
2189 @staticmethod
2190 def parseMcCallAImpl(oSelf, sName, asParams):
2191 """ IEM_MC_CALL_AIMPL_3|4 """
2192 cArgs = int(sName[-1]);
2193 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2194 return McStmtCall(sName, asParams, 1, 0);
2195
2196 @staticmethod
2197 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2198 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2199 cArgs = int(sName[-1]);
2200 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2201 return McStmtCall(sName, asParams, 0);
2202
2203 @staticmethod
2204 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2205 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2206 cArgs = int(sName[-1]);
2207 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2208 return McStmtCall(sName, asParams, 0);
2209
2210 @staticmethod
2211 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2212 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2213 cArgs = int(sName[-1]);
2214 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2215 return McStmtCall(sName, asParams, 0);
2216
2217 @staticmethod
2218 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2219 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2220 cArgs = int(sName[-1]);
2221 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2222 return McStmtCall(sName, asParams, 0);
2223
2224 @staticmethod
2225 def parseMcCallSseAImpl(oSelf, sName, asParams):
2226 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2227 cArgs = int(sName[-1]);
2228 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2229 return McStmtCall(sName, asParams, 0);
2230
2231 def parseCImplFlags(self, sName, sFlags):
2232 """
2233 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2234 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2235 """
2236 if sFlags != '0':
2237 sFlags = self.stripComments(sFlags);
2238 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2239 for sFlag in sFlags.split('|'):
2240 sFlag = sFlag.strip();
2241 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2242 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2243 #print('debug: %s' % sFlag)
2244 if sFlag not in g_kdCImplFlags:
2245 if sFlag == '0':
2246 continue;
2247 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2248 self.dsCImplFlags[sFlag] = True;
2249 for sFlag2 in g_kdCImplFlags[sFlag]:
2250 self.dsCImplFlags[sFlag2] = True;
2251 return None;
2252
2253 @staticmethod
2254 def parseMcCallCImpl(oSelf, sName, asParams):
2255 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2256 cArgs = int(sName[-1]);
2257 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2258 oSelf.parseCImplFlags(sName, asParams[0]);
2259 return McStmtCall(sName, asParams, 2);
2260
2261 @staticmethod
2262 def parseMcDeferToCImpl(oSelf, sName, asParams):
2263 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2264 # Note! This code is called by workerIemMcDeferToCImplXRet.
2265 #print('debug: %s, %s,...' % (sName, asParams[0],));
2266 cArgs = int(sName[-5]);
2267 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2268 oSelf.parseCImplFlags(sName, asParams[0]);
2269 return McStmtCall(sName, asParams, 2);
2270
2271 @staticmethod
2272 def stripComments(sCode):
2273 """ Returns sCode with comments removed. """
2274 off = 0;
2275 while off < len(sCode):
2276 off = sCode.find('/', off);
2277 if off < 0 or off + 1 >= len(sCode):
2278 break;
2279
2280 if sCode[off + 1] == '/':
2281 # C++ comment.
2282 offEnd = sCode.find('\n', off + 2);
2283 if offEnd < 0:
2284 return sCode[:off].rstrip();
2285 sCode = sCode[ : off] + sCode[offEnd : ];
2286 off += 1;
2287
2288 elif sCode[off + 1] == '*':
2289 # C comment
2290 offEnd = sCode.find('*/', off + 2);
2291 if offEnd < 0:
2292 return sCode[:off].rstrip();
2293 sSep = ' ';
2294 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2295 sSep = '';
2296 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2297 off += len(sSep);
2298
2299 else:
2300 # Not a comment.
2301 off += 1;
2302 return sCode;
2303
2304 @staticmethod
2305 def extractParam(sCode, offParam):
2306 """
2307 Extracts the parameter value at offParam in sCode.
2308 Returns stripped value and the end offset of the terminating ',' or ')'.
2309 """
2310 # Extract it.
2311 cNesting = 0;
2312 offStart = offParam;
2313 while offParam < len(sCode):
2314 ch = sCode[offParam];
2315 if ch == '(':
2316 cNesting += 1;
2317 elif ch == ')':
2318 if cNesting == 0:
2319 break;
2320 cNesting -= 1;
2321 elif ch == ',' and cNesting == 0:
2322 break;
2323 offParam += 1;
2324 return (sCode[offStart : offParam].strip(), offParam);
2325
2326 @staticmethod
2327 def extractParams(sCode, offOpenParen):
2328 """
2329 Parses a parameter list.
2330 Returns the list of parameter values and the offset of the closing parentheses.
2331 Returns (None, len(sCode)) on if no closing parentheses was found.
2332 """
2333 assert sCode[offOpenParen] == '(';
2334 asParams = [];
2335 off = offOpenParen + 1;
2336 while off < len(sCode):
2337 ch = sCode[off];
2338 if ch.isspace():
2339 off += 1;
2340 elif ch != ')':
2341 (sParam, off) = McBlock.extractParam(sCode, off);
2342 asParams.append(sParam);
2343 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2344 if sCode[off] == ',':
2345 off += 1;
2346 else:
2347 return (asParams, off);
2348 return (None, off);
2349
2350 @staticmethod
2351 def findClosingBraces(sCode, off, offStop):
2352 """
2353 Finds the matching '}' for the '{' at off in sCode.
2354 Returns offset of the matching '}' on success, otherwise -1.
2355
2356 Note! Does not take comments into account.
2357 """
2358 cDepth = 1;
2359 off += 1;
2360 while off < offStop:
2361 offClose = sCode.find('}', off, offStop);
2362 if offClose < 0:
2363 break;
2364 cDepth += sCode.count('{', off, offClose);
2365 cDepth -= 1;
2366 if cDepth == 0:
2367 return offClose;
2368 off = offClose + 1;
2369 return -1;
2370
2371 @staticmethod
2372 def countSpacesAt(sCode, off, offStop):
2373 """ Returns the number of space characters at off in sCode. """
2374 offStart = off;
2375 while off < offStop and sCode[off].isspace():
2376 off += 1;
2377 return off - offStart;
2378
2379 @staticmethod
2380 def skipSpacesAt(sCode, off, offStop):
2381 """ Returns first offset at or after off for a non-space character. """
2382 return off + McBlock.countSpacesAt(sCode, off, offStop);
2383
2384 @staticmethod
2385 def isSubstrAt(sStr, off, sSubStr):
2386 """ Returns true of sSubStr is found at off in sStr. """
2387 return sStr[off : off + len(sSubStr)] == sSubStr;
2388
2389 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2390 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2391 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2392 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2393 + r')');
2394
2395 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2396 """
2397 Decodes sRawCode[off : offStop].
2398
2399 Returns list of McStmt instances.
2400 Raises ParserException on failure.
2401 """
2402 if offStop < 0:
2403 offStop = len(sRawCode);
2404 aoStmts = [];
2405 while off < offStop:
2406 ch = sRawCode[off];
2407
2408 #
2409 # Skip spaces and comments.
2410 #
2411 if ch.isspace():
2412 off += 1;
2413
2414 elif ch == '/':
2415 ch = sRawCode[off + 1];
2416 if ch == '/': # C++ comment.
2417 off = sRawCode.find('\n', off + 2);
2418 if off < 0:
2419 break;
2420 off += 1;
2421 elif ch == '*': # C comment.
2422 off = sRawCode.find('*/', off + 2);
2423 if off < 0:
2424 break;
2425 off += 2;
2426 else:
2427 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2428
2429 #
2430 # Is it a MC statement.
2431 #
2432 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2433 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2434 # Extract it and strip comments from it.
2435 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2436 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2437 if offEnd <= off:
2438 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2439 else:
2440 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2441 if offEnd <= off:
2442 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2443 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2444 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2445 offEnd -= 1;
2446 while offEnd > off and sRawCode[offEnd - 1].isspace():
2447 offEnd -= 1;
2448
2449 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2450
2451 # Isolate the statement name.
2452 offOpenParen = sRawStmt.find('(');
2453 if offOpenParen < 0:
2454 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2455 sName = sRawStmt[: offOpenParen].strip();
2456
2457 # Extract the parameters.
2458 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2459 if asParams is None:
2460 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2461 if offCloseParen + 1 != len(sRawStmt):
2462 self.raiseDecodeError(sRawCode, off,
2463 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2464
2465 # Hand it to the handler.
2466 fnParser = g_dMcStmtParsers.get(sName);
2467 if not fnParser:
2468 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2469 fnParser = fnParser[0];
2470 oStmt = fnParser(self, sName, asParams);
2471 if not isinstance(oStmt, (list, tuple)):
2472 aoStmts.append(oStmt);
2473 else:
2474 aoStmts.extend(oStmt);
2475
2476 #
2477 # If conditional, we need to parse the whole statement.
2478 #
2479 # For reasons of simplicity, we assume the following structure
2480 # and parse each branch in a recursive call:
2481 # IEM_MC_IF_XXX() {
2482 # IEM_MC_WHATEVER();
2483 # } IEM_MC_ELSE() {
2484 # IEM_MC_WHATEVER();
2485 # } IEM_MC_ENDIF();
2486 #
2487 if sName.startswith('IEM_MC_IF_'):
2488 if iLevel > 1:
2489 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2490
2491 # Find start of the IF block:
2492 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2493 if sRawCode[offBlock1] != '{':
2494 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2495
2496 # Find the end of it.
2497 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2498 if offBlock1End < 0:
2499 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2500
2501 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2502
2503 # Is there an else section?
2504 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2505 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2506 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2507 if sRawCode[off] != '(':
2508 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2509 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2510 if sRawCode[off] != ')':
2511 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2512
2513 # Find start of the ELSE block.
2514 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2515 if sRawCode[offBlock2] != '{':
2516 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2517
2518 # Find the end of it.
2519 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2520 if offBlock2End < 0:
2521 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2522
2523 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2524 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2525
2526 # Parse past the endif statement.
2527 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2528 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2529 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2530 if sRawCode[off] != '(':
2531 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2532 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2533 if sRawCode[off] != ')':
2534 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2535 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2536 if sRawCode[off] != ';':
2537 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2538 off += 1;
2539
2540 else:
2541 # Advance.
2542 off = offEnd + 1;
2543
2544 #
2545 # Otherwise it must be a C/C++ statement of sorts.
2546 #
2547 else:
2548 # Find the end of the statement. if and else requires special handling.
2549 sCondExpr = None;
2550 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2551 if oMatch:
2552 if oMatch.group(1)[-1] == '(':
2553 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2554 else:
2555 offEnd = oMatch.end();
2556 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2557 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2558 elif ch == '#':
2559 offEnd = sRawCode.find('\n', off, offStop);
2560 if offEnd < 0:
2561 offEnd = offStop;
2562 offEnd -= 1;
2563 while offEnd > off and sRawCode[offEnd - 1].isspace():
2564 offEnd -= 1;
2565 else:
2566 offEnd = sRawCode.find(';', off);
2567 if offEnd < 0:
2568 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2569
2570 # Check this and the following statement whether it might have
2571 # something to do with decoding. This is a statement filter
2572 # criteria when generating the threaded functions blocks.
2573 offNextEnd = sRawCode.find(';', offEnd + 1);
2574 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2575 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2576 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2577 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2578 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2579 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2580 );
2581
2582 if not oMatch:
2583 if ch != '#':
2584 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2585 else:
2586 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2587 off = offEnd + 1;
2588 elif oMatch.group(1).startswith('if'):
2589 #
2590 # if () xxx [else yyy] statement.
2591 #
2592 oStmt = McCppCond(sCondExpr, fDecode);
2593 aoStmts.append(oStmt);
2594 off = offEnd + 1;
2595
2596 # Following the if () we can either have a {} containing zero or more statements
2597 # or we have a single statement.
2598 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2599 if sRawCode[offBlock1] == '{':
2600 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2601 if offBlock1End < 0:
2602 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2603 offBlock1 += 1;
2604 else:
2605 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2606 if offBlock1End < 0:
2607 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2608
2609 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2610
2611 # The else is optional and can likewise be followed by {} or a single statement.
2612 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2613 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2614 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2615 if sRawCode[offBlock2] == '{':
2616 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2617 if offBlock2End < 0:
2618 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2619 offBlock2 += 1;
2620 else:
2621 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2622 if offBlock2End < 0:
2623 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2624
2625 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2626 off = offBlock2End + 1;
2627
2628 elif oMatch.group(1) == 'else':
2629 # Problematic 'else' branch, typically involving #ifdefs.
2630 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2631
2632 return aoStmts;
2633
2634 def decode(self):
2635 """
2636 Decodes the block, populating self.aoStmts if necessary.
2637 Returns the statement list.
2638 Raises ParserException on failure.
2639 """
2640 if not self.aoStmts:
2641 self.aoStmts = self.decodeCode(''.join(self.asLines));
2642 return self.aoStmts;
2643
2644
2645 def checkForTooEarlyEffSegUse(self, aoStmts):
2646 """
2647 Checks if iEffSeg is used before the effective address has been decoded.
2648 Returns None on success, error string on failure.
2649
2650 See r158454 for an example of this issue.
2651 """
2652
2653 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2654 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2655 # as we're ASSUMING these will not occur before address calculation.
2656 for iStmt, oStmt in enumerate(aoStmts):
2657 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2658 while iStmt > 0:
2659 iStmt -= 1;
2660 oStmt = aoStmts[iStmt];
2661 for sArg in oStmt.asParams:
2662 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2663 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2664 break;
2665 return None;
2666
2667 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2668 kdDecodeCppStmtOkayAfterDone = {
2669 'IEMOP_HLP_IN_VMX_OPERATION': True,
2670 'IEMOP_HLP_VMX_INSTR': True,
2671 };
2672
2673 def checkForDoneDecoding(self, aoStmts):
2674 """
2675 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2676 invocation.
2677 Returns None on success, error string on failure.
2678
2679 This ensures safe instruction restarting in case the recompiler runs
2680 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2681 entries).
2682 """
2683
2684 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2685 # don't need to look.
2686 cIemOpHlpDone = 0;
2687 for iStmt, oStmt in enumerate(aoStmts):
2688 if oStmt.isCppStmt():
2689 #print('dbg: #%u[%u]: %s %s (%s)'
2690 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2691
2692 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2693 if oMatch:
2694 sFirstWord = oMatch.group(1);
2695 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2696 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2697 cIemOpHlpDone += 1;
2698 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2699 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2700 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2701 else:
2702 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2703 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2704 cIemOpHlpDone += 1;
2705 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2706 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2707 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2708 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2709 if cIemOpHlpDone == 1:
2710 return None;
2711 if cIemOpHlpDone > 1:
2712 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2713 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2714
2715 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2716 """
2717 Checks that the register references are placed after register fetches
2718 from the same register class.
2719 Returns None on success, error string on failure.
2720
2721 Example:
2722 SHL CH, CL
2723
2724 If the CH reference is created first, the fetching of CL will cause the
2725 RCX guest register to have an active shadow register when it's being
2726 updated. The shadow register will then be stale after the SHL operation
2727 completes, without us noticing.
2728
2729 It's easier to ensure we've got correct code than complicating the
2730 recompiler code with safeguards here.
2731 """
2732 for iStmt, oStmt in enumerate(aoStmts):
2733 if not oStmt.isCppStmt():
2734 offRef = oStmt.sName.find("_REF_");
2735 if offRef > 0:
2736 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2737 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2738 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2739 sClass = 'FPUREG';
2740 else:
2741 offUnderscore = oStmt.sName.find('_', offRef + 5);
2742 if offUnderscore > 0:
2743 assert offUnderscore > offRef;
2744 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2745 else:
2746 sClass = oStmt.sName[offRef + 5];
2747 asRegRefClasses[sClass] = True;
2748 else:
2749 offFetch = oStmt.sName.find("_FETCH_");
2750 if offFetch > 0:
2751 sClass = oStmt.sName[offFetch + 7 : ];
2752 if not sClass.startswith("MEM"):
2753 offUnderscore = sClass.find('_');
2754 if offUnderscore >= 0:
2755 assert offUnderscore > 0;
2756 sClass = sClass[:offUnderscore];
2757 if sClass in asRegRefClasses:
2758 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2759 % (iStmt + 1, oStmt.sName,);
2760
2761 # Go into branches.
2762 if isinstance(oStmt, McStmtCond):
2763 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2764 if sRet:
2765 return sRet;
2766 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2767 if sRet:
2768 return sRet;
2769 return None;
2770
2771 def check(self):
2772 """
2773 Performs some sanity checks on the block.
2774 Returns error string list, empty if all is fine.
2775 """
2776 aoStmts = self.decode();
2777 asRet = [];
2778
2779 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2780 if sRet:
2781 asRet.append(sRet);
2782
2783 sRet = self.checkForDoneDecoding(aoStmts);
2784 if sRet:
2785 asRet.append(sRet);
2786
2787 sRet = self.checkForFetchAfterRef(aoStmts, {});
2788 if sRet:
2789 asRet.append(sRet);
2790
2791 return asRet;
2792
2793
2794
2795## IEM_MC_XXX -> parser + info dictionary.
2796#
2797# The info columns:
2798# - col 1+0: boolean entry indicating whether the statement modifies state and
2799# must not be used before IEMOP_HL_DONE_*.
2800# - col 1+1: boolean entry indicating similar to the previous column but is
2801# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2802# The difference is that most IEM_MC_IF_XXX entries are False here.
2803# - col 1+2: boolean entry indicating native recompiler support.
2804#
2805# The raw table was generated via the following command
2806# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2807# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2808# pylint: disable=line-too-long
2809g_dMcStmtParsers = {
2810 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2811 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2812 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2813 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2814 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2815 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2816 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2817 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2818 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2819 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2820 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2821 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2822 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2823 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2824 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2825 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2826 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2827 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2828 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2829 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2830 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2831 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2832 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2833 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2834 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2835 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
2836 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, False, ),
2837 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, False, ),
2838 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
2839 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2840 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2841 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2842 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2843 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2844 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2845 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2846 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2847 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2848 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2849 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2850 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2851 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2852 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2853 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2854 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
2855 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, False, ),
2856 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, False, ),
2857 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2858 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2859 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2860 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2861 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2862 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2863 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2864 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2865 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2866 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2867 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2868 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2869 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2870 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2871 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2872 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2873 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2874 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2875 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2876 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2877 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2878 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2879 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2880 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
2881 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
2882 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2883 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, False, ),
2884 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, False, ),
2885 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
2886 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
2887 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2888 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2889 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2890 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2891 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2892 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2893 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
2894 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
2895 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
2896 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
2897 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, False, ),
2898 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, False, ),
2899 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
2900 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
2901 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2902 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
2903 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2904 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
2905 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2906 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2907 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
2908 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2909 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2910 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2911 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2912 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2913 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2914 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2915 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
2916 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
2917 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
2918 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
2919 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
2920 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
2921 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
2922 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
2923 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
2924 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
2925 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
2926 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
2927 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
2928 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
2929 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
2930 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
2931 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
2932 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
2933 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2934 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2935 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2936 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2937 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
2938 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
2939 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
2940 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
2941 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
2942 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2943 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2944 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
2945 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
2946 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
2947 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2948 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2949 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
2950 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2951 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2952 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
2953 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
2954 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
2955 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
2956 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
2957 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
2958 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
2959 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
2960 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
2961 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
2962 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
2963 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
2964 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
2965 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
2966 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2967 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2968 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
2969 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
2970 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
2971 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
2972 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
2973 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
2974 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2975 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2976 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2977 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, False, ),
2978 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
2979 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
2980 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
2981 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
2982 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
2983 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False, False, False, ),
2984 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
2985 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, False, ),
2986 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2987 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2988 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
2989 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
2990 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
2991 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
2992 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
2993 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
2994 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
2995 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
2996 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
2997 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
2998 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
2999 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3000 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3001 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3002 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3003 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3004 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3005 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3006 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3007 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3008 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3009 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3010 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3011 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3012 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3013 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3014 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3015 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3016 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3017 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3018 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3019 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3020 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3021 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3022 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3023 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3024 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3025 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, False, ),
3026 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, False, ),
3027 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3028 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3029 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3030 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3031 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3032 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3033 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3034 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3035 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3036 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3037 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3038 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3039 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3040 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3041 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3042 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3043 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3044 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3045 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3046 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3047 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3048 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3049 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3050 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3051 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3052 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3053 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3054 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3055 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3056 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3057 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3058 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3059 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3060 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3061 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3062 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3063 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3064 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3065 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3066 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3067 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3068 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3069 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3070 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3071 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3072 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3073 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3074 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3075 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3076 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3077 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3078 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3079 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3080 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3081 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3082 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3083 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3084 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3085 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3086 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3087 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3088 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3089 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3090 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3091 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3092 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3093 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3094 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3095 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, False, ),
3096 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3097 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3098 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3099 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3100 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3101 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3102 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3103 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3104 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3105 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3106 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3107 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3108 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3109 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3110 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, False, ),
3111 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3112 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3113 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3114 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3115 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3116 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3117 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3118 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3119 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3120 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3121 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3122 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3123 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3124 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3125 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3126 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3127 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3128 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3129 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3130 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3131 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, False, ),
3132 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3133 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3134 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3135 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3136 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3137 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3138 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3139 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3140 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3141 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3142 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3143 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3144 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3145 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3146 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3147 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3148 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3149 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3150 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3151 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3152 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3153 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3154 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3155 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3156 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3157 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3158 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3159 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3160 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3161 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3162 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3163 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3164 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3165 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3166 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3167 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3168 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3169 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3170 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3171 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3172 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3173 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3174 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3175 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3176 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3177 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3178 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3179 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3180 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3181 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3182 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3183 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3184 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3185 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3186 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3188 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3189 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3193 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3195 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3196 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3197 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3198 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3199 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3200 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True, True, False, ),
3201 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3202 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3203 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3204 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3205 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3206 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3207 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3208 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3209 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3210 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3211 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3212 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3213 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3214 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3215 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3216 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3217 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3218 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3219 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3220 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3221 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3222 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3223 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3224 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3225 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3226 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3227 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3228 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3229 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3230 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3231};
3232# pylint: enable=line-too-long
3233
3234## List of microcode blocks.
3235g_aoMcBlocks = [] # type: List[McBlock]
3236
3237
3238
3239class ParserException(Exception):
3240 """ Parser exception """
3241 def __init__(self, sMessage):
3242 Exception.__init__(self, sMessage);
3243
3244
3245class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3246 """
3247 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3248 """
3249
3250 ## @name Parser state.
3251 ## @{
3252 kiCode = 0;
3253 kiCommentMulti = 1;
3254 ## @}
3255
3256 class Macro(object):
3257 """ Macro """
3258 def __init__(self, sName, asArgs, sBody, iLine):
3259 self.sName = sName; ##< The macro name.
3260 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3261 self.sBody = sBody;
3262 self.iLine = iLine;
3263 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3264
3265 @staticmethod
3266 def _needSpace(ch):
3267 """ This is just to make the expanded output a bit prettier. """
3268 return ch.isspace() and ch != '(';
3269
3270 def expandMacro(self, oParent, asArgs = None):
3271 """ Expands the macro body with the given arguments. """
3272 _ = oParent;
3273 sBody = self.sBody;
3274
3275 if self.oReArgMatch:
3276 assert len(asArgs) == len(self.asArgs);
3277 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3278
3279 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3280 oMatch = self.oReArgMatch.search(sBody);
3281 while oMatch:
3282 sName = oMatch.group(2);
3283 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3284 sValue = dArgs[sName];
3285 sPre = '';
3286 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3287 sPre = ' ';
3288 sPost = '';
3289 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3290 sPost = ' ';
3291 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3292 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3293 else:
3294 assert not asArgs;
3295
3296 return sBody;
3297
3298 class PreprocessorConditional(object):
3299 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3300
3301 ## Known defines.
3302 # - A value of 1 indicates that it's always defined.
3303 # - A value of 0 if it's always undefined
3304 # - A value of -1 if it's an arch and it depends of script parameters.
3305 # - A value of -2 if it's not recognized when filtering MC blocks.
3306 kdKnownDefines = {
3307 'IEM_WITH_ONE_BYTE_TABLE': 1,
3308 'IEM_WITH_TWO_BYTE_TABLE': 1,
3309 'IEM_WITH_THREE_0F_38': 1,
3310 'IEM_WITH_THREE_0F_3A': 1,
3311 'IEM_WITH_THREE_BYTE_TABLES': 1,
3312 'IEM_WITH_3DNOW': 1,
3313 'IEM_WITH_3DNOW_TABLE': 1,
3314 'IEM_WITH_VEX': 1,
3315 'IEM_WITH_VEX_TABLES': 1,
3316 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3317 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3318 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3319 'LOG_ENABLED': 1,
3320 'RT_WITHOUT_PRAGMA_ONCE': 0,
3321 'TST_IEM_CHECK_MC': 0,
3322 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3323 'RT_ARCH_AMD64': -1,
3324 'RT_ARCH_ARM64': -1,
3325 'RT_ARCH_ARM32': -1,
3326 'RT_ARCH_X86': -1,
3327 'RT_ARCH_SPARC': -1,
3328 'RT_ARCH_SPARC64': -1,
3329 };
3330 kdBuildArchToIprt = {
3331 'amd64': 'RT_ARCH_AMD64',
3332 'arm64': 'RT_ARCH_ARM64',
3333 'sparc32': 'RT_ARCH_SPARC64',
3334 };
3335 ## For parsing the next defined(xxxx).
3336 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3337
3338 def __init__(self, sType, sExpr):
3339 self.sType = sType;
3340 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3341 self.aoElif = [] # type: List[PreprocessorConditional]
3342 self.fInElse = [];
3343 if sType in ('if', 'elif'):
3344 self.checkExpression(sExpr);
3345 else:
3346 self.checkSupportedDefine(sExpr)
3347
3348 @staticmethod
3349 def checkSupportedDefine(sDefine):
3350 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3351 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3352 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3353 return True;
3354 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3355 return True;
3356 raise Exception('Unsupported define: %s' % (sDefine,));
3357
3358 @staticmethod
3359 def checkExpression(sExpr):
3360 """ Check that the expression is supported. Raises exception if not. """
3361 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3362 if sExpr in ('0', '1'):
3363 return True;
3364
3365 off = 0;
3366 cParan = 0;
3367 while off < len(sExpr):
3368 ch = sExpr[off];
3369
3370 # Unary operator or parentheses:
3371 if ch in ('(', '!'):
3372 if ch == '(':
3373 cParan += 1;
3374 off += 1;
3375 else:
3376 # defined(xxxx)
3377 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3378 if oMatch:
3379 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3380 elif sExpr[off:] != '1':
3381 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3382 off = oMatch.end();
3383
3384 # Look for closing parentheses.
3385 while off < len(sExpr) and sExpr[off].isspace():
3386 off += 1;
3387 if cParan > 0:
3388 while off < len(sExpr) and sExpr[off] == ')':
3389 if cParan <= 0:
3390 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3391 cParan -= 1;
3392 off += 1;
3393 while off < len(sExpr) and sExpr[off].isspace():
3394 off += 1;
3395
3396 # Look for binary operator.
3397 if off >= len(sExpr):
3398 break;
3399 if sExpr[off:off + 2] in ('||', '&&'):
3400 off += 2;
3401 else:
3402 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3403
3404 # Skip spaces.
3405 while off < len(sExpr) and sExpr[off].isspace():
3406 off += 1;
3407 if cParan != 0:
3408 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3409 return True;
3410
3411 @staticmethod
3412 def isArchIncludedInExpr(sExpr, sArch):
3413 """ Checks if sArch is included in the given expression. """
3414 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3415 if sExpr == '0':
3416 return False;
3417 if sExpr == '1':
3418 return True;
3419 off = 0;
3420 while off < len(sExpr):
3421 # defined(xxxx)
3422 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3423 if not oMatch:
3424 if sExpr[off:] == '1':
3425 return True;
3426 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3427 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3428 return True;
3429 off = oMatch.end();
3430
3431 # Look for OR operator.
3432 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3433 off += 1;
3434 if off >= len(sExpr):
3435 break;
3436 if sExpr.startswith('||'):
3437 off += 2;
3438 else:
3439 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3440
3441 return False;
3442
3443 @staticmethod
3444 def matchArch(sDefine, sArch):
3445 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3446 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3447
3448 @staticmethod
3449 def matchDefined(sExpr, sArch):
3450 """ Check the result of an ifdef/ifndef expression, given sArch. """
3451 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3452 if iDefine == -2:
3453 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3454 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3455
3456 def isArchIncludedInPrimaryBlock(self, sArch):
3457 """ Checks if sArch is included in the (primary) 'if' block. """
3458 if self.sType == 'ifdef':
3459 return self.matchDefined(self.sExpr, sArch);
3460 if self.sType == 'ifndef':
3461 return not self.matchDefined(self.sExpr, sArch);
3462 return self.isArchIncludedInExpr(self.sExpr, sArch);
3463
3464 @staticmethod
3465 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3466 """ Checks if sArch is included in the current conditional block. """
3467 _ = iLine;
3468 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3469 for oCond in aoCppCondStack:
3470 if oCond.isArchIncludedInPrimaryBlock(sArch):
3471 if oCond.aoElif or oCond.fInElse:
3472 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3473 return False;
3474 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3475 else:
3476 fFine = False;
3477 for oElifCond in oCond.aoElif:
3478 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3479 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3480 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3481 return False;
3482 fFine = True;
3483 if not fFine and not oCond.fInElse:
3484 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3485 return False;
3486 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3487 return True;
3488
3489 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3490 self.sSrcFile = sSrcFile;
3491 self.asLines = asLines;
3492 self.iLine = 0;
3493 self.iState = self.kiCode;
3494 self.sComment = '';
3495 self.iCommentLine = 0;
3496 self.aoCurInstrs = [] # type: List[Instruction]
3497 self.oCurFunction = None # type: DecoderFunction
3498 self.iMcBlockInFunc = 0;
3499 self.oCurMcBlock = None # type: McBlock
3500 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3501 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3502 if oInheritMacrosFrom:
3503 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3504 self.oReMacros = oInheritMacrosFrom.oReMacros;
3505 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3506 self.sHostArch = sHostArch;
3507
3508 assert sDefaultMap in g_dInstructionMaps;
3509 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3510
3511 self.cTotalInstr = 0;
3512 self.cTotalStubs = 0;
3513 self.cTotalTagged = 0;
3514 self.cTotalMcBlocks = 0;
3515
3516 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3517 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3518 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3519 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3520 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3521 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3522 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3523 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3524 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3525 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3526 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3527 self.fDebug = True;
3528 self.fDebugMc = False;
3529 self.fDebugPreproc = False;
3530
3531 self.dTagHandlers = {
3532 '@opbrief': self.parseTagOpBrief,
3533 '@opdesc': self.parseTagOpDesc,
3534 '@opmnemonic': self.parseTagOpMnemonic,
3535 '@op1': self.parseTagOpOperandN,
3536 '@op2': self.parseTagOpOperandN,
3537 '@op3': self.parseTagOpOperandN,
3538 '@op4': self.parseTagOpOperandN,
3539 '@oppfx': self.parseTagOpPfx,
3540 '@opmaps': self.parseTagOpMaps,
3541 '@opcode': self.parseTagOpcode,
3542 '@opcodesub': self.parseTagOpcodeSub,
3543 '@openc': self.parseTagOpEnc,
3544 #@opfltest: Lists all flags that will be used as input in some way.
3545 '@opfltest': self.parseTagOpEFlags,
3546 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3547 '@opflmodify': self.parseTagOpEFlags,
3548 #@opflclear: Lists all flags that will be set (set to 1).
3549 '@opflset': self.parseTagOpEFlags,
3550 #@opflclear: Lists all flags that will be cleared (set to 0).
3551 '@opflclear': self.parseTagOpEFlags,
3552 #@opflundef: List of flag documented as undefined.
3553 '@opflundef': self.parseTagOpEFlags,
3554 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3555 '@opflclass': self.parseTagOpEFlagsClass,
3556 '@ophints': self.parseTagOpHints,
3557 '@opdisenum': self.parseTagOpDisEnum,
3558 '@opmincpu': self.parseTagOpMinCpu,
3559 '@opcpuid': self.parseTagOpCpuId,
3560 '@opgroup': self.parseTagOpGroup,
3561 '@opunused': self.parseTagOpUnusedInvalid,
3562 '@opinvalid': self.parseTagOpUnusedInvalid,
3563 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3564 '@optest': self.parseTagOpTest,
3565 '@optestign': self.parseTagOpTestIgnore,
3566 '@optestignore': self.parseTagOpTestIgnore,
3567 '@opcopytests': self.parseTagOpCopyTests,
3568 '@oponly': self.parseTagOpOnlyTest,
3569 '@oponlytest': self.parseTagOpOnlyTest,
3570 '@opxcpttype': self.parseTagOpXcptType,
3571 '@opstats': self.parseTagOpStats,
3572 '@opfunction': self.parseTagOpFunction,
3573 '@opdone': self.parseTagOpDone,
3574 };
3575 for i in range(48):
3576 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3577 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3578
3579 self.asErrors = [];
3580
3581 def raiseError(self, sMessage):
3582 """
3583 Raise error prefixed with the source and line number.
3584 """
3585 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3586
3587 def raiseCommentError(self, iLineInComment, sMessage):
3588 """
3589 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3590 """
3591 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3592
3593 def error(self, sMessage):
3594 """
3595 Adds an error.
3596 returns False;
3597 """
3598 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3599 return False;
3600
3601 def errorOnLine(self, iLine, sMessage):
3602 """
3603 Adds an error.
3604 returns False;
3605 """
3606 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3607 return False;
3608
3609 def errorComment(self, iLineInComment, sMessage):
3610 """
3611 Adds a comment error.
3612 returns False;
3613 """
3614 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3615 return False;
3616
3617 def printErrors(self):
3618 """
3619 Print the errors to stderr.
3620 Returns number of errors.
3621 """
3622 if self.asErrors:
3623 sys.stderr.write(u''.join(self.asErrors));
3624 return len(self.asErrors);
3625
3626 def debug(self, sMessage):
3627 """
3628 For debugging.
3629 """
3630 if self.fDebug:
3631 print('debug: %s' % (sMessage,), file = sys.stderr);
3632
3633 def stripComments(self, sLine):
3634 """
3635 Returns sLine with comments stripped.
3636
3637 Complains if traces of incomplete multi-line comments are encountered.
3638 """
3639 sLine = self.oReComment.sub(" ", sLine);
3640 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3641 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3642 return sLine;
3643
3644 def parseFunctionTable(self, sLine):
3645 """
3646 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3647
3648 Note! Updates iLine as it consumes the whole table.
3649 """
3650
3651 #
3652 # Extract the table name.
3653 #
3654 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3655 oMap = g_dInstructionMapsByIemName.get(sName);
3656 if not oMap:
3657 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3658 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3659
3660 #
3661 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3662 # entries per byte:
3663 # no prefix, 066h prefix, f3h prefix, f2h prefix
3664 # Those tables has 256 & 32 entries respectively.
3665 #
3666 cEntriesPerByte = 4;
3667 cValidTableLength = 1024;
3668 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3669
3670 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3671 if oEntriesMatch:
3672 cEntriesPerByte = 1;
3673 cValidTableLength = int(oEntriesMatch.group(1));
3674 asPrefixes = (None,);
3675
3676 #
3677 # The next line should be '{' and nothing else.
3678 #
3679 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3680 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3681 self.iLine += 1;
3682
3683 #
3684 # Parse till we find the end of the table.
3685 #
3686 iEntry = 0;
3687 while self.iLine < len(self.asLines):
3688 # Get the next line and strip comments and spaces (assumes no
3689 # multi-line comments).
3690 sLine = self.asLines[self.iLine];
3691 self.iLine += 1;
3692 sLine = self.stripComments(sLine).strip();
3693
3694 # Split the line up into entries, expanding IEMOP_X4 usage.
3695 asEntries = sLine.split(',');
3696 for i in range(len(asEntries) - 1, -1, -1):
3697 sEntry = asEntries[i].strip();
3698 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3699 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3700 asEntries.insert(i + 1, sEntry);
3701 asEntries.insert(i + 1, sEntry);
3702 asEntries.insert(i + 1, sEntry);
3703 if sEntry:
3704 asEntries[i] = sEntry;
3705 else:
3706 del asEntries[i];
3707
3708 # Process the entries.
3709 for sEntry in asEntries:
3710 if sEntry in ('};', '}'):
3711 if iEntry != cValidTableLength:
3712 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3713 return True;
3714 if sEntry.startswith('iemOp_Invalid'):
3715 pass; # skip
3716 else:
3717 # Look up matching instruction by function.
3718 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3719 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3720 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3721 if aoInstr:
3722 if not isinstance(aoInstr, list):
3723 aoInstr = [aoInstr,];
3724 oInstr = None;
3725 for oCurInstr in aoInstr:
3726 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3727 pass;
3728 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3729 oCurInstr.sPrefix = sPrefix;
3730 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3731 oCurInstr.sOpcode = sOpcode;
3732 oCurInstr.sPrefix = sPrefix;
3733 else:
3734 continue;
3735 oInstr = oCurInstr;
3736 break;
3737 if not oInstr:
3738 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3739 aoInstr.append(oInstr);
3740 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3741 g_aoAllInstructions.append(oInstr);
3742 oMap.aoInstructions.append(oInstr);
3743 else:
3744 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3745 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3746 iEntry += 1;
3747
3748 return self.error('Unexpected end of file in PFNIEMOP table');
3749
3750 def addInstruction(self, iLine = None):
3751 """
3752 Adds an instruction.
3753 """
3754 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3755 g_aoAllInstructions.append(oInstr);
3756 self.aoCurInstrs.append(oInstr);
3757 return oInstr;
3758
3759 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3760 """
3761 Derives the mnemonic and operands from a IEM stats base name like string.
3762 """
3763 if oInstr.sMnemonic is None:
3764 asWords = sStats.split('_');
3765 oInstr.sMnemonic = asWords[0].lower();
3766 if len(asWords) > 1 and not oInstr.aoOperands:
3767 for sType in asWords[1:]:
3768 if sType in g_kdOpTypes:
3769 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3770 else:
3771 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3772 return False;
3773 return True;
3774
3775 def doneInstructionOne(self, oInstr, iLine):
3776 """
3777 Complete the parsing by processing, validating and expanding raw inputs.
3778 """
3779 assert oInstr.iLineCompleted is None;
3780 oInstr.iLineCompleted = iLine;
3781
3782 #
3783 # Specified instructions.
3784 #
3785 if oInstr.cOpTags > 0:
3786 if oInstr.sStats is None:
3787 pass;
3788
3789 #
3790 # Unspecified legacy stuff. We generally only got a few things to go on here.
3791 # /** Opcode 0x0f 0x00 /0. */
3792 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3793 #
3794 else:
3795 #if oInstr.sRawOldOpcodes:
3796 #
3797 #if oInstr.sMnemonic:
3798 pass;
3799
3800 #
3801 # Common defaults.
3802 #
3803
3804 # Guess mnemonic and operands from stats if the former is missing.
3805 if oInstr.sMnemonic is None:
3806 if oInstr.sStats is not None:
3807 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3808 elif oInstr.sFunction is not None:
3809 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3810
3811 # Derive the disassembler op enum constant from the mnemonic.
3812 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3813 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3814
3815 # Derive the IEM statistics base name from mnemonic and operand types.
3816 if oInstr.sStats is None:
3817 if oInstr.sFunction is not None:
3818 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3819 elif oInstr.sMnemonic is not None:
3820 oInstr.sStats = oInstr.sMnemonic;
3821 for oOperand in oInstr.aoOperands:
3822 if oOperand.sType:
3823 oInstr.sStats += '_' + oOperand.sType;
3824
3825 # Derive the IEM function name from mnemonic and operand types.
3826 if oInstr.sFunction is None:
3827 if oInstr.sMnemonic is not None:
3828 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3829 for oOperand in oInstr.aoOperands:
3830 if oOperand.sType:
3831 oInstr.sFunction += '_' + oOperand.sType;
3832 elif oInstr.sStats:
3833 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3834
3835 #
3836 # Apply default map and then add the instruction to all it's groups.
3837 #
3838 if not oInstr.aoMaps:
3839 oInstr.aoMaps = [ self.oDefaultMap, ];
3840 for oMap in oInstr.aoMaps:
3841 oMap.aoInstructions.append(oInstr);
3842
3843 #
3844 # Derive encoding from operands and maps.
3845 #
3846 if oInstr.sEncoding is None:
3847 if not oInstr.aoOperands:
3848 if oInstr.fUnused and oInstr.sSubOpcode:
3849 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3850 else:
3851 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3852 elif oInstr.aoOperands[0].usesModRM():
3853 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3854 or oInstr.onlyInVexMaps():
3855 oInstr.sEncoding = 'VEX.ModR/M';
3856 else:
3857 oInstr.sEncoding = 'ModR/M';
3858
3859 #
3860 # Check the opstat value and add it to the opstat indexed dictionary.
3861 #
3862 if oInstr.sStats:
3863 if oInstr.sStats not in g_dAllInstructionsByStat:
3864 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3865 else:
3866 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3867 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3868
3869 #
3870 # Add to function indexed dictionary. We allow multiple instructions per function.
3871 #
3872 if oInstr.sFunction:
3873 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3874 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3875 else:
3876 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3877
3878 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3879 return True;
3880
3881 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3882 """
3883 Done with current instruction.
3884 """
3885 for oInstr in self.aoCurInstrs:
3886 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3887 if oInstr.fStub:
3888 self.cTotalStubs += 1;
3889
3890 self.cTotalInstr += len(self.aoCurInstrs);
3891
3892 self.sComment = '';
3893 self.aoCurInstrs = [];
3894 if fEndOfFunction:
3895 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3896 if self.oCurFunction:
3897 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3898 self.oCurFunction = None;
3899 self.iMcBlockInFunc = 0;
3900 return True;
3901
3902 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3903 """
3904 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3905 is False, only None values and empty strings are replaced.
3906 """
3907 for oInstr in self.aoCurInstrs:
3908 if fOverwrite is not True:
3909 oOldValue = getattr(oInstr, sAttrib);
3910 if oOldValue is not None:
3911 continue;
3912 setattr(oInstr, sAttrib, oValue);
3913
3914 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3915 """
3916 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3917 If fOverwrite is False, only None values and empty strings are replaced.
3918 """
3919 for oInstr in self.aoCurInstrs:
3920 aoArray = getattr(oInstr, sAttrib);
3921 while len(aoArray) <= iEntry:
3922 aoArray.append(None);
3923 if fOverwrite is True or aoArray[iEntry] is None:
3924 aoArray[iEntry] = oValue;
3925
3926 def parseCommentOldOpcode(self, asLines):
3927 """ Deals with 'Opcode 0xff /4' like comments """
3928 asWords = asLines[0].split();
3929 if len(asWords) >= 2 \
3930 and asWords[0] == 'Opcode' \
3931 and ( asWords[1].startswith('0x')
3932 or asWords[1].startswith('0X')):
3933 asWords = asWords[:1];
3934 for iWord, sWord in enumerate(asWords):
3935 if sWord.startswith('0X'):
3936 sWord = '0x' + sWord[:2];
3937 asWords[iWord] = asWords;
3938 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3939
3940 return False;
3941
3942 def ensureInstructionForOpTag(self, iTagLine):
3943 """ Ensure there is an instruction for the op-tag being parsed. """
3944 if not self.aoCurInstrs:
3945 self.addInstruction(self.iCommentLine + iTagLine);
3946 for oInstr in self.aoCurInstrs:
3947 oInstr.cOpTags += 1;
3948 if oInstr.cOpTags == 1:
3949 self.cTotalTagged += 1;
3950 return self.aoCurInstrs[-1];
3951
3952 @staticmethod
3953 def flattenSections(aasSections):
3954 """
3955 Flattens multiline sections into stripped single strings.
3956 Returns list of strings, on section per string.
3957 """
3958 asRet = [];
3959 for asLines in aasSections:
3960 if asLines:
3961 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3962 return asRet;
3963
3964 @staticmethod
3965 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3966 """
3967 Flattens sections into a simple stripped string with newlines as
3968 section breaks. The final section does not sport a trailing newline.
3969 """
3970 # Typical: One section with a single line.
3971 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3972 return aasSections[0][0].strip();
3973
3974 sRet = '';
3975 for iSection, asLines in enumerate(aasSections):
3976 if asLines:
3977 if iSection > 0:
3978 sRet += sSectionSep;
3979 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3980 return sRet;
3981
3982
3983
3984 ## @name Tag parsers
3985 ## @{
3986
3987 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3988 """
3989 Tag: @opbrief
3990 Value: Text description, multiple sections, appended.
3991
3992 Brief description. If not given, it's the first sentence from @opdesc.
3993 """
3994 oInstr = self.ensureInstructionForOpTag(iTagLine);
3995
3996 # Flatten and validate the value.
3997 sBrief = self.flattenAllSections(aasSections);
3998 if not sBrief:
3999 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4000 if sBrief[-1] != '.':
4001 sBrief = sBrief + '.';
4002 if len(sBrief) > 180:
4003 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4004 offDot = sBrief.find('.');
4005 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4006 offDot = sBrief.find('.', offDot + 1);
4007 if offDot >= 0 and offDot != len(sBrief) - 1:
4008 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4009
4010 # Update the instruction.
4011 if oInstr.sBrief is not None:
4012 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4013 % (sTag, oInstr.sBrief, sBrief,));
4014 _ = iEndLine;
4015 return True;
4016
4017 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4018 """
4019 Tag: @opdesc
4020 Value: Text description, multiple sections, appended.
4021
4022 It is used to describe instructions.
4023 """
4024 oInstr = self.ensureInstructionForOpTag(iTagLine);
4025 if aasSections:
4026 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4027 return True;
4028
4029 _ = sTag; _ = iEndLine;
4030 return True;
4031
4032 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4033 """
4034 Tag: @opmenmonic
4035 Value: mnemonic
4036
4037 The 'mnemonic' value must be a valid C identifier string. Because of
4038 prefixes, groups and whatnot, there times when the mnemonic isn't that
4039 of an actual assembler mnemonic.
4040 """
4041 oInstr = self.ensureInstructionForOpTag(iTagLine);
4042
4043 # Flatten and validate the value.
4044 sMnemonic = self.flattenAllSections(aasSections);
4045 if not self.oReMnemonic.match(sMnemonic):
4046 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4047 if oInstr.sMnemonic is not None:
4048 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4049 % (sTag, oInstr.sMnemonic, sMnemonic,));
4050 oInstr.sMnemonic = sMnemonic
4051
4052 _ = iEndLine;
4053 return True;
4054
4055 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4056 """
4057 Tags: @op1, @op2, @op3, @op4
4058 Value: [where:]type
4059
4060 The 'where' value indicates where the operand is found, like the 'reg'
4061 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4062 a list.
4063
4064 The 'type' value indicates the operand type. These follow the types
4065 given in the opcode tables in the CPU reference manuals.
4066 See Instruction.kdOperandTypes for a list.
4067
4068 """
4069 oInstr = self.ensureInstructionForOpTag(iTagLine);
4070 idxOp = int(sTag[-1]) - 1;
4071 assert 0 <= idxOp < 4;
4072
4073 # flatten, split up, and validate the "where:type" value.
4074 sFlattened = self.flattenAllSections(aasSections);
4075 asSplit = sFlattened.split(':');
4076 if len(asSplit) == 1:
4077 sType = asSplit[0];
4078 sWhere = None;
4079 elif len(asSplit) == 2:
4080 (sWhere, sType) = asSplit;
4081 else:
4082 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4083
4084 if sType not in g_kdOpTypes:
4085 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4086 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4087 if sWhere is None:
4088 sWhere = g_kdOpTypes[sType][1];
4089 elif sWhere not in g_kdOpLocations:
4090 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4091 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4092
4093 # Insert the operand, refusing to overwrite an existing one.
4094 while idxOp >= len(oInstr.aoOperands):
4095 oInstr.aoOperands.append(None);
4096 if oInstr.aoOperands[idxOp] is not None:
4097 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4098 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4099 sWhere, sType,));
4100 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4101
4102 _ = iEndLine;
4103 return True;
4104
4105 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4106 """
4107 Tag: @opmaps
4108 Value: map[,map2]
4109
4110 Indicates which maps the instruction is in. There is a default map
4111 associated with each input file.
4112 """
4113 oInstr = self.ensureInstructionForOpTag(iTagLine);
4114
4115 # Flatten, split up and validate the value.
4116 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4117 asMaps = sFlattened.split(',');
4118 if not asMaps:
4119 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4120 for sMap in asMaps:
4121 if sMap not in g_dInstructionMaps:
4122 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4123 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4124
4125 # Add the maps to the current list. Throw errors on duplicates.
4126 for oMap in oInstr.aoMaps:
4127 if oMap.sName in asMaps:
4128 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4129
4130 for sMap in asMaps:
4131 oMap = g_dInstructionMaps[sMap];
4132 if oMap not in oInstr.aoMaps:
4133 oInstr.aoMaps.append(oMap);
4134 else:
4135 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4136
4137 _ = iEndLine;
4138 return True;
4139
4140 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4141 """
4142 Tag: @oppfx
4143 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4144
4145 Required prefix for the instruction. (In a (E)VEX context this is the
4146 value of the 'pp' field rather than an actual prefix.)
4147 """
4148 oInstr = self.ensureInstructionForOpTag(iTagLine);
4149
4150 # Flatten and validate the value.
4151 sFlattened = self.flattenAllSections(aasSections);
4152 asPrefixes = sFlattened.split();
4153 if len(asPrefixes) > 1:
4154 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4155
4156 sPrefix = asPrefixes[0].lower();
4157 if sPrefix == 'none':
4158 sPrefix = 'none';
4159 elif sPrefix == 'n/a':
4160 sPrefix = None;
4161 else:
4162 if len(sPrefix) == 2:
4163 sPrefix = '0x' + sPrefix;
4164 if not _isValidOpcodeByte(sPrefix):
4165 if sPrefix != '!0xf3':
4166 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4167
4168 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4169 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4170
4171 # Set it.
4172 if oInstr.sPrefix is not None:
4173 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4174 oInstr.sPrefix = sPrefix;
4175
4176 _ = iEndLine;
4177 return True;
4178
4179 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4180 """
4181 Tag: @opcode
4182 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4183
4184 The opcode byte or sub-byte for the instruction in the context of a map.
4185 """
4186 oInstr = self.ensureInstructionForOpTag(iTagLine);
4187
4188 # Flatten and validate the value.
4189 sOpcode = self.flattenAllSections(aasSections);
4190 if _isValidOpcodeByte(sOpcode):
4191 pass;
4192 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4193 pass;
4194 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4195 pass;
4196 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4197 pass;
4198 else:
4199 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4200
4201 # Set it.
4202 if oInstr.sOpcode is not None:
4203 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4204 oInstr.sOpcode = sOpcode;
4205
4206 _ = iEndLine;
4207 return True;
4208
4209 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4210 """
4211 Tag: @opcodesub
4212 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4213 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4214 | !11 rex.w=0 | !11 mr/reg rex.w=0
4215 | !11 rex.w=1 | !11 mr/reg rex.w=1
4216
4217 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4218 represents exactly two different instructions. The more proper way would
4219 be to go via maps with two members, but this is faster.
4220 """
4221 oInstr = self.ensureInstructionForOpTag(iTagLine);
4222
4223 # Flatten and validate the value.
4224 sSubOpcode = self.flattenAllSections(aasSections);
4225 if sSubOpcode not in g_kdSubOpcodes:
4226 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4227 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4228 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4229
4230 # Set it.
4231 if oInstr.sSubOpcode is not None:
4232 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4233 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4234 oInstr.sSubOpcode = sSubOpcode;
4235
4236 _ = iEndLine;
4237 return True;
4238
4239 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4240 """
4241 Tag: @openc
4242 Value: ModR/M|fixed|prefix|<map name>
4243
4244 The instruction operand encoding style.
4245 """
4246 oInstr = self.ensureInstructionForOpTag(iTagLine);
4247
4248 # Flatten and validate the value.
4249 sEncoding = self.flattenAllSections(aasSections);
4250 if sEncoding in g_kdEncodings:
4251 pass;
4252 elif sEncoding in g_dInstructionMaps:
4253 pass;
4254 elif not _isValidOpcodeByte(sEncoding):
4255 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4256
4257 # Set it.
4258 if oInstr.sEncoding is not None:
4259 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4260 % ( sTag, oInstr.sEncoding, sEncoding,));
4261 oInstr.sEncoding = sEncoding;
4262
4263 _ = iEndLine;
4264 return True;
4265
4266 ## EFlags tag to Instruction attribute name.
4267 kdOpFlagToAttr = {
4268 '@opfltest': 'asFlTest',
4269 '@opflmodify': 'asFlModify',
4270 '@opflundef': 'asFlUndefined',
4271 '@opflset': 'asFlSet',
4272 '@opflclear': 'asFlClear',
4273 };
4274
4275 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4276 """
4277 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4278 Value: <eflags specifier>
4279
4280 """
4281 oInstr = self.ensureInstructionForOpTag(iTagLine);
4282
4283 # Flatten, split up and validate the values.
4284 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4285 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4286 asFlags = [];
4287 else:
4288 fRc = True;
4289 for iFlag, sFlag in enumerate(asFlags):
4290 if sFlag not in g_kdEFlagsMnemonics:
4291 if sFlag.strip() in g_kdEFlagsMnemonics:
4292 asFlags[iFlag] = sFlag.strip();
4293 else:
4294 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4295 if not fRc:
4296 return False;
4297
4298 # Set them.
4299 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4300 if asOld is not None and len(asOld) > 0:
4301 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4302 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4303
4304 _ = iEndLine;
4305 return True;
4306
4307 ## EFLAGS class definitions with their attribute lists.
4308 kdEFlagsClasses = {
4309 'arithmetic': { # add, sub, ...
4310 'asFlTest': [],
4311 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4312 'asFlClear': [],
4313 'asFlSet': [],
4314 'asFlUndefined': [],
4315 },
4316 'arithmetic_carry': { # adc, sbb, ...
4317 'asFlTest': [ 'cf', ],
4318 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4319 'asFlClear': [],
4320 'asFlSet': [],
4321 'asFlUndefined': [],
4322 },
4323 'incdec': {
4324 'asFlTest': [],
4325 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4326 'asFlClear': [],
4327 'asFlSet': [],
4328 'asFlUndefined': [],
4329 },
4330 'division': { ## @todo specify intel/amd differences...
4331 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4332 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4333 'asFlClear': [],
4334 'asFlSet': [],
4335 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4336 },
4337 'multiply': { ## @todo specify intel/amd differences...
4338 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4339 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4340 'asFlClear': [], # between IMUL and MUL.
4341 'asFlSet': [],
4342 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4343 },
4344 'logical': { # and, or, xor, ...
4345 'asFlTest': [],
4346 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4347 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4348 'asFlSet': [],
4349 'asFlUndefined': [ 'af', ],
4350 },
4351 'rotate_1': { # rol and ror with fixed 1 shift count
4352 'asFlTest': [],
4353 'asFlModify': [ 'cf', 'of', ],
4354 'asFlClear': [],
4355 'asFlSet': [],
4356 'asFlUndefined': [],
4357 },
4358 'rotate_count': { # rol and ror w/o fixed 1 shift count
4359 'asFlTest': [],
4360 'asFlModify': [ 'cf', 'of', ],
4361 'asFlClear': [],
4362 'asFlSet': [],
4363 'asFlUndefined': [ 'of', ],
4364 },
4365 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4366 'asFlTest': [ 'cf', ],
4367 'asFlModify': [ 'cf', 'of', ],
4368 'asFlClear': [],
4369 'asFlSet': [],
4370 'asFlUndefined': [],
4371 },
4372 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4373 'asFlTest': [ 'cf', ],
4374 'asFlModify': [ 'cf', 'of', ],
4375 'asFlClear': [],
4376 'asFlSet': [],
4377 'asFlUndefined': [ 'of', ],
4378 },
4379 'shift_1': { # shl, shr or sar with fixed 1 count.
4380 'asFlTest': [],
4381 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4382 'asFlClear': [],
4383 'asFlSet': [],
4384 'asFlUndefined': [ 'af', ],
4385 },
4386 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4387 'asFlTest': [],
4388 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4389 'asFlClear': [],
4390 'asFlSet': [],
4391 'asFlUndefined': [ 'af', 'of', ],
4392 },
4393 'bitmap': { # bt, btc, btr, btc
4394 'asFlTest': [],
4395 'asFlModify': [ 'cf', ],
4396 'asFlClear': [],
4397 'asFlSet': [],
4398 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4399 },
4400 'unchanged': {
4401 'asFlTest': [],
4402 'asFlModify': [],
4403 'asFlClear': [],
4404 'asFlSet': [],
4405 'asFlUndefined': [],
4406 },
4407 };
4408 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4409 """
4410 Tags: @opflclass
4411 Value: arithmetic, logical, ...
4412
4413 """
4414 oInstr = self.ensureInstructionForOpTag(iTagLine);
4415
4416 # Flatten and validate the value.
4417 sClass = self.flattenAllSections(aasSections);
4418 kdAttribs = self.kdEFlagsClasses.get(sClass);
4419 if not kdAttribs:
4420 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s' % ( sTag, sClass,));
4421
4422 # Set the attributes.
4423 for sAttrib, asFlags in kdAttribs.items():
4424 asOld = getattr(oInstr, sAttrib);
4425 if asOld is not None:
4426 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4427 % ( sTag, asOld, asFlags, sAttrib));
4428 setattr(oInstr, sAttrib, asFlags);
4429
4430 _ = iEndLine;
4431 return True;
4432
4433
4434 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4435 """
4436 Tag: @ophints
4437 Value: Comma or space separated list of flags and hints.
4438
4439 This covers the disassembler flags table and more.
4440 """
4441 oInstr = self.ensureInstructionForOpTag(iTagLine);
4442
4443 # Flatten as a space separated list, split it up and validate the values.
4444 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4445 if len(asHints) == 1 and asHints[0].lower() == 'none':
4446 asHints = [];
4447 else:
4448 fRc = True;
4449 for iHint, sHint in enumerate(asHints):
4450 if sHint not in g_kdHints:
4451 if sHint.strip() in g_kdHints:
4452 sHint[iHint] = sHint.strip();
4453 else:
4454 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4455 if not fRc:
4456 return False;
4457
4458 # Append them.
4459 for sHint in asHints:
4460 if sHint not in oInstr.dHints:
4461 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4462 else:
4463 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4464
4465 _ = iEndLine;
4466 return True;
4467
4468 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4469 """
4470 Tag: @opdisenum
4471 Value: OP_XXXX
4472
4473 This is for select a specific (legacy) disassembler enum value for the
4474 instruction.
4475 """
4476 oInstr = self.ensureInstructionForOpTag(iTagLine);
4477
4478 # Flatten and split.
4479 asWords = self.flattenAllSections(aasSections).split();
4480 if len(asWords) != 1:
4481 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4482 if not asWords:
4483 return False;
4484 sDisEnum = asWords[0];
4485 if not self.oReDisEnum.match(sDisEnum):
4486 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4487 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4488
4489 # Set it.
4490 if oInstr.sDisEnum is not None:
4491 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4492 oInstr.sDisEnum = sDisEnum;
4493
4494 _ = iEndLine;
4495 return True;
4496
4497 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4498 """
4499 Tag: @opmincpu
4500 Value: <simple CPU name>
4501
4502 Indicates when this instruction was introduced.
4503 """
4504 oInstr = self.ensureInstructionForOpTag(iTagLine);
4505
4506 # Flatten the value, split into words, make sure there's just one, valid it.
4507 asCpus = self.flattenAllSections(aasSections).split();
4508 if len(asCpus) > 1:
4509 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4510
4511 sMinCpu = asCpus[0];
4512 if sMinCpu in g_kdCpuNames:
4513 oInstr.sMinCpu = sMinCpu;
4514 else:
4515 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4516 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4517
4518 # Set it.
4519 if oInstr.sMinCpu is None:
4520 oInstr.sMinCpu = sMinCpu;
4521 elif oInstr.sMinCpu != sMinCpu:
4522 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4523
4524 _ = iEndLine;
4525 return True;
4526
4527 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4528 """
4529 Tag: @opcpuid
4530 Value: none | <CPUID flag specifier>
4531
4532 CPUID feature bit which is required for the instruction to be present.
4533 """
4534 oInstr = self.ensureInstructionForOpTag(iTagLine);
4535
4536 # Flatten as a space separated list, split it up and validate the values.
4537 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4538 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4539 asCpuIds = [];
4540 else:
4541 fRc = True;
4542 for iCpuId, sCpuId in enumerate(asCpuIds):
4543 if sCpuId not in g_kdCpuIdFlags:
4544 if sCpuId.strip() in g_kdCpuIdFlags:
4545 sCpuId[iCpuId] = sCpuId.strip();
4546 else:
4547 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4548 if not fRc:
4549 return False;
4550
4551 # Append them.
4552 for sCpuId in asCpuIds:
4553 if sCpuId not in oInstr.asCpuIds:
4554 oInstr.asCpuIds.append(sCpuId);
4555 else:
4556 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4557
4558 _ = iEndLine;
4559 return True;
4560
4561 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4562 """
4563 Tag: @opgroup
4564 Value: op_grp1[_subgrp2[_subsubgrp3]]
4565
4566 Instruction grouping.
4567 """
4568 oInstr = self.ensureInstructionForOpTag(iTagLine);
4569
4570 # Flatten as a space separated list, split it up and validate the values.
4571 asGroups = self.flattenAllSections(aasSections).split();
4572 if len(asGroups) != 1:
4573 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4574 sGroup = asGroups[0];
4575 if not self.oReGroupName.match(sGroup):
4576 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4577 % (sTag, sGroup, self.oReGroupName.pattern));
4578
4579 # Set it.
4580 if oInstr.sGroup is not None:
4581 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4582 oInstr.sGroup = sGroup;
4583
4584 _ = iEndLine;
4585 return True;
4586
4587 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4588 """
4589 Tag: @opunused, @opinvalid, @opinvlstyle
4590 Value: <invalid opcode behaviour style>
4591
4592 The @opunused indicates the specification is for a currently unused
4593 instruction encoding.
4594
4595 The @opinvalid indicates the specification is for an invalid currently
4596 instruction encoding (like UD2).
4597
4598 The @opinvlstyle just indicates how CPUs decode the instruction when
4599 not supported (@opcpuid, @opmincpu) or disabled.
4600 """
4601 oInstr = self.ensureInstructionForOpTag(iTagLine);
4602
4603 # Flatten as a space separated list, split it up and validate the values.
4604 asStyles = self.flattenAllSections(aasSections).split();
4605 if len(asStyles) != 1:
4606 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4607 sStyle = asStyles[0];
4608 if sStyle not in g_kdInvalidStyles:
4609 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4610 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4611 # Set it.
4612 if oInstr.sInvalidStyle is not None:
4613 return self.errorComment(iTagLine,
4614 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4615 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4616 oInstr.sInvalidStyle = sStyle;
4617 if sTag == '@opunused':
4618 oInstr.fUnused = True;
4619 elif sTag == '@opinvalid':
4620 oInstr.fInvalid = True;
4621
4622 _ = iEndLine;
4623 return True;
4624
4625 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4626 """
4627 Tag: @optest
4628 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4629 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4630
4631 The main idea here is to generate basic instruction tests.
4632
4633 The probably simplest way of handling the diverse input, would be to use
4634 it to produce size optimized byte code for a simple interpreter that
4635 modifies the register input and output states.
4636
4637 An alternative to the interpreter would be creating multiple tables,
4638 but that becomes rather complicated wrt what goes where and then to use
4639 them in an efficient manner.
4640 """
4641 oInstr = self.ensureInstructionForOpTag(iTagLine);
4642
4643 #
4644 # Do it section by section.
4645 #
4646 for asSectionLines in aasSections:
4647 #
4648 # Sort the input into outputs, inputs and selector conditions.
4649 #
4650 sFlatSection = self.flattenAllSections([asSectionLines,]);
4651 if not sFlatSection:
4652 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4653 continue;
4654 oTest = InstructionTest(oInstr);
4655
4656 asSelectors = [];
4657 asInputs = [];
4658 asOutputs = [];
4659 asCur = asOutputs;
4660 fRc = True;
4661 asWords = sFlatSection.split();
4662 for iWord in range(len(asWords) - 1, -1, -1):
4663 sWord = asWords[iWord];
4664 # Check for array switchers.
4665 if sWord == '->':
4666 if asCur != asOutputs:
4667 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4668 break;
4669 asCur = asInputs;
4670 elif sWord == '/':
4671 if asCur != asInputs:
4672 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4673 break;
4674 asCur = asSelectors;
4675 else:
4676 asCur.insert(0, sWord);
4677
4678 #
4679 # Validate and add selectors.
4680 #
4681 for sCond in asSelectors:
4682 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4683 oSelector = None;
4684 for sOp in TestSelector.kasCompareOps:
4685 off = sCondExp.find(sOp);
4686 if off >= 0:
4687 sVariable = sCondExp[:off];
4688 sValue = sCondExp[off + len(sOp):];
4689 if sVariable in TestSelector.kdVariables:
4690 if sValue in TestSelector.kdVariables[sVariable]:
4691 oSelector = TestSelector(sVariable, sOp, sValue);
4692 else:
4693 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4694 % ( sTag, sValue, sCond,
4695 TestSelector.kdVariables[sVariable].keys(),));
4696 else:
4697 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4698 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4699 break;
4700 if oSelector is not None:
4701 for oExisting in oTest.aoSelectors:
4702 if oExisting.sVariable == oSelector.sVariable:
4703 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4704 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4705 oTest.aoSelectors.append(oSelector);
4706 else:
4707 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4708
4709 #
4710 # Validate outputs and inputs, adding them to the test as we go along.
4711 #
4712 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4713 asValidFieldKinds = [ 'both', sDesc, ];
4714 for sItem in asItems:
4715 oItem = None;
4716 for sOp in TestInOut.kasOperators:
4717 off = sItem.find(sOp);
4718 if off < 0:
4719 continue;
4720 sField = sItem[:off];
4721 sValueType = sItem[off + len(sOp):];
4722 if sField in TestInOut.kdFields \
4723 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4724 asSplit = sValueType.split(':', 1);
4725 sValue = asSplit[0];
4726 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4727 if sType in TestInOut.kdTypes:
4728 oValid = TestInOut.kdTypes[sType].validate(sValue);
4729 if oValid is True:
4730 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4731 oItem = TestInOut(sField, sOp, sValue, sType);
4732 else:
4733 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4734 % ( sTag, sDesc, sItem, ));
4735 else:
4736 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4737 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4738 else:
4739 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4740 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4741 else:
4742 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4743 % ( sTag, sDesc, sField, sItem,
4744 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4745 if asVal[1] in asValidFieldKinds]),));
4746 break;
4747 if oItem is not None:
4748 for oExisting in aoDst:
4749 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4750 self.errorComment(iTagLine,
4751 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4752 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4753 aoDst.append(oItem);
4754 else:
4755 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4756
4757 #
4758 # .
4759 #
4760 if fRc:
4761 oInstr.aoTests.append(oTest);
4762 else:
4763 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4764 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4765 % (sTag, asSelectors, asInputs, asOutputs,));
4766
4767 _ = iEndLine;
4768 return True;
4769
4770 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4771 """
4772 Numbered @optest tag. Either @optest42 or @optest[42].
4773 """
4774 oInstr = self.ensureInstructionForOpTag(iTagLine);
4775
4776 iTest = 0;
4777 if sTag[-1] == ']':
4778 iTest = int(sTag[8:-1]);
4779 else:
4780 iTest = int(sTag[7:]);
4781
4782 if iTest != len(oInstr.aoTests):
4783 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4784 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4785
4786 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4787 """
4788 Tag: @optestign | @optestignore
4789 Value: <value is ignored>
4790
4791 This is a simple trick to ignore a test while debugging another.
4792
4793 See also @oponlytest.
4794 """
4795 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4796 return True;
4797
4798 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4799 """
4800 Tag: @opcopytests
4801 Value: <opstat | function> [..]
4802 Example: @opcopytests add_Eb_Gb
4803
4804 Trick to avoid duplicating tests for different encodings of the same
4805 operation.
4806 """
4807 oInstr = self.ensureInstructionForOpTag(iTagLine);
4808
4809 # Flatten, validate and append the copy job to the instruction. We execute
4810 # them after parsing all the input so we can handle forward references.
4811 asToCopy = self.flattenAllSections(aasSections).split();
4812 if not asToCopy:
4813 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4814 for sToCopy in asToCopy:
4815 if sToCopy not in oInstr.asCopyTests:
4816 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4817 oInstr.asCopyTests.append(sToCopy);
4818 else:
4819 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4820 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4821 else:
4822 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4823
4824 _ = iEndLine;
4825 return True;
4826
4827 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4828 """
4829 Tag: @oponlytest | @oponly
4830 Value: none
4831
4832 Only test instructions with this tag. This is a trick that is handy
4833 for singling out one or two new instructions or tests.
4834
4835 See also @optestignore.
4836 """
4837 oInstr = self.ensureInstructionForOpTag(iTagLine);
4838
4839 # Validate and add instruction to only test dictionary.
4840 sValue = self.flattenAllSections(aasSections).strip();
4841 if sValue:
4842 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4843
4844 if oInstr not in g_aoOnlyTestInstructions:
4845 g_aoOnlyTestInstructions.append(oInstr);
4846
4847 _ = iEndLine;
4848 return True;
4849
4850 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4851 """
4852 Tag: @opxcpttype
4853 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4854
4855 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4856 """
4857 oInstr = self.ensureInstructionForOpTag(iTagLine);
4858
4859 # Flatten as a space separated list, split it up and validate the values.
4860 asTypes = self.flattenAllSections(aasSections).split();
4861 if len(asTypes) != 1:
4862 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4863 sType = asTypes[0];
4864 if sType not in g_kdXcptTypes:
4865 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4866 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4867 # Set it.
4868 if oInstr.sXcptType is not None:
4869 return self.errorComment(iTagLine,
4870 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4871 % ( sTag, oInstr.sXcptType, sType,));
4872 oInstr.sXcptType = sType;
4873
4874 _ = iEndLine;
4875 return True;
4876
4877 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4878 """
4879 Tag: @opfunction
4880 Value: <VMM function name>
4881
4882 This is for explicitly setting the IEM function name. Normally we pick
4883 this up from the FNIEMOP_XXX macro invocation after the description, or
4884 generate it from the mnemonic and operands.
4885
4886 It it thought it maybe necessary to set it when specifying instructions
4887 which implementation isn't following immediately or aren't implemented yet.
4888 """
4889 oInstr = self.ensureInstructionForOpTag(iTagLine);
4890
4891 # Flatten and validate the value.
4892 sFunction = self.flattenAllSections(aasSections);
4893 if not self.oReFunctionName.match(sFunction):
4894 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4895 % (sTag, sFunction, self.oReFunctionName.pattern));
4896
4897 if oInstr.sFunction is not None:
4898 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4899 % (sTag, oInstr.sFunction, sFunction,));
4900 oInstr.sFunction = sFunction;
4901
4902 _ = iEndLine;
4903 return True;
4904
4905 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4906 """
4907 Tag: @opstats
4908 Value: <VMM statistics base name>
4909
4910 This is for explicitly setting the statistics name. Normally we pick
4911 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4912 the mnemonic and operands.
4913
4914 It it thought it maybe necessary to set it when specifying instructions
4915 which implementation isn't following immediately or aren't implemented yet.
4916 """
4917 oInstr = self.ensureInstructionForOpTag(iTagLine);
4918
4919 # Flatten and validate the value.
4920 sStats = self.flattenAllSections(aasSections);
4921 if not self.oReStatsName.match(sStats):
4922 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4923 % (sTag, sStats, self.oReStatsName.pattern));
4924
4925 if oInstr.sStats is not None:
4926 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4927 % (sTag, oInstr.sStats, sStats,));
4928 oInstr.sStats = sStats;
4929
4930 _ = iEndLine;
4931 return True;
4932
4933 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4934 """
4935 Tag: @opdone
4936 Value: none
4937
4938 Used to explictily flush the instructions that have been specified.
4939 """
4940 sFlattened = self.flattenAllSections(aasSections);
4941 if sFlattened != '':
4942 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4943 _ = sTag; _ = iEndLine;
4944 return self.doneInstructions();
4945
4946 ## @}
4947
4948
4949 def parseComment(self):
4950 """
4951 Parse the current comment (self.sComment).
4952
4953 If it's a opcode specifiying comment, we reset the macro stuff.
4954 """
4955 #
4956 # Reject if comment doesn't seem to contain anything interesting.
4957 #
4958 if self.sComment.find('Opcode') < 0 \
4959 and self.sComment.find('@') < 0:
4960 return False;
4961
4962 #
4963 # Split the comment into lines, removing leading asterisks and spaces.
4964 # Also remove leading and trailing empty lines.
4965 #
4966 asLines = self.sComment.split('\n');
4967 for iLine, sLine in enumerate(asLines):
4968 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4969
4970 while asLines and not asLines[0]:
4971 self.iCommentLine += 1;
4972 asLines.pop(0);
4973
4974 while asLines and not asLines[-1]:
4975 asLines.pop(len(asLines) - 1);
4976
4977 #
4978 # Check for old style: Opcode 0x0f 0x12
4979 #
4980 if asLines[0].startswith('Opcode '):
4981 self.parseCommentOldOpcode(asLines);
4982
4983 #
4984 # Look for @op* tagged data.
4985 #
4986 cOpTags = 0;
4987 sFlatDefault = None;
4988 sCurTag = '@default';
4989 iCurTagLine = 0;
4990 asCurSection = [];
4991 aasSections = [ asCurSection, ];
4992 for iLine, sLine in enumerate(asLines):
4993 if not sLine.startswith('@'):
4994 if sLine:
4995 asCurSection.append(sLine);
4996 elif asCurSection:
4997 asCurSection = [];
4998 aasSections.append(asCurSection);
4999 else:
5000 #
5001 # Process the previous tag.
5002 #
5003 if not asCurSection and len(aasSections) > 1:
5004 aasSections.pop(-1);
5005 if sCurTag in self.dTagHandlers:
5006 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5007 cOpTags += 1;
5008 elif sCurTag.startswith('@op'):
5009 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5010 elif sCurTag == '@default':
5011 sFlatDefault = self.flattenAllSections(aasSections);
5012 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5013 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5014 elif sCurTag in ['@encoding', '@opencoding']:
5015 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5016
5017 #
5018 # New tag.
5019 #
5020 asSplit = sLine.split(None, 1);
5021 sCurTag = asSplit[0].lower();
5022 if len(asSplit) > 1:
5023 asCurSection = [asSplit[1],];
5024 else:
5025 asCurSection = [];
5026 aasSections = [asCurSection, ];
5027 iCurTagLine = iLine;
5028
5029 #
5030 # Process the final tag.
5031 #
5032 if not asCurSection and len(aasSections) > 1:
5033 aasSections.pop(-1);
5034 if sCurTag in self.dTagHandlers:
5035 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5036 cOpTags += 1;
5037 elif sCurTag.startswith('@op'):
5038 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5039 elif sCurTag == '@default':
5040 sFlatDefault = self.flattenAllSections(aasSections);
5041
5042 #
5043 # Don't allow default text in blocks containing @op*.
5044 #
5045 if cOpTags > 0 and sFlatDefault:
5046 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5047
5048 return True;
5049
5050 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5051 """
5052 Parses a macro invocation.
5053
5054 Returns three values:
5055 1. A list of macro arguments, where the zero'th is the macro name.
5056 2. The offset following the macro invocation, into sInvocation of
5057 this is on the same line or into the last line if it is on a
5058 different line.
5059 3. Number of additional lines the invocation spans (i.e. zero if
5060 it is all contained within sInvocation).
5061 """
5062 # First the name.
5063 offOpen = sInvocation.find('(', offStartInvocation);
5064 if offOpen <= offStartInvocation:
5065 self.raiseError("macro invocation open parenthesis not found");
5066 sName = sInvocation[offStartInvocation:offOpen].strip();
5067 if not self.oReMacroName.match(sName):
5068 self.raiseError("invalid macro name '%s'" % (sName,));
5069 asRet = [sName, ];
5070
5071 # Arguments.
5072 iLine = self.iLine;
5073 cDepth = 1;
5074 off = offOpen + 1;
5075 offStart = off;
5076 offCurLn = 0;
5077 chQuote = None;
5078 while cDepth > 0:
5079 if off >= len(sInvocation):
5080 if iLine >= len(self.asLines):
5081 self.error('macro invocation beyond end of file');
5082 return (asRet, off - offCurLn, iLine - self.iLine);
5083 offCurLn = off;
5084 sInvocation += self.asLines[iLine];
5085 iLine += 1;
5086 ch = sInvocation[off];
5087
5088 if chQuote:
5089 if ch == '\\' and off + 1 < len(sInvocation):
5090 off += 1;
5091 elif ch == chQuote:
5092 chQuote = None;
5093 elif ch in ('"', '\'',):
5094 chQuote = ch;
5095 elif ch in (',', ')',):
5096 if cDepth == 1:
5097 asRet.append(sInvocation[offStart:off].strip());
5098 offStart = off + 1;
5099 if ch == ')':
5100 cDepth -= 1;
5101 elif ch == '(':
5102 cDepth += 1;
5103 off += 1;
5104
5105 return (asRet, off - offCurLn, iLine - self.iLine);
5106
5107 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5108 """
5109 Returns (None, len(sCode), 0) if not found, otherwise the
5110 parseMacroInvocation() return value.
5111 """
5112 offHit = sCode.find(sMacro, offStart);
5113 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5114 return self.parseMacroInvocation(sCode, offHit);
5115 return (None, len(sCode), 0);
5116
5117 def findAndParseMacroInvocation(self, sCode, sMacro):
5118 """
5119 Returns None if not found, arguments as per parseMacroInvocation if found.
5120 """
5121 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5122
5123 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5124 """
5125 Returns same as findAndParseMacroInvocation.
5126 """
5127 for sMacro in asMacro:
5128 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5129 if asRet is not None:
5130 return asRet;
5131 return None;
5132
5133 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5134 sDisHints, sIemHints, asOperands):
5135 """
5136 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5137 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5138 """
5139 #
5140 # Some invocation checks.
5141 #
5142 if sUpper != sUpper.upper():
5143 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5144 if sLower != sLower.lower():
5145 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5146 if sUpper.lower() != sLower:
5147 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5148 if not self.oReMnemonic.match(sLower):
5149 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5150
5151 #
5152 # Check if sIemHints tells us to not consider this macro invocation.
5153 #
5154 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5155 return True;
5156
5157 # Apply to the last instruction only for now.
5158 if not self.aoCurInstrs:
5159 self.addInstruction();
5160 oInstr = self.aoCurInstrs[-1];
5161 if oInstr.iLineMnemonicMacro == -1:
5162 oInstr.iLineMnemonicMacro = self.iLine;
5163 else:
5164 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5165 % (sMacro, oInstr.iLineMnemonicMacro,));
5166
5167 # Mnemonic
5168 if oInstr.sMnemonic is None:
5169 oInstr.sMnemonic = sLower;
5170 elif oInstr.sMnemonic != sLower:
5171 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5172
5173 # Process operands.
5174 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5175 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5176 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5177 for iOperand, sType in enumerate(asOperands):
5178 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5179 if sWhere is None:
5180 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5181 if iOperand < len(oInstr.aoOperands): # error recovery.
5182 sWhere = oInstr.aoOperands[iOperand].sWhere;
5183 sType = oInstr.aoOperands[iOperand].sType;
5184 else:
5185 sWhere = 'reg';
5186 sType = 'Gb';
5187 if iOperand == len(oInstr.aoOperands):
5188 oInstr.aoOperands.append(Operand(sWhere, sType))
5189 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5190 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5191 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5192 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5193
5194 # Encoding.
5195 if sForm not in g_kdIemForms:
5196 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5197 else:
5198 if oInstr.sEncoding is None:
5199 oInstr.sEncoding = g_kdIemForms[sForm][0];
5200 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5201 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5202 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5203
5204 # Check the parameter locations for the encoding.
5205 if g_kdIemForms[sForm][1] is not None:
5206 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5207 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5208 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5209 else:
5210 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5211 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5212 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5213 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5214 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5215 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5216 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5217 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5218 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5219 or sForm.replace('VEX','').find('V') < 0) ):
5220 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5221 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5222 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5223 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5224 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5225 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5226 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5227 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5228 oInstr.aoOperands[iOperand].sWhere));
5229
5230
5231 # Check @opcodesub
5232 if oInstr.sSubOpcode \
5233 and g_kdIemForms[sForm][2] \
5234 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5235 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5236 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5237
5238 # Stats.
5239 if not self.oReStatsName.match(sStats):
5240 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5241 elif oInstr.sStats is None:
5242 oInstr.sStats = sStats;
5243 elif oInstr.sStats != sStats:
5244 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5245 % (sMacro, oInstr.sStats, sStats,));
5246
5247 # Process the hints (simply merge with @ophints w/o checking anything).
5248 for sHint in sDisHints.split('|'):
5249 sHint = sHint.strip();
5250 if sHint.startswith('DISOPTYPE_'):
5251 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5252 if sShortHint in g_kdHints:
5253 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5254 else:
5255 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5256 elif sHint != '0':
5257 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5258
5259 for sHint in sIemHints.split('|'):
5260 sHint = sHint.strip();
5261 if sHint.startswith('IEMOPHINT_'):
5262 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5263 if sShortHint in g_kdHints:
5264 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5265 else:
5266 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5267 elif sHint != '0':
5268 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5269
5270 _ = sAsm;
5271 return True;
5272
5273 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5274 """
5275 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5276 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5277 """
5278 if not asOperands:
5279 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5280 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5281 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5282
5283 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5284 """
5285 Process a IEM_MC_BEGIN macro invocation.
5286 """
5287 if self.fDebugMc:
5288 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5289 #self.debug('%s<eos>' % (sCode,));
5290
5291 # Check preconditions.
5292 if not self.oCurFunction:
5293 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5294 if self.oCurMcBlock:
5295 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5296
5297 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5298 cchIndent = offBeginStatementInCodeStr;
5299 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5300 if offPrevNewline >= 0:
5301 cchIndent -= offPrevNewline + 1;
5302 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5303
5304 # Start a new block.
5305 # But don't add it to the list unless the context matches the host architecture.
5306 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5307 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
5308 try:
5309 if ( not self.aoCppCondStack
5310 or not self.sHostArch
5311 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5312 g_aoMcBlocks.append(self.oCurMcBlock);
5313 self.cTotalMcBlocks += 1;
5314 except Exception as oXcpt:
5315 self.raiseError(oXcpt.args[0]);
5316
5317 self.iMcBlockInFunc += 1;
5318 return True;
5319
5320 @staticmethod
5321 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5322 """
5323 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5324 extracting a statement block from a string that's the result of macro
5325 expansion and therefore contains multiple "sub-lines" as it were.
5326
5327 Returns list of lines covering offBegin thru offEnd in sRawLine.
5328 """
5329
5330 off = sRawLine.find('\n', offEnd);
5331 if off > 0:
5332 sRawLine = sRawLine[:off + 1];
5333
5334 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5335 sRawLine = sRawLine[off:];
5336 if not sRawLine.strip().startswith(sBeginStmt):
5337 sRawLine = sRawLine[offBegin - off:]
5338
5339 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5340
5341 def workerIemMcEnd(self, offEndStatementInLine):
5342 """
5343 Process a IEM_MC_END macro invocation.
5344 """
5345 if self.fDebugMc:
5346 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5347
5348 # Check preconditions.
5349 if not self.oCurMcBlock:
5350 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5351
5352 #
5353 # HACK ALERT! For blocks originating from macro expansion the start and
5354 # end line will be the same, but the line has multiple
5355 # newlines inside it. So, we have to do some extra tricks
5356 # to get the lines out of there. We ASSUME macros aren't
5357 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5358 #
5359 if self.iLine > self.oCurMcBlock.iBeginLine:
5360 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5361 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5362 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5363
5364 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5365 # so we can deal correctly with IEM_MC_END below and everything else.
5366 for sLine in asLines:
5367 cNewLines = sLine.count('\n');
5368 assert cNewLines > 0;
5369 if cNewLines > 1:
5370 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5371 self.oCurMcBlock.offBeginLine,
5372 offEndStatementInLine
5373 + sum(len(s) for s in asLines)
5374 - len(asLines[-1]));
5375 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5376 break;
5377 else:
5378 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5379 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5380 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5381
5382 #
5383 # Strip anything following the IEM_MC_END(); statement in the final line,
5384 # so that we don't carry on any trailing 'break' after macro expansions
5385 # like for iemOp_movsb_Xb_Yb.
5386 #
5387 while asLines[-1].strip() == '':
5388 asLines.pop();
5389 sFinal = asLines[-1];
5390 offFinalEnd = sFinal.find('IEM_MC_END');
5391 offEndInFinal = offFinalEnd;
5392 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5393 offFinalEnd += len('IEM_MC_END');
5394
5395 while sFinal[offFinalEnd].isspace():
5396 offFinalEnd += 1;
5397 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5398 offFinalEnd += 1;
5399
5400 while sFinal[offFinalEnd].isspace():
5401 offFinalEnd += 1;
5402 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5403 offFinalEnd += 1;
5404
5405 while sFinal[offFinalEnd].isspace():
5406 offFinalEnd += 1;
5407 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5408 offFinalEnd += 1;
5409
5410 asLines[-1] = sFinal[: offFinalEnd];
5411
5412 #
5413 # Complete and discard the current block.
5414 #
5415 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5416 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5417 self.oCurMcBlock = None;
5418 return True;
5419
5420 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5421 """
5422 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5423 """
5424 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5425 if self.fDebugMc:
5426 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5427 #self.debug('%s<eos>' % (sCode,));
5428
5429 # Check preconditions.
5430 if not self.oCurFunction:
5431 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5432 if self.oCurMcBlock:
5433 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5434
5435 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5436 cchIndent = offBeginStatementInCodeStr;
5437 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5438 if offPrevNewline >= 0:
5439 cchIndent -= offPrevNewline + 1;
5440 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5441
5442 # Start a new block.
5443 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5444 self.oCurFunction, self.iMcBlockInFunc, cchIndent, fDeferToCImpl = True);
5445
5446 # Parse the statment.
5447 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5448 if asArgs is None:
5449 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5450 if len(asArgs) != cParams + 4:
5451 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5452 % (sStmt, len(asArgs), cParams + 4, asArgs));
5453
5454 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5455
5456 # These MCs are not typically part of macro expansions, but let's get
5457 # it out of the way immediately if it's the case.
5458 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5459 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5460 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5461 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5462 asLines[-1] = asLines[-1][:offAfter + 1];
5463 else:
5464 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5465 offAfter, sStmt);
5466 assert asLines[-1].find(';') >= 0;
5467 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5468
5469 assert asLines[0].find(sStmt) >= 0;
5470 #if not asLines[0].strip().startswith(sStmt):
5471 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5472
5473 # Advance to the line with the closing ')'.
5474 self.iLine += cLines;
5475
5476 # Complete the block.
5477 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5478
5479 g_aoMcBlocks.append(oMcBlock);
5480 self.cTotalMcBlocks += 1;
5481 self.iMcBlockInFunc += 1;
5482
5483 return True;
5484
5485 def workerStartFunction(self, asArgs):
5486 """
5487 Deals with the start of a decoder function.
5488
5489 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5490 macros, so we get a argument list for these where the 0th argument is the
5491 macro name.
5492 """
5493 # Complete any existing function.
5494 if self.oCurFunction:
5495 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5496
5497 # Create the new function.
5498 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5499 return True;
5500
5501 def checkCodeForMacro(self, sCode, offLine):
5502 """
5503 Checks code for relevant macro invocation.
5504 """
5505
5506 #
5507 # Scan macro invocations.
5508 #
5509 if sCode.find('(') > 0:
5510 # Look for instruction decoder function definitions. ASSUME single line.
5511 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5512 [ 'FNIEMOP_DEF',
5513 'FNIEMOPRM_DEF',
5514 'FNIEMOP_STUB',
5515 'FNIEMOP_STUB_1',
5516 'FNIEMOP_UD_STUB',
5517 'FNIEMOP_UD_STUB_1' ]);
5518 if asArgs is not None:
5519 self.workerStartFunction(asArgs);
5520 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5521
5522 if not self.aoCurInstrs:
5523 self.addInstruction();
5524 for oInstr in self.aoCurInstrs:
5525 if oInstr.iLineFnIemOpMacro == -1:
5526 oInstr.iLineFnIemOpMacro = self.iLine;
5527 else:
5528 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5529 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5530 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5531 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5532 if asArgs[0].find('STUB') > 0:
5533 self.doneInstructions(fEndOfFunction = True);
5534 return True;
5535
5536 # Check for worker function definitions, so we can get a context for MC blocks.
5537 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5538 [ 'FNIEMOP_DEF_1',
5539 'FNIEMOP_DEF_2', ]);
5540 if asArgs is not None:
5541 self.workerStartFunction(asArgs);
5542 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5543 return True;
5544
5545 # IEMOP_HLP_DONE_VEX_DECODING_*
5546 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5547 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5548 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5549 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5550 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5551 ]);
5552 if asArgs is not None:
5553 sMacro = asArgs[0];
5554 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5555 for oInstr in self.aoCurInstrs:
5556 if 'vex_l_zero' not in oInstr.dHints:
5557 if oInstr.iLineMnemonicMacro >= 0:
5558 self.errorOnLine(oInstr.iLineMnemonicMacro,
5559 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5560 oInstr.dHints['vex_l_zero'] = True;
5561
5562 #
5563 # IEMOP_MNEMONIC*
5564 #
5565 if sCode.find('IEMOP_MNEMONIC') >= 0:
5566 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5567 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5568 if asArgs is not None:
5569 if len(self.aoCurInstrs) == 1:
5570 oInstr = self.aoCurInstrs[0];
5571 if oInstr.sStats is None:
5572 oInstr.sStats = asArgs[1];
5573 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5574
5575 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5576 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5577 if asArgs is not None:
5578 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5579 asArgs[7], []);
5580 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5581 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5582 if asArgs is not None:
5583 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5584 asArgs[8], [asArgs[6],]);
5585 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5586 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5587 if asArgs is not None:
5588 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5589 asArgs[9], [asArgs[6], asArgs[7]]);
5590 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5591 # a_fIemHints)
5592 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5593 if asArgs is not None:
5594 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5595 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5596 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5597 # a_fIemHints)
5598 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5599 if asArgs is not None:
5600 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5601 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5602
5603 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5604 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5605 if asArgs is not None:
5606 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5607 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5608 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5609 if asArgs is not None:
5610 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5611 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5612 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5613 if asArgs is not None:
5614 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5615 [asArgs[4], asArgs[5],]);
5616 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5617 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5618 if asArgs is not None:
5619 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5620 [asArgs[4], asArgs[5], asArgs[6],]);
5621 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5622 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5623 if asArgs is not None:
5624 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5625 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5626
5627 #
5628 # IEM_MC_BEGIN + IEM_MC_END.
5629 # We must support multiple instances per code snippet.
5630 #
5631 offCode = sCode.find('IEM_MC_');
5632 if offCode >= 0:
5633 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5634 if oMatch.group(1) == 'END':
5635 self.workerIemMcEnd(offLine + oMatch.start());
5636 elif oMatch.group(1) == 'BEGIN':
5637 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5638 else:
5639 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5640 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5641 return True;
5642
5643 return False;
5644
5645 def workerPreprocessorRecreateMacroRegex(self):
5646 """
5647 Recreates self.oReMacros when self.dMacros changes.
5648 """
5649 if self.dMacros:
5650 sRegex = '';
5651 for sName, oMacro in self.dMacros.items():
5652 if sRegex:
5653 sRegex += r'|' + sName;
5654 else:
5655 sRegex = r'\b(' + sName;
5656 if oMacro.asArgs is not None:
5657 sRegex += r'\s*\(';
5658 else:
5659 sRegex += r'\b';
5660 sRegex += ')';
5661 self.oReMacros = re.compile(sRegex);
5662 else:
5663 self.oReMacros = None;
5664 return True;
5665
5666 def workerPreprocessorDefine(self, sRest):
5667 """
5668 Handles a macro #define, the sRest is what follows after the directive word.
5669 """
5670 assert sRest[-1] == '\n';
5671
5672 #
5673 # If using line continutation, just concat all the lines together,
5674 # preserving the newline character but not the escaping.
5675 #
5676 iLineStart = self.iLine;
5677 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5678 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5679 self.iLine += 1;
5680 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5681
5682 #
5683 # Use regex to split out the name, argument list and body.
5684 # If this fails, we assume it's a simple macro.
5685 #
5686 oMatch = self.oReHashDefine2.match(sRest);
5687 if oMatch:
5688 sAllArgs = oMatch.group(2).strip();
5689 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5690 sBody = oMatch.group(3);
5691 else:
5692 oMatch = self.oReHashDefine3.match(sRest);
5693 if not oMatch:
5694 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5695 return self.error('bogus macro definition: %s' % (sRest,));
5696 asArgs = None;
5697 sBody = oMatch.group(2);
5698 sName = oMatch.group(1);
5699 assert sName == sName.strip();
5700 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5701
5702 #
5703 # Is this of any interest to us? We do NOT support MC blocks wihtin
5704 # nested macro expansion, just to avoid lots of extra work.
5705 #
5706 # There is only limited support for macros expanding to partial MC blocks.
5707 #
5708 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5709 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5710 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5711 # siblings in the recompiler. This is a lot simpler than nested macro
5712 # expansion and lots of heuristics for locating all the relevant macros.
5713 # Also, this way we don't produce lots of unnecessary threaded functions.
5714 #
5715 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5716 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5717 return True;
5718
5719 #
5720 # Add the macro.
5721 #
5722 if self.fDebugPreproc:
5723 self.debug('#define %s on line %u' % (sName, self.iLine,));
5724 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5725 return self.workerPreprocessorRecreateMacroRegex();
5726
5727 def workerPreprocessorUndef(self, sRest):
5728 """
5729 Handles a macro #undef, the sRest is what follows after the directive word.
5730 """
5731 # Quick comment strip and isolate the name.
5732 offSlash = sRest.find('/');
5733 if offSlash > 0:
5734 sRest = sRest[:offSlash];
5735 sName = sRest.strip();
5736
5737 # Remove the macro if we're clocking it.
5738 if sName in self.dMacros:
5739 if self.fDebugPreproc:
5740 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5741 del self.dMacros[sName];
5742 return self.workerPreprocessorRecreateMacroRegex();
5743
5744 return True;
5745
5746 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5747 """
5748 Handles an #if, #ifdef, #ifndef or #elif directive.
5749 """
5750 #
5751 # Sanity check #elif.
5752 #
5753 if sDirective == 'elif':
5754 if len(self.aoCppCondStack) == 0:
5755 self.raiseError('#elif without #if');
5756 if self.aoCppCondStack[-1].fInElse:
5757 self.raiseError('#elif after #else');
5758
5759 #
5760 # If using line continutation, just concat all the lines together,
5761 # stripping both the newline and escape characters.
5762 #
5763 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5764 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5765 self.iLine += 1;
5766
5767 # Strip it of all comments and leading and trailing blanks.
5768 sRest = self.stripComments(sRest).strip();
5769
5770 #
5771 # Stash it.
5772 #
5773 try:
5774 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5775 except Exception as oXcpt:
5776 self.raiseError(oXcpt.args[0]);
5777
5778 if sDirective == 'elif':
5779 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5780 else:
5781 self.aoCppCondStack.append(oPreprocCond);
5782
5783 return True;
5784
5785 def workerPreprocessorElse(self):
5786 """
5787 Handles an #else directive.
5788 """
5789 if len(self.aoCppCondStack) == 0:
5790 self.raiseError('#else without #if');
5791 if self.aoCppCondStack[-1].fInElse:
5792 self.raiseError('Another #else after #else');
5793
5794 self.aoCppCondStack[-1].fInElse = True;
5795 return True;
5796
5797 def workerPreprocessorEndif(self):
5798 """
5799 Handles an #endif directive.
5800 """
5801 if len(self.aoCppCondStack) == 0:
5802 self.raiseError('#endif without #if');
5803
5804 self.aoCppCondStack.pop();
5805 return True;
5806
5807 def checkPreprocessorDirective(self, sLine):
5808 """
5809 Handles a preprocessor directive.
5810 """
5811 # Skip past the preprocessor hash.
5812 off = sLine.find('#');
5813 assert off >= 0;
5814 off += 1;
5815 while off < len(sLine) and sLine[off].isspace():
5816 off += 1;
5817
5818 # Extract the directive.
5819 offDirective = off;
5820 while off < len(sLine) and not sLine[off].isspace():
5821 off += 1;
5822 sDirective = sLine[offDirective:off];
5823 if self.fDebugPreproc:
5824 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5825
5826 # Skip spaces following it to where the arguments/whatever starts.
5827 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5828 off += 1;
5829 sTail = sLine[off:];
5830
5831 # Handle the directive.
5832 if sDirective == 'define':
5833 return self.workerPreprocessorDefine(sTail);
5834 if sDirective == 'undef':
5835 return self.workerPreprocessorUndef(sTail);
5836 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5837 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5838 if sDirective == 'else':
5839 return self.workerPreprocessorElse();
5840 if sDirective == 'endif':
5841 return self.workerPreprocessorEndif();
5842
5843 if self.fDebugPreproc:
5844 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5845 return False;
5846
5847 def expandMacros(self, sLine, oMatch):
5848 """
5849 Expands macros we know about in the given line.
5850 Currently we ASSUME there is only one and that is what oMatch matched.
5851 """
5852 #
5853 # Get our bearings.
5854 #
5855 offMatch = oMatch.start();
5856 sName = oMatch.group(1);
5857 assert sName == sLine[oMatch.start() : oMatch.end()];
5858 fWithArgs = sName.endswith('(');
5859 if fWithArgs:
5860 sName = sName[:-1].strip();
5861 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5862
5863 #
5864 # Deal with simple macro invocations w/o parameters.
5865 #
5866 if not fWithArgs:
5867 if self.fDebugPreproc:
5868 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5869 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5870
5871 #
5872 # Complicated macro with parameters.
5873 # Start by extracting the parameters. ASSUMES they are all on the same line!
5874 #
5875 cLevel = 1;
5876 offCur = oMatch.end();
5877 offCurArg = offCur;
5878 asArgs = [];
5879 while True:
5880 if offCur >= len(sLine):
5881 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5882 ch = sLine[offCur];
5883 if ch == '(':
5884 cLevel += 1;
5885 elif ch == ')':
5886 cLevel -= 1;
5887 if cLevel == 0:
5888 asArgs.append(sLine[offCurArg:offCur].strip());
5889 break;
5890 elif ch == ',' and cLevel == 1:
5891 asArgs.append(sLine[offCurArg:offCur].strip());
5892 offCurArg = offCur + 1;
5893 offCur += 1;
5894 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5895 asArgs = [];
5896 if len(oMacro.asArgs) != len(asArgs):
5897 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5898
5899 #
5900 # Do the expanding.
5901 #
5902 if self.fDebugPreproc:
5903 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5904 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5905
5906 def parse(self):
5907 """
5908 Parses the given file.
5909
5910 Returns number or errors.
5911 Raises exception on fatal trouble.
5912 """
5913 #self.debug('Parsing %s' % (self.sSrcFile,));
5914
5915 #
5916 # Loop thru the lines.
5917 #
5918 # Please mind that self.iLine may be updated by checkCodeForMacro and
5919 # other worker methods.
5920 #
5921 while self.iLine < len(self.asLines):
5922 sLine = self.asLines[self.iLine];
5923 self.iLine += 1;
5924 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5925
5926 # Expand macros we know about if we're currently in code.
5927 if self.iState == self.kiCode and self.oReMacros:
5928 oMatch = self.oReMacros.search(sLine);
5929 if oMatch:
5930 sLine = self.expandMacros(sLine, oMatch);
5931 if self.fDebugPreproc:
5932 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5933 self.asLines[self.iLine - 1] = sLine;
5934
5935 # Check for preprocessor directives before comments and other stuff.
5936 # ASSUMES preprocessor directives doesn't end with multiline comments.
5937 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5938 if self.fDebugPreproc:
5939 self.debug('line %d: preproc' % (self.iLine,));
5940 self.checkPreprocessorDirective(sLine);
5941 else:
5942 # Look for comments.
5943 offSlash = sLine.find('/');
5944 if offSlash >= 0:
5945 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5946 offLine = 0;
5947 while offLine < len(sLine):
5948 if self.iState == self.kiCode:
5949 # Look for substantial multiline comment so we pass the following MC as a whole line:
5950 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5951 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5952 offHit = sLine.find('/*', offLine);
5953 while offHit >= 0:
5954 offEnd = sLine.find('*/', offHit + 2);
5955 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5956 break;
5957 offHit = sLine.find('/*', offEnd);
5958
5959 if offHit >= 0:
5960 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5961 self.sComment = '';
5962 self.iCommentLine = self.iLine;
5963 self.iState = self.kiCommentMulti;
5964 offLine = offHit + 2;
5965 else:
5966 self.checkCodeForMacro(sLine[offLine:], offLine);
5967 offLine = len(sLine);
5968
5969 elif self.iState == self.kiCommentMulti:
5970 offHit = sLine.find('*/', offLine);
5971 if offHit >= 0:
5972 self.sComment += sLine[offLine:offHit];
5973 self.iState = self.kiCode;
5974 offLine = offHit + 2;
5975 self.parseComment();
5976 else:
5977 self.sComment += sLine[offLine:];
5978 offLine = len(sLine);
5979 else:
5980 assert False;
5981 # C++ line comment.
5982 elif offSlash > 0:
5983 self.checkCodeForMacro(sLine[:offSlash], 0);
5984
5985 # No slash, but append the line if in multi-line comment.
5986 elif self.iState == self.kiCommentMulti:
5987 #self.debug('line %d: multi' % (self.iLine,));
5988 self.sComment += sLine;
5989
5990 # No slash, but check code line for relevant macro.
5991 elif ( self.iState == self.kiCode
5992 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5993 #self.debug('line %d: macro' % (self.iLine,));
5994 self.checkCodeForMacro(sLine, 0);
5995
5996 # If the line is a '}' in the first position, complete the instructions.
5997 elif self.iState == self.kiCode and sLine[0] == '}':
5998 #self.debug('line %d: }' % (self.iLine,));
5999 self.doneInstructions(fEndOfFunction = True);
6000
6001 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6002 # so we can check/add @oppfx info from it.
6003 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6004 self.parseFunctionTable(sLine);
6005
6006 self.doneInstructions(fEndOfFunction = True);
6007 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6008 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6009 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6010 return self.printErrors();
6011
6012## The parsed content of IEMAllInstCommonBodyMacros.h.
6013g_oParsedCommonBodyMacros = None # type: SimpleParser
6014
6015def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6016 """
6017 Parses one source file for instruction specfications.
6018 """
6019 #
6020 # Read sSrcFile into a line array.
6021 #
6022 try:
6023 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6024 except Exception as oXcpt:
6025 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6026 try:
6027 asLines = oFile.readlines();
6028 except Exception as oXcpt:
6029 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6030 finally:
6031 oFile.close();
6032
6033 #
6034 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6035 # can use the macros from it when processing the other files.
6036 #
6037 global g_oParsedCommonBodyMacros;
6038 if g_oParsedCommonBodyMacros is None:
6039 # Locate the file.
6040 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6041 if not os.path.isfile(sCommonBodyMacros):
6042 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6043
6044 # Read it.
6045 try:
6046 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6047 asIncFiles = oIncFile.readlines();
6048 except Exception as oXcpt:
6049 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6050
6051 # Parse it.
6052 try:
6053 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6054 if oParser.parse() != 0:
6055 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6056 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6057 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6058 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6059 oParser.cTotalMcBlocks,
6060 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6061 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6062 except ParserException as oXcpt:
6063 print(str(oXcpt), file = sys.stderr);
6064 raise;
6065 g_oParsedCommonBodyMacros = oParser;
6066
6067 #
6068 # Do the parsing.
6069 #
6070 try:
6071 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6072 return (oParser.parse(), oParser) ;
6073 except ParserException as oXcpt:
6074 print(str(oXcpt), file = sys.stderr);
6075 raise;
6076
6077
6078def __doTestCopying():
6079 """
6080 Executes the asCopyTests instructions.
6081 """
6082 asErrors = [];
6083 for oDstInstr in g_aoAllInstructions:
6084 if oDstInstr.asCopyTests:
6085 for sSrcInstr in oDstInstr.asCopyTests:
6086 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6087 if oSrcInstr:
6088 aoSrcInstrs = [oSrcInstr,];
6089 else:
6090 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6091 if aoSrcInstrs:
6092 for oSrcInstr in aoSrcInstrs:
6093 if oSrcInstr != oDstInstr:
6094 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6095 else:
6096 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6097 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6098 else:
6099 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6100 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6101
6102 if asErrors:
6103 sys.stderr.write(u''.join(asErrors));
6104 return len(asErrors);
6105
6106
6107def __applyOnlyTest():
6108 """
6109 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6110 all other instructions so that only these get tested.
6111 """
6112 if g_aoOnlyTestInstructions:
6113 for oInstr in g_aoAllInstructions:
6114 if oInstr.aoTests:
6115 if oInstr not in g_aoOnlyTestInstructions:
6116 oInstr.aoTests = [];
6117 return 0;
6118
6119## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6120g_aaoAllInstrFilesAndDefaultMapAndSet = (
6121 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6122 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6123 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6124 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6125 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6126 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6127 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6128 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6129 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6130);
6131
6132def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6133 """
6134 Parses all the IEMAllInstruction*.cpp.h files.
6135
6136 Returns a list of the parsers on success.
6137 Raises exception on failure.
6138 """
6139 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6140 cErrors = 0;
6141 aoParsers = [];
6142 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6143 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6144 sFilename = os.path.join(sSrcDir, sFilename);
6145 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6146 cErrors += cThisErrors;
6147 aoParsers.append(oParser);
6148 cErrors += __doTestCopying();
6149 cErrors += __applyOnlyTest();
6150
6151 # Total stub stats:
6152 cTotalStubs = 0;
6153 for oInstr in g_aoAllInstructions:
6154 cTotalStubs += oInstr.fStub;
6155 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6156 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6157 file = sys.stderr);
6158
6159 if cErrors != 0:
6160 raise Exception('%d parse errors' % (cErrors,));
6161 return aoParsers;
6162
6163
6164def parseFiles(asFiles, sHostArch = None):
6165 """
6166 Parses a selection of IEMAllInstruction*.cpp.h files.
6167
6168 Returns a list of the parsers on success.
6169 Raises exception on failure.
6170 """
6171 # Look up default maps for the files and call __parseFilesWorker to do the job.
6172 asFilesAndDefaultMap = [];
6173 for sFilename in asFiles:
6174 sName = os.path.split(sFilename)[1].lower();
6175 sMap = None;
6176 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6177 if aoInfo[0].lower() == sName:
6178 sMap = aoInfo[1];
6179 break;
6180 if not sMap:
6181 raise Exception('Unable to classify file: %s' % (sFilename,));
6182 asFilesAndDefaultMap.append((sFilename, sMap));
6183
6184 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6185
6186
6187def parseAll(sHostArch = None):
6188 """
6189 Parses all the IEMAllInstruction*.cpp.h files.
6190
6191 Returns a list of the parsers on success.
6192 Raises exception on failure.
6193 """
6194 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6195
6196
6197#
6198# Generators (may perhaps move later).
6199#
6200def __formatDisassemblerTableEntry(oInstr):
6201 """
6202 """
6203 sMacro = 'OP';
6204 cMaxOperands = 3;
6205 if len(oInstr.aoOperands) > 3:
6206 sMacro = 'OPVEX'
6207 cMaxOperands = 4;
6208 assert len(oInstr.aoOperands) <= cMaxOperands;
6209
6210 #
6211 # Format string.
6212 #
6213 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6214 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6215 sTmp += ' ' if iOperand == 0 else ',';
6216 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6217 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6218 else:
6219 sTmp += g_kdOpTypes[oOperand.sType][2];
6220 sTmp += '",';
6221 asColumns = [ sTmp, ];
6222
6223 #
6224 # Decoders.
6225 #
6226 iStart = len(asColumns);
6227 if oInstr.sEncoding is None:
6228 pass;
6229 elif oInstr.sEncoding == 'ModR/M':
6230 # ASSUME the first operand is using the ModR/M encoding
6231 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6232 asColumns.append('IDX_ParseModRM,');
6233 elif oInstr.sEncoding in [ 'prefix', ]:
6234 for oOperand in oInstr.aoOperands:
6235 asColumns.append('0,');
6236 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6237 pass;
6238 elif oInstr.sEncoding == 'VEX.ModR/M':
6239 asColumns.append('IDX_ParseModRM,');
6240 elif oInstr.sEncoding == 'vex2':
6241 asColumns.append('IDX_ParseVex2b,')
6242 elif oInstr.sEncoding == 'vex3':
6243 asColumns.append('IDX_ParseVex3b,')
6244 elif oInstr.sEncoding in g_dInstructionMaps:
6245 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6246 else:
6247 ## @todo
6248 #IDX_ParseTwoByteEsc,
6249 #IDX_ParseGrp1,
6250 #IDX_ParseShiftGrp2,
6251 #IDX_ParseGrp3,
6252 #IDX_ParseGrp4,
6253 #IDX_ParseGrp5,
6254 #IDX_Parse3DNow,
6255 #IDX_ParseGrp6,
6256 #IDX_ParseGrp7,
6257 #IDX_ParseGrp8,
6258 #IDX_ParseGrp9,
6259 #IDX_ParseGrp10,
6260 #IDX_ParseGrp12,
6261 #IDX_ParseGrp13,
6262 #IDX_ParseGrp14,
6263 #IDX_ParseGrp15,
6264 #IDX_ParseGrp16,
6265 #IDX_ParseThreeByteEsc4,
6266 #IDX_ParseThreeByteEsc5,
6267 #IDX_ParseModFence,
6268 #IDX_ParseEscFP,
6269 #IDX_ParseNopPause,
6270 #IDX_ParseInvOpModRM,
6271 assert False, str(oInstr);
6272
6273 # Check for immediates and stuff in the remaining operands.
6274 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6275 sIdx = g_kdOpTypes[oOperand.sType][0];
6276 #if sIdx != 'IDX_UseModRM':
6277 asColumns.append(sIdx + ',');
6278 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6279
6280 #
6281 # Opcode and operands.
6282 #
6283 assert oInstr.sDisEnum, str(oInstr);
6284 asColumns.append(oInstr.sDisEnum + ',');
6285 iStart = len(asColumns)
6286 for oOperand in oInstr.aoOperands:
6287 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6288 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6289
6290 #
6291 # Flags.
6292 #
6293 sTmp = '';
6294 for sHint in sorted(oInstr.dHints.keys()):
6295 sDefine = g_kdHints[sHint];
6296 if sDefine.startswith('DISOPTYPE_'):
6297 if sTmp:
6298 sTmp += ' | ' + sDefine;
6299 else:
6300 sTmp += sDefine;
6301 if sTmp:
6302 sTmp += '),';
6303 else:
6304 sTmp += '0),';
6305 asColumns.append(sTmp);
6306
6307 #
6308 # Format the columns into a line.
6309 #
6310 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6311 sLine = '';
6312 for i, s in enumerate(asColumns):
6313 if len(sLine) < aoffColumns[i]:
6314 sLine += ' ' * (aoffColumns[i] - len(sLine));
6315 else:
6316 sLine += ' ';
6317 sLine += s;
6318
6319 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6320 # DISOPTYPE_HARMLESS),
6321 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6322 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6323 return sLine;
6324
6325def __checkIfShortTable(aoTableOrdered, oMap):
6326 """
6327 Returns (iInstr, cInstructions, fShortTable)
6328 """
6329
6330 # Determin how much we can trim off.
6331 cInstructions = len(aoTableOrdered);
6332 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6333 cInstructions -= 1;
6334
6335 iInstr = 0;
6336 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6337 iInstr += 1;
6338
6339 # If we can save more than 30%, we go for the short table version.
6340 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6341 return (iInstr, cInstructions, True);
6342 _ = oMap; # Use this for overriding.
6343
6344 # Output the full table.
6345 return (0, len(aoTableOrdered), False);
6346
6347def generateDisassemblerTables(oDstFile = sys.stdout):
6348 """
6349 Generates disassembler tables.
6350
6351 Returns exit code.
6352 """
6353
6354 #
6355 # Parse all.
6356 #
6357 try:
6358 parseAll();
6359 except Exception as oXcpt:
6360 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6361 traceback.print_exc(file = sys.stderr);
6362 return 1;
6363
6364
6365 #
6366 # The disassembler uses a slightly different table layout to save space,
6367 # since several of the prefix varia
6368 #
6369 aoDisasmMaps = [];
6370 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6371 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6372 if oMap.sSelector != 'byte+pfx':
6373 aoDisasmMaps.append(oMap);
6374 else:
6375 # Split the map by prefix.
6376 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6377 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6378 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6379 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6380
6381 #
6382 # Dump each map.
6383 #
6384 asHeaderLines = [];
6385 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6386 for oMap in aoDisasmMaps:
6387 sName = oMap.sName;
6388
6389 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6390
6391 #
6392 # Get the instructions for the map and see if we can do a short version or not.
6393 #
6394 aoTableOrder = oMap.getInstructionsInTableOrder();
6395 cEntriesPerByte = oMap.getEntriesPerByte();
6396 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6397
6398 #
6399 # Output the table start.
6400 # Note! Short tables are static and only accessible via the map range record.
6401 #
6402 asLines = [];
6403 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6404 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6405 if fShortTable:
6406 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6407 else:
6408 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6409 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6410 asLines.append('{');
6411
6412 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6413 asLines.append(' /* %#04x: */' % (iInstrStart,));
6414
6415 #
6416 # Output the instructions.
6417 #
6418 iInstr = iInstrStart;
6419 while iInstr < iInstrEnd:
6420 oInstr = aoTableOrder[iInstr];
6421 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6422 if iInstr != iInstrStart:
6423 asLines.append('');
6424 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6425
6426 if oInstr is None:
6427 # Invalid. Optimize blocks of invalid instructions.
6428 cInvalidInstrs = 1;
6429 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6430 cInvalidInstrs += 1;
6431 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6432 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6433 iInstr += 0x10 * cEntriesPerByte - 1;
6434 elif cEntriesPerByte > 1:
6435 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6436 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6437 iInstr += 3;
6438 else:
6439 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6440 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6441 else:
6442 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6443 elif isinstance(oInstr, list):
6444 if len(oInstr) != 0:
6445 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6446 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6447 else:
6448 asLines.append(__formatDisassemblerTableEntry(oInstr));
6449 else:
6450 asLines.append(__formatDisassemblerTableEntry(oInstr));
6451
6452 iInstr += 1;
6453
6454 if iInstrStart >= iInstrEnd:
6455 asLines.append(' /* dummy */ INVALID_OPCODE');
6456
6457 asLines.append('};');
6458 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6459
6460 #
6461 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6462 #
6463 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6464 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6465 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6466
6467 #
6468 # Write out the lines.
6469 #
6470 oDstFile.write('\n'.join(asLines));
6471 oDstFile.write('\n');
6472 oDstFile.write('\n');
6473 #break; #for now
6474 return 0;
6475
6476if __name__ == '__main__':
6477 sys.exit(generateDisassemblerTables());
6478
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette