VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 103787

Last change on this file since 103787 was 103787, checked in by vboxsync, 11 months ago

VMM/IEM: Add a qword index parameter to IEM_MC_FETCH_YREG_U64() and replace IEM_MC_FETCH_YREG_2ND_U64() with it, bugref:10614

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 321.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 103787 2024-03-11 17:47:32Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 103787 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'CL': [],
205 'rSI': [],
206 'rDI': [],
207 'rFLAGS': [],
208 'CS': [],
209 'DS': [],
210 'ES': [],
211 'FS': [],
212 'GS': [],
213 'SS': [],
214
215 # fixed valures.
216 '1': [],
217};
218
219## \@op[1-4] types
220##
221## Value fields:
222## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
223## - 1: the location (g_kdOpLocations).
224## - 2: disassembler format string version of the type.
225## - 3: disassembler OP_PARAM_XXX (XXX only).
226## - 4: IEM form matching instruction.
227##
228## Note! See the A.2.1 in SDM vol 2 for the type names.
229g_kdOpTypes = {
230 # Fixed addresses
231 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
232
233 # ModR/M.rm
234 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
235 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
236 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
237 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
238 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
239 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
240 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
241 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
242 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
243 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
244 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
245 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
246 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
247 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
248 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
249 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
250 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
251 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
252 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
253 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
254 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
255 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
256 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
257 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
258 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
259 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
260
261 # ModR/M.rm - register only.
262 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
263 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
264 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
265 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
266 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
267 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
268 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
269 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
270
271 # ModR/M.rm - memory only.
272 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
273 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
274 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
275 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
276 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
277 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
278 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
279 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
280 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
281 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
282 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
283 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
284 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
285 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
286 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
287
288 # ModR/M.reg
289 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
290 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
291 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
292 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
293 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
294 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
295 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
296 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
297 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
298 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
299 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
300 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
301 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
302 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
303 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
304 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
305 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
306 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
307 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
308 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
309 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
310 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
311 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
312 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
313 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
314 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
315 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
316 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
317 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
318 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
319 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
320 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
321
322 # VEX.vvvv
323 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
324 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
325 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
326 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
327 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
328 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
329 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
330 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
331
332 # Immediate values.
333 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
334 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
335 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
336 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
337 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
338 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
339
340 # Address operands (no ModR/M).
341 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
342 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
343
344 # Relative jump targets
345 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
346 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
347
348 # DS:rSI
349 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
350 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
351 # ES:rDI
352 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
353 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
354
355 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
356
357 # Fixed registers.
358 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
359 'REG_CL': ( 'IDX_ParseFixedReg', 'CL', 'cl', 'REG_CL', '', ),
360 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
361 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
362 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
363 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
364 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
365 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
366 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
367 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
368
369 # Fixed values.
370 '1': ( '', '1', '1', '1', '', ),
371};
372
373# IDX_ParseFixedReg
374# IDX_ParseVexDest
375
376
377## IEMFORM_XXX mappings.
378g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
379 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
380 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
381 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
382 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
383 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
384 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
385 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
386 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
387 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
388 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
389 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
390 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
391 'M': ( 'ModR/M', [ 'rm', ], '', ),
392 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
393 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
394 'M1': ( 'ModR/M', [ 'rm', '1' ], '', ),
395 'M_CL': ( 'ModR/M', [ 'rm', 'CL' ], '', ), # shl/rcl/ror/++
396 'MI': ( 'ModR/M', [ 'rm', 'imm' ], '', ),
397 'MI_REG': ( 'ModR/M', [ 'rm', 'imm' ], '11 mr/reg', ),
398 'MI_MEM': ( 'ModR/M', [ 'rm', 'imm' ], '!11 mr/reg', ),
399 'R': ( 'ModR/M', [ 'reg', ], '', ),
400
401 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
402 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
403 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
404 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
405 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
406 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
407 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
408 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
409 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
410 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
411 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
412 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
414 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
415 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
416 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
417 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
418 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
419 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
420 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
421 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
422 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
423
424 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
425 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
426 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
427 'VEX_VMI': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '', ),
428 'VEX_VMI_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '11 mr/reg', ),
429 'VEX_VMI_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm', 'imm' ], '!11 mr/reg', ),
430
431 'FIXED': ( 'fixed', None, '', ),
432};
433
434## \@oppfx values.
435g_kdPrefixes = {
436 'none': [],
437 '0x66': [],
438 '0xf3': [],
439 '0xf2': [],
440 '!0xf3': [], # special case for bsf/tzcnt
441};
442
443## Special \@opcode tag values.
444g_kdSpecialOpcodes = {
445 '/reg': [],
446 'mr/reg': [],
447 '11 /reg': [],
448 '!11 /reg': [],
449 '11 mr/reg': [],
450 '!11 mr/reg': [],
451};
452
453## Special \@opcodesub tag values.
454## The first value is the real value for aliases.
455## The second value is for bs3cg1.
456g_kdSubOpcodes = {
457 'none': [ None, '', ],
458 '11 mr/reg': [ '11 mr/reg', '', ],
459 '11': [ '11 mr/reg', '', ], ##< alias
460 '!11 mr/reg': [ '!11 mr/reg', '', ],
461 '!11': [ '!11 mr/reg', '', ], ##< alias
462 'rex.w=0': [ 'rex.w=0', 'WZ', ],
463 'w=0': [ 'rex.w=0', '', ], ##< alias
464 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
465 'w=1': [ 'rex.w=1', '', ], ##< alias
466 'vex.l=0': [ 'vex.l=0', 'L0', ],
467 'vex.l=1': [ 'vex.l=0', 'L1', ],
468 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
469 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
470 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
471 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
472 '!11 mr/reg rex.w=0': [ '!11 mr/reg rex.w=0', '', ],
473 '!11 mr/reg rex.w=1': [ '!11 mr/reg rex.w=1', '', ],
474};
475
476## Valid values for \@openc
477g_kdEncodings = {
478 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
479 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
480 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
481 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
482 'prefix': [ None, ], ##< Prefix
483};
484
485## \@opunused, \@opinvalid, \@opinvlstyle
486g_kdInvalidStyles = {
487 'immediate': [], ##< CPU stops decoding immediately after the opcode.
488 'vex.modrm': [], ##< VEX+ModR/M, everyone.
489 'intel-modrm': [], ##< Intel decodes ModR/M.
490 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
491 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
492 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
493};
494
495g_kdCpuNames = {
496 '8086': (),
497 '80186': (),
498 '80286': (),
499 '80386': (),
500 '80486': (),
501};
502
503## \@opcpuid
504g_kdCpuIdFlags = {
505 'vme': 'X86_CPUID_FEATURE_EDX_VME',
506 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
507 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
508 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
509 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
510 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
511 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
512 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
513 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
514 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
515 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
516 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
517 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
518 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
519 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
520 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
521 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
522 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
523 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
524 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
525 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
526 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
527 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
528 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
529 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
530 'aes': 'X86_CPUID_FEATURE_ECX_AES',
531 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
532 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
533 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
534 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
535 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
536
537 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
538 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
539 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
540 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
541 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
542 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
543 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
544 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
545 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
546 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
547};
548
549## \@ophints values.
550# pylint: disable=line-too-long
551g_kdHints = {
552 'invalid': 'DISOPTYPE_INVALID', ##<
553 'harmless': 'DISOPTYPE_HARMLESS', ##<
554 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
555 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
556 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
557 'portio': 'DISOPTYPE_PORTIO', ##<
558 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
559 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
560 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
561 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
562 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
563 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
564 'illegal': 'DISOPTYPE_ILLEGAL', ##<
565 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
566 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
567 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
568 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
569 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
570 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
571 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
572 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
573 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
574 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
575 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
576 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
577 ## (only in 16 & 32 bits mode!)
578 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
579 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
580 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
581 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
582 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
583 'ignores_rexw': '', ##< Ignores REX.W.
584 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
585 'vex_l_zero': '', ##< VEX.L must be 0.
586 'vex_l_ignored': '', ##< VEX.L is ignored.
587 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
588 'lock_allowed': '', ##< Lock prefix allowed.
589};
590# pylint: enable=line-too-long
591
592## \@opxcpttype values (see SDMv2 2.4, 2.7).
593g_kdXcptTypes = {
594 'none': [],
595 '1': [],
596 '2': [],
597 '3': [],
598 '4': [],
599 '4UA': [],
600 '5': [],
601 '5LZ': [], # LZ = VEX.L must be zero.
602 '6': [],
603 '7': [],
604 '7LZ': [],
605 '8': [],
606 '11': [],
607 '12': [],
608 'E1': [],
609 'E1NF': [],
610 'E2': [],
611 'E3': [],
612 'E3NF': [],
613 'E4': [],
614 'E4NF': [],
615 'E5': [],
616 'E5NF': [],
617 'E6': [],
618 'E6NF': [],
619 'E7NF': [],
620 'E9': [],
621 'E9NF': [],
622 'E10': [],
623 'E11': [],
624 'E12': [],
625 'E12NF': [],
626};
627
628
629def _isValidOpcodeByte(sOpcode):
630 """
631 Checks if sOpcode is a valid lower case opcode byte.
632 Returns true/false.
633 """
634 if len(sOpcode) == 4:
635 if sOpcode[:2] == '0x':
636 if sOpcode[2] in '0123456789abcdef':
637 if sOpcode[3] in '0123456789abcdef':
638 return True;
639 return False;
640
641
642class InstructionMap(object):
643 """
644 Instruction map.
645
646 The opcode map provides the lead opcode bytes (empty for the one byte
647 opcode map). An instruction can be member of multiple opcode maps as long
648 as it uses the same opcode value within the map (because of VEX).
649 """
650
651 kdEncodings = {
652 'legacy': [],
653 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
654 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
655 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
656 'xop8': [], ##< XOP prefix with vvvvv = 8
657 'xop9': [], ##< XOP prefix with vvvvv = 9
658 'xop10': [], ##< XOP prefix with vvvvv = 10
659 };
660 ## Selectors.
661 ## 1. The first value is the number of table entries required by a
662 ## decoder or disassembler for this type of selector.
663 ## 2. The second value is how many entries per opcode byte if applicable.
664 kdSelectors = {
665 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
666 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
667 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
668 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
669 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
670 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
671 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
672 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
673 };
674
675 ## Define the subentry number according to the Instruction::sPrefix
676 ## value for 'byte+pfx' selected tables.
677 kiPrefixOrder = {
678 'none': 0,
679 '0x66': 1,
680 '0xf3': 2,
681 '0xf2': 3,
682 };
683
684 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
685 sEncoding = 'legacy', sDisParse = None):
686 assert sSelector in self.kdSelectors;
687 assert sEncoding in self.kdEncodings;
688 if asLeadOpcodes is None:
689 asLeadOpcodes = [];
690 else:
691 for sOpcode in asLeadOpcodes:
692 assert _isValidOpcodeByte(sOpcode);
693 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
694
695 self.sName = sName;
696 self.sIemName = sIemName;
697 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
698 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
699 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
700 self.aoInstructions = [] # type: Instruction
701 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
702
703 def copy(self, sNewName, sPrefixFilter = None):
704 """
705 Copies the table with filtering instruction by sPrefix if not None.
706 """
707 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
708 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
709 else self.sSelector,
710 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
711 if sPrefixFilter is None:
712 oCopy.aoInstructions = list(self.aoInstructions);
713 else:
714 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
715 return oCopy;
716
717 def getTableSize(self):
718 """
719 Number of table entries. This corresponds directly to the selector.
720 """
721 return self.kdSelectors[self.sSelector][0];
722
723 def getEntriesPerByte(self):
724 """
725 Number of table entries per opcode bytes.
726
727 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
728 the others it will just return 1.
729 """
730 return self.kdSelectors[self.sSelector][1];
731
732 def getInstructionIndex(self, oInstr):
733 """
734 Returns the table index for the instruction.
735 """
736 bOpcode = oInstr.getOpcodeByte();
737
738 # The byte selectors are simple. We need a full opcode byte and need just return it.
739 if self.sSelector == 'byte':
740 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
741 return bOpcode;
742
743 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
744 if self.sSelector == 'byte+pfx':
745 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
746 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
747 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
748
749 # The other selectors needs masking and shifting.
750 if self.sSelector == '/r':
751 return (bOpcode >> 3) & 0x7;
752
753 if self.sSelector == 'mod /r':
754 return (bOpcode >> 3) & 0x1f;
755
756 if self.sSelector == 'memreg /r':
757 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
758
759 if self.sSelector == '!11 /r':
760 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
761 return (bOpcode >> 3) & 0x7;
762
763 if self.sSelector == '11 /r':
764 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
765 return (bOpcode >> 3) & 0x7;
766
767 if self.sSelector == '11':
768 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
769 return bOpcode & 0x3f;
770
771 assert False, self.sSelector;
772 return -1;
773
774 def getInstructionsInTableOrder(self):
775 """
776 Get instructions in table order.
777
778 Returns array of instructions. Normally there is exactly one
779 instruction per entry. However the entry could also be None if
780 not instruction was specified for that opcode value. Or there
781 could be a list of instructions to deal with special encodings
782 where for instance prefix (e.g. REX.W) encodes a different
783 instruction or different CPUs have different instructions or
784 prefixes in the same place.
785 """
786 # Start with empty table.
787 cTable = self.getTableSize();
788 aoTable = [None] * cTable;
789
790 # Insert the instructions.
791 for oInstr in self.aoInstructions:
792 if oInstr.sOpcode:
793 idxOpcode = self.getInstructionIndex(oInstr);
794 assert idxOpcode < cTable, str(idxOpcode);
795
796 oExisting = aoTable[idxOpcode];
797 if oExisting is None:
798 aoTable[idxOpcode] = oInstr;
799 elif not isinstance(oExisting, list):
800 aoTable[idxOpcode] = list([oExisting, oInstr]);
801 else:
802 oExisting.append(oInstr);
803
804 return aoTable;
805
806
807 def getDisasTableName(self):
808 """
809 Returns the disassembler table name for this map.
810 """
811 sName = 'g_aDisas';
812 for sWord in self.sName.split('_'):
813 if sWord == 'm': # suffix indicating modrm.mod==mem
814 sName += '_m';
815 elif sWord == 'r': # suffix indicating modrm.mod==reg
816 sName += '_r';
817 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
818 sName += '_' + sWord;
819 else:
820 sWord = sWord.replace('grp', 'Grp');
821 sWord = sWord.replace('map', 'Map');
822 sName += sWord[0].upper() + sWord[1:];
823 return sName;
824
825 def getDisasRangeName(self):
826 """
827 Returns the disassembler table range name for this map.
828 """
829 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
830
831 def isVexMap(self):
832 """ Returns True if a VEX map. """
833 return self.sEncoding.startswith('vex');
834
835
836class TestType(object):
837 """
838 Test value type.
839
840 This base class deals with integer like values. The fUnsigned constructor
841 parameter indicates the default stance on zero vs sign extending. It is
842 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
843 """
844 def __init__(self, sName, acbSizes = None, fUnsigned = True):
845 self.sName = sName;
846 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
847 self.fUnsigned = fUnsigned;
848
849 class BadValue(Exception):
850 """ Bad value exception. """
851 def __init__(self, sMessage):
852 Exception.__init__(self, sMessage);
853 self.sMessage = sMessage;
854
855 ## For ascii ~ operator.
856 kdHexInv = {
857 '0': 'f',
858 '1': 'e',
859 '2': 'd',
860 '3': 'c',
861 '4': 'b',
862 '5': 'a',
863 '6': '9',
864 '7': '8',
865 '8': '7',
866 '9': '6',
867 'a': '5',
868 'b': '4',
869 'c': '3',
870 'd': '2',
871 'e': '1',
872 'f': '0',
873 };
874
875 def get(self, sValue):
876 """
877 Get the shortest normal sized byte representation of oValue.
878
879 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
880 The latter form is for AND+OR pairs where the first entry is what to
881 AND with the field and the second the one or OR with.
882
883 Raises BadValue if invalid value.
884 """
885 if not sValue:
886 raise TestType.BadValue('empty value');
887
888 # Deal with sign and detect hexadecimal or decimal.
889 fSignExtend = not self.fUnsigned;
890 if sValue[0] == '-' or sValue[0] == '+':
891 fSignExtend = True;
892 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
893 else:
894 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
895
896 # try convert it to long integer.
897 try:
898 iValue = long(sValue, 16 if fHex else 10);
899 except Exception as oXcpt:
900 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
901
902 # Convert the hex string and pad it to a decent value. Negative values
903 # needs to be manually converted to something non-negative (~-n + 1).
904 if iValue >= 0:
905 sHex = hex(iValue);
906 if sys.version_info[0] < 3:
907 assert sHex[-1] == 'L';
908 sHex = sHex[:-1];
909 assert sHex[:2] == '0x';
910 sHex = sHex[2:];
911 else:
912 sHex = hex(-iValue - 1);
913 if sys.version_info[0] < 3:
914 assert sHex[-1] == 'L';
915 sHex = sHex[:-1];
916 assert sHex[:2] == '0x';
917 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
918 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
919 sHex = 'f' + sHex;
920
921 cDigits = len(sHex);
922 if cDigits <= self.acbSizes[-1] * 2:
923 for cb in self.acbSizes:
924 cNaturalDigits = cb * 2;
925 if cDigits <= cNaturalDigits:
926 break;
927 else:
928 cNaturalDigits = self.acbSizes[-1] * 2;
929 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
930 assert isinstance(cNaturalDigits, int)
931
932 if cNaturalDigits != cDigits:
933 cNeeded = cNaturalDigits - cDigits;
934 if iValue >= 0:
935 sHex = ('0' * cNeeded) + sHex;
936 else:
937 sHex = ('f' * cNeeded) + sHex;
938
939 # Invert and convert to bytearray and return it.
940 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
941
942 return ((fSignExtend, abValue),);
943
944 def validate(self, sValue):
945 """
946 Returns True if value is okay, error message on failure.
947 """
948 try:
949 self.get(sValue);
950 except TestType.BadValue as oXcpt:
951 return oXcpt.sMessage;
952 return True;
953
954 def isAndOrPair(self, sValue):
955 """
956 Checks if sValue is a pair.
957 """
958 _ = sValue;
959 return False;
960
961
962class TestTypeEflags(TestType):
963 """
964 Special value parsing for EFLAGS/RFLAGS/FLAGS.
965 """
966
967 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
968
969 def __init__(self, sName):
970 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
971
972 def get(self, sValue):
973 fClear = 0;
974 fSet = 0;
975 for sFlag in sValue.split(','):
976 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
977 if sConstant is None:
978 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
979 if sConstant[0] == '!':
980 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
981 else:
982 fSet |= g_kdX86EFlagsConstants[sConstant];
983
984 aoSet = TestType.get(self, '0x%x' % (fSet,));
985 if fClear != 0:
986 aoClear = TestType.get(self, '%#x' % (fClear,))
987 assert self.isAndOrPair(sValue) is True;
988 return (aoClear[0], aoSet[0]);
989 assert self.isAndOrPair(sValue) is False;
990 return aoSet;
991
992 def isAndOrPair(self, sValue):
993 for sZeroFlag in self.kdZeroValueFlags:
994 if sValue.find(sZeroFlag) >= 0:
995 return True;
996 return False;
997
998class TestTypeFromDict(TestType):
999 """
1000 Special value parsing for CR0.
1001 """
1002
1003 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
1004
1005 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
1006 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
1007 self.kdConstantsAndValues = kdConstantsAndValues;
1008 self.sConstantPrefix = sConstantPrefix;
1009
1010 def get(self, sValue):
1011 fValue = 0;
1012 for sFlag in sValue.split(','):
1013 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
1014 if fFlagValue is None:
1015 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
1016 fValue |= fFlagValue;
1017 return TestType.get(self, '0x%x' % (fValue,));
1018
1019
1020class TestInOut(object):
1021 """
1022 One input or output state modifier.
1023
1024 This should be thought as values to modify BS3REGCTX and extended (needs
1025 to be structured) state.
1026 """
1027 ## Assigned operators.
1028 kasOperators = [
1029 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1030 '&~=',
1031 '&=',
1032 '|=',
1033 '='
1034 ];
1035 ## Types
1036 kdTypes = {
1037 'uint': TestType('uint', fUnsigned = True),
1038 'int': TestType('int'),
1039 'efl': TestTypeEflags('efl'),
1040 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1041 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1042 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1043 };
1044 ## CPU context fields.
1045 kdFields = {
1046 # name: ( default type, [both|input|output], )
1047 # Operands.
1048 'op1': ( 'uint', 'both', ), ## \@op1
1049 'op2': ( 'uint', 'both', ), ## \@op2
1050 'op3': ( 'uint', 'both', ), ## \@op3
1051 'op4': ( 'uint', 'both', ), ## \@op4
1052 # Flags.
1053 'efl': ( 'efl', 'both', ),
1054 'efl_undef': ( 'uint', 'output', ),
1055 # 8-bit GPRs.
1056 'al': ( 'uint', 'both', ),
1057 'cl': ( 'uint', 'both', ),
1058 'dl': ( 'uint', 'both', ),
1059 'bl': ( 'uint', 'both', ),
1060 'ah': ( 'uint', 'both', ),
1061 'ch': ( 'uint', 'both', ),
1062 'dh': ( 'uint', 'both', ),
1063 'bh': ( 'uint', 'both', ),
1064 'r8l': ( 'uint', 'both', ),
1065 'r9l': ( 'uint', 'both', ),
1066 'r10l': ( 'uint', 'both', ),
1067 'r11l': ( 'uint', 'both', ),
1068 'r12l': ( 'uint', 'both', ),
1069 'r13l': ( 'uint', 'both', ),
1070 'r14l': ( 'uint', 'both', ),
1071 'r15l': ( 'uint', 'both', ),
1072 # 16-bit GPRs.
1073 'ax': ( 'uint', 'both', ),
1074 'dx': ( 'uint', 'both', ),
1075 'cx': ( 'uint', 'both', ),
1076 'bx': ( 'uint', 'both', ),
1077 'sp': ( 'uint', 'both', ),
1078 'bp': ( 'uint', 'both', ),
1079 'si': ( 'uint', 'both', ),
1080 'di': ( 'uint', 'both', ),
1081 'r8w': ( 'uint', 'both', ),
1082 'r9w': ( 'uint', 'both', ),
1083 'r10w': ( 'uint', 'both', ),
1084 'r11w': ( 'uint', 'both', ),
1085 'r12w': ( 'uint', 'both', ),
1086 'r13w': ( 'uint', 'both', ),
1087 'r14w': ( 'uint', 'both', ),
1088 'r15w': ( 'uint', 'both', ),
1089 # 32-bit GPRs.
1090 'eax': ( 'uint', 'both', ),
1091 'edx': ( 'uint', 'both', ),
1092 'ecx': ( 'uint', 'both', ),
1093 'ebx': ( 'uint', 'both', ),
1094 'esp': ( 'uint', 'both', ),
1095 'ebp': ( 'uint', 'both', ),
1096 'esi': ( 'uint', 'both', ),
1097 'edi': ( 'uint', 'both', ),
1098 'r8d': ( 'uint', 'both', ),
1099 'r9d': ( 'uint', 'both', ),
1100 'r10d': ( 'uint', 'both', ),
1101 'r11d': ( 'uint', 'both', ),
1102 'r12d': ( 'uint', 'both', ),
1103 'r13d': ( 'uint', 'both', ),
1104 'r14d': ( 'uint', 'both', ),
1105 'r15d': ( 'uint', 'both', ),
1106 # 64-bit GPRs.
1107 'rax': ( 'uint', 'both', ),
1108 'rdx': ( 'uint', 'both', ),
1109 'rcx': ( 'uint', 'both', ),
1110 'rbx': ( 'uint', 'both', ),
1111 'rsp': ( 'uint', 'both', ),
1112 'rbp': ( 'uint', 'both', ),
1113 'rsi': ( 'uint', 'both', ),
1114 'rdi': ( 'uint', 'both', ),
1115 'r8': ( 'uint', 'both', ),
1116 'r9': ( 'uint', 'both', ),
1117 'r10': ( 'uint', 'both', ),
1118 'r11': ( 'uint', 'both', ),
1119 'r12': ( 'uint', 'both', ),
1120 'r13': ( 'uint', 'both', ),
1121 'r14': ( 'uint', 'both', ),
1122 'r15': ( 'uint', 'both', ),
1123 # 16-bit, 32-bit or 64-bit registers according to operand size.
1124 'oz.rax': ( 'uint', 'both', ),
1125 'oz.rdx': ( 'uint', 'both', ),
1126 'oz.rcx': ( 'uint', 'both', ),
1127 'oz.rbx': ( 'uint', 'both', ),
1128 'oz.rsp': ( 'uint', 'both', ),
1129 'oz.rbp': ( 'uint', 'both', ),
1130 'oz.rsi': ( 'uint', 'both', ),
1131 'oz.rdi': ( 'uint', 'both', ),
1132 'oz.r8': ( 'uint', 'both', ),
1133 'oz.r9': ( 'uint', 'both', ),
1134 'oz.r10': ( 'uint', 'both', ),
1135 'oz.r11': ( 'uint', 'both', ),
1136 'oz.r12': ( 'uint', 'both', ),
1137 'oz.r13': ( 'uint', 'both', ),
1138 'oz.r14': ( 'uint', 'both', ),
1139 'oz.r15': ( 'uint', 'both', ),
1140 # Control registers.
1141 'cr0': ( 'cr0', 'both', ),
1142 'cr4': ( 'cr4', 'both', ),
1143 'xcr0': ( 'xcr0', 'both', ),
1144 # FPU Registers
1145 'fcw': ( 'uint', 'both', ),
1146 'fsw': ( 'uint', 'both', ),
1147 'ftw': ( 'uint', 'both', ),
1148 'fop': ( 'uint', 'both', ),
1149 'fpuip': ( 'uint', 'both', ),
1150 'fpucs': ( 'uint', 'both', ),
1151 'fpudp': ( 'uint', 'both', ),
1152 'fpuds': ( 'uint', 'both', ),
1153 'mxcsr': ( 'uint', 'both', ),
1154 'st0': ( 'uint', 'both', ),
1155 'st1': ( 'uint', 'both', ),
1156 'st2': ( 'uint', 'both', ),
1157 'st3': ( 'uint', 'both', ),
1158 'st4': ( 'uint', 'both', ),
1159 'st5': ( 'uint', 'both', ),
1160 'st6': ( 'uint', 'both', ),
1161 'st7': ( 'uint', 'both', ),
1162 # MMX registers.
1163 'mm0': ( 'uint', 'both', ),
1164 'mm1': ( 'uint', 'both', ),
1165 'mm2': ( 'uint', 'both', ),
1166 'mm3': ( 'uint', 'both', ),
1167 'mm4': ( 'uint', 'both', ),
1168 'mm5': ( 'uint', 'both', ),
1169 'mm6': ( 'uint', 'both', ),
1170 'mm7': ( 'uint', 'both', ),
1171 # SSE registers.
1172 'xmm0': ( 'uint', 'both', ),
1173 'xmm1': ( 'uint', 'both', ),
1174 'xmm2': ( 'uint', 'both', ),
1175 'xmm3': ( 'uint', 'both', ),
1176 'xmm4': ( 'uint', 'both', ),
1177 'xmm5': ( 'uint', 'both', ),
1178 'xmm6': ( 'uint', 'both', ),
1179 'xmm7': ( 'uint', 'both', ),
1180 'xmm8': ( 'uint', 'both', ),
1181 'xmm9': ( 'uint', 'both', ),
1182 'xmm10': ( 'uint', 'both', ),
1183 'xmm11': ( 'uint', 'both', ),
1184 'xmm12': ( 'uint', 'both', ),
1185 'xmm13': ( 'uint', 'both', ),
1186 'xmm14': ( 'uint', 'both', ),
1187 'xmm15': ( 'uint', 'both', ),
1188 'xmm0.lo': ( 'uint', 'both', ),
1189 'xmm1.lo': ( 'uint', 'both', ),
1190 'xmm2.lo': ( 'uint', 'both', ),
1191 'xmm3.lo': ( 'uint', 'both', ),
1192 'xmm4.lo': ( 'uint', 'both', ),
1193 'xmm5.lo': ( 'uint', 'both', ),
1194 'xmm6.lo': ( 'uint', 'both', ),
1195 'xmm7.lo': ( 'uint', 'both', ),
1196 'xmm8.lo': ( 'uint', 'both', ),
1197 'xmm9.lo': ( 'uint', 'both', ),
1198 'xmm10.lo': ( 'uint', 'both', ),
1199 'xmm11.lo': ( 'uint', 'both', ),
1200 'xmm12.lo': ( 'uint', 'both', ),
1201 'xmm13.lo': ( 'uint', 'both', ),
1202 'xmm14.lo': ( 'uint', 'both', ),
1203 'xmm15.lo': ( 'uint', 'both', ),
1204 'xmm0.hi': ( 'uint', 'both', ),
1205 'xmm1.hi': ( 'uint', 'both', ),
1206 'xmm2.hi': ( 'uint', 'both', ),
1207 'xmm3.hi': ( 'uint', 'both', ),
1208 'xmm4.hi': ( 'uint', 'both', ),
1209 'xmm5.hi': ( 'uint', 'both', ),
1210 'xmm6.hi': ( 'uint', 'both', ),
1211 'xmm7.hi': ( 'uint', 'both', ),
1212 'xmm8.hi': ( 'uint', 'both', ),
1213 'xmm9.hi': ( 'uint', 'both', ),
1214 'xmm10.hi': ( 'uint', 'both', ),
1215 'xmm11.hi': ( 'uint', 'both', ),
1216 'xmm12.hi': ( 'uint', 'both', ),
1217 'xmm13.hi': ( 'uint', 'both', ),
1218 'xmm14.hi': ( 'uint', 'both', ),
1219 'xmm15.hi': ( 'uint', 'both', ),
1220 'xmm0.lo.zx': ( 'uint', 'both', ),
1221 'xmm1.lo.zx': ( 'uint', 'both', ),
1222 'xmm2.lo.zx': ( 'uint', 'both', ),
1223 'xmm3.lo.zx': ( 'uint', 'both', ),
1224 'xmm4.lo.zx': ( 'uint', 'both', ),
1225 'xmm5.lo.zx': ( 'uint', 'both', ),
1226 'xmm6.lo.zx': ( 'uint', 'both', ),
1227 'xmm7.lo.zx': ( 'uint', 'both', ),
1228 'xmm8.lo.zx': ( 'uint', 'both', ),
1229 'xmm9.lo.zx': ( 'uint', 'both', ),
1230 'xmm10.lo.zx': ( 'uint', 'both', ),
1231 'xmm11.lo.zx': ( 'uint', 'both', ),
1232 'xmm12.lo.zx': ( 'uint', 'both', ),
1233 'xmm13.lo.zx': ( 'uint', 'both', ),
1234 'xmm14.lo.zx': ( 'uint', 'both', ),
1235 'xmm15.lo.zx': ( 'uint', 'both', ),
1236 'xmm0.dw0': ( 'uint', 'both', ),
1237 'xmm1.dw0': ( 'uint', 'both', ),
1238 'xmm2.dw0': ( 'uint', 'both', ),
1239 'xmm3.dw0': ( 'uint', 'both', ),
1240 'xmm4.dw0': ( 'uint', 'both', ),
1241 'xmm5.dw0': ( 'uint', 'both', ),
1242 'xmm6.dw0': ( 'uint', 'both', ),
1243 'xmm7.dw0': ( 'uint', 'both', ),
1244 'xmm8.dw0': ( 'uint', 'both', ),
1245 'xmm9.dw0': ( 'uint', 'both', ),
1246 'xmm10.dw0': ( 'uint', 'both', ),
1247 'xmm11.dw0': ( 'uint', 'both', ),
1248 'xmm12.dw0': ( 'uint', 'both', ),
1249 'xmm13.dw0': ( 'uint', 'both', ),
1250 'xmm14.dw0': ( 'uint', 'both', ),
1251 'xmm15_dw0': ( 'uint', 'both', ),
1252 # AVX registers.
1253 'ymm0': ( 'uint', 'both', ),
1254 'ymm1': ( 'uint', 'both', ),
1255 'ymm2': ( 'uint', 'both', ),
1256 'ymm3': ( 'uint', 'both', ),
1257 'ymm4': ( 'uint', 'both', ),
1258 'ymm5': ( 'uint', 'both', ),
1259 'ymm6': ( 'uint', 'both', ),
1260 'ymm7': ( 'uint', 'both', ),
1261 'ymm8': ( 'uint', 'both', ),
1262 'ymm9': ( 'uint', 'both', ),
1263 'ymm10': ( 'uint', 'both', ),
1264 'ymm11': ( 'uint', 'both', ),
1265 'ymm12': ( 'uint', 'both', ),
1266 'ymm13': ( 'uint', 'both', ),
1267 'ymm14': ( 'uint', 'both', ),
1268 'ymm15': ( 'uint', 'both', ),
1269
1270 # Special ones.
1271 'value.xcpt': ( 'uint', 'output', ),
1272 };
1273
1274 def __init__(self, sField, sOp, sValue, sType):
1275 assert sField in self.kdFields;
1276 assert sOp in self.kasOperators;
1277 self.sField = sField;
1278 self.sOp = sOp;
1279 self.sValue = sValue;
1280 self.sType = sType;
1281 assert isinstance(sField, str);
1282 assert isinstance(sOp, str);
1283 assert isinstance(sType, str);
1284 assert isinstance(sValue, str);
1285
1286
1287class TestSelector(object):
1288 """
1289 One selector for an instruction test.
1290 """
1291 ## Selector compare operators.
1292 kasCompareOps = [ '==', '!=' ];
1293 ## Selector variables and their valid values.
1294 kdVariables = {
1295 # Operand size.
1296 'size': {
1297 'o16': 'size_o16',
1298 'o32': 'size_o32',
1299 'o64': 'size_o64',
1300 },
1301 # VEX.L value.
1302 'vex.l': {
1303 '0': 'vexl_0',
1304 '1': 'vexl_1',
1305 },
1306 # Execution ring.
1307 'ring': {
1308 '0': 'ring_0',
1309 '1': 'ring_1',
1310 '2': 'ring_2',
1311 '3': 'ring_3',
1312 '0..2': 'ring_0_thru_2',
1313 '1..3': 'ring_1_thru_3',
1314 },
1315 # Basic code mode.
1316 'codebits': {
1317 '64': 'code_64bit',
1318 '32': 'code_32bit',
1319 '16': 'code_16bit',
1320 },
1321 # cpu modes.
1322 'mode': {
1323 'real': 'mode_real',
1324 'prot': 'mode_prot',
1325 'long': 'mode_long',
1326 'v86': 'mode_v86',
1327 'smm': 'mode_smm',
1328 'vmx': 'mode_vmx',
1329 'svm': 'mode_svm',
1330 },
1331 # paging on/off
1332 'paging': {
1333 'on': 'paging_on',
1334 'off': 'paging_off',
1335 },
1336 # CPU vendor
1337 'vendor': {
1338 'amd': 'vendor_amd',
1339 'intel': 'vendor_intel',
1340 'via': 'vendor_via',
1341 },
1342 };
1343 ## Selector shorthand predicates.
1344 ## These translates into variable expressions.
1345 kdPredicates = {
1346 'o16': 'size==o16',
1347 'o32': 'size==o32',
1348 'o64': 'size==o64',
1349 'ring0': 'ring==0',
1350 '!ring0': 'ring==1..3',
1351 'ring1': 'ring==1',
1352 'ring2': 'ring==2',
1353 'ring3': 'ring==3',
1354 'user': 'ring==3',
1355 'supervisor': 'ring==0..2',
1356 '16-bit': 'codebits==16',
1357 '32-bit': 'codebits==32',
1358 '64-bit': 'codebits==64',
1359 'real': 'mode==real',
1360 'prot': 'mode==prot',
1361 'long': 'mode==long',
1362 'v86': 'mode==v86',
1363 'smm': 'mode==smm',
1364 'vmx': 'mode==vmx',
1365 'svm': 'mode==svm',
1366 'paging': 'paging==on',
1367 '!paging': 'paging==off',
1368 'amd': 'vendor==amd',
1369 '!amd': 'vendor!=amd',
1370 'intel': 'vendor==intel',
1371 '!intel': 'vendor!=intel',
1372 'via': 'vendor==via',
1373 '!via': 'vendor!=via',
1374 };
1375
1376 def __init__(self, sVariable, sOp, sValue):
1377 assert sVariable in self.kdVariables;
1378 assert sOp in self.kasCompareOps;
1379 assert sValue in self.kdVariables[sVariable];
1380 self.sVariable = sVariable;
1381 self.sOp = sOp;
1382 self.sValue = sValue;
1383
1384
1385class InstructionTest(object):
1386 """
1387 Instruction test.
1388 """
1389
1390 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1391 self.oInstr = oInstr # type: InstructionTest
1392 self.aoInputs = [] # type: List[TestInOut]
1393 self.aoOutputs = [] # type: List[TestInOut]
1394 self.aoSelectors = [] # type: List[TestSelector]
1395
1396 def toString(self, fRepr = False):
1397 """
1398 Converts it to string representation.
1399 """
1400 asWords = [];
1401 if self.aoSelectors:
1402 for oSelector in self.aoSelectors:
1403 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1404 asWords.append('/');
1405
1406 for oModifier in self.aoInputs:
1407 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1408
1409 asWords.append('->');
1410
1411 for oModifier in self.aoOutputs:
1412 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1413
1414 if fRepr:
1415 return '<' + ' '.join(asWords) + '>';
1416 return ' '.join(asWords);
1417
1418 def __str__(self):
1419 """ Provide string represenation. """
1420 return self.toString(False);
1421
1422 def __repr__(self):
1423 """ Provide unambigious string representation. """
1424 return self.toString(True);
1425
1426class Operand(object):
1427 """
1428 Instruction operand.
1429 """
1430
1431 def __init__(self, sWhere, sType):
1432 assert sWhere in g_kdOpLocations, sWhere;
1433 assert sType in g_kdOpTypes, sType;
1434 self.sWhere = sWhere; ##< g_kdOpLocations
1435 self.sType = sType; ##< g_kdOpTypes
1436
1437 def usesModRM(self):
1438 """ Returns True if using some form of ModR/M encoding. """
1439 return self.sType[0] in ['E', 'G', 'M'];
1440
1441
1442
1443class Instruction(object): # pylint: disable=too-many-instance-attributes
1444 """
1445 Instruction.
1446 """
1447
1448 def __init__(self, sSrcFile, iLine):
1449 ## @name Core attributes.
1450 ## @{
1451 self.oParent = None # type: Instruction
1452 self.sMnemonic = None;
1453 self.sBrief = None;
1454 self.asDescSections = [] # type: List[str]
1455 self.aoMaps = [] # type: List[InstructionMap]
1456 self.aoOperands = [] # type: List[Operand]
1457 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1458 self.sOpcode = None # type: str
1459 self.sSubOpcode = None # type: str
1460 self.sEncoding = None;
1461 self.asFlTest = None;
1462 self.asFlModify = None;
1463 self.asFlUndefined = None;
1464 self.asFlSet = None;
1465 self.asFlClear = None;
1466 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1467 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1468 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1469 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1470 self.aoTests = [] # type: List[InstructionTest]
1471 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1472 self.oCpuExpr = None; ##< Some CPU restriction expression...
1473 self.sGroup = None;
1474 self.fUnused = False; ##< Unused instruction.
1475 self.fInvalid = False; ##< Invalid instruction (like UD2).
1476 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1477 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1478 ## @}
1479
1480 ## @name Implementation attributes.
1481 ## @{
1482 self.sStats = None;
1483 self.sFunction = None;
1484 self.fStub = False;
1485 self.fUdStub = False;
1486 ## @}
1487
1488 ## @name Decoding info
1489 ## @{
1490 self.sSrcFile = sSrcFile;
1491 self.iLineCreated = iLine;
1492 self.iLineCompleted = None;
1493 self.cOpTags = 0;
1494 self.iLineFnIemOpMacro = -1;
1495 self.iLineMnemonicMacro = -1;
1496 ## @}
1497
1498 ## @name Intermediate input fields.
1499 ## @{
1500 self.sRawDisOpNo = None;
1501 self.asRawDisParams = [];
1502 self.sRawIemOpFlags = None;
1503 self.sRawOldOpcodes = None;
1504 self.asCopyTests = [];
1505 ## @}
1506
1507 ## All the MC blocks associated with this instruction.
1508 self.aoMcBlocks = [] # type: List[McBlock]
1509
1510 def toString(self, fRepr = False):
1511 """ Turn object into a string. """
1512 aasFields = [];
1513
1514 aasFields.append(['opcode', self.sOpcode]);
1515 if self.sPrefix:
1516 aasFields.append(['prefix', self.sPrefix]);
1517 aasFields.append(['mnemonic', self.sMnemonic]);
1518 for iOperand, oOperand in enumerate(self.aoOperands):
1519 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1520 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1521 aasFields.append(['encoding', self.sEncoding]);
1522 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1523 aasFields.append(['disenum', self.sDisEnum]);
1524 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1525 aasFields.append(['group', self.sGroup]);
1526 if self.fUnused: aasFields.append(['unused', 'True']);
1527 if self.fInvalid: aasFields.append(['invalid', 'True']);
1528 aasFields.append(['invlstyle', self.sInvalidStyle]);
1529 aasFields.append(['fltest', self.asFlTest]);
1530 aasFields.append(['flmodify', self.asFlModify]);
1531 aasFields.append(['flundef', self.asFlUndefined]);
1532 aasFields.append(['flset', self.asFlSet]);
1533 aasFields.append(['flclear', self.asFlClear]);
1534 aasFields.append(['mincpu', self.sMinCpu]);
1535 aasFields.append(['stats', self.sStats]);
1536 aasFields.append(['sFunction', self.sFunction]);
1537 if self.fStub: aasFields.append(['fStub', 'True']);
1538 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1539 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1540 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1541 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1542
1543 sRet = '<' if fRepr else '';
1544 for sField, sValue in aasFields:
1545 if sValue is not None:
1546 if len(sRet) > 1:
1547 sRet += '; ';
1548 sRet += '%s=%s' % (sField, sValue,);
1549 if fRepr:
1550 sRet += '>';
1551
1552 return sRet;
1553
1554 def __str__(self):
1555 """ Provide string represenation. """
1556 return self.toString(False);
1557
1558 def __repr__(self):
1559 """ Provide unambigious string representation. """
1560 return self.toString(True);
1561
1562 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1563 """
1564 Makes a copy of the object for the purpose of putting in a different map
1565 or a different place in the current map.
1566 """
1567 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1568
1569 oCopy.oParent = self;
1570 oCopy.sMnemonic = self.sMnemonic;
1571 oCopy.sBrief = self.sBrief;
1572 oCopy.asDescSections = list(self.asDescSections);
1573 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1574 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1575 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1576 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1577 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1578 oCopy.sEncoding = self.sEncoding;
1579 oCopy.asFlTest = self.asFlTest;
1580 oCopy.asFlModify = self.asFlModify;
1581 oCopy.asFlUndefined = self.asFlUndefined;
1582 oCopy.asFlSet = self.asFlSet;
1583 oCopy.asFlClear = self.asFlClear;
1584 oCopy.dHints = dict(self.dHints);
1585 oCopy.sDisEnum = self.sDisEnum;
1586 oCopy.asCpuIds = list(self.asCpuIds);
1587 oCopy.asReqFeatures = list(self.asReqFeatures);
1588 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1589 oCopy.sMinCpu = self.sMinCpu;
1590 oCopy.oCpuExpr = self.oCpuExpr;
1591 oCopy.sGroup = self.sGroup;
1592 oCopy.fUnused = self.fUnused;
1593 oCopy.fInvalid = self.fInvalid;
1594 oCopy.sInvalidStyle = self.sInvalidStyle;
1595 oCopy.sXcptType = self.sXcptType;
1596
1597 oCopy.sStats = self.sStats;
1598 oCopy.sFunction = self.sFunction;
1599 oCopy.fStub = self.fStub;
1600 oCopy.fUdStub = self.fUdStub;
1601
1602 oCopy.iLineCompleted = self.iLineCompleted;
1603 oCopy.cOpTags = self.cOpTags;
1604 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1605 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1606
1607 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1608 oCopy.asRawDisParams = list(self.asRawDisParams);
1609 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1610 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1611 oCopy.asCopyTests = list(self.asCopyTests);
1612
1613 return oCopy;
1614
1615 def getOpcodeByte(self):
1616 """
1617 Decodes sOpcode into a byte range integer value.
1618 Raises exception if sOpcode is None or invalid.
1619 """
1620 if self.sOpcode is None:
1621 raise Exception('No opcode byte for %s!' % (self,));
1622 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1623
1624 # Full hex byte form.
1625 if sOpcode[:2] == '0x':
1626 return int(sOpcode, 16);
1627
1628 # The /r form:
1629 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1630 return int(sOpcode[1:]) << 3;
1631
1632 # The 11/r form:
1633 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1634 return (int(sOpcode[-1:]) << 3) | 0xc0;
1635
1636 # The !11/r form (returns mod=1):
1637 ## @todo this doesn't really work...
1638 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1639 return (int(sOpcode[-1:]) << 3) | 0x80;
1640
1641 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1642
1643 @staticmethod
1644 def _flagsToIntegerMask(asFlags):
1645 """
1646 Returns the integer mask value for asFlags.
1647 """
1648 uRet = 0;
1649 if asFlags:
1650 for sFlag in asFlags:
1651 sConstant = g_kdEFlagsMnemonics[sFlag];
1652 assert sConstant[0] != '!', sConstant
1653 uRet |= g_kdX86EFlagsConstants[sConstant];
1654 return uRet;
1655
1656 def getTestedFlagsMask(self):
1657 """ Returns asFlTest into a integer mask value """
1658 return self._flagsToIntegerMask(self.asFlTest);
1659
1660 def getModifiedFlagsMask(self):
1661 """ Returns asFlModify into a integer mask value """
1662 return self._flagsToIntegerMask(self.asFlModify);
1663
1664 def getUndefinedFlagsMask(self):
1665 """ Returns asFlUndefined into a integer mask value """
1666 return self._flagsToIntegerMask(self.asFlUndefined);
1667
1668 def getSetFlagsMask(self):
1669 """ Returns asFlSet into a integer mask value """
1670 return self._flagsToIntegerMask(self.asFlSet);
1671
1672 def getClearedFlagsMask(self):
1673 """ Returns asFlClear into a integer mask value """
1674 return self._flagsToIntegerMask(self.asFlClear);
1675
1676 @staticmethod
1677 def _flagsToC(asFlags):
1678 """
1679 Returns asFlags converted to X86_EFL_XXX ored together C-style.
1680 """
1681 if asFlags:
1682 asRet = [];
1683 for sFlag in asFlags:
1684 sConstant = g_kdEFlagsMnemonics[sFlag];
1685 assert sConstant[0] != '!', sConstant
1686 asRet.append(sConstant);
1687 return ' | '.join(asRet);
1688 return '0';
1689
1690 def getTestedFlagsCStyle(self):
1691 """ Returns asFlTest as C constants ored together. """
1692 return self._flagsToC(self.asFlTest);
1693
1694 def getModifiedFlagsCStyle(self):
1695 """ Returns asFlModify as C constants ored together. """
1696 return self._flagsToC(self.asFlModify);
1697
1698 def getUndefinedFlagsCStyle(self):
1699 """ Returns asFlUndefined as C constants ored together. """
1700 return self._flagsToC(self.asFlUndefined);
1701
1702 def getSetFlagsCStyle(self):
1703 """ Returns asFlSet as C constants ored together. """
1704 return self._flagsToC(self.asFlSet);
1705
1706 def getClearedFlagsCStyle(self):
1707 """ Returns asFlClear as C constants ored together. """
1708 return self._flagsToC(self.asFlClear);
1709
1710 def onlyInVexMaps(self):
1711 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1712 if not self.aoMaps:
1713 return False;
1714 for oMap in self.aoMaps:
1715 if not oMap.isVexMap():
1716 return False;
1717 return True;
1718
1719
1720
1721## All the instructions.
1722g_aoAllInstructions = [] # type: List[Instruction]
1723
1724## All the instructions indexed by statistics name (opstat).
1725g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1726
1727## All the instructions indexed by function name (opfunction).
1728g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1729
1730## Instructions tagged by oponlytest
1731g_aoOnlyTestInstructions = [] # type: List[Instruction]
1732
1733## Instruction maps.
1734g_aoInstructionMaps = [
1735 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1736 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1737 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1738 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1739 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1740 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1741 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1742 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1743 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1744 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1745 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1746 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1747 ## @todo g_apfnEscF1_E0toFF
1748 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1749 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1750 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1751 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1752 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1753 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1754 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1755 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1756
1757 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1758 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1759 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1760 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1761 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1762 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1763 ## @todo What about g_apfnGroup9MemReg?
1764 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1765 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1766 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1767 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1768 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1769 ## @todo What about g_apfnGroup15RegReg?
1770 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1771 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1772 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1773
1774 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1775 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1776
1777 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1778 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1779 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1780 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1781 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1782 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1783
1784 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1785 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1786
1787 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1788 InstructionMap('xopmap8', sEncoding = 'xop8'),
1789 InstructionMap('xopmap9', sEncoding = 'xop9'),
1790 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1791 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1792 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1793 InstructionMap('xopmap10', sEncoding = 'xop10'),
1794 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1795];
1796g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1797g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1798
1799
1800#
1801# Decoder functions.
1802#
1803
1804class DecoderFunction(object):
1805 """
1806 Decoder function.
1807
1808 This is mainly for searching for scoping searches for variables used in
1809 microcode blocks.
1810 """
1811 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1812 self.sName = sName; ##< The function name.
1813 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1814 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1815 self.iBeginLine = iBeginLine; ##< The start line.
1816 self.iEndLine = -1; ##< The line the function (probably) ends on.
1817 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1818
1819 def complete(self, iEndLine, asLines):
1820 """
1821 Completes the function.
1822 """
1823 assert self.iEndLine == -1;
1824 self.iEndLine = iEndLine;
1825 self.asLines = asLines;
1826
1827
1828#
1829# "Microcode" statements and blocks
1830#
1831
1832class McStmt(object):
1833 """
1834 Statement in a microcode block.
1835 """
1836 def __init__(self, sName, asParams):
1837 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1838 self.asParams = asParams;
1839 self.oUser = None;
1840
1841 def renderCode(self, cchIndent = 0):
1842 """
1843 Renders the code for the statement.
1844 """
1845 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1846
1847 @staticmethod
1848 def renderCodeForList(aoStmts, cchIndent = 0):
1849 """
1850 Renders a list of statements.
1851 """
1852 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1853
1854 @staticmethod
1855 def findStmtByNames(aoStmts, dNames):
1856 """
1857 Returns first statement with any of the given names in from the list.
1858
1859 Note! The names are passed as a dictionary for quick lookup, the value
1860 does not matter.
1861 """
1862 for oStmt in aoStmts:
1863 if oStmt.sName in dNames:
1864 return oStmt;
1865 if isinstance(oStmt, McStmtCond):
1866 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1867 if not oHit:
1868 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1869 if oHit:
1870 return oHit;
1871 return None;
1872
1873 @staticmethod
1874 def countStmtsByName(aoStmts, dNames, dRet):
1875 """
1876 Searches the given list of statements for the names in the dictionary,
1877 adding each found to dRet with an occurnece count.
1878
1879 return total number of hits;
1880 """
1881 cHits = 0;
1882 for oStmt in aoStmts:
1883 if oStmt.sName in dNames:
1884 dRet[oStmt.sName] = dRet.get(oStmt.sName, 0) + 1;
1885 cHits += 1;
1886 if isinstance(oStmt, McStmtCond):
1887 cHits += McStmt.countStmtsByName(oStmt.aoIfBranch, dNames, dRet);
1888 cHits += McStmt.countStmtsByName(oStmt.aoElseBranch, dNames, dRet);
1889 return cHits;
1890
1891 def isCppStmt(self):
1892 """ Checks if this is a C++ statement. """
1893 return self.sName.startswith('C++');
1894
1895class McStmtCond(McStmt):
1896 """
1897 Base class for conditional statements (IEM_MC_IF_XXX, IEM_MC_NATIVE_IF).
1898 """
1899 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1900 McStmt.__init__(self, sName, asParams);
1901 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1902 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1903 self.oIfBranchAnnotation = None; ##< User specific IF-branch annotation.
1904 self.oElseBranchAnnotation = None; ##< User specific IF-branch annotation.
1905 self.sNativeInfix = '' if sName != 'IEM_MC_NATIVE_IF' else '_NATIVE';
1906
1907 def renderCode(self, cchIndent = 0):
1908 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1909 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1910 if self.aoElseBranch:
1911 sRet += ' ' * cchIndent + '} IEM_MC%s_ELSE() {\n' % (self.sNativeInfix,);
1912 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1913 sRet += ' ' * cchIndent + '} IEM_MC%s_ENDIF();\n' % (self.sNativeInfix,);
1914 return sRet;
1915
1916class McStmtNativeIf(McStmtCond):
1917 """ IEM_MC_NATIVE_IF """
1918 def __init__(self, sName, asArchitectures):
1919 McStmtCond.__init__(self, sName, ['|'.join(asArchitectures) if asArchitectures else '0',]);
1920 self.asArchitectures = asArchitectures;
1921
1922class McStmtVar(McStmt):
1923 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1924 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1925 McStmt.__init__(self, sName, asParams);
1926 self.sType = sType;
1927 self.sVarName = sVarName;
1928 self.sValue = sValue; ##< None if no assigned / const value.
1929
1930class McStmtArg(McStmtVar):
1931 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1932 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1933 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1934 self.iArg = iArg;
1935 self.sRef = sRef; ##< The reference string (local variable, register).
1936 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1937 assert sRefType in ('none', 'local');
1938
1939class McStmtCall(McStmt):
1940 """ IEM_MC_CALL_* """
1941 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1942 McStmt.__init__(self, sName, asParams);
1943 self.idxFn = iFnParam;
1944 self.idxParams = iFnParam + 1;
1945 self.sFn = asParams[iFnParam];
1946 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1947
1948class McStmtAssertEFlags(McStmt):
1949 """
1950 IEM_MC_ASSERT_EFLAGS
1951 """
1952 def __init__(self, oInstruction):
1953 McStmt.__init__(self, 'IEM_MC_ASSERT_EFLAGS',
1954 [oInstruction.getTestedFlagsCStyle(), oInstruction.getModifiedFlagsCStyle(),]);
1955
1956
1957class McCppGeneric(McStmt):
1958 """
1959 Generic C++/C statement.
1960 """
1961 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1962 McStmt.__init__(self, sName, [sCode,]);
1963 self.fDecode = fDecode;
1964 self.cchIndent = cchIndent;
1965
1966 def renderCode(self, cchIndent = 0):
1967 cchIndent += self.cchIndent;
1968 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1969 if self.fDecode:
1970 sRet = sRet.replace('\n', ' // C++ decode\n');
1971 else:
1972 sRet = sRet.replace('\n', ' // C++ normal\n');
1973 return sRet;
1974
1975class McCppCall(McCppGeneric):
1976 """
1977 A generic C++/C call statement.
1978
1979 The sName is still 'C++', so the function name is in the first parameter
1980 and the the arguments in the subsequent ones.
1981 """
1982 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1983 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1984 self.asParams.extend(asArgs);
1985
1986 def renderCode(self, cchIndent = 0):
1987 cchIndent += self.cchIndent;
1988 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1989 if self.fDecode:
1990 sRet += ' // C++ decode\n';
1991 else:
1992 sRet += ' // C++ normal\n';
1993 return sRet;
1994
1995class McCppCond(McStmtCond):
1996 """
1997 C++/C 'if' statement.
1998 """
1999 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
2000 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
2001 self.fDecode = fDecode;
2002 self.cchIndent = cchIndent;
2003
2004 def renderCode(self, cchIndent = 0):
2005 cchIndent += self.cchIndent;
2006 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
2007 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
2008 sRet += ' ' * cchIndent + '{\n';
2009 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
2010 sRet += ' ' * cchIndent + '}\n';
2011 if self.aoElseBranch:
2012 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
2013 sRet += ' ' * cchIndent + '{\n';
2014 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
2015 sRet += ' ' * cchIndent + '}\n';
2016 return sRet;
2017
2018class McCppPreProc(McCppGeneric):
2019 """
2020 C++/C Preprocessor directive.
2021 """
2022 def __init__(self, sCode):
2023 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
2024
2025 def renderCode(self, cchIndent = 0):
2026 return self.asParams[0] + '\n';
2027
2028
2029## IEM_MC_F_XXX values.
2030g_kdMcFlags = {
2031 'IEM_MC_F_ONLY_8086': (),
2032 'IEM_MC_F_MIN_186': (),
2033 'IEM_MC_F_MIN_286': (),
2034 'IEM_MC_F_NOT_286_OR_OLDER': (),
2035 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
2036 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
2037 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
2038 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
2039 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
2040 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
2041 'IEM_MC_F_NOT_64BIT': (),
2042};
2043## IEM_MC_F_XXX values.
2044g_kdCImplFlags = {
2045 'IEM_CIMPL_F_BRANCH_DIRECT': (),
2046 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
2047 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
2048 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
2049 'IEM_CIMPL_F_BRANCH_FAR': (),
2050 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
2051 'IEM_CIMPL_F_BRANCH_RELATIVE',),
2052 'IEM_CIMPL_F_BRANCH_STACK': (),
2053 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
2054 'IEM_CIMPL_F_MODE': (),
2055 'IEM_CIMPL_F_RFLAGS': (),
2056 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
2057 'IEM_CIMPL_F_STATUS_FLAGS': (),
2058 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
2059 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
2060 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
2061 'IEM_CIMPL_F_VMEXIT': (),
2062 'IEM_CIMPL_F_FPU': (),
2063 'IEM_CIMPL_F_REP': (),
2064 'IEM_CIMPL_F_IO': (),
2065 'IEM_CIMPL_F_END_TB': (),
2066 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
2067 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
2068 'IEM_CIMPL_F_CALLS_CIMPL': (),
2069 'IEM_CIMPL_F_CALLS_AIMPL': (),
2070 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
2071};
2072class McBlock(object):
2073 """
2074 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
2075 """
2076
2077 ## @name Macro expansion types.
2078 ## @{
2079 kiMacroExp_None = 0;
2080 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
2081 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
2082 ## @}
2083
2084 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction,
2085 oInstruction = None, cchIndent = None, fDeferToCImpl = False):
2086 ## Set if IEM_MC_DEFER_TO_CIMPL_0_RET and friends, clear if IEM_MC_BEGIN/END block.
2087 self.fDeferToCImpl = fDeferToCImpl;
2088 ## The source file containing the block.
2089 self.sSrcFile = sSrcFile;
2090 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
2091 self.iBeginLine = iBeginLine;
2092 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
2093 self.offBeginLine = offBeginLine;
2094 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2095 self.iEndLine = -1;
2096 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2097 self.offEndLine = 0;
2098 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2099 self.offAfterEnd = 0;
2100 ## The function the block resides in.
2101 self.oFunction = oFunction;
2102 ## The name of the function the block resides in. DEPRECATED.
2103 self.sFunction = oFunction.sName;
2104 ## The block number within the function.
2105 self.iInFunction = iInFunction;
2106 ## The instruction this block is associated with - can be None.
2107 self.oInstruction = oInstruction # type: Instruction
2108 ## Indentation level of the block.
2109 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2110 ## The raw lines the block is made up of.
2111 self.asLines = [] # type: List[str]
2112 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2113 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2114 self.iMacroExp = self.kiMacroExp_None;
2115 ## IEM_MC_BEGIN: Argument count.
2116 self.cArgs = -1;
2117 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2118 self.aoArgs = [] # type: List[McStmtArg]
2119 ## IEM_MC_BEGIN: Locals count.
2120 self.cLocals = -1;
2121 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2122 self.aoLocals = [] # type: List[McStmtVar]
2123 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2124 self.dsMcFlags = {} # type: Dict[str, bool]
2125 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2126 self.dsCImplFlags = {} # type: Dict[str, bool]
2127 ## Decoded statements in the block.
2128 self.aoStmts = [] # type: List[McStmt]
2129
2130 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2131 """
2132 Completes the microcode block.
2133 """
2134 assert self.iEndLine == -1;
2135 self.iEndLine = iEndLine;
2136 self.offEndLine = offEndLine;
2137 self.offAfterEnd = offAfterEnd;
2138 self.asLines = asLines;
2139
2140 def raiseDecodeError(self, sRawCode, off, sMessage):
2141 """ Raises a decoding error. """
2142 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2143 iLine = sRawCode.count('\n', 0, off);
2144 raise ParserException('%s:%d:%d: parsing error: %s'
2145 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2146
2147 def raiseStmtError(self, sName, sMessage):
2148 """ Raises a statement parser error. """
2149 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2150
2151 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2152 """ Check the parameter count, raising an error it doesn't match. """
2153 if len(asParams) != cParamsExpected:
2154 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2155 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2156 return True;
2157
2158 @staticmethod
2159 def parseMcGeneric(oSelf, sName, asParams):
2160 """ Generic parser that returns a plain McStmt object. """
2161 _ = oSelf;
2162 return McStmt(sName, asParams);
2163
2164 @staticmethod
2165 def parseMcGenericCond(oSelf, sName, asParams):
2166 """ Generic parser that returns a plain McStmtCond object. """
2167 _ = oSelf;
2168 return McStmtCond(sName, asParams);
2169
2170 kdArchVals = {
2171 'RT_ARCH_VAL_X86': True,
2172 'RT_ARCH_VAL_AMD64': True,
2173 'RT_ARCH_VAL_ARM32': True,
2174 'RT_ARCH_VAL_ARM64': True,
2175 'RT_ARCH_VAL_SPARC32': True,
2176 'RT_ARCH_VAL_SPARC64': True,
2177 };
2178
2179 @staticmethod
2180 def parseMcNativeIf(oSelf, sName, asParams):
2181 """ IEM_MC_NATIVE_IF """
2182 oSelf.checkStmtParamCount(sName, asParams, 1);
2183 if asParams[0].strip() == '0':
2184 asArchitectures = [];
2185 else:
2186 asArchitectures = [sArch.strip() for sArch in asParams[0].split('|')];
2187 for sArch in asArchitectures:
2188 if sArch not in oSelf.kdArchVals:
2189 oSelf.raiseStmtError(sName, 'Unknown architecture: %s' % (sArch,));
2190 return McStmtNativeIf(sName, asArchitectures);
2191
2192 @staticmethod
2193 def parseMcBegin(oSelf, sName, asParams):
2194 """ IEM_MC_BEGIN """
2195 oSelf.checkStmtParamCount(sName, asParams, 4);
2196 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2197 oSelf.raiseStmtError(sName, 'Used more than once!');
2198 oSelf.cArgs = int(asParams[0]);
2199 oSelf.cLocals = int(asParams[1]);
2200
2201 if asParams[2] != '0':
2202 for sFlag in asParams[2].split('|'):
2203 sFlag = sFlag.strip();
2204 if sFlag not in g_kdMcFlags:
2205 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2206 oSelf.dsMcFlags[sFlag] = True;
2207 for sFlag2 in g_kdMcFlags[sFlag]:
2208 oSelf.dsMcFlags[sFlag2] = True;
2209
2210 if asParams[3] != '0':
2211 oSelf.parseCImplFlags(sName, asParams[3]);
2212
2213 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2214
2215 @staticmethod
2216 def parseMcArg(oSelf, sName, asParams):
2217 """ IEM_MC_ARG """
2218 oSelf.checkStmtParamCount(sName, asParams, 3);
2219 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2220 oSelf.aoArgs.append(oStmt);
2221 return oStmt;
2222
2223 @staticmethod
2224 def parseMcArgConst(oSelf, sName, asParams):
2225 """ IEM_MC_ARG_CONST """
2226 oSelf.checkStmtParamCount(sName, asParams, 4);
2227 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2228 oSelf.aoArgs.append(oStmt);
2229 return oStmt;
2230
2231 @staticmethod
2232 def parseMcArgLocalRef(oSelf, sName, asParams):
2233 """ IEM_MC_ARG_LOCAL_REF """
2234 oSelf.checkStmtParamCount(sName, asParams, 4);
2235 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2236 oSelf.aoArgs.append(oStmt);
2237 return oStmt;
2238
2239 @staticmethod
2240 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2241 """ IEM_MC_ARG_LOCAL_EFLAGS """
2242 oSelf.checkStmtParamCount(sName, asParams, 3);
2243 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2244 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2245 oSelf.aoLocals.append(oStmtLocal);
2246 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2247 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2248 oSelf.aoArgs.append(oStmtArg);
2249 return (oStmtLocal, oStmtArg,);
2250
2251 @staticmethod
2252 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2253 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2254 oSelf.checkStmtParamCount(sName, asParams, 0);
2255 # Note! Translate to IEM_MC_ARG_CONST
2256 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2257 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2258 oSelf.aoArgs.append(oStmt);
2259 return oStmt;
2260
2261 @staticmethod
2262 def parseMcLocal(oSelf, sName, asParams):
2263 """ IEM_MC_LOCAL """
2264 oSelf.checkStmtParamCount(sName, asParams, 2);
2265 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2266 oSelf.aoLocals.append(oStmt);
2267 return oStmt;
2268
2269 @staticmethod
2270 def parseMcLocalAssign(oSelf, sName, asParams):
2271 """ IEM_MC_LOCAL_ASSIGN """
2272 oSelf.checkStmtParamCount(sName, asParams, 3);
2273 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2274 oSelf.aoLocals.append(oStmt);
2275 return oStmt;
2276
2277 @staticmethod
2278 def parseMcLocalConst(oSelf, sName, asParams):
2279 """ IEM_MC_LOCAL_CONST """
2280 oSelf.checkStmtParamCount(sName, asParams, 3);
2281 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2282 oSelf.aoLocals.append(oStmt);
2283 return oStmt;
2284
2285 @staticmethod
2286 def parseMcLocalEFlags(oSelf, sName, asParams):
2287 """ IEM_MC_LOCAL_EFLAGS"""
2288 oSelf.checkStmtParamCount(sName, asParams, 1);
2289 oStmt = McStmtVar(sName, asParams, 'uint32_t', asParams[0]);
2290 oSelf.aoLocals.append(oStmt);
2291 return oStmt;
2292
2293 @staticmethod
2294 def parseMcCallAImpl(oSelf, sName, asParams):
2295 """ IEM_MC_CALL_AIMPL_3|4 """
2296 cArgs = int(sName[-1]);
2297 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2298 return McStmtCall(sName, asParams, 1, 0);
2299
2300 @staticmethod
2301 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2302 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2303 cArgs = int(sName[-1]);
2304 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2305 return McStmtCall(sName, asParams, 0);
2306
2307 @staticmethod
2308 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2309 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2310 cArgs = int(sName[-1]);
2311 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2312 return McStmtCall(sName, asParams, 0);
2313
2314 @staticmethod
2315 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2316 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2317 cArgs = int(sName[-1]);
2318 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2319 return McStmtCall(sName, asParams, 0);
2320
2321 @staticmethod
2322 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2323 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2324 cArgs = int(sName[-1]);
2325 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2326 return McStmtCall(sName, asParams, 0);
2327
2328 @staticmethod
2329 def parseMcCallSseAImpl(oSelf, sName, asParams):
2330 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2331 cArgs = int(sName[-1]);
2332 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2333 return McStmtCall(sName, asParams, 0);
2334
2335 def parseCImplFlags(self, sName, sFlags):
2336 """
2337 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2338 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2339 """
2340 if sFlags != '0':
2341 sFlags = self.stripComments(sFlags);
2342 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2343 for sFlag in sFlags.split('|'):
2344 sFlag = sFlag.strip();
2345 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2346 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2347 #print('debug: %s' % sFlag)
2348 if sFlag not in g_kdCImplFlags:
2349 if sFlag == '0':
2350 continue;
2351 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2352 self.dsCImplFlags[sFlag] = True;
2353 for sFlag2 in g_kdCImplFlags[sFlag]:
2354 self.dsCImplFlags[sFlag2] = True;
2355 return None;
2356
2357 @staticmethod
2358 def parseMcCallCImpl(oSelf, sName, asParams):
2359 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2360 cArgs = int(sName[-1]);
2361 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2362 oSelf.parseCImplFlags(sName, asParams[0]);
2363 return McStmtCall(sName, asParams, 2);
2364
2365 @staticmethod
2366 def parseMcDeferToCImpl(oSelf, sName, asParams):
2367 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2368 # Note! This code is called by workerIemMcDeferToCImplXRet.
2369 #print('debug: %s, %s,...' % (sName, asParams[0],));
2370 cArgs = int(sName[-5]);
2371 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2372 oSelf.parseCImplFlags(sName, asParams[0]);
2373 return McStmtCall(sName, asParams, 2);
2374
2375 @staticmethod
2376 def stripComments(sCode):
2377 """ Returns sCode with comments removed. """
2378 off = 0;
2379 while off < len(sCode):
2380 off = sCode.find('/', off);
2381 if off < 0 or off + 1 >= len(sCode):
2382 break;
2383
2384 if sCode[off + 1] == '/':
2385 # C++ comment.
2386 offEnd = sCode.find('\n', off + 2);
2387 if offEnd < 0:
2388 return sCode[:off].rstrip();
2389 sCode = sCode[ : off] + sCode[offEnd : ];
2390 off += 1;
2391
2392 elif sCode[off + 1] == '*':
2393 # C comment
2394 offEnd = sCode.find('*/', off + 2);
2395 if offEnd < 0:
2396 return sCode[:off].rstrip();
2397 sSep = ' ';
2398 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2399 sSep = '';
2400 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2401 off += len(sSep);
2402
2403 else:
2404 # Not a comment.
2405 off += 1;
2406 return sCode;
2407
2408 @staticmethod
2409 def extractParam(sCode, offParam):
2410 """
2411 Extracts the parameter value at offParam in sCode.
2412 Returns stripped value and the end offset of the terminating ',' or ')'.
2413 """
2414 # Extract it.
2415 cNesting = 0;
2416 offStart = offParam;
2417 while offParam < len(sCode):
2418 ch = sCode[offParam];
2419 if ch == '(':
2420 cNesting += 1;
2421 elif ch == ')':
2422 if cNesting == 0:
2423 break;
2424 cNesting -= 1;
2425 elif ch == ',' and cNesting == 0:
2426 break;
2427 offParam += 1;
2428 return (sCode[offStart : offParam].strip(), offParam);
2429
2430 @staticmethod
2431 def extractParams(sCode, offOpenParen):
2432 """
2433 Parses a parameter list.
2434 Returns the list of parameter values and the offset of the closing parentheses.
2435 Returns (None, len(sCode)) on if no closing parentheses was found.
2436 """
2437 assert sCode[offOpenParen] == '(';
2438 asParams = [];
2439 off = offOpenParen + 1;
2440 while off < len(sCode):
2441 ch = sCode[off];
2442 if ch.isspace():
2443 off += 1;
2444 elif ch != ')':
2445 (sParam, off) = McBlock.extractParam(sCode, off);
2446 asParams.append(sParam);
2447 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2448 if sCode[off] == ',':
2449 off += 1;
2450 else:
2451 return (asParams, off);
2452 return (None, off);
2453
2454 @staticmethod
2455 def findClosingBraces(sCode, off, offStop):
2456 """
2457 Finds the matching '}' for the '{' at off in sCode.
2458 Returns offset of the matching '}' on success, otherwise -1.
2459
2460 Note! Does not take comments into account.
2461 """
2462 cDepth = 1;
2463 off += 1;
2464 while off < offStop:
2465 offClose = sCode.find('}', off, offStop);
2466 if offClose < 0:
2467 break;
2468 cDepth += sCode.count('{', off, offClose);
2469 cDepth -= 1;
2470 if cDepth == 0:
2471 return offClose;
2472 off = offClose + 1;
2473 return -1;
2474
2475 @staticmethod
2476 def countSpacesAt(sCode, off, offStop):
2477 """ Returns the number of space characters at off in sCode. """
2478 offStart = off;
2479 while off < offStop and sCode[off].isspace():
2480 off += 1;
2481 return off - offStart;
2482
2483 @staticmethod
2484 def skipSpacesAt(sCode, off, offStop):
2485 """ Returns first offset at or after off for a non-space character. """
2486 return off + McBlock.countSpacesAt(sCode, off, offStop);
2487
2488 @staticmethod
2489 def isSubstrAt(sStr, off, sSubStr):
2490 """ Returns true of sSubStr is found at off in sStr. """
2491 return sStr[off : off + len(sSubStr)] == sSubStr;
2492
2493 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2494 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2495 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2496 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2497 + r')');
2498
2499 kaasConditions = (
2500 ( 'IEM_MC_IF_', 'IEM_MC_ELSE', 'IEM_MC_ENDIF' ),
2501 ( 'IEM_MC_NATIVE_IF', 'IEM_MC_NATIVE_ELSE', 'IEM_MC_NATIVE_ENDIF' ),
2502 );
2503 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2504 """
2505 Decodes sRawCode[off : offStop].
2506
2507 Returns list of McStmt instances.
2508 Raises ParserException on failure.
2509 """
2510 if offStop < 0:
2511 offStop = len(sRawCode);
2512 aoStmts = [];
2513 while off < offStop:
2514 ch = sRawCode[off];
2515
2516 #
2517 # Skip spaces and comments.
2518 #
2519 if ch.isspace():
2520 off += 1;
2521
2522 elif ch == '/':
2523 ch = sRawCode[off + 1];
2524 if ch == '/': # C++ comment.
2525 off = sRawCode.find('\n', off + 2);
2526 if off < 0:
2527 break;
2528 off += 1;
2529 elif ch == '*': # C comment.
2530 off = sRawCode.find('*/', off + 2);
2531 if off < 0:
2532 break;
2533 off += 2;
2534 else:
2535 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2536
2537 #
2538 # Is it a MC statement.
2539 #
2540 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2541 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2542 # Extract it and strip comments from it.
2543 if self.isSubstrAt(sRawCode, off, self.kaasConditions[0][0]): iCond = 0;
2544 elif self.isSubstrAt(sRawCode, off, self.kaasConditions[1][0]): iCond = 1;
2545 else: iCond = -1;
2546 if iCond < 0:
2547 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2548 if offEnd <= off:
2549 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2550 else:
2551 offEnd = sRawCode.find('{', off + len(self.kaasConditions[iCond][0]));
2552 if offEnd <= off:
2553 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2554 if sRawCode.find(';', off + len(self.kaasConditions[iCond][0]), offEnd) > off:
2555 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2556 offEnd -= 1;
2557 while offEnd > off and sRawCode[offEnd - 1].isspace():
2558 offEnd -= 1;
2559
2560 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2561
2562 # Isolate the statement name.
2563 offOpenParen = sRawStmt.find('(');
2564 if offOpenParen < 0:
2565 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2566 sName = sRawStmt[: offOpenParen].strip();
2567
2568 # Extract the parameters.
2569 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2570 if asParams is None:
2571 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2572 if offCloseParen + 1 != len(sRawStmt):
2573 self.raiseDecodeError(sRawCode, off,
2574 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2575
2576 # Hand it to the handler.
2577 fnParser = g_dMcStmtParsers.get(sName);
2578 if not fnParser:
2579 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2580 fnParser = fnParser[0];
2581 oStmt = fnParser(self, sName, asParams);
2582 if not isinstance(oStmt, (list, tuple)):
2583 aoStmts.append(oStmt);
2584 else:
2585 aoStmts.extend(oStmt);
2586
2587 #
2588 # If conditional, we need to parse the whole statement.
2589 #
2590 # For reasons of simplicity, we assume the following structure
2591 # and parse each branch in a recursive call:
2592 # IEM_MC_IF_XXX() {
2593 # IEM_MC_WHATEVER();
2594 # } IEM_MC_ELSE() {
2595 # IEM_MC_WHATEVER();
2596 # } IEM_MC_ENDIF();
2597 #
2598 if iCond >= 0:
2599 if iLevel > 1: ## @todo discount IEM_MC_NATIVE_IF.
2600 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2601
2602 # Find start of the IF block:
2603 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2604 if sRawCode[offBlock1] != '{':
2605 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2606
2607 # Find the end of it.
2608 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2609 if offBlock1End < 0:
2610 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2611
2612 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2613
2614 # Is there an else section?
2615 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2616 sElseNm = self.kaasConditions[iCond][1];
2617 if self.isSubstrAt(sRawCode, off, sElseNm):
2618 off = self.skipSpacesAt(sRawCode, off + len(sElseNm), offStop);
2619 if sRawCode[off] != '(':
2620 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sElseNm,));
2621 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2622 if sRawCode[off] != ')':
2623 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sElseNm,));
2624
2625 # Find start of the ELSE block.
2626 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2627 if sRawCode[offBlock2] != '{':
2628 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following %s()"' % (sElseNm,));
2629
2630 # Find the end of it.
2631 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2632 if offBlock2End < 0:
2633 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2634
2635 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2636 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2637
2638 # Parse past the endif statement.
2639 sEndIfNm = self.kaasConditions[iCond][2];
2640 if not self.isSubstrAt(sRawCode, off, sEndIfNm):
2641 self.raiseDecodeError(sRawCode, off, 'Expected %s for closing %s' % (sEndIfNm, sName,));
2642 off = self.skipSpacesAt(sRawCode, off + len(sEndIfNm), offStop);
2643 if sRawCode[off] != '(':
2644 self.raiseDecodeError(sRawCode, off, 'Expected "(" following %s"' % (sEndIfNm,));
2645 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2646 if sRawCode[off] != ')':
2647 self.raiseDecodeError(sRawCode, off, 'Expected ")" following %s("' % (sEndIfNm,));
2648 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2649 if sRawCode[off] != ';':
2650 self.raiseDecodeError(sRawCode, off, 'Expected ";" following %s()"' % (sEndIfNm,));
2651 off += 1;
2652
2653 else:
2654 # Advance.
2655 off = offEnd + 1;
2656
2657 #
2658 # Otherwise it must be a C/C++ statement of sorts.
2659 #
2660 else:
2661 # Find the end of the statement. if and else requires special handling.
2662 sCondExpr = None;
2663 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2664 if oMatch:
2665 if oMatch.group(1)[-1] == '(':
2666 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2667 else:
2668 offEnd = oMatch.end();
2669 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2670 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2671 elif ch == '#':
2672 offEnd = sRawCode.find('\n', off, offStop);
2673 if offEnd < 0:
2674 offEnd = offStop;
2675 offEnd -= 1;
2676 while offEnd > off and sRawCode[offEnd - 1].isspace():
2677 offEnd -= 1;
2678 else:
2679 offEnd = sRawCode.find(';', off);
2680 if offEnd < 0:
2681 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2682
2683 # Check this and the following statement whether it might have
2684 # something to do with decoding. This is a statement filter
2685 # criteria when generating the threaded functions blocks.
2686 offNextEnd = sRawCode.find(';', offEnd + 1);
2687 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2688 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2689 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2690 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2691 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2692 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2693 );
2694
2695 if not oMatch:
2696 if ch != '#':
2697 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2698 else:
2699 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2700 off = offEnd + 1;
2701 elif oMatch.group(1).startswith('if'):
2702 #
2703 # if () xxx [else yyy] statement.
2704 #
2705 oStmt = McCppCond(sCondExpr, fDecode);
2706 aoStmts.append(oStmt);
2707 off = offEnd + 1;
2708
2709 # Following the if () we can either have a {} containing zero or more statements
2710 # or we have a single statement.
2711 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2712 if sRawCode[offBlock1] == '{':
2713 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2714 if offBlock1End < 0:
2715 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2716 offBlock1 += 1;
2717 else:
2718 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2719 if offBlock1End < 0:
2720 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2721
2722 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2723
2724 # The else is optional and can likewise be followed by {} or a single statement.
2725 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2726 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2727 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2728 if sRawCode[offBlock2] == '{':
2729 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2730 if offBlock2End < 0:
2731 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2732 offBlock2 += 1;
2733 else:
2734 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2735 if offBlock2End < 0:
2736 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2737
2738 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2739 off = offBlock2End + 1;
2740
2741 elif oMatch.group(1) == 'else':
2742 # Problematic 'else' branch, typically involving #ifdefs.
2743 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2744
2745 return aoStmts;
2746
2747 def decode(self):
2748 """
2749 Decodes the block, populating self.aoStmts if necessary.
2750 Returns the statement list.
2751 Raises ParserException on failure.
2752 """
2753 if not self.aoStmts:
2754 self.aoStmts = self.decodeCode(''.join(self.asLines));
2755 return self.aoStmts;
2756
2757
2758 def checkForTooEarlyEffSegUse(self, aoStmts):
2759 """
2760 Checks if iEffSeg is used before the effective address has been decoded.
2761 Returns None on success, error string on failure.
2762
2763 See r158454 for an example of this issue.
2764 """
2765
2766 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2767 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2768 # as we're ASSUMING these will not occur before address calculation.
2769 for iStmt, oStmt in enumerate(aoStmts):
2770 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2771 while iStmt > 0:
2772 iStmt -= 1;
2773 oStmt = aoStmts[iStmt];
2774 for sArg in oStmt.asParams:
2775 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2776 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2777 break;
2778 return None;
2779
2780 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2781 kdDecodeCppStmtOkayAfterDone = {
2782 'IEMOP_HLP_IN_VMX_OPERATION': True,
2783 'IEMOP_HLP_VMX_INSTR': True,
2784 };
2785
2786 def checkForDoneDecoding(self, aoStmts):
2787 """
2788 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2789 invocation.
2790 Returns None on success, error string on failure.
2791
2792 This ensures safe instruction restarting in case the recompiler runs
2793 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2794 entries).
2795 """
2796
2797 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2798 # don't need to look.
2799 cIemOpHlpDone = 0;
2800 for iStmt, oStmt in enumerate(aoStmts):
2801 if oStmt.isCppStmt():
2802 #print('dbg: #%u[%u]: %s %s (%s)'
2803 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2804
2805 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2806 if oMatch:
2807 sFirstWord = oMatch.group(1);
2808 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2809 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2810 cIemOpHlpDone += 1;
2811 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2812 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2813 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2814 else:
2815 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2816 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2817 cIemOpHlpDone += 1;
2818 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2819 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2820 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2821 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2822 if cIemOpHlpDone == 1:
2823 return None;
2824 if cIemOpHlpDone > 1:
2825 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2826 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2827
2828 def checkForFetchAfterRef(self, aoStmts, asRegRefClasses):
2829 """
2830 Checks that the register references are placed after register fetches
2831 from the same register class.
2832 Returns None on success, error string on failure.
2833
2834 Example:
2835 SHL CH, CL
2836
2837 If the CH reference is created first, the fetching of CL will cause the
2838 RCX guest register to have an active shadow register when it's being
2839 updated. The shadow register will then be stale after the SHL operation
2840 completes, without us noticing.
2841
2842 It's easier to ensure we've got correct code than complicating the
2843 recompiler code with safeguards here.
2844 """
2845 for iStmt, oStmt in enumerate(aoStmts):
2846 if not oStmt.isCppStmt():
2847 offRef = oStmt.sName.find("_REF_");
2848 if offRef > 0:
2849 if oStmt.sName in ('IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80',
2850 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80',
2851 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST',):
2852 sClass = 'FPUREG';
2853 else:
2854 offUnderscore = oStmt.sName.find('_', offRef + 5);
2855 if offUnderscore > 0:
2856 assert offUnderscore > offRef;
2857 sClass = oStmt.sName[offRef + 5 : offUnderscore];
2858 else:
2859 sClass = oStmt.sName[offRef + 5];
2860 asRegRefClasses[sClass] = True;
2861 else:
2862 offFetch = oStmt.sName.find("_FETCH_");
2863 if offFetch > 0:
2864 sClass = oStmt.sName[offFetch + 7 : ];
2865 if not sClass.startswith("MEM"):
2866 offUnderscore = sClass.find('_');
2867 if offUnderscore >= 0:
2868 assert offUnderscore > 0;
2869 sClass = sClass[:offUnderscore];
2870 if sClass in asRegRefClasses:
2871 return "statement #%u: %s following REF! That'll mess up guest register shadowing" \
2872 % (iStmt + 1, oStmt.sName,);
2873
2874 # Go into branches.
2875 if isinstance(oStmt, McStmtCond):
2876 sRet = self.checkForFetchAfterRef(oStmt.aoIfBranch, asRegRefClasses);
2877 if sRet:
2878 return sRet;
2879 sRet = self.checkForFetchAfterRef(oStmt.aoElseBranch, asRegRefClasses);
2880 if sRet:
2881 return sRet;
2882 return None;
2883
2884 def check(self):
2885 """
2886 Performs some sanity checks on the block.
2887 Returns error string list, empty if all is fine.
2888 """
2889 aoStmts = self.decode();
2890 asRet = [];
2891
2892 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2893 if sRet:
2894 asRet.append(sRet);
2895
2896 sRet = self.checkForDoneDecoding(aoStmts);
2897 if sRet:
2898 asRet.append(sRet);
2899
2900 sRet = self.checkForFetchAfterRef(aoStmts, {});
2901 if sRet:
2902 asRet.append(sRet);
2903
2904 return asRet;
2905
2906
2907
2908## IEM_MC_XXX -> parser + info dictionary.
2909#
2910# The info columns:
2911# - col 1+0: boolean entry indicating whether the statement modifies state and
2912# must not be used before IEMOP_HL_DONE_*.
2913# - col 1+1: boolean entry indicating similar to the previous column but is
2914# used to decide when to emit calls for conditional jumps (Jmp/NoJmp).
2915# The difference is that most IEM_MC_IF_XXX entries are False here.
2916# - col 1+2: boolean entry indicating native recompiler support.
2917#
2918# The raw table was generated via the following command
2919# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2920# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2921# pylint: disable=line-too-long
2922g_dMcStmtParsers = {
2923 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2924 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2925 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2926 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2927 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, True, ),
2928 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, True, ),
2929 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
2930 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2931 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
2932 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2933 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2934 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2935 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, False, ),
2936 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2937 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2938 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, True, False, ),
2939 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, True, ),
2940 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
2941 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, False, ),
2942 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, False, ),
2943 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, False, ),
2944 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
2945 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
2946 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
2947 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
2948 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2949 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2950 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2951 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
2952 'IEM_MC_ARG': (McBlock.parseMcArg, False, False, True, ),
2953 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, False, True, ),
2954 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, False, True, ),
2955 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, False, True, ),
2956 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, True, ),
2957 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ),
2958 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2959 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2960 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2961 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2962 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2963 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2964 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2965 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2966 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
2967 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
2968 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
2969 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, True, ),
2970 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, False, ),
2971 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, True, True, ),
2972 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, True, True, ),
2973 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2974 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, True, False, ),
2975 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, True, False, ),
2976 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, True, False, ),
2977 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, True, False, ),
2978 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, True, False, ),
2979 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, True, False, ),
2980 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, True, False, ),
2981 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2982 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2983 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, True, False, ),
2984 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2985 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, True, False, ),
2986 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2987 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, True, False, ),
2988 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2989 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2990 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2991 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2992 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, True, True, ),
2993 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
2994 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, True, False, ),
2995 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
2996 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, True, False, ),
2997 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, True, True, ),
2998 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, True, True, ),
2999 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, True, True, ),
3000 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3001 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3002 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3003 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3004 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3005 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3006 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, False, ),
3007 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, True, ),
3008 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3009 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, False, ),
3010 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, True, ),
3011 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, True, ),
3012 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3013 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3014 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3015 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3016 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3017 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3018 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3019 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3020 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3021 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3022 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3023 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3024 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3025 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3026 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3027 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3028 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ), # thrd var
3029 'IEM_MC_FETCH_GREG_PAIR_U32': (McBlock.parseMcGeneric, False, False, False, ),
3030 'IEM_MC_FETCH_GREG_PAIR_U64': (McBlock.parseMcGeneric, False, False, False, ),
3031 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, True, False, ),
3032 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, True, False, ),
3033 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, True, False, ),
3034 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, True, False, ),
3035 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, True, False, ),
3036 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, True, False, ),
3037 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, True, False, ),
3038 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3039 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3040 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3041 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3042 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3043 'IEM_MC_FETCH_MEM_U128_AND_XREG_U128_AND_EAX_EDX_U32_SX_U64':(McBlock.parseMcGeneric, True, True, False, ),
3044 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3045 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, True, True, ),
3046 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3047 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3048 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3049 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3050 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3051 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3052 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3053 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3054 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, True, True, ), #bounds only
3055 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3056 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3057 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3058 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, True, False, ),
3059 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3060 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3061 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3062 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movsx
3063 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3064 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3065 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, True, True, ), # movzx
3066 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3067 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3068 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3069 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3070 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3071 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3072 'IEM_MC_FETCH_MEM_XMM_U32_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3073 'IEM_MC_FETCH_MEM_XMM_U64_AND_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3074 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, True, False, ),
3075 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3076 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3077 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, True, False, ),
3078 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, True, False, ),
3079 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, False, ),
3080 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3081 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, False, ),
3082 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, False, ),
3083 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3084 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, True, ),
3085 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, True, ),
3086 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3087 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, False, ),
3088 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3089 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3090 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, False, ),
3091 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3092 'IEM_MC_FETCH_XREG_PAIR_U128': (McBlock.parseMcGeneric, False, False, False, ),
3093 'IEM_MC_FETCH_XREG_PAIR_U128_AND_RAX_RDX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3094 'IEM_MC_FETCH_XREG_PAIR_U128_AND_EAX_EDX_U32_SX_U64': (McBlock.parseMcGeneric, False, False, False, ),
3095 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ),
3096 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3097 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, False, ),
3098 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3099 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3100 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3101 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3102 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3103 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, True, False, ),
3104 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, True, False, ),
3105 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3106 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3107 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3108 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3109 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, True, False, ),
3110 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3111 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3112 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3113 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3114 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, True, False, ),
3115 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, True, ),
3116 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3117 'IEM_MC_IF_CX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3118 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3119 'IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3120 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3121 'IEM_MC_IF_ECX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3122 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3123 'IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3124 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3125 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3126 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3127 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3128 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3129 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, False, True, ),
3130 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, False, True, ),
3131 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3132 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, True, False, ),
3133 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3134 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, True, False, ),
3135 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3136 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, False, ),
3137 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, False, ),
3138 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, True, False, ),
3139 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, False, True, ),
3140 'IEM_MC_IF_RCX_IS_NOT_ONE': (McBlock.parseMcGenericCond, True, False, True, ),
3141 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3142 'IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, False, True, ),
3143 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, True, False, ),
3144 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, True, False, ),
3145 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, False, ),
3146 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, True, False, ),
3147 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, False, True, ),
3148 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, False, True, ),
3149 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, False, True, ),
3150 'IEM_MC_LOCAL_EFLAGS': (McBlock.parseMcLocalEFlags, True, True, True, ),
3151 'IEM_MC_NOREF': (McBlock.parseMcGeneric, False, False, True, ),
3152 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3153 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, True, ),
3154 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3155 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3156 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3157 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, True, False, ),
3158 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, False, ),
3159 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3160 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, True, False, ),
3161 'IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3162 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, True, True, ),
3163 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, True, True, ),
3164 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3165 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE_WO': (McBlock.parseMcGeneric, True, True, False, ),
3166 'IEM_MC_MEM_MAP_D80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3167 'IEM_MC_MEM_MAP_I16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3168 'IEM_MC_MEM_MAP_I32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3169 'IEM_MC_MEM_MAP_I64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3170 'IEM_MC_MEM_MAP_R32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3171 'IEM_MC_MEM_MAP_R64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3172 'IEM_MC_MEM_MAP_R80_WO': (McBlock.parseMcGeneric, True, True, True, ),
3173 'IEM_MC_MEM_MAP_U8_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3174 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, True, True, ),
3175 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, True, True, ),
3176 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, True, True, ),
3177 'IEM_MC_MEM_MAP_U16_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3178 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, True, True, ),
3179 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, True, True, ),
3180 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, True, True, ),
3181 'IEM_MC_MEM_MAP_U32_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3182 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, True, True, ),
3183 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, True, True, ),
3184 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, True, True, ),
3185 'IEM_MC_MEM_MAP_U64_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3186 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, True, True, ),
3187 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, True, True, ),
3188 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, True, True, ),
3189 'IEM_MC_MEM_MAP_U128_ATOMIC': (McBlock.parseMcGeneric, True, True, True, ),
3190 'IEM_MC_MEM_MAP_U128_RW': (McBlock.parseMcGeneric, True, True, True, ),
3191 'IEM_MC_MEM_MAP_U128_RO': (McBlock.parseMcGeneric, True, True, True, ),
3192 'IEM_MC_MEM_MAP_U128_WO': (McBlock.parseMcGeneric, True, True, True, ),
3193 'IEM_MC_MEM_ROLLBACK_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, True, True, ),
3194 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3195 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3196 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3197 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3198 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3199 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3200 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, True, False, ),
3201 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3202 'IEM_MC_NATIVE_EMIT_0': (McBlock.parseMcGeneric, True, True, True, ),
3203 'IEM_MC_NATIVE_EMIT_1': (McBlock.parseMcGeneric, True, True, True, ),
3204 'IEM_MC_NATIVE_EMIT_2': (McBlock.parseMcGeneric, True, True, True, ),
3205 'IEM_MC_NATIVE_EMIT_3': (McBlock.parseMcGeneric, True, True, True, ),
3206 'IEM_MC_NATIVE_EMIT_4': (McBlock.parseMcGeneric, True, True, True, ),
3207 'IEM_MC_NATIVE_EMIT_5': (McBlock.parseMcGeneric, True, True, True, ),
3208 'IEM_MC_NATIVE_EMIT_6': (McBlock.parseMcGeneric, True, True, True, ),
3209 'IEM_MC_NATIVE_EMIT_7': (McBlock.parseMcGeneric, True, True, True, ),
3210 'IEM_MC_NATIVE_IF': (McBlock.parseMcNativeIf, False, False, True, ),
3211 'IEM_MC_NATIVE_ELSE': (McBlock.parseMcGenericCond, False, False, True, ),
3212 'IEM_MC_NATIVE_ENDIF': (McBlock.parseMcGenericCond, False, False, True, ),
3213 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, False, ),
3214 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3215 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, True, False, ),
3216 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3217 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3218 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, True, ),
3219 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, True, ),
3220 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, True, ),
3221 'IEM_MC_POP_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3222 'IEM_MC_POP_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3223 'IEM_MC_POP_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3224 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, True),
3225 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, True),
3226 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, True),
3227 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3228 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3229 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, True, False, ),
3230 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, True, True, ),
3231 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, True, True, ),
3232 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, True, True, ),
3233 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, True, True, ),
3234 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, True, False, ),
3235 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, True, False, ),
3236 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, True, False, ),
3237 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, True, True, ),
3238 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, True, ),
3239 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, False, ),
3240 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, True, ),
3241 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3242 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, True, ),
3243 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3244 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, True, ),
3245 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3246 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, True, ),
3247 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3248 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, True, ),
3249 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3250 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3251 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, False, ), # threaded
3252 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3253 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, False, ),
3254 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3255 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, True, ),
3256 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3257 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3258 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, True, ),
3259 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3260 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3261 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3262 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, True, ),
3263 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ),
3264 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3265 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, False, ),
3266 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3267 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3268 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3269 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, False, ),
3270 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3271 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3272 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3273 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, True, False, ),
3274 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3275 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3276 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3277 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, True, False, ),
3278 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, False, ),
3279 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, False, ),
3280 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, False, ),
3281 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, False, ),
3282 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, True, False, ),
3283 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3284 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3285 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3286 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3287 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, True, False, ),
3288 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, True, False, ),
3289 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3290 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3291 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3292 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3293 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3294 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3295 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3296 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ), # thrd var
3297 'IEM_MC_STORE_GREG_PAIR_U32': (McBlock.parseMcGeneric, True, True, False, ),
3298 'IEM_MC_STORE_GREG_PAIR_U64': (McBlock.parseMcGeneric, True, True, False, ),
3299 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3300 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3301 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3302 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3303 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3304 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3305 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3306 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, True, False, ),
3307 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, True, False, ),
3308 'IEM_MC_STORE_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3309 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, True, False, ),
3310 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, True, True, ),
3311 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3312 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, True, False, ),
3313 'IEM_MC_STORE_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, True, False, ),
3314 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, True, False, ),
3315 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, True, True, ),
3316 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3317 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, True, True, ),
3318 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3319 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, True, True, ),
3320 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, True, True, ),
3321 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, True, False, ),
3322 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, True, False, ),
3323 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, True, False, ),
3324 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, True, False, ),
3325 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, True, False, ),
3326 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, True, False, ),
3327 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, True, False, ),
3328 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3329 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, True, False, ),
3330 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3331 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, True, False, ),
3332 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3333 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3334 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, True, False, ),
3335 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, True, False, ),
3336 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, True, False, ),
3337 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, True, False, ),
3338 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, True, False, ),
3339 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ),
3340 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3341 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3342 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3343 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ),
3344 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, True, ),
3345 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, True, ),
3346 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, True, ),
3347 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, False, ),
3348 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, True, False, ),
3349 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, True, False, ),
3350 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, True, False, ),
3351 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3352 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, True, False, ),
3353 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, True, False, ),
3354 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, True, False, ),
3355 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, False, ),
3356};
3357# pylint: enable=line-too-long
3358
3359## List of microcode blocks.
3360g_aoMcBlocks = [] # type: List[McBlock]
3361
3362
3363
3364class ParserException(Exception):
3365 """ Parser exception """
3366 def __init__(self, sMessage):
3367 Exception.__init__(self, sMessage);
3368
3369
3370class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3371 """
3372 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3373 """
3374
3375 ## @name Parser state.
3376 ## @{
3377 kiCode = 0;
3378 kiCommentMulti = 1;
3379 ## @}
3380
3381 class Macro(object):
3382 """ Macro """
3383 def __init__(self, sName, asArgs, sBody, iLine):
3384 self.sName = sName; ##< The macro name.
3385 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3386 self.sBody = sBody;
3387 self.iLine = iLine;
3388 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3389
3390 @staticmethod
3391 def _needSpace(ch):
3392 """ This is just to make the expanded output a bit prettier. """
3393 return ch.isspace() and ch != '(';
3394
3395 def expandMacro(self, oParent, asArgs = None):
3396 """ Expands the macro body with the given arguments. """
3397 _ = oParent;
3398 sBody = self.sBody;
3399
3400 if self.oReArgMatch:
3401 assert len(asArgs) == len(self.asArgs);
3402 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3403
3404 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3405 oMatch = self.oReArgMatch.search(sBody);
3406 while oMatch:
3407 sName = oMatch.group(2);
3408 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3409 sValue = dArgs[sName];
3410 sPre = '';
3411 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3412 sPre = ' ';
3413 sPost = '';
3414 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3415 sPost = ' ';
3416 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3417 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3418 else:
3419 assert not asArgs;
3420
3421 return sBody;
3422
3423 class PreprocessorConditional(object):
3424 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3425
3426 ## Known defines.
3427 # - A value of 1 indicates that it's always defined.
3428 # - A value of 0 if it's always undefined
3429 # - A value of -1 if it's an arch and it depends of script parameters.
3430 # - A value of -2 if it's not recognized when filtering MC blocks.
3431 kdKnownDefines = {
3432 'IEM_WITH_ONE_BYTE_TABLE': 1,
3433 'IEM_WITH_TWO_BYTE_TABLE': 1,
3434 'IEM_WITH_THREE_0F_38': 1,
3435 'IEM_WITH_THREE_0F_3A': 1,
3436 'IEM_WITH_THREE_BYTE_TABLES': 1,
3437 'IEM_WITH_3DNOW': 1,
3438 'IEM_WITH_3DNOW_TABLE': 1,
3439 'IEM_WITH_VEX': 1,
3440 'IEM_WITH_VEX_TABLES': 1,
3441 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3442 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3443 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3444 'LOG_ENABLED': 1,
3445 'RT_WITHOUT_PRAGMA_ONCE': 0,
3446 'TST_IEM_CHECK_MC': 0,
3447 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3448 'RT_ARCH_AMD64': -1,
3449 'RT_ARCH_ARM64': -1,
3450 'RT_ARCH_ARM32': -1,
3451 'RT_ARCH_X86': -1,
3452 'RT_ARCH_SPARC': -1,
3453 'RT_ARCH_SPARC64': -1,
3454 };
3455 kdBuildArchToIprt = {
3456 'amd64': 'RT_ARCH_AMD64',
3457 'arm64': 'RT_ARCH_ARM64',
3458 'sparc32': 'RT_ARCH_SPARC64',
3459 };
3460 ## For parsing the next defined(xxxx).
3461 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3462
3463 def __init__(self, sType, sExpr):
3464 self.sType = sType;
3465 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3466 self.aoElif = [] # type: List[PreprocessorConditional]
3467 self.fInElse = [];
3468 if sType in ('if', 'elif'):
3469 self.checkExpression(sExpr);
3470 else:
3471 self.checkSupportedDefine(sExpr)
3472
3473 @staticmethod
3474 def checkSupportedDefine(sDefine):
3475 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3476 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3477 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3478 return True;
3479 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3480 return True;
3481 raise Exception('Unsupported define: %s' % (sDefine,));
3482
3483 @staticmethod
3484 def checkExpression(sExpr):
3485 """ Check that the expression is supported. Raises exception if not. """
3486 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3487 if sExpr in ('0', '1'):
3488 return True;
3489
3490 off = 0;
3491 cParan = 0;
3492 while off < len(sExpr):
3493 ch = sExpr[off];
3494
3495 # Unary operator or parentheses:
3496 if ch in ('(', '!'):
3497 if ch == '(':
3498 cParan += 1;
3499 off += 1;
3500 else:
3501 # defined(xxxx)
3502 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3503 if oMatch:
3504 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3505 elif sExpr[off:] != '1':
3506 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3507 off = oMatch.end();
3508
3509 # Look for closing parentheses.
3510 while off < len(sExpr) and sExpr[off].isspace():
3511 off += 1;
3512 if cParan > 0:
3513 while off < len(sExpr) and sExpr[off] == ')':
3514 if cParan <= 0:
3515 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3516 cParan -= 1;
3517 off += 1;
3518 while off < len(sExpr) and sExpr[off].isspace():
3519 off += 1;
3520
3521 # Look for binary operator.
3522 if off >= len(sExpr):
3523 break;
3524 if sExpr[off:off + 2] in ('||', '&&'):
3525 off += 2;
3526 else:
3527 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3528
3529 # Skip spaces.
3530 while off < len(sExpr) and sExpr[off].isspace():
3531 off += 1;
3532 if cParan != 0:
3533 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3534 return True;
3535
3536 @staticmethod
3537 def isArchIncludedInExpr(sExpr, sArch):
3538 """ Checks if sArch is included in the given expression. """
3539 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3540 if sExpr == '0':
3541 return False;
3542 if sExpr == '1':
3543 return True;
3544 off = 0;
3545 while off < len(sExpr):
3546 # defined(xxxx)
3547 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3548 if not oMatch:
3549 if sExpr[off:] == '1':
3550 return True;
3551 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3552 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3553 return True;
3554 off = oMatch.end();
3555
3556 # Look for OR operator.
3557 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3558 off += 1;
3559 if off >= len(sExpr):
3560 break;
3561 if sExpr.startswith('||'):
3562 off += 2;
3563 else:
3564 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3565
3566 return False;
3567
3568 @staticmethod
3569 def matchArch(sDefine, sArch):
3570 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3571 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3572
3573 @staticmethod
3574 def matchDefined(sExpr, sArch):
3575 """ Check the result of an ifdef/ifndef expression, given sArch. """
3576 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3577 if iDefine == -2:
3578 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3579 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3580
3581 def isArchIncludedInPrimaryBlock(self, sArch):
3582 """ Checks if sArch is included in the (primary) 'if' block. """
3583 if self.sType == 'ifdef':
3584 return self.matchDefined(self.sExpr, sArch);
3585 if self.sType == 'ifndef':
3586 return not self.matchDefined(self.sExpr, sArch);
3587 return self.isArchIncludedInExpr(self.sExpr, sArch);
3588
3589 @staticmethod
3590 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3591 """ Checks if sArch is included in the current conditional block. """
3592 _ = iLine;
3593 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3594 for oCond in aoCppCondStack:
3595 if oCond.isArchIncludedInPrimaryBlock(sArch):
3596 if oCond.aoElif or oCond.fInElse:
3597 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3598 return False;
3599 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3600 else:
3601 fFine = False;
3602 for oElifCond in oCond.aoElif:
3603 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3604 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3605 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3606 return False;
3607 fFine = True;
3608 if not fFine and not oCond.fInElse:
3609 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3610 return False;
3611 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3612 return True;
3613
3614 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3615 self.sSrcFile = sSrcFile;
3616 self.asLines = asLines;
3617 self.iLine = 0;
3618 self.iState = self.kiCode;
3619 self.sComment = '';
3620 self.iCommentLine = 0;
3621 self.aoCurInstrs = [] # type: List[Instruction]
3622 self.oCurFunction = None # type: DecoderFunction
3623 self.iMcBlockInFunc = 0;
3624 self.oCurMcBlock = None # type: McBlock
3625 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3626 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3627 if oInheritMacrosFrom:
3628 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3629 self.oReMacros = oInheritMacrosFrom.oReMacros;
3630 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3631 self.sHostArch = sHostArch;
3632
3633 assert sDefaultMap in g_dInstructionMaps;
3634 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3635
3636 self.cTotalInstr = 0;
3637 self.cTotalStubs = 0;
3638 self.cTotalTagged = 0;
3639 self.cTotalMcBlocks = 0;
3640
3641 self.oReMacroName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3642 self.oReMnemonic = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3643 self.oReStatsName = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$');
3644 self.oReFunctionName= re.compile(r'^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3645 self.oReGroupName = re.compile(r'^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3646 self.oReDisEnum = re.compile(r'^OP_[A-Z0-9_]+$');
3647 self.oReFunTable = re.compile(r'^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3648 self.oReComment = re.compile(r'//.*?$|/\*.*?\*/'); ## Full comments.
3649 self.oReHashDefine2 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3650 self.oReHashDefine3 = re.compile(r'(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3651 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3652 self.fDebug = True;
3653 self.fDebugMc = False;
3654 self.fDebugPreproc = False;
3655
3656 self.dTagHandlers = {
3657 '@opbrief': self.parseTagOpBrief,
3658 '@opdesc': self.parseTagOpDesc,
3659 '@opmnemonic': self.parseTagOpMnemonic,
3660 '@op1': self.parseTagOpOperandN,
3661 '@op2': self.parseTagOpOperandN,
3662 '@op3': self.parseTagOpOperandN,
3663 '@op4': self.parseTagOpOperandN,
3664 '@oppfx': self.parseTagOpPfx,
3665 '@opmaps': self.parseTagOpMaps,
3666 '@opcode': self.parseTagOpcode,
3667 '@opcodesub': self.parseTagOpcodeSub,
3668 '@openc': self.parseTagOpEnc,
3669 #@opfltest: Lists all flags that will be used as input in some way.
3670 '@opfltest': self.parseTagOpEFlags,
3671 #@opflmodify: Lists all EFLAGS modified. Includes @opflset, @opflcleared and @opflundef (if applicable).
3672 '@opflmodify': self.parseTagOpEFlags,
3673 #@opflclear: Lists all flags that will be set (set to 1).
3674 '@opflset': self.parseTagOpEFlags,
3675 #@opflclear: Lists all flags that will be cleared (set to 0).
3676 '@opflclear': self.parseTagOpEFlags,
3677 #@opflundef: List of flag documented as undefined.
3678 '@opflundef': self.parseTagOpEFlags,
3679 #@opflclass: Shorthand for defining flag behaviour (@opfltest, @opfmodify, @opflset, @opflclear, @opflundef).
3680 '@opflclass': self.parseTagOpEFlagsClass,
3681 '@ophints': self.parseTagOpHints,
3682 '@opdisenum': self.parseTagOpDisEnum,
3683 '@opmincpu': self.parseTagOpMinCpu,
3684 '@opcpuid': self.parseTagOpCpuId,
3685 '@opgroup': self.parseTagOpGroup,
3686 '@opunused': self.parseTagOpUnusedInvalid,
3687 '@opinvalid': self.parseTagOpUnusedInvalid,
3688 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3689 '@optest': self.parseTagOpTest,
3690 '@optestign': self.parseTagOpTestIgnore,
3691 '@optestignore': self.parseTagOpTestIgnore,
3692 '@opcopytests': self.parseTagOpCopyTests,
3693 '@oponly': self.parseTagOpOnlyTest,
3694 '@oponlytest': self.parseTagOpOnlyTest,
3695 '@opxcpttype': self.parseTagOpXcptType,
3696 '@opstats': self.parseTagOpStats,
3697 '@opfunction': self.parseTagOpFunction,
3698 '@opdone': self.parseTagOpDone,
3699 };
3700 for i in range(48):
3701 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3702 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3703
3704 self.asErrors = [];
3705
3706 def raiseError(self, sMessage):
3707 """
3708 Raise error prefixed with the source and line number.
3709 """
3710 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3711
3712 def raiseCommentError(self, iLineInComment, sMessage):
3713 """
3714 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3715 """
3716 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3717
3718 def error(self, sMessage):
3719 """
3720 Adds an error.
3721 returns False;
3722 """
3723 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3724 return False;
3725
3726 def errorOnLine(self, iLine, sMessage):
3727 """
3728 Adds an error.
3729 returns False;
3730 """
3731 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3732 return False;
3733
3734 def errorComment(self, iLineInComment, sMessage):
3735 """
3736 Adds a comment error.
3737 returns False;
3738 """
3739 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3740 return False;
3741
3742 def printErrors(self):
3743 """
3744 Print the errors to stderr.
3745 Returns number of errors.
3746 """
3747 if self.asErrors:
3748 sys.stderr.write(u''.join(self.asErrors));
3749 return len(self.asErrors);
3750
3751 def debug(self, sMessage):
3752 """
3753 For debugging.
3754 """
3755 if self.fDebug:
3756 print('debug: %s' % (sMessage,), file = sys.stderr);
3757
3758 def stripComments(self, sLine):
3759 """
3760 Returns sLine with comments stripped.
3761
3762 Complains if traces of incomplete multi-line comments are encountered.
3763 """
3764 sLine = self.oReComment.sub(" ", sLine);
3765 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3766 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3767 return sLine;
3768
3769 def parseFunctionTable(self, sLine):
3770 """
3771 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3772
3773 Note! Updates iLine as it consumes the whole table.
3774 """
3775
3776 #
3777 # Extract the table name.
3778 #
3779 sName = re.search(r' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3780 oMap = g_dInstructionMapsByIemName.get(sName);
3781 if not oMap:
3782 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3783 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3784
3785 #
3786 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3787 # entries per byte:
3788 # no prefix, 066h prefix, f3h prefix, f2h prefix
3789 # Those tables has 256 & 32 entries respectively.
3790 #
3791 cEntriesPerByte = 4;
3792 cValidTableLength = 1024;
3793 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3794
3795 oEntriesMatch = re.search(r'\[ *(256|32) *\]', sLine);
3796 if oEntriesMatch:
3797 cEntriesPerByte = 1;
3798 cValidTableLength = int(oEntriesMatch.group(1));
3799 asPrefixes = (None,);
3800
3801 #
3802 # The next line should be '{' and nothing else.
3803 #
3804 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3805 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3806 self.iLine += 1;
3807
3808 #
3809 # Parse till we find the end of the table.
3810 #
3811 iEntry = 0;
3812 while self.iLine < len(self.asLines):
3813 # Get the next line and strip comments and spaces (assumes no
3814 # multi-line comments).
3815 sLine = self.asLines[self.iLine];
3816 self.iLine += 1;
3817 sLine = self.stripComments(sLine).strip();
3818
3819 # Split the line up into entries, expanding IEMOP_X4 usage.
3820 asEntries = sLine.split(',');
3821 for i in range(len(asEntries) - 1, -1, -1):
3822 sEntry = asEntries[i].strip();
3823 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3824 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3825 asEntries.insert(i + 1, sEntry);
3826 asEntries.insert(i + 1, sEntry);
3827 asEntries.insert(i + 1, sEntry);
3828 if sEntry:
3829 asEntries[i] = sEntry;
3830 else:
3831 del asEntries[i];
3832
3833 # Process the entries.
3834 for sEntry in asEntries:
3835 if sEntry in ('};', '}'):
3836 if iEntry != cValidTableLength:
3837 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3838 return True;
3839 if sEntry.startswith('iemOp_Invalid'):
3840 pass; # skip
3841 else:
3842 # Look up matching instruction by function.
3843 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3844 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3845 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3846 if aoInstr:
3847 if not isinstance(aoInstr, list):
3848 aoInstr = [aoInstr,];
3849 oInstr = None;
3850 for oCurInstr in aoInstr:
3851 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3852 pass;
3853 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3854 oCurInstr.sPrefix = sPrefix;
3855 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3856 oCurInstr.sOpcode = sOpcode;
3857 oCurInstr.sPrefix = sPrefix;
3858 else:
3859 continue;
3860 oInstr = oCurInstr;
3861 break;
3862 if not oInstr:
3863 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3864 aoInstr.append(oInstr);
3865 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3866 g_aoAllInstructions.append(oInstr);
3867 oMap.aoInstructions.append(oInstr);
3868 else:
3869 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3870 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3871 iEntry += 1;
3872
3873 return self.error('Unexpected end of file in PFNIEMOP table');
3874
3875 def addInstruction(self, iLine = None):
3876 """
3877 Adds an instruction.
3878 """
3879 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3880 g_aoAllInstructions.append(oInstr);
3881 self.aoCurInstrs.append(oInstr);
3882 return oInstr;
3883
3884 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3885 """
3886 Derives the mnemonic and operands from a IEM stats base name like string.
3887 """
3888 if oInstr.sMnemonic is None:
3889 asWords = sStats.split('_');
3890 oInstr.sMnemonic = asWords[0].lower();
3891 if len(asWords) > 1 and not oInstr.aoOperands:
3892 for sType in asWords[1:]:
3893 if sType in g_kdOpTypes:
3894 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3895 else:
3896 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3897 return False;
3898 return True;
3899
3900 def doneInstructionOne(self, oInstr, iLine):
3901 """
3902 Complete the parsing by processing, validating and expanding raw inputs.
3903 """
3904 assert oInstr.iLineCompleted is None;
3905 oInstr.iLineCompleted = iLine;
3906
3907 #
3908 # Specified instructions.
3909 #
3910 if oInstr.cOpTags > 0:
3911 if oInstr.sStats is None:
3912 pass;
3913
3914 #
3915 # Unspecified legacy stuff. We generally only got a few things to go on here.
3916 # /** Opcode 0x0f 0x00 /0. */
3917 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3918 #
3919 else:
3920 #if oInstr.sRawOldOpcodes:
3921 #
3922 #if oInstr.sMnemonic:
3923 pass;
3924
3925 #
3926 # Common defaults.
3927 #
3928
3929 # Guess mnemonic and operands from stats if the former is missing.
3930 if oInstr.sMnemonic is None:
3931 if oInstr.sStats is not None:
3932 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3933 elif oInstr.sFunction is not None:
3934 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3935
3936 # Derive the disassembler op enum constant from the mnemonic.
3937 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3938 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3939
3940 # Derive the IEM statistics base name from mnemonic and operand types.
3941 if oInstr.sStats is None:
3942 if oInstr.sFunction is not None:
3943 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3944 elif oInstr.sMnemonic is not None:
3945 oInstr.sStats = oInstr.sMnemonic;
3946 for oOperand in oInstr.aoOperands:
3947 if oOperand.sType:
3948 oInstr.sStats += '_' + oOperand.sType;
3949
3950 # Derive the IEM function name from mnemonic and operand types.
3951 if oInstr.sFunction is None:
3952 if oInstr.sMnemonic is not None:
3953 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3954 for oOperand in oInstr.aoOperands:
3955 if oOperand.sType:
3956 oInstr.sFunction += '_' + oOperand.sType;
3957 elif oInstr.sStats:
3958 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3959
3960 #
3961 # Apply default map and then add the instruction to all it's groups.
3962 #
3963 if not oInstr.aoMaps:
3964 oInstr.aoMaps = [ self.oDefaultMap, ];
3965 for oMap in oInstr.aoMaps:
3966 oMap.aoInstructions.append(oInstr);
3967
3968 #
3969 # Derive encoding from operands and maps.
3970 #
3971 if oInstr.sEncoding is None:
3972 if not oInstr.aoOperands:
3973 if oInstr.fUnused and oInstr.sSubOpcode:
3974 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3975 else:
3976 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3977 elif oInstr.aoOperands[0].usesModRM():
3978 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3979 or oInstr.onlyInVexMaps():
3980 oInstr.sEncoding = 'VEX.ModR/M';
3981 else:
3982 oInstr.sEncoding = 'ModR/M';
3983
3984 #
3985 # Check the opstat value and add it to the opstat indexed dictionary.
3986 #
3987 if oInstr.sStats:
3988 if oInstr.sStats not in g_dAllInstructionsByStat:
3989 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3990 else:
3991 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3992 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3993
3994 #
3995 # Add to function indexed dictionary. We allow multiple instructions per function.
3996 #
3997 if oInstr.sFunction:
3998 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3999 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
4000 else:
4001 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
4002
4003 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
4004 return True;
4005
4006 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
4007 """
4008 Done with current instruction.
4009 """
4010 for oInstr in self.aoCurInstrs:
4011 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
4012 if oInstr.fStub:
4013 self.cTotalStubs += 1;
4014
4015 self.cTotalInstr += len(self.aoCurInstrs);
4016
4017 self.sComment = '';
4018 self.aoCurInstrs = [];
4019 if fEndOfFunction:
4020 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
4021 if self.oCurFunction:
4022 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
4023 self.oCurFunction = None;
4024 self.iMcBlockInFunc = 0;
4025 return True;
4026
4027 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
4028 """
4029 Sets the sAttrib of all current instruction to oValue. If fOverwrite
4030 is False, only None values and empty strings are replaced.
4031 """
4032 for oInstr in self.aoCurInstrs:
4033 if fOverwrite is not True:
4034 oOldValue = getattr(oInstr, sAttrib);
4035 if oOldValue is not None:
4036 continue;
4037 setattr(oInstr, sAttrib, oValue);
4038
4039 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
4040 """
4041 Sets the iEntry of the array sAttrib of all current instruction to oValue.
4042 If fOverwrite is False, only None values and empty strings are replaced.
4043 """
4044 for oInstr in self.aoCurInstrs:
4045 aoArray = getattr(oInstr, sAttrib);
4046 while len(aoArray) <= iEntry:
4047 aoArray.append(None);
4048 if fOverwrite is True or aoArray[iEntry] is None:
4049 aoArray[iEntry] = oValue;
4050
4051 def parseCommentOldOpcode(self, asLines):
4052 """ Deals with 'Opcode 0xff /4' like comments """
4053 asWords = asLines[0].split();
4054 if len(asWords) >= 2 \
4055 and asWords[0] == 'Opcode' \
4056 and ( asWords[1].startswith('0x')
4057 or asWords[1].startswith('0X')):
4058 asWords = asWords[:1];
4059 for iWord, sWord in enumerate(asWords):
4060 if sWord.startswith('0X'):
4061 sWord = '0x' + sWord[:2];
4062 asWords[iWord] = asWords;
4063 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
4064
4065 return False;
4066
4067 def ensureInstructionForOpTag(self, iTagLine):
4068 """ Ensure there is an instruction for the op-tag being parsed. """
4069 if not self.aoCurInstrs:
4070 self.addInstruction(self.iCommentLine + iTagLine);
4071 for oInstr in self.aoCurInstrs:
4072 oInstr.cOpTags += 1;
4073 if oInstr.cOpTags == 1:
4074 self.cTotalTagged += 1;
4075 return self.aoCurInstrs[-1];
4076
4077 @staticmethod
4078 def flattenSections(aasSections):
4079 """
4080 Flattens multiline sections into stripped single strings.
4081 Returns list of strings, on section per string.
4082 """
4083 asRet = [];
4084 for asLines in aasSections:
4085 if asLines:
4086 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
4087 return asRet;
4088
4089 @staticmethod
4090 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
4091 """
4092 Flattens sections into a simple stripped string with newlines as
4093 section breaks. The final section does not sport a trailing newline.
4094 """
4095 # Typical: One section with a single line.
4096 if len(aasSections) == 1 and len(aasSections[0]) == 1:
4097 return aasSections[0][0].strip();
4098
4099 sRet = '';
4100 for iSection, asLines in enumerate(aasSections):
4101 if asLines:
4102 if iSection > 0:
4103 sRet += sSectionSep;
4104 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
4105 return sRet;
4106
4107
4108
4109 ## @name Tag parsers
4110 ## @{
4111
4112 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
4113 """
4114 Tag: @opbrief
4115 Value: Text description, multiple sections, appended.
4116
4117 Brief description. If not given, it's the first sentence from @opdesc.
4118 """
4119 oInstr = self.ensureInstructionForOpTag(iTagLine);
4120
4121 # Flatten and validate the value.
4122 sBrief = self.flattenAllSections(aasSections);
4123 if not sBrief:
4124 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4125 if sBrief[-1] != '.':
4126 sBrief = sBrief + '.';
4127 if len(sBrief) > 180:
4128 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
4129 offDot = sBrief.find('.');
4130 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
4131 offDot = sBrief.find('.', offDot + 1);
4132 if offDot >= 0 and offDot != len(sBrief) - 1:
4133 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
4134
4135 # Update the instruction.
4136 if oInstr.sBrief is not None:
4137 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
4138 % (sTag, oInstr.sBrief, sBrief,));
4139 _ = iEndLine;
4140 return True;
4141
4142 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
4143 """
4144 Tag: @opdesc
4145 Value: Text description, multiple sections, appended.
4146
4147 It is used to describe instructions.
4148 """
4149 oInstr = self.ensureInstructionForOpTag(iTagLine);
4150 if aasSections:
4151 oInstr.asDescSections.extend(self.flattenSections(aasSections));
4152 return True;
4153
4154 _ = sTag; _ = iEndLine;
4155 return True;
4156
4157 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
4158 """
4159 Tag: @opmenmonic
4160 Value: mnemonic
4161
4162 The 'mnemonic' value must be a valid C identifier string. Because of
4163 prefixes, groups and whatnot, there times when the mnemonic isn't that
4164 of an actual assembler mnemonic.
4165 """
4166 oInstr = self.ensureInstructionForOpTag(iTagLine);
4167
4168 # Flatten and validate the value.
4169 sMnemonic = self.flattenAllSections(aasSections);
4170 if not self.oReMnemonic.match(sMnemonic):
4171 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
4172 if oInstr.sMnemonic is not None:
4173 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
4174 % (sTag, oInstr.sMnemonic, sMnemonic,));
4175 oInstr.sMnemonic = sMnemonic
4176
4177 _ = iEndLine;
4178 return True;
4179
4180 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
4181 """
4182 Tags: @op1, @op2, @op3, @op4
4183 Value: [where:]type
4184
4185 The 'where' value indicates where the operand is found, like the 'reg'
4186 part of the ModR/M encoding. See Instruction.kdOperandLocations for
4187 a list.
4188
4189 The 'type' value indicates the operand type. These follow the types
4190 given in the opcode tables in the CPU reference manuals.
4191 See Instruction.kdOperandTypes for a list.
4192
4193 """
4194 oInstr = self.ensureInstructionForOpTag(iTagLine);
4195 idxOp = int(sTag[-1]) - 1;
4196 assert 0 <= idxOp < 4;
4197
4198 # flatten, split up, and validate the "where:type" value.
4199 sFlattened = self.flattenAllSections(aasSections);
4200 asSplit = sFlattened.split(':');
4201 if len(asSplit) == 1:
4202 sType = asSplit[0];
4203 sWhere = None;
4204 elif len(asSplit) == 2:
4205 (sWhere, sType) = asSplit;
4206 else:
4207 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
4208
4209 if sType not in g_kdOpTypes:
4210 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4211 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
4212 if sWhere is None:
4213 sWhere = g_kdOpTypes[sType][1];
4214 elif sWhere not in g_kdOpLocations:
4215 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
4216 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
4217
4218 # Insert the operand, refusing to overwrite an existing one.
4219 while idxOp >= len(oInstr.aoOperands):
4220 oInstr.aoOperands.append(None);
4221 if oInstr.aoOperands[idxOp] is not None:
4222 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
4223 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
4224 sWhere, sType,));
4225 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
4226
4227 _ = iEndLine;
4228 return True;
4229
4230 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
4231 """
4232 Tag: @opmaps
4233 Value: map[,map2]
4234
4235 Indicates which maps the instruction is in. There is a default map
4236 associated with each input file.
4237 """
4238 oInstr = self.ensureInstructionForOpTag(iTagLine);
4239
4240 # Flatten, split up and validate the value.
4241 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
4242 asMaps = sFlattened.split(',');
4243 if not asMaps:
4244 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
4245 for sMap in asMaps:
4246 if sMap not in g_dInstructionMaps:
4247 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
4248 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4249
4250 # Add the maps to the current list. Throw errors on duplicates.
4251 for oMap in oInstr.aoMaps:
4252 if oMap.sName in asMaps:
4253 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4254
4255 for sMap in asMaps:
4256 oMap = g_dInstructionMaps[sMap];
4257 if oMap not in oInstr.aoMaps:
4258 oInstr.aoMaps.append(oMap);
4259 else:
4260 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4261
4262 _ = iEndLine;
4263 return True;
4264
4265 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4266 """
4267 Tag: @oppfx
4268 Value: n/a|none|0x66|0xf3|0xf2|!0xf3
4269
4270 Required prefix for the instruction. (In a (E)VEX context this is the
4271 value of the 'pp' field rather than an actual prefix.)
4272 """
4273 oInstr = self.ensureInstructionForOpTag(iTagLine);
4274
4275 # Flatten and validate the value.
4276 sFlattened = self.flattenAllSections(aasSections);
4277 asPrefixes = sFlattened.split();
4278 if len(asPrefixes) > 1:
4279 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4280
4281 sPrefix = asPrefixes[0].lower();
4282 if sPrefix == 'none':
4283 sPrefix = 'none';
4284 elif sPrefix == 'n/a':
4285 sPrefix = None;
4286 else:
4287 if len(sPrefix) == 2:
4288 sPrefix = '0x' + sPrefix;
4289 if not _isValidOpcodeByte(sPrefix):
4290 if sPrefix != '!0xf3':
4291 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4292
4293 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4294 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4295
4296 # Set it.
4297 if oInstr.sPrefix is not None:
4298 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4299 oInstr.sPrefix = sPrefix;
4300
4301 _ = iEndLine;
4302 return True;
4303
4304 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4305 """
4306 Tag: @opcode
4307 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4308
4309 The opcode byte or sub-byte for the instruction in the context of a map.
4310 """
4311 oInstr = self.ensureInstructionForOpTag(iTagLine);
4312
4313 # Flatten and validate the value.
4314 sOpcode = self.flattenAllSections(aasSections);
4315 if _isValidOpcodeByte(sOpcode):
4316 pass;
4317 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4318 pass;
4319 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4320 pass;
4321 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4322 pass;
4323 else:
4324 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4325
4326 # Set it.
4327 if oInstr.sOpcode is not None:
4328 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4329 oInstr.sOpcode = sOpcode;
4330
4331 _ = iEndLine;
4332 return True;
4333
4334 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4335 """
4336 Tag: @opcodesub
4337 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4338 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4339 | !11 rex.w=0 | !11 mr/reg rex.w=0
4340 | !11 rex.w=1 | !11 mr/reg rex.w=1
4341
4342 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4343 represents exactly two different instructions. The more proper way would
4344 be to go via maps with two members, but this is faster.
4345 """
4346 oInstr = self.ensureInstructionForOpTag(iTagLine);
4347
4348 # Flatten and validate the value.
4349 sSubOpcode = self.flattenAllSections(aasSections);
4350 if sSubOpcode not in g_kdSubOpcodes:
4351 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: %s)'
4352 % (sTag, sSubOpcode, ', '.join(sorted(g_kdSubOpcodes.keys())),));
4353 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4354
4355 # Set it.
4356 if oInstr.sSubOpcode is not None:
4357 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4358 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4359 oInstr.sSubOpcode = sSubOpcode;
4360
4361 _ = iEndLine;
4362 return True;
4363
4364 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4365 """
4366 Tag: @openc
4367 Value: ModR/M|fixed|prefix|<map name>
4368
4369 The instruction operand encoding style.
4370 """
4371 oInstr = self.ensureInstructionForOpTag(iTagLine);
4372
4373 # Flatten and validate the value.
4374 sEncoding = self.flattenAllSections(aasSections);
4375 if sEncoding in g_kdEncodings:
4376 pass;
4377 elif sEncoding in g_dInstructionMaps:
4378 pass;
4379 elif not _isValidOpcodeByte(sEncoding):
4380 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4381
4382 # Set it.
4383 if oInstr.sEncoding is not None:
4384 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4385 % ( sTag, oInstr.sEncoding, sEncoding,));
4386 oInstr.sEncoding = sEncoding;
4387
4388 _ = iEndLine;
4389 return True;
4390
4391 ## EFlags tag to Instruction attribute name.
4392 kdOpFlagToAttr = {
4393 '@opfltest': 'asFlTest',
4394 '@opflmodify': 'asFlModify',
4395 '@opflundef': 'asFlUndefined',
4396 '@opflset': 'asFlSet',
4397 '@opflclear': 'asFlClear',
4398 };
4399
4400 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4401 """
4402 Tags: @opfltest, @opflmodify, @opflundef, @opflset, @opflclear
4403 Value: <eflags specifier>
4404
4405 """
4406 oInstr = self.ensureInstructionForOpTag(iTagLine);
4407
4408 # Flatten, split up and validate the values.
4409 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4410 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4411 asFlags = [];
4412 else:
4413 fRc = True;
4414 for iFlag, sFlag in enumerate(asFlags):
4415 if sFlag not in g_kdEFlagsMnemonics:
4416 if sFlag.strip() in g_kdEFlagsMnemonics:
4417 asFlags[iFlag] = sFlag.strip();
4418 else:
4419 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4420 if not fRc:
4421 return False;
4422
4423 # Set them.
4424 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4425 if asOld is not None and len(asOld) > 0:
4426 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4427 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4428
4429 _ = iEndLine;
4430 return True;
4431
4432 ## EFLAGS class definitions with their attribute lists.
4433 kdEFlagsClasses = {
4434 'arithmetic': { # add, sub, ...
4435 'asFlTest': [],
4436 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4437 'asFlClear': [],
4438 'asFlSet': [],
4439 'asFlUndefined': [],
4440 },
4441 'arithmetic_carry': { # adc, sbb, ...
4442 'asFlTest': [ 'cf', ],
4443 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4444 'asFlClear': [],
4445 'asFlSet': [],
4446 'asFlUndefined': [],
4447 },
4448 'incdec': {
4449 'asFlTest': [],
4450 'asFlModify': [ 'pf', 'af', 'zf', 'sf', 'of', ], # leaves CF alone
4451 'asFlClear': [],
4452 'asFlSet': [],
4453 'asFlUndefined': [],
4454 },
4455 'division': { ## @todo specify intel/amd differences...
4456 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # Intel leaves all flags unchanged.
4457 'asFlModify': [ 'pf', 'af', 'zf', 'sf', ], # While AMD sets AF and clears PF, ZF & SF, leaving CF and OF alone.
4458 'asFlClear': [],
4459 'asFlSet': [],
4460 'asFlUndefined': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4461 },
4462 'multiply': { ## @todo specify intel/amd differences...
4463 'asFlTest': [ 'pf', 'af', 'zf', 'sf', ], # AMD leaves these unchanged, so we have to delcare them as inputs.
4464 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of' ], # Intel always modifies all flags, but how differs
4465 'asFlClear': [], # between IMUL and MUL.
4466 'asFlSet': [],
4467 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', ],
4468 },
4469 'logical': { # and, or, xor, ...
4470 'asFlTest': [],
4471 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4472 'asFlClear': [ 'cf', 'af', 'of', ], # 'af' is undefined, but tstIEMAImpl indicates that it is cleared.
4473 'asFlSet': [],
4474 'asFlUndefined': [ 'af', ],
4475 },
4476 'rotate_1': { # rol and ror with fixed 1 shift count
4477 'asFlTest': [],
4478 'asFlModify': [ 'cf', 'of', ],
4479 'asFlClear': [],
4480 'asFlSet': [],
4481 'asFlUndefined': [],
4482 },
4483 'rotate_count': { # rol and ror w/o fixed 1 shift count
4484 'asFlTest': [],
4485 'asFlModify': [ 'cf', 'of', ],
4486 'asFlClear': [],
4487 'asFlSet': [],
4488 'asFlUndefined': [ 'of', ],
4489 },
4490 'rotate_carry_1': { # rcl and rcr with fixed 1 shift count
4491 'asFlTest': [ 'cf', ],
4492 'asFlModify': [ 'cf', 'of', ],
4493 'asFlClear': [],
4494 'asFlSet': [],
4495 'asFlUndefined': [],
4496 },
4497 'rotate_carry_count': { # rcl and rcr w/o fixed 1 shift count
4498 'asFlTest': [ 'cf', ],
4499 'asFlModify': [ 'cf', 'of', ],
4500 'asFlClear': [],
4501 'asFlSet': [],
4502 'asFlUndefined': [ 'of', ],
4503 },
4504 'shift_1': { # shl, shr or sar with fixed 1 count.
4505 'asFlTest': [],
4506 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4507 'asFlClear': [],
4508 'asFlSet': [],
4509 'asFlUndefined': [ 'af', ],
4510 },
4511 'shift_count': { # shl, shr or sar w/o fixed 1 shift count
4512 'asFlTest': [],
4513 'asFlModify': [ 'cf', 'pf', 'af', 'zf', 'sf', 'of', ],
4514 'asFlClear': [],
4515 'asFlSet': [],
4516 'asFlUndefined': [ 'af', 'of', ],
4517 },
4518 'bitmap': { # bt, btc, btr, btc
4519 'asFlTest': [],
4520 'asFlModify': [ 'cf', ],
4521 'asFlClear': [],
4522 'asFlSet': [],
4523 'asFlUndefined': [ 'pf', 'af', 'zf', 'sf', 'of', ], # tstIEMAImpl indicates that they aren't modified.
4524 },
4525 'unchanged': {
4526 'asFlTest': [],
4527 'asFlModify': [],
4528 'asFlClear': [],
4529 'asFlSet': [],
4530 'asFlUndefined': [],
4531 },
4532 };
4533 def parseTagOpEFlagsClass(self, sTag, aasSections, iTagLine, iEndLine):
4534 """
4535 Tags: @opflclass
4536 Value: arithmetic, logical, ...
4537
4538 """
4539 oInstr = self.ensureInstructionForOpTag(iTagLine);
4540
4541 # Flatten and validate the value.
4542 sClass = self.flattenAllSections(aasSections);
4543 kdAttribs = self.kdEFlagsClasses.get(sClass);
4544 if not kdAttribs:
4545 return self.errorComment(iTagLine, '%s: Unknown EFLAGS class: %s (valid: %s)'
4546 % (sTag, sClass, ', '.join(sorted(self.kdEFlagsClasses.keys())),));
4547
4548 # Set the attributes.
4549 for sAttrib, asFlags in kdAttribs.items():
4550 asOld = getattr(oInstr, sAttrib);
4551 if asOld is not None:
4552 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" for %s'
4553 % (sTag, asOld, asFlags, sAttrib));
4554 setattr(oInstr, sAttrib, asFlags);
4555
4556 _ = iEndLine;
4557 return True;
4558
4559 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4560 """
4561 Tag: @ophints
4562 Value: Comma or space separated list of flags and hints.
4563
4564 This covers the disassembler flags table and more.
4565 """
4566 oInstr = self.ensureInstructionForOpTag(iTagLine);
4567
4568 # Flatten as a space separated list, split it up and validate the values.
4569 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4570 if len(asHints) == 1 and asHints[0].lower() == 'none':
4571 asHints = [];
4572 else:
4573 fRc = True;
4574 for iHint, sHint in enumerate(asHints):
4575 if sHint not in g_kdHints:
4576 if sHint.strip() in g_kdHints:
4577 sHint[iHint] = sHint.strip();
4578 else:
4579 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4580 if not fRc:
4581 return False;
4582
4583 # Append them.
4584 for sHint in asHints:
4585 if sHint not in oInstr.dHints:
4586 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4587 else:
4588 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4589
4590 _ = iEndLine;
4591 return True;
4592
4593 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4594 """
4595 Tag: @opdisenum
4596 Value: OP_XXXX
4597
4598 This is for select a specific (legacy) disassembler enum value for the
4599 instruction.
4600 """
4601 oInstr = self.ensureInstructionForOpTag(iTagLine);
4602
4603 # Flatten and split.
4604 asWords = self.flattenAllSections(aasSections).split();
4605 if len(asWords) != 1:
4606 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4607 if not asWords:
4608 return False;
4609 sDisEnum = asWords[0];
4610 if not self.oReDisEnum.match(sDisEnum):
4611 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4612 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4613
4614 # Set it.
4615 if oInstr.sDisEnum is not None:
4616 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4617 oInstr.sDisEnum = sDisEnum;
4618
4619 _ = iEndLine;
4620 return True;
4621
4622 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4623 """
4624 Tag: @opmincpu
4625 Value: <simple CPU name>
4626
4627 Indicates when this instruction was introduced.
4628 """
4629 oInstr = self.ensureInstructionForOpTag(iTagLine);
4630
4631 # Flatten the value, split into words, make sure there's just one, valid it.
4632 asCpus = self.flattenAllSections(aasSections).split();
4633 if len(asCpus) > 1:
4634 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4635
4636 sMinCpu = asCpus[0];
4637 if sMinCpu in g_kdCpuNames:
4638 oInstr.sMinCpu = sMinCpu;
4639 else:
4640 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4641 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4642
4643 # Set it.
4644 if oInstr.sMinCpu is None:
4645 oInstr.sMinCpu = sMinCpu;
4646 elif oInstr.sMinCpu != sMinCpu:
4647 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4648
4649 _ = iEndLine;
4650 return True;
4651
4652 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4653 """
4654 Tag: @opcpuid
4655 Value: none | <CPUID flag specifier>
4656
4657 CPUID feature bit which is required for the instruction to be present.
4658 """
4659 oInstr = self.ensureInstructionForOpTag(iTagLine);
4660
4661 # Flatten as a space separated list, split it up and validate the values.
4662 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4663 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4664 asCpuIds = [];
4665 else:
4666 fRc = True;
4667 for iCpuId, sCpuId in enumerate(asCpuIds):
4668 if sCpuId not in g_kdCpuIdFlags:
4669 if sCpuId.strip() in g_kdCpuIdFlags:
4670 sCpuId[iCpuId] = sCpuId.strip();
4671 else:
4672 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4673 if not fRc:
4674 return False;
4675
4676 # Append them.
4677 for sCpuId in asCpuIds:
4678 if sCpuId not in oInstr.asCpuIds:
4679 oInstr.asCpuIds.append(sCpuId);
4680 else:
4681 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4682
4683 _ = iEndLine;
4684 return True;
4685
4686 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4687 """
4688 Tag: @opgroup
4689 Value: op_grp1[_subgrp2[_subsubgrp3]]
4690
4691 Instruction grouping.
4692 """
4693 oInstr = self.ensureInstructionForOpTag(iTagLine);
4694
4695 # Flatten as a space separated list, split it up and validate the values.
4696 asGroups = self.flattenAllSections(aasSections).split();
4697 if len(asGroups) != 1:
4698 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4699 sGroup = asGroups[0];
4700 if not self.oReGroupName.match(sGroup):
4701 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4702 % (sTag, sGroup, self.oReGroupName.pattern));
4703
4704 # Set it.
4705 if oInstr.sGroup is not None:
4706 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4707 oInstr.sGroup = sGroup;
4708
4709 _ = iEndLine;
4710 return True;
4711
4712 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4713 """
4714 Tag: @opunused, @opinvalid, @opinvlstyle
4715 Value: <invalid opcode behaviour style>
4716
4717 The @opunused indicates the specification is for a currently unused
4718 instruction encoding.
4719
4720 The @opinvalid indicates the specification is for an invalid currently
4721 instruction encoding (like UD2).
4722
4723 The @opinvlstyle just indicates how CPUs decode the instruction when
4724 not supported (@opcpuid, @opmincpu) or disabled.
4725 """
4726 oInstr = self.ensureInstructionForOpTag(iTagLine);
4727
4728 # Flatten as a space separated list, split it up and validate the values.
4729 asStyles = self.flattenAllSections(aasSections).split();
4730 if len(asStyles) != 1:
4731 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4732 sStyle = asStyles[0];
4733 if sStyle not in g_kdInvalidStyles:
4734 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4735 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4736 # Set it.
4737 if oInstr.sInvalidStyle is not None:
4738 return self.errorComment(iTagLine,
4739 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4740 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4741 oInstr.sInvalidStyle = sStyle;
4742 if sTag == '@opunused':
4743 oInstr.fUnused = True;
4744 elif sTag == '@opinvalid':
4745 oInstr.fInvalid = True;
4746
4747 _ = iEndLine;
4748 return True;
4749
4750 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4751 """
4752 Tag: @optest
4753 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4754 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4755
4756 The main idea here is to generate basic instruction tests.
4757
4758 The probably simplest way of handling the diverse input, would be to use
4759 it to produce size optimized byte code for a simple interpreter that
4760 modifies the register input and output states.
4761
4762 An alternative to the interpreter would be creating multiple tables,
4763 but that becomes rather complicated wrt what goes where and then to use
4764 them in an efficient manner.
4765 """
4766 oInstr = self.ensureInstructionForOpTag(iTagLine);
4767
4768 #
4769 # Do it section by section.
4770 #
4771 for asSectionLines in aasSections:
4772 #
4773 # Sort the input into outputs, inputs and selector conditions.
4774 #
4775 sFlatSection = self.flattenAllSections([asSectionLines,]);
4776 if not sFlatSection:
4777 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4778 continue;
4779 oTest = InstructionTest(oInstr);
4780
4781 asSelectors = [];
4782 asInputs = [];
4783 asOutputs = [];
4784 asCur = asOutputs;
4785 fRc = True;
4786 asWords = sFlatSection.split();
4787 for iWord in range(len(asWords) - 1, -1, -1):
4788 sWord = asWords[iWord];
4789 # Check for array switchers.
4790 if sWord == '->':
4791 if asCur != asOutputs:
4792 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4793 break;
4794 asCur = asInputs;
4795 elif sWord == '/':
4796 if asCur != asInputs:
4797 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4798 break;
4799 asCur = asSelectors;
4800 else:
4801 asCur.insert(0, sWord);
4802
4803 #
4804 # Validate and add selectors.
4805 #
4806 for sCond in asSelectors:
4807 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4808 oSelector = None;
4809 for sOp in TestSelector.kasCompareOps:
4810 off = sCondExp.find(sOp);
4811 if off >= 0:
4812 sVariable = sCondExp[:off];
4813 sValue = sCondExp[off + len(sOp):];
4814 if sVariable in TestSelector.kdVariables:
4815 if sValue in TestSelector.kdVariables[sVariable]:
4816 oSelector = TestSelector(sVariable, sOp, sValue);
4817 else:
4818 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4819 % ( sTag, sValue, sCond,
4820 TestSelector.kdVariables[sVariable].keys(),));
4821 else:
4822 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4823 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4824 break;
4825 if oSelector is not None:
4826 for oExisting in oTest.aoSelectors:
4827 if oExisting.sVariable == oSelector.sVariable:
4828 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4829 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4830 oTest.aoSelectors.append(oSelector);
4831 else:
4832 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4833
4834 #
4835 # Validate outputs and inputs, adding them to the test as we go along.
4836 #
4837 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4838 asValidFieldKinds = [ 'both', sDesc, ];
4839 for sItem in asItems:
4840 oItem = None;
4841 for sOp in TestInOut.kasOperators:
4842 off = sItem.find(sOp);
4843 if off < 0:
4844 continue;
4845 sField = sItem[:off];
4846 sValueType = sItem[off + len(sOp):];
4847 if sField in TestInOut.kdFields \
4848 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4849 asSplit = sValueType.split(':', 1);
4850 sValue = asSplit[0];
4851 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4852 if sType in TestInOut.kdTypes:
4853 oValid = TestInOut.kdTypes[sType].validate(sValue);
4854 if oValid is True:
4855 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4856 oItem = TestInOut(sField, sOp, sValue, sType);
4857 else:
4858 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4859 % ( sTag, sDesc, sItem, ));
4860 else:
4861 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4862 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4863 else:
4864 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4865 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4866 else:
4867 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4868 % ( sTag, sDesc, sField, sItem,
4869 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4870 if asVal[1] in asValidFieldKinds]),));
4871 break;
4872 if oItem is not None:
4873 for oExisting in aoDst:
4874 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4875 self.errorComment(iTagLine,
4876 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4877 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4878 aoDst.append(oItem);
4879 else:
4880 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4881
4882 #
4883 # .
4884 #
4885 if fRc:
4886 oInstr.aoTests.append(oTest);
4887 else:
4888 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4889 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4890 % (sTag, asSelectors, asInputs, asOutputs,));
4891
4892 _ = iEndLine;
4893 return True;
4894
4895 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4896 """
4897 Numbered @optest tag. Either @optest42 or @optest[42].
4898 """
4899 oInstr = self.ensureInstructionForOpTag(iTagLine);
4900
4901 iTest = 0;
4902 if sTag[-1] == ']':
4903 iTest = int(sTag[8:-1]);
4904 else:
4905 iTest = int(sTag[7:]);
4906
4907 if iTest != len(oInstr.aoTests):
4908 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4909 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4910
4911 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4912 """
4913 Tag: @optestign | @optestignore
4914 Value: <value is ignored>
4915
4916 This is a simple trick to ignore a test while debugging another.
4917
4918 See also @oponlytest.
4919 """
4920 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4921 return True;
4922
4923 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4924 """
4925 Tag: @opcopytests
4926 Value: <opstat | function> [..]
4927 Example: @opcopytests add_Eb_Gb
4928
4929 Trick to avoid duplicating tests for different encodings of the same
4930 operation.
4931 """
4932 oInstr = self.ensureInstructionForOpTag(iTagLine);
4933
4934 # Flatten, validate and append the copy job to the instruction. We execute
4935 # them after parsing all the input so we can handle forward references.
4936 asToCopy = self.flattenAllSections(aasSections).split();
4937 if not asToCopy:
4938 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4939 for sToCopy in asToCopy:
4940 if sToCopy not in oInstr.asCopyTests:
4941 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4942 oInstr.asCopyTests.append(sToCopy);
4943 else:
4944 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4945 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4946 else:
4947 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4948
4949 _ = iEndLine;
4950 return True;
4951
4952 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4953 """
4954 Tag: @oponlytest | @oponly
4955 Value: none
4956
4957 Only test instructions with this tag. This is a trick that is handy
4958 for singling out one or two new instructions or tests.
4959
4960 See also @optestignore.
4961 """
4962 oInstr = self.ensureInstructionForOpTag(iTagLine);
4963
4964 # Validate and add instruction to only test dictionary.
4965 sValue = self.flattenAllSections(aasSections).strip();
4966 if sValue:
4967 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4968
4969 if oInstr not in g_aoOnlyTestInstructions:
4970 g_aoOnlyTestInstructions.append(oInstr);
4971
4972 _ = iEndLine;
4973 return True;
4974
4975 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4976 """
4977 Tag: @opxcpttype
4978 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4979
4980 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4981 """
4982 oInstr = self.ensureInstructionForOpTag(iTagLine);
4983
4984 # Flatten as a space separated list, split it up and validate the values.
4985 asTypes = self.flattenAllSections(aasSections).split();
4986 if len(asTypes) != 1:
4987 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4988 sType = asTypes[0];
4989 if sType not in g_kdXcptTypes:
4990 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4991 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4992 # Set it.
4993 if oInstr.sXcptType is not None:
4994 return self.errorComment(iTagLine,
4995 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4996 % ( sTag, oInstr.sXcptType, sType,));
4997 oInstr.sXcptType = sType;
4998
4999 _ = iEndLine;
5000 return True;
5001
5002 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
5003 """
5004 Tag: @opfunction
5005 Value: <VMM function name>
5006
5007 This is for explicitly setting the IEM function name. Normally we pick
5008 this up from the FNIEMOP_XXX macro invocation after the description, or
5009 generate it from the mnemonic and operands.
5010
5011 It it thought it maybe necessary to set it when specifying instructions
5012 which implementation isn't following immediately or aren't implemented yet.
5013 """
5014 oInstr = self.ensureInstructionForOpTag(iTagLine);
5015
5016 # Flatten and validate the value.
5017 sFunction = self.flattenAllSections(aasSections);
5018 if not self.oReFunctionName.match(sFunction):
5019 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
5020 % (sTag, sFunction, self.oReFunctionName.pattern));
5021
5022 if oInstr.sFunction is not None:
5023 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
5024 % (sTag, oInstr.sFunction, sFunction,));
5025 oInstr.sFunction = sFunction;
5026
5027 _ = iEndLine;
5028 return True;
5029
5030 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
5031 """
5032 Tag: @opstats
5033 Value: <VMM statistics base name>
5034
5035 This is for explicitly setting the statistics name. Normally we pick
5036 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
5037 the mnemonic and operands.
5038
5039 It it thought it maybe necessary to set it when specifying instructions
5040 which implementation isn't following immediately or aren't implemented yet.
5041 """
5042 oInstr = self.ensureInstructionForOpTag(iTagLine);
5043
5044 # Flatten and validate the value.
5045 sStats = self.flattenAllSections(aasSections);
5046 if not self.oReStatsName.match(sStats):
5047 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
5048 % (sTag, sStats, self.oReStatsName.pattern));
5049
5050 if oInstr.sStats is not None:
5051 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
5052 % (sTag, oInstr.sStats, sStats,));
5053 oInstr.sStats = sStats;
5054
5055 _ = iEndLine;
5056 return True;
5057
5058 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
5059 """
5060 Tag: @opdone
5061 Value: none
5062
5063 Used to explictily flush the instructions that have been specified.
5064 """
5065 sFlattened = self.flattenAllSections(aasSections);
5066 if sFlattened != '':
5067 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
5068 _ = sTag; _ = iEndLine;
5069 return self.doneInstructions();
5070
5071 ## @}
5072
5073
5074 def parseComment(self):
5075 """
5076 Parse the current comment (self.sComment).
5077
5078 If it's a opcode specifiying comment, we reset the macro stuff.
5079 """
5080 #
5081 # Reject if comment doesn't seem to contain anything interesting.
5082 #
5083 if self.sComment.find('Opcode') < 0 \
5084 and self.sComment.find('@') < 0:
5085 return False;
5086
5087 #
5088 # Split the comment into lines, removing leading asterisks and spaces.
5089 # Also remove leading and trailing empty lines.
5090 #
5091 asLines = self.sComment.split('\n');
5092 for iLine, sLine in enumerate(asLines):
5093 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
5094
5095 while asLines and not asLines[0]:
5096 self.iCommentLine += 1;
5097 asLines.pop(0);
5098
5099 while asLines and not asLines[-1]:
5100 asLines.pop(len(asLines) - 1);
5101
5102 #
5103 # Check for old style: Opcode 0x0f 0x12
5104 #
5105 if asLines[0].startswith('Opcode '):
5106 self.parseCommentOldOpcode(asLines);
5107
5108 #
5109 # Look for @op* tagged data.
5110 #
5111 cOpTags = 0;
5112 sFlatDefault = None;
5113 sCurTag = '@default';
5114 iCurTagLine = 0;
5115 asCurSection = [];
5116 aasSections = [ asCurSection, ];
5117 for iLine, sLine in enumerate(asLines):
5118 if not sLine.startswith('@'):
5119 if sLine:
5120 asCurSection.append(sLine);
5121 elif asCurSection:
5122 asCurSection = [];
5123 aasSections.append(asCurSection);
5124 else:
5125 #
5126 # Process the previous tag.
5127 #
5128 if not asCurSection and len(aasSections) > 1:
5129 aasSections.pop(-1);
5130 if sCurTag in self.dTagHandlers:
5131 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5132 cOpTags += 1;
5133 elif sCurTag.startswith('@op'):
5134 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5135 elif sCurTag == '@default':
5136 sFlatDefault = self.flattenAllSections(aasSections);
5137 elif '@op' + sCurTag[1:] in self.dTagHandlers:
5138 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
5139 elif sCurTag in ['@encoding', '@opencoding']:
5140 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
5141
5142 #
5143 # New tag.
5144 #
5145 asSplit = sLine.split(None, 1);
5146 sCurTag = asSplit[0].lower();
5147 if len(asSplit) > 1:
5148 asCurSection = [asSplit[1],];
5149 else:
5150 asCurSection = [];
5151 aasSections = [asCurSection, ];
5152 iCurTagLine = iLine;
5153
5154 #
5155 # Process the final tag.
5156 #
5157 if not asCurSection and len(aasSections) > 1:
5158 aasSections.pop(-1);
5159 if sCurTag in self.dTagHandlers:
5160 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
5161 cOpTags += 1;
5162 elif sCurTag.startswith('@op'):
5163 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
5164 elif sCurTag == '@default':
5165 sFlatDefault = self.flattenAllSections(aasSections);
5166
5167 #
5168 # Don't allow default text in blocks containing @op*.
5169 #
5170 if cOpTags > 0 and sFlatDefault:
5171 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
5172
5173 return True;
5174
5175 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
5176 """
5177 Parses a macro invocation.
5178
5179 Returns three values:
5180 1. A list of macro arguments, where the zero'th is the macro name.
5181 2. The offset following the macro invocation, into sInvocation of
5182 this is on the same line or into the last line if it is on a
5183 different line.
5184 3. Number of additional lines the invocation spans (i.e. zero if
5185 it is all contained within sInvocation).
5186 """
5187 # First the name.
5188 offOpen = sInvocation.find('(', offStartInvocation);
5189 if offOpen <= offStartInvocation:
5190 self.raiseError("macro invocation open parenthesis not found");
5191 sName = sInvocation[offStartInvocation:offOpen].strip();
5192 if not self.oReMacroName.match(sName):
5193 self.raiseError("invalid macro name '%s'" % (sName,));
5194 asRet = [sName, ];
5195
5196 # Arguments.
5197 iLine = self.iLine;
5198 cDepth = 1;
5199 off = offOpen + 1;
5200 offStart = off;
5201 offCurLn = 0;
5202 chQuote = None;
5203 while cDepth > 0:
5204 if off >= len(sInvocation):
5205 if iLine >= len(self.asLines):
5206 self.error('macro invocation beyond end of file');
5207 return (asRet, off - offCurLn, iLine - self.iLine);
5208 offCurLn = off;
5209 sInvocation += self.asLines[iLine];
5210 iLine += 1;
5211 ch = sInvocation[off];
5212
5213 if chQuote:
5214 if ch == '\\' and off + 1 < len(sInvocation):
5215 off += 1;
5216 elif ch == chQuote:
5217 chQuote = None;
5218 elif ch in ('"', '\'',):
5219 chQuote = ch;
5220 elif ch in (',', ')',):
5221 if cDepth == 1:
5222 asRet.append(sInvocation[offStart:off].strip());
5223 offStart = off + 1;
5224 if ch == ')':
5225 cDepth -= 1;
5226 elif ch == '(':
5227 cDepth += 1;
5228 off += 1;
5229
5230 return (asRet, off - offCurLn, iLine - self.iLine);
5231
5232 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
5233 """
5234 Returns (None, len(sCode), 0) if not found, otherwise the
5235 parseMacroInvocation() return value.
5236 """
5237 offHit = sCode.find(sMacro, offStart);
5238 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
5239 return self.parseMacroInvocation(sCode, offHit);
5240 return (None, len(sCode), 0);
5241
5242 def findAndParseMacroInvocation(self, sCode, sMacro):
5243 """
5244 Returns None if not found, arguments as per parseMacroInvocation if found.
5245 """
5246 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
5247
5248 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
5249 """
5250 Returns same as findAndParseMacroInvocation.
5251 """
5252 for sMacro in asMacro:
5253 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
5254 if asRet is not None:
5255 return asRet;
5256 return None;
5257
5258 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
5259 sDisHints, sIemHints, asOperands):
5260 """
5261 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
5262 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
5263 """
5264 #
5265 # Some invocation checks.
5266 #
5267 if sUpper != sUpper.upper():
5268 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
5269 if sLower != sLower.lower():
5270 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
5271 if sUpper.lower() != sLower:
5272 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
5273 if not self.oReMnemonic.match(sLower):
5274 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
5275
5276 #
5277 # Check if sIemHints tells us to not consider this macro invocation.
5278 #
5279 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
5280 return True;
5281
5282 # Apply to the last instruction only for now.
5283 if not self.aoCurInstrs:
5284 self.addInstruction();
5285 oInstr = self.aoCurInstrs[-1];
5286 if oInstr.iLineMnemonicMacro == -1:
5287 oInstr.iLineMnemonicMacro = self.iLine;
5288 else:
5289 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
5290 % (sMacro, oInstr.iLineMnemonicMacro,));
5291
5292 # Mnemonic
5293 if oInstr.sMnemonic is None:
5294 oInstr.sMnemonic = sLower;
5295 elif oInstr.sMnemonic != sLower:
5296 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
5297
5298 # Process operands.
5299 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
5300 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
5301 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
5302 for iOperand, sType in enumerate(asOperands):
5303 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
5304 if sWhere is None:
5305 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
5306 if iOperand < len(oInstr.aoOperands): # error recovery.
5307 sWhere = oInstr.aoOperands[iOperand].sWhere;
5308 sType = oInstr.aoOperands[iOperand].sType;
5309 else:
5310 sWhere = 'reg';
5311 sType = 'Gb';
5312 if iOperand == len(oInstr.aoOperands):
5313 oInstr.aoOperands.append(Operand(sWhere, sType))
5314 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
5315 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
5316 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
5317 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
5318
5319 # Encoding.
5320 if sForm not in g_kdIemForms:
5321 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
5322 else:
5323 if oInstr.sEncoding is None:
5324 oInstr.sEncoding = g_kdIemForms[sForm][0];
5325 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
5326 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
5327 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
5328
5329 # Check the parameter locations for the encoding.
5330 if g_kdIemForms[sForm][1] is not None:
5331 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
5332 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
5333 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
5334 else:
5335 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
5336 if oInstr.aoOperands[iOperand].sWhere != sWhere:
5337 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
5338 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
5339 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
5340 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
5341 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
5342 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
5343 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
5344 or sForm.replace('VEX','').find('V') < 0) ):
5345 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
5346 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
5347 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
5348 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
5349 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
5350 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
5351 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
5352 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
5353 oInstr.aoOperands[iOperand].sWhere));
5354
5355
5356 # Check @opcodesub
5357 if oInstr.sSubOpcode \
5358 and g_kdIemForms[sForm][2] \
5359 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
5360 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
5361 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
5362
5363 # Stats.
5364 if not self.oReStatsName.match(sStats):
5365 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
5366 elif oInstr.sStats is None:
5367 oInstr.sStats = sStats;
5368 elif oInstr.sStats != sStats:
5369 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
5370 % (sMacro, oInstr.sStats, sStats,));
5371
5372 # Process the hints (simply merge with @ophints w/o checking anything).
5373 for sHint in sDisHints.split('|'):
5374 sHint = sHint.strip();
5375 if sHint.startswith('DISOPTYPE_'):
5376 sShortHint = sHint[len('DISOPTYPE_'):].lower();
5377 if sShortHint in g_kdHints:
5378 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5379 else:
5380 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5381 elif sHint != '0':
5382 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5383
5384 for sHint in sIemHints.split('|'):
5385 sHint = sHint.strip();
5386 if sHint.startswith('IEMOPHINT_'):
5387 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5388 if sShortHint in g_kdHints:
5389 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5390 else:
5391 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5392 elif sHint != '0':
5393 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5394
5395 _ = sAsm;
5396 return True;
5397
5398 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5399 """
5400 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5401 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5402 """
5403 if not asOperands:
5404 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5405 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5406 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5407
5408 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5409 """
5410 Process a IEM_MC_BEGIN macro invocation.
5411 """
5412 if self.fDebugMc:
5413 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5414 #self.debug('%s<eos>' % (sCode,));
5415
5416 # Check preconditions.
5417 if not self.oCurFunction:
5418 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5419 if self.oCurMcBlock:
5420 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5421
5422 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5423 cchIndent = offBeginStatementInCodeStr;
5424 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5425 if offPrevNewline >= 0:
5426 cchIndent -= offPrevNewline + 1;
5427 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5428
5429 # Start a new block.
5430 # But don't add it to the list unless the context matches the host architecture.
5431 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5432 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5433 cchIndent = cchIndent);
5434 try:
5435 if ( not self.aoCppCondStack
5436 or not self.sHostArch
5437 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5438 g_aoMcBlocks.append(self.oCurMcBlock);
5439 self.cTotalMcBlocks += 1;
5440 except Exception as oXcpt:
5441 self.raiseError(oXcpt.args[0]);
5442
5443 if self.oCurMcBlock.oInstruction:
5444 self.oCurMcBlock.oInstruction.aoMcBlocks.append(self.oCurMcBlock);
5445 self.iMcBlockInFunc += 1;
5446 return True;
5447
5448 @staticmethod
5449 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5450 """
5451 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5452 extracting a statement block from a string that's the result of macro
5453 expansion and therefore contains multiple "sub-lines" as it were.
5454
5455 Returns list of lines covering offBegin thru offEnd in sRawLine.
5456 """
5457
5458 off = sRawLine.find('\n', offEnd);
5459 if off > 0:
5460 sRawLine = sRawLine[:off + 1];
5461
5462 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5463 sRawLine = sRawLine[off:];
5464 if not sRawLine.strip().startswith(sBeginStmt):
5465 sRawLine = sRawLine[offBegin - off:]
5466
5467 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5468
5469 def workerIemMcEnd(self, offEndStatementInLine):
5470 """
5471 Process a IEM_MC_END macro invocation.
5472 """
5473 if self.fDebugMc:
5474 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5475
5476 # Check preconditions.
5477 if not self.oCurMcBlock:
5478 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5479
5480 #
5481 # HACK ALERT! For blocks originating from macro expansion the start and
5482 # end line will be the same, but the line has multiple
5483 # newlines inside it. So, we have to do some extra tricks
5484 # to get the lines out of there. We ASSUME macros aren't
5485 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5486 #
5487 if self.iLine > self.oCurMcBlock.iBeginLine:
5488 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5489 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5490 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5491
5492 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5493 # so we can deal correctly with IEM_MC_END below and everything else.
5494 for sLine in asLines:
5495 cNewLines = sLine.count('\n');
5496 assert cNewLines > 0;
5497 if cNewLines > 1:
5498 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5499 self.oCurMcBlock.offBeginLine,
5500 offEndStatementInLine
5501 + sum(len(s) for s in asLines)
5502 - len(asLines[-1]));
5503 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5504 break;
5505 else:
5506 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5507 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5508 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5509
5510 #
5511 # Strip anything following the IEM_MC_END(); statement in the final line,
5512 # so that we don't carry on any trailing 'break' after macro expansions
5513 # like for iemOp_movsb_Xb_Yb.
5514 #
5515 while asLines[-1].strip() == '':
5516 asLines.pop();
5517 sFinal = asLines[-1];
5518 offFinalEnd = sFinal.find('IEM_MC_END');
5519 offEndInFinal = offFinalEnd;
5520 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5521 offFinalEnd += len('IEM_MC_END');
5522
5523 while sFinal[offFinalEnd].isspace():
5524 offFinalEnd += 1;
5525 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5526 offFinalEnd += 1;
5527
5528 while sFinal[offFinalEnd].isspace():
5529 offFinalEnd += 1;
5530 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5531 offFinalEnd += 1;
5532
5533 while sFinal[offFinalEnd].isspace():
5534 offFinalEnd += 1;
5535 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5536 offFinalEnd += 1;
5537
5538 asLines[-1] = sFinal[: offFinalEnd];
5539
5540 #
5541 # Complete and discard the current block.
5542 #
5543 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5544 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5545 self.oCurMcBlock = None;
5546 return True;
5547
5548 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5549 """
5550 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5551 """
5552 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5553 if self.fDebugMc:
5554 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5555 #self.debug('%s<eos>' % (sCode,));
5556
5557 # Check preconditions.
5558 if not self.oCurFunction:
5559 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5560 if self.oCurMcBlock:
5561 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5562
5563 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5564 cchIndent = offBeginStatementInCodeStr;
5565 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5566 if offPrevNewline >= 0:
5567 cchIndent -= offPrevNewline + 1;
5568 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5569
5570 # Start a new block.
5571 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.oCurFunction, self.iMcBlockInFunc,
5572 oInstruction = self.aoCurInstrs[-1] if self.aoCurInstrs else None,
5573 cchIndent = cchIndent, fDeferToCImpl = True);
5574
5575 # Parse the statment.
5576 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5577 if asArgs is None:
5578 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5579 if len(asArgs) != cParams + 4:
5580 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5581 % (sStmt, len(asArgs), cParams + 4, asArgs));
5582
5583 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5584
5585 # These MCs are not typically part of macro expansions, but let's get
5586 # it out of the way immediately if it's the case.
5587 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5588 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5589 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5590 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5591 asLines[-1] = asLines[-1][:offAfter + 1];
5592 else:
5593 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5594 offAfter, sStmt);
5595 assert asLines[-1].find(';') >= 0;
5596 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5597
5598 assert asLines[0].find(sStmt) >= 0;
5599 #if not asLines[0].strip().startswith(sStmt):
5600 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5601
5602 # Advance to the line with the closing ')'.
5603 self.iLine += cLines;
5604
5605 # Complete the block.
5606 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5607
5608 g_aoMcBlocks.append(oMcBlock);
5609 if oMcBlock.oInstruction:
5610 oMcBlock.oInstruction.aoMcBlocks.append(oMcBlock);
5611 self.cTotalMcBlocks += 1;
5612 self.iMcBlockInFunc += 1;
5613
5614 return True;
5615
5616 def workerStartFunction(self, asArgs):
5617 """
5618 Deals with the start of a decoder function.
5619
5620 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5621 macros, so we get a argument list for these where the 0th argument is the
5622 macro name.
5623 """
5624 # Complete any existing function.
5625 if self.oCurFunction:
5626 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5627
5628 # Create the new function.
5629 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5630 return True;
5631
5632 def checkCodeForMacro(self, sCode, offLine):
5633 """
5634 Checks code for relevant macro invocation.
5635 """
5636
5637 #
5638 # Scan macro invocations.
5639 #
5640 if sCode.find('(') > 0:
5641 # Look for instruction decoder function definitions. ASSUME single line.
5642 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5643 [ 'FNIEMOP_DEF',
5644 'FNIEMOPRM_DEF',
5645 'FNIEMOP_STUB',
5646 'FNIEMOP_STUB_1',
5647 'FNIEMOP_UD_STUB',
5648 'FNIEMOP_UD_STUB_1' ]);
5649 if asArgs is not None:
5650 self.workerStartFunction(asArgs);
5651 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5652
5653 if not self.aoCurInstrs:
5654 self.addInstruction();
5655 for oInstr in self.aoCurInstrs:
5656 if oInstr.iLineFnIemOpMacro == -1:
5657 oInstr.iLineFnIemOpMacro = self.iLine;
5658 else:
5659 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5660 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5661 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5662 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5663 if asArgs[0].find('STUB') > 0:
5664 self.doneInstructions(fEndOfFunction = True);
5665 return True;
5666
5667 # Check for worker function definitions, so we can get a context for MC blocks.
5668 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5669 [ 'FNIEMOP_DEF_1',
5670 'FNIEMOP_DEF_2', ]);
5671 if asArgs is not None:
5672 self.workerStartFunction(asArgs);
5673 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5674 return True;
5675
5676 # IEMOP_HLP_DONE_VEX_DECODING_*
5677 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5678 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5679 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5680 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5681 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5682 ]);
5683 if asArgs is not None:
5684 sMacro = asArgs[0];
5685 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5686 for oInstr in self.aoCurInstrs:
5687 if 'vex_l_zero' not in oInstr.dHints:
5688 if oInstr.iLineMnemonicMacro >= 0:
5689 self.errorOnLine(oInstr.iLineMnemonicMacro,
5690 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5691 oInstr.dHints['vex_l_zero'] = True;
5692
5693 #
5694 # IEMOP_MNEMONIC*
5695 #
5696 if sCode.find('IEMOP_MNEMONIC') >= 0:
5697 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5698 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5699 if asArgs is not None:
5700 if len(self.aoCurInstrs) == 1:
5701 oInstr = self.aoCurInstrs[0];
5702 if oInstr.sStats is None:
5703 oInstr.sStats = asArgs[1];
5704 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5705
5706 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5707 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5708 if asArgs is not None:
5709 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5710 asArgs[7], []);
5711 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5712 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5713 if asArgs is not None:
5714 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5715 asArgs[8], [asArgs[6],]);
5716 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5717 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5718 if asArgs is not None:
5719 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5720 asArgs[9], [asArgs[6], asArgs[7]]);
5721 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5722 # a_fIemHints)
5723 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5724 if asArgs is not None:
5725 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5726 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5727 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5728 # a_fIemHints)
5729 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5730 if asArgs is not None:
5731 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5732 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5733
5734 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5735 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5736 if asArgs is not None:
5737 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5738 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5739 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5740 if asArgs is not None:
5741 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5742 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5743 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5744 if asArgs is not None:
5745 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5746 [asArgs[4], asArgs[5],]);
5747 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5748 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5749 if asArgs is not None:
5750 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5751 [asArgs[4], asArgs[5], asArgs[6],]);
5752 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5753 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5754 if asArgs is not None:
5755 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5756 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5757
5758 #
5759 # IEM_MC_BEGIN + IEM_MC_END.
5760 # We must support multiple instances per code snippet.
5761 #
5762 offCode = sCode.find('IEM_MC_');
5763 if offCode >= 0:
5764 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5765 if oMatch.group(1) == 'END':
5766 self.workerIemMcEnd(offLine + oMatch.start());
5767 elif oMatch.group(1) == 'BEGIN':
5768 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5769 else:
5770 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5771 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5772 return True;
5773
5774 return False;
5775
5776 def workerPreprocessorRecreateMacroRegex(self):
5777 """
5778 Recreates self.oReMacros when self.dMacros changes.
5779 """
5780 if self.dMacros:
5781 sRegex = '';
5782 for sName, oMacro in self.dMacros.items():
5783 if sRegex:
5784 sRegex += r'|' + sName;
5785 else:
5786 sRegex = r'\b(' + sName;
5787 if oMacro.asArgs is not None:
5788 sRegex += r'\s*\(';
5789 else:
5790 sRegex += r'\b';
5791 sRegex += ')';
5792 self.oReMacros = re.compile(sRegex);
5793 else:
5794 self.oReMacros = None;
5795 return True;
5796
5797 def workerPreprocessorDefine(self, sRest):
5798 """
5799 Handles a macro #define, the sRest is what follows after the directive word.
5800 """
5801 assert sRest[-1] == '\n';
5802
5803 #
5804 # If using line continutation, just concat all the lines together,
5805 # preserving the newline character but not the escaping.
5806 #
5807 iLineStart = self.iLine;
5808 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5809 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5810 self.iLine += 1;
5811 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5812
5813 #
5814 # Use regex to split out the name, argument list and body.
5815 # If this fails, we assume it's a simple macro.
5816 #
5817 oMatch = self.oReHashDefine2.match(sRest);
5818 if oMatch:
5819 sAllArgs = oMatch.group(2).strip();
5820 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5821 sBody = oMatch.group(3);
5822 else:
5823 oMatch = self.oReHashDefine3.match(sRest);
5824 if not oMatch:
5825 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5826 return self.error('bogus macro definition: %s' % (sRest,));
5827 asArgs = None;
5828 sBody = oMatch.group(2);
5829 sName = oMatch.group(1);
5830 assert sName == sName.strip();
5831 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5832
5833 #
5834 # Is this of any interest to us? We do NOT support MC blocks wihtin
5835 # nested macro expansion, just to avoid lots of extra work.
5836 #
5837 # There is only limited support for macros expanding to partial MC blocks.
5838 #
5839 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5840 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5841 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5842 # siblings in the recompiler. This is a lot simpler than nested macro
5843 # expansion and lots of heuristics for locating all the relevant macros.
5844 # Also, this way we don't produce lots of unnecessary threaded functions.
5845 #
5846 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5847 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5848 return True;
5849
5850 #
5851 # Add the macro.
5852 #
5853 if self.fDebugPreproc:
5854 self.debug('#define %s on line %u' % (sName, self.iLine,));
5855 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5856 return self.workerPreprocessorRecreateMacroRegex();
5857
5858 def workerPreprocessorUndef(self, sRest):
5859 """
5860 Handles a macro #undef, the sRest is what follows after the directive word.
5861 """
5862 # Quick comment strip and isolate the name.
5863 offSlash = sRest.find('/');
5864 if offSlash > 0:
5865 sRest = sRest[:offSlash];
5866 sName = sRest.strip();
5867
5868 # Remove the macro if we're clocking it.
5869 if sName in self.dMacros:
5870 if self.fDebugPreproc:
5871 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5872 del self.dMacros[sName];
5873 return self.workerPreprocessorRecreateMacroRegex();
5874
5875 return True;
5876
5877 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5878 """
5879 Handles an #if, #ifdef, #ifndef or #elif directive.
5880 """
5881 #
5882 # Sanity check #elif.
5883 #
5884 if sDirective == 'elif':
5885 if len(self.aoCppCondStack) == 0:
5886 self.raiseError('#elif without #if');
5887 if self.aoCppCondStack[-1].fInElse:
5888 self.raiseError('#elif after #else');
5889
5890 #
5891 # If using line continutation, just concat all the lines together,
5892 # stripping both the newline and escape characters.
5893 #
5894 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5895 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5896 self.iLine += 1;
5897
5898 # Strip it of all comments and leading and trailing blanks.
5899 sRest = self.stripComments(sRest).strip();
5900
5901 #
5902 # Stash it.
5903 #
5904 try:
5905 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5906 except Exception as oXcpt:
5907 self.raiseError(oXcpt.args[0]);
5908
5909 if sDirective == 'elif':
5910 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5911 else:
5912 self.aoCppCondStack.append(oPreprocCond);
5913
5914 return True;
5915
5916 def workerPreprocessorElse(self):
5917 """
5918 Handles an #else directive.
5919 """
5920 if len(self.aoCppCondStack) == 0:
5921 self.raiseError('#else without #if');
5922 if self.aoCppCondStack[-1].fInElse:
5923 self.raiseError('Another #else after #else');
5924
5925 self.aoCppCondStack[-1].fInElse = True;
5926 return True;
5927
5928 def workerPreprocessorEndif(self):
5929 """
5930 Handles an #endif directive.
5931 """
5932 if len(self.aoCppCondStack) == 0:
5933 self.raiseError('#endif without #if');
5934
5935 self.aoCppCondStack.pop();
5936 return True;
5937
5938 def checkPreprocessorDirective(self, sLine):
5939 """
5940 Handles a preprocessor directive.
5941 """
5942 # Skip past the preprocessor hash.
5943 off = sLine.find('#');
5944 assert off >= 0;
5945 off += 1;
5946 while off < len(sLine) and sLine[off].isspace():
5947 off += 1;
5948
5949 # Extract the directive.
5950 offDirective = off;
5951 while off < len(sLine) and not sLine[off].isspace():
5952 off += 1;
5953 sDirective = sLine[offDirective:off];
5954 if self.fDebugPreproc:
5955 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5956
5957 # Skip spaces following it to where the arguments/whatever starts.
5958 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5959 off += 1;
5960 sTail = sLine[off:];
5961
5962 # Handle the directive.
5963 if sDirective == 'define':
5964 return self.workerPreprocessorDefine(sTail);
5965 if sDirective == 'undef':
5966 return self.workerPreprocessorUndef(sTail);
5967 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5968 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5969 if sDirective == 'else':
5970 return self.workerPreprocessorElse();
5971 if sDirective == 'endif':
5972 return self.workerPreprocessorEndif();
5973
5974 if self.fDebugPreproc:
5975 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5976 return False;
5977
5978 def expandMacros(self, sLine, oMatch):
5979 """
5980 Expands macros we know about in the given line.
5981 Currently we ASSUME there is only one and that is what oMatch matched.
5982 """
5983 #
5984 # Get our bearings.
5985 #
5986 offMatch = oMatch.start();
5987 sName = oMatch.group(1);
5988 assert sName == sLine[oMatch.start() : oMatch.end()];
5989 fWithArgs = sName.endswith('(');
5990 if fWithArgs:
5991 sName = sName[:-1].strip();
5992 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5993
5994 #
5995 # Deal with simple macro invocations w/o parameters.
5996 #
5997 if not fWithArgs:
5998 if self.fDebugPreproc:
5999 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
6000 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
6001
6002 #
6003 # Complicated macro with parameters.
6004 # Start by extracting the parameters. ASSUMES they are all on the same line!
6005 #
6006 cLevel = 1;
6007 offCur = oMatch.end();
6008 offCurArg = offCur;
6009 asArgs = [];
6010 while True:
6011 if offCur >= len(sLine):
6012 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
6013 ch = sLine[offCur];
6014 if ch == '(':
6015 cLevel += 1;
6016 elif ch == ')':
6017 cLevel -= 1;
6018 if cLevel == 0:
6019 asArgs.append(sLine[offCurArg:offCur].strip());
6020 break;
6021 elif ch == ',' and cLevel == 1:
6022 asArgs.append(sLine[offCurArg:offCur].strip());
6023 offCurArg = offCur + 1;
6024 offCur += 1;
6025 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
6026 asArgs = [];
6027 if len(oMacro.asArgs) != len(asArgs):
6028 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
6029
6030 #
6031 # Do the expanding.
6032 #
6033 if self.fDebugPreproc:
6034 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
6035 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
6036
6037 def parse(self):
6038 """
6039 Parses the given file.
6040
6041 Returns number or errors.
6042 Raises exception on fatal trouble.
6043 """
6044 #self.debug('Parsing %s' % (self.sSrcFile,));
6045
6046 #
6047 # Loop thru the lines.
6048 #
6049 # Please mind that self.iLine may be updated by checkCodeForMacro and
6050 # other worker methods.
6051 #
6052 while self.iLine < len(self.asLines):
6053 sLine = self.asLines[self.iLine];
6054 self.iLine += 1;
6055 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
6056
6057 # Expand macros we know about if we're currently in code.
6058 if self.iState == self.kiCode and self.oReMacros:
6059 oMatch = self.oReMacros.search(sLine);
6060 if oMatch:
6061 sLine = self.expandMacros(sLine, oMatch);
6062 if self.fDebugPreproc:
6063 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
6064 self.asLines[self.iLine - 1] = sLine;
6065
6066 # Check for preprocessor directives before comments and other stuff.
6067 # ASSUMES preprocessor directives doesn't end with multiline comments.
6068 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
6069 if self.fDebugPreproc:
6070 self.debug('line %d: preproc' % (self.iLine,));
6071 self.checkPreprocessorDirective(sLine);
6072 else:
6073 # Look for comments.
6074 offSlash = sLine.find('/');
6075 if offSlash >= 0:
6076 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
6077 offLine = 0;
6078 while offLine < len(sLine):
6079 if self.iState == self.kiCode:
6080 # Look for substantial multiline comment so we pass the following MC as a whole line:
6081 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
6082 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
6083 offHit = sLine.find('/*', offLine);
6084 while offHit >= 0:
6085 offEnd = sLine.find('*/', offHit + 2);
6086 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
6087 break;
6088 offHit = sLine.find('/*', offEnd);
6089
6090 if offHit >= 0:
6091 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
6092 self.sComment = '';
6093 self.iCommentLine = self.iLine;
6094 self.iState = self.kiCommentMulti;
6095 offLine = offHit + 2;
6096 else:
6097 self.checkCodeForMacro(sLine[offLine:], offLine);
6098 offLine = len(sLine);
6099
6100 elif self.iState == self.kiCommentMulti:
6101 offHit = sLine.find('*/', offLine);
6102 if offHit >= 0:
6103 self.sComment += sLine[offLine:offHit];
6104 self.iState = self.kiCode;
6105 offLine = offHit + 2;
6106 self.parseComment();
6107 else:
6108 self.sComment += sLine[offLine:];
6109 offLine = len(sLine);
6110 else:
6111 assert False;
6112 # C++ line comment.
6113 elif offSlash > 0:
6114 self.checkCodeForMacro(sLine[:offSlash], 0);
6115
6116 # No slash, but append the line if in multi-line comment.
6117 elif self.iState == self.kiCommentMulti:
6118 #self.debug('line %d: multi' % (self.iLine,));
6119 self.sComment += sLine;
6120
6121 # No slash, but check code line for relevant macro.
6122 elif ( self.iState == self.kiCode
6123 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
6124 #self.debug('line %d: macro' % (self.iLine,));
6125 self.checkCodeForMacro(sLine, 0);
6126
6127 # If the line is a '}' in the first position, complete the instructions.
6128 elif self.iState == self.kiCode and sLine[0] == '}':
6129 #self.debug('line %d: }' % (self.iLine,));
6130 self.doneInstructions(fEndOfFunction = True);
6131
6132 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
6133 # so we can check/add @oppfx info from it.
6134 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
6135 self.parseFunctionTable(sLine);
6136
6137 self.doneInstructions(fEndOfFunction = True);
6138 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
6139 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
6140 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
6141 return self.printErrors();
6142
6143# Some sanity checking.
6144def __sanityCheckEFlagsClasses():
6145 for sClass, dLists in SimpleParser.kdEFlagsClasses.items():
6146 for sAttrib, asFlags in dLists.items():
6147 for sFlag in asFlags:
6148 assert sFlag in g_kdEFlagsMnemonics, 'sClass=%s sAttrib=%s sFlag=%s' % (sClass, sAttrib, sFlag,);
6149__sanityCheckEFlagsClasses();
6150
6151## The parsed content of IEMAllInstCommonBodyMacros.h.
6152g_oParsedCommonBodyMacros = None # type: SimpleParser
6153
6154def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
6155 """
6156 Parses one source file for instruction specfications.
6157 """
6158 #
6159 # Read sSrcFile into a line array.
6160 #
6161 try:
6162 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
6163 except Exception as oXcpt:
6164 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
6165 try:
6166 asLines = oFile.readlines();
6167 except Exception as oXcpt:
6168 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
6169 finally:
6170 oFile.close();
6171
6172 #
6173 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
6174 # can use the macros from it when processing the other files.
6175 #
6176 global g_oParsedCommonBodyMacros;
6177 if g_oParsedCommonBodyMacros is None:
6178 # Locate the file.
6179 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
6180 if not os.path.isfile(sCommonBodyMacros):
6181 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
6182
6183 # Read it.
6184 try:
6185 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
6186 asIncFiles = oIncFile.readlines();
6187 except Exception as oXcpt:
6188 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
6189
6190 # Parse it.
6191 try:
6192 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
6193 if oParser.parse() != 0:
6194 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
6195 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
6196 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
6197 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
6198 oParser.cTotalMcBlocks,
6199 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
6200 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
6201 except ParserException as oXcpt:
6202 print(str(oXcpt), file = sys.stderr);
6203 raise;
6204 g_oParsedCommonBodyMacros = oParser;
6205
6206 #
6207 # Do the parsing.
6208 #
6209 try:
6210 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
6211 return (oParser.parse(), oParser) ;
6212 except ParserException as oXcpt:
6213 print(str(oXcpt), file = sys.stderr);
6214 raise;
6215
6216
6217def __doTestCopying():
6218 """
6219 Executes the asCopyTests instructions.
6220 """
6221 asErrors = [];
6222 for oDstInstr in g_aoAllInstructions:
6223 if oDstInstr.asCopyTests:
6224 for sSrcInstr in oDstInstr.asCopyTests:
6225 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
6226 if oSrcInstr:
6227 aoSrcInstrs = [oSrcInstr,];
6228 else:
6229 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
6230 if aoSrcInstrs:
6231 for oSrcInstr in aoSrcInstrs:
6232 if oSrcInstr != oDstInstr:
6233 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
6234 else:
6235 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
6236 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6237 else:
6238 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
6239 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
6240
6241 if asErrors:
6242 sys.stderr.write(u''.join(asErrors));
6243 return len(asErrors);
6244
6245
6246def __applyOnlyTest():
6247 """
6248 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
6249 all other instructions so that only these get tested.
6250 """
6251 if g_aoOnlyTestInstructions:
6252 for oInstr in g_aoAllInstructions:
6253 if oInstr.aoTests:
6254 if oInstr not in g_aoOnlyTestInstructions:
6255 oInstr.aoTests = [];
6256 return 0;
6257
6258## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
6259g_aaoAllInstrFilesAndDefaultMapAndSet = (
6260 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
6261 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
6262 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
6263 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
6264 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
6265 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
6266 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
6267 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
6268 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
6269);
6270
6271def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
6272 """
6273 Parses all the IEMAllInstruction*.cpp.h files.
6274
6275 Returns a list of the parsers on success.
6276 Raises exception on failure.
6277 """
6278 sSrcDir = os.path.dirname(os.path.abspath(__file__));
6279 cErrors = 0;
6280 aoParsers = [];
6281 for sFilename, sDefaultMap in asFilesAndDefaultMap:
6282 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
6283 sFilename = os.path.join(sSrcDir, sFilename);
6284 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
6285 cErrors += cThisErrors;
6286 aoParsers.append(oParser);
6287 cErrors += __doTestCopying();
6288 cErrors += __applyOnlyTest();
6289
6290 # Total stub stats:
6291 cTotalStubs = 0;
6292 for oInstr in g_aoAllInstructions:
6293 cTotalStubs += oInstr.fStub;
6294 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
6295 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
6296 file = sys.stderr);
6297
6298 if cErrors != 0:
6299 raise Exception('%d parse errors' % (cErrors,));
6300 return aoParsers;
6301
6302
6303def parseFiles(asFiles, sHostArch = None):
6304 """
6305 Parses a selection of IEMAllInstruction*.cpp.h files.
6306
6307 Returns a list of the parsers on success.
6308 Raises exception on failure.
6309 """
6310 # Look up default maps for the files and call __parseFilesWorker to do the job.
6311 asFilesAndDefaultMap = [];
6312 for sFilename in asFiles:
6313 sName = os.path.split(sFilename)[1].lower();
6314 sMap = None;
6315 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
6316 if aoInfo[0].lower() == sName:
6317 sMap = aoInfo[1];
6318 break;
6319 if not sMap:
6320 raise Exception('Unable to classify file: %s' % (sFilename,));
6321 asFilesAndDefaultMap.append((sFilename, sMap));
6322
6323 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
6324
6325
6326def parseAll(sHostArch = None):
6327 """
6328 Parses all the IEMAllInstruction*.cpp.h files.
6329
6330 Returns a list of the parsers on success.
6331 Raises exception on failure.
6332 """
6333 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
6334
6335
6336#
6337# Generators (may perhaps move later).
6338#
6339def __formatDisassemblerTableEntry(oInstr):
6340 """
6341 """
6342 sMacro = 'OP';
6343 cMaxOperands = 3;
6344 if len(oInstr.aoOperands) > 3:
6345 sMacro = 'OPVEX'
6346 cMaxOperands = 4;
6347 assert len(oInstr.aoOperands) <= cMaxOperands;
6348
6349 #
6350 # Format string.
6351 #
6352 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
6353 for iOperand, oOperand in enumerate(oInstr.aoOperands):
6354 sTmp += ' ' if iOperand == 0 else ',';
6355 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
6356 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
6357 else:
6358 sTmp += g_kdOpTypes[oOperand.sType][2];
6359 sTmp += '",';
6360 asColumns = [ sTmp, ];
6361
6362 #
6363 # Decoders.
6364 #
6365 iStart = len(asColumns);
6366 if oInstr.sEncoding is None:
6367 pass;
6368 elif oInstr.sEncoding == 'ModR/M':
6369 # ASSUME the first operand is using the ModR/M encoding
6370 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
6371 asColumns.append('IDX_ParseModRM,');
6372 elif oInstr.sEncoding in [ 'prefix', ]:
6373 for oOperand in oInstr.aoOperands:
6374 asColumns.append('0,');
6375 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
6376 pass;
6377 elif oInstr.sEncoding == 'VEX.ModR/M':
6378 asColumns.append('IDX_ParseModRM,');
6379 elif oInstr.sEncoding == 'vex2':
6380 asColumns.append('IDX_ParseVex2b,')
6381 elif oInstr.sEncoding == 'vex3':
6382 asColumns.append('IDX_ParseVex3b,')
6383 elif oInstr.sEncoding in g_dInstructionMaps:
6384 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
6385 else:
6386 ## @todo
6387 #IDX_ParseTwoByteEsc,
6388 #IDX_ParseGrp1,
6389 #IDX_ParseShiftGrp2,
6390 #IDX_ParseGrp3,
6391 #IDX_ParseGrp4,
6392 #IDX_ParseGrp5,
6393 #IDX_Parse3DNow,
6394 #IDX_ParseGrp6,
6395 #IDX_ParseGrp7,
6396 #IDX_ParseGrp8,
6397 #IDX_ParseGrp9,
6398 #IDX_ParseGrp10,
6399 #IDX_ParseGrp12,
6400 #IDX_ParseGrp13,
6401 #IDX_ParseGrp14,
6402 #IDX_ParseGrp15,
6403 #IDX_ParseGrp16,
6404 #IDX_ParseThreeByteEsc4,
6405 #IDX_ParseThreeByteEsc5,
6406 #IDX_ParseModFence,
6407 #IDX_ParseEscFP,
6408 #IDX_ParseNopPause,
6409 #IDX_ParseInvOpModRM,
6410 assert False, str(oInstr);
6411
6412 # Check for immediates and stuff in the remaining operands.
6413 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6414 sIdx = g_kdOpTypes[oOperand.sType][0];
6415 #if sIdx != 'IDX_UseModRM':
6416 asColumns.append(sIdx + ',');
6417 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6418
6419 #
6420 # Opcode and operands.
6421 #
6422 assert oInstr.sDisEnum, str(oInstr);
6423 asColumns.append(oInstr.sDisEnum + ',');
6424 iStart = len(asColumns)
6425 for oOperand in oInstr.aoOperands:
6426 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6427 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6428
6429 #
6430 # Flags.
6431 #
6432 sTmp = '';
6433 for sHint in sorted(oInstr.dHints.keys()):
6434 sDefine = g_kdHints[sHint];
6435 if sDefine.startswith('DISOPTYPE_'):
6436 if sTmp:
6437 sTmp += ' | ' + sDefine;
6438 else:
6439 sTmp += sDefine;
6440 if sTmp:
6441 sTmp += '),';
6442 else:
6443 sTmp += '0),';
6444 asColumns.append(sTmp);
6445
6446 #
6447 # Format the columns into a line.
6448 #
6449 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6450 sLine = '';
6451 for i, s in enumerate(asColumns):
6452 if len(sLine) < aoffColumns[i]:
6453 sLine += ' ' * (aoffColumns[i] - len(sLine));
6454 else:
6455 sLine += ' ';
6456 sLine += s;
6457
6458 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6459 # DISOPTYPE_HARMLESS),
6460 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6461 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6462 return sLine;
6463
6464def __checkIfShortTable(aoTableOrdered, oMap):
6465 """
6466 Returns (iInstr, cInstructions, fShortTable)
6467 """
6468
6469 # Determin how much we can trim off.
6470 cInstructions = len(aoTableOrdered);
6471 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6472 cInstructions -= 1;
6473
6474 iInstr = 0;
6475 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6476 iInstr += 1;
6477
6478 # If we can save more than 30%, we go for the short table version.
6479 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6480 return (iInstr, cInstructions, True);
6481 _ = oMap; # Use this for overriding.
6482
6483 # Output the full table.
6484 return (0, len(aoTableOrdered), False);
6485
6486def generateDisassemblerTables(oDstFile = sys.stdout):
6487 """
6488 Generates disassembler tables.
6489
6490 Returns exit code.
6491 """
6492
6493 #
6494 # Parse all.
6495 #
6496 try:
6497 parseAll();
6498 except Exception as oXcpt:
6499 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6500 traceback.print_exc(file = sys.stderr);
6501 return 1;
6502
6503
6504 #
6505 # The disassembler uses a slightly different table layout to save space,
6506 # since several of the prefix varia
6507 #
6508 aoDisasmMaps = [];
6509 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6510 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6511 if oMap.sSelector != 'byte+pfx':
6512 aoDisasmMaps.append(oMap);
6513 else:
6514 # Split the map by prefix.
6515 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6516 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6517 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6518 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6519
6520 #
6521 # Dump each map.
6522 #
6523 asHeaderLines = [];
6524 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6525 for oMap in aoDisasmMaps:
6526 sName = oMap.sName;
6527
6528 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6529
6530 #
6531 # Get the instructions for the map and see if we can do a short version or not.
6532 #
6533 aoTableOrder = oMap.getInstructionsInTableOrder();
6534 cEntriesPerByte = oMap.getEntriesPerByte();
6535 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6536
6537 #
6538 # Output the table start.
6539 # Note! Short tables are static and only accessible via the map range record.
6540 #
6541 asLines = [];
6542 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6543 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6544 if fShortTable:
6545 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6546 else:
6547 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6548 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6549 asLines.append('{');
6550
6551 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6552 asLines.append(' /* %#04x: */' % (iInstrStart,));
6553
6554 #
6555 # Output the instructions.
6556 #
6557 iInstr = iInstrStart;
6558 while iInstr < iInstrEnd:
6559 oInstr = aoTableOrder[iInstr];
6560 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6561 if iInstr != iInstrStart:
6562 asLines.append('');
6563 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6564
6565 if oInstr is None:
6566 # Invalid. Optimize blocks of invalid instructions.
6567 cInvalidInstrs = 1;
6568 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6569 cInvalidInstrs += 1;
6570 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6571 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6572 iInstr += 0x10 * cEntriesPerByte - 1;
6573 elif cEntriesPerByte > 1:
6574 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6575 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6576 iInstr += 3;
6577 else:
6578 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6579 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6580 else:
6581 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6582 elif isinstance(oInstr, list):
6583 if len(oInstr) != 0:
6584 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6585 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6586 else:
6587 asLines.append(__formatDisassemblerTableEntry(oInstr));
6588 else:
6589 asLines.append(__formatDisassemblerTableEntry(oInstr));
6590
6591 iInstr += 1;
6592
6593 if iInstrStart >= iInstrEnd:
6594 asLines.append(' /* dummy */ INVALID_OPCODE');
6595
6596 asLines.append('};');
6597 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6598
6599 #
6600 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6601 #
6602 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6603 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6604 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6605
6606 #
6607 # Write out the lines.
6608 #
6609 oDstFile.write('\n'.join(asLines));
6610 oDstFile.write('\n');
6611 oDstFile.write('\n');
6612 #break; #for now
6613 return 0;
6614
6615if __name__ == '__main__':
6616 sys.exit(generateDisassemblerTables());
6617
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette