VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 101984

Last change on this file since 101984 was 101984, checked in by vboxsync, 16 months ago

VMM/IEM: Added a flush mask for guest register shadows to the IEM_MC_DEFER_TO_CIMPL_X_RET macros to better manage register optimizations when recompiling to native code. bugref:10371

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 297.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 101984 2023-11-08 15:56:18Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 101984 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: List[TestInOut]
1374 self.aoOutputs = [] # type: List[TestInOut]
1375 self.aoSelectors = [] # type: List[TestSelector]
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: List[str]
1436 self.aoMaps = [] # type: List[InstructionMap]
1437 self.aoOperands = [] # type: List[Operand]
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: List[InstructionTest]
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: List[Instruction]
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: List[Instruction]
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sValue = sValue; ##< None if no assigned / const value.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCall(McCppGeneric):
1885 """
1886 A generic C++/C call statement.
1887
1888 The sName is still 'C++', so the function name is in the first parameter
1889 and the the arguments in the subsequent ones.
1890 """
1891 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1892 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1893 self.asParams.extend(asArgs);
1894
1895 def renderCode(self, cchIndent = 0):
1896 cchIndent += self.cchIndent;
1897 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1898 if self.fDecode:
1899 sRet += ' // C++ decode\n';
1900 else:
1901 sRet += ' // C++ normal\n';
1902 return sRet;
1903
1904class McCppCond(McStmtCond):
1905 """
1906 C++/C 'if' statement.
1907 """
1908 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1909 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1910 self.fDecode = fDecode;
1911 self.cchIndent = cchIndent;
1912
1913 def renderCode(self, cchIndent = 0):
1914 cchIndent += self.cchIndent;
1915 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1916 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1917 sRet += ' ' * cchIndent + '{\n';
1918 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1919 sRet += ' ' * cchIndent + '}\n';
1920 if self.aoElseBranch:
1921 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1922 sRet += ' ' * cchIndent + '{\n';
1923 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1924 sRet += ' ' * cchIndent + '}\n';
1925 return sRet;
1926
1927class McCppPreProc(McCppGeneric):
1928 """
1929 C++/C Preprocessor directive.
1930 """
1931 def __init__(self, sCode):
1932 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1933
1934 def renderCode(self, cchIndent = 0):
1935 return self.asParams[0] + '\n';
1936
1937
1938## IEM_MC_F_XXX values.
1939g_kdMcFlags = {
1940 'IEM_MC_F_ONLY_8086': (),
1941 'IEM_MC_F_MIN_186': (),
1942 'IEM_MC_F_MIN_286': (),
1943 'IEM_MC_F_NOT_286_OR_OLDER': (),
1944 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
1945 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
1946 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
1947 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
1948 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
1949 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
1950 'IEM_MC_F_NOT_64BIT': (),
1951};
1952## IEM_MC_F_XXX values.
1953g_kdCImplFlags = {
1954 'IEM_CIMPL_F_BRANCH_DIRECT': (),
1955 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
1956 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
1957 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
1958 'IEM_CIMPL_F_BRANCH_FAR': (),
1959 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
1960 'IEM_CIMPL_F_BRANCH_RELATIVE',),
1961 'IEM_CIMPL_F_BRANCH_STACK': (),
1962 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
1963 'IEM_CIMPL_F_MODE': (),
1964 'IEM_CIMPL_F_RFLAGS': (),
1965 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
1966 'IEM_CIMPL_F_STATUS_FLAGS': (),
1967 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
1968 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
1969 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
1970 'IEM_CIMPL_F_VMEXIT': (),
1971 'IEM_CIMPL_F_FPU': (),
1972 'IEM_CIMPL_F_REP': (),
1973 'IEM_CIMPL_F_IO': (),
1974 'IEM_CIMPL_F_END_TB': (),
1975 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
1976 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
1977 'IEM_CIMPL_F_CALLS_CIMPL': (),
1978 'IEM_CIMPL_F_CALLS_AIMPL': (),
1979 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
1980};
1981class McBlock(object):
1982 """
1983 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1984 """
1985
1986 ## @name Macro expansion types.
1987 ## @{
1988 kiMacroExp_None = 0;
1989 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
1990 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
1991 ## @}
1992
1993 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1994 ## The source file containing the block.
1995 self.sSrcFile = sSrcFile;
1996 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1997 self.iBeginLine = iBeginLine;
1998 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1999 self.offBeginLine = offBeginLine;
2000 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2001 self.iEndLine = -1;
2002 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2003 self.offEndLine = 0;
2004 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2005 self.offAfterEnd = 0;
2006 ## The function the block resides in.
2007 self.oFunction = oFunction;
2008 ## The name of the function the block resides in. DEPRECATED.
2009 self.sFunction = oFunction.sName;
2010 ## The block number within the function.
2011 self.iInFunction = iInFunction;
2012 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2013 ##< The raw lines the block is made up of.
2014 self.asLines = [] # type: List[str]
2015 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2016 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2017 self.iMacroExp = self.kiMacroExp_None;
2018 ## IEM_MC_BEGIN: Argument count.
2019 self.cArgs = -1;
2020 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2021 self.aoArgs = [] # type: List[McStmtArg]
2022 ## IEM_MC_BEGIN: Locals count.
2023 self.cLocals = -1;
2024 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2025 self.aoLocals = [] # type: List[McStmtVar]
2026 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2027 self.dsMcFlags = {} # type: Dict[str, bool]
2028 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2029 self.dsCImplFlags = {} # type: Dict[str, bool]
2030 ## Decoded statements in the block.
2031 self.aoStmts = [] # type: List[McStmt]
2032
2033 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2034 """
2035 Completes the microcode block.
2036 """
2037 assert self.iEndLine == -1;
2038 self.iEndLine = iEndLine;
2039 self.offEndLine = offEndLine;
2040 self.offAfterEnd = offAfterEnd;
2041 self.asLines = asLines;
2042
2043 def raiseDecodeError(self, sRawCode, off, sMessage):
2044 """ Raises a decoding error. """
2045 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2046 iLine = sRawCode.count('\n', 0, off);
2047 raise ParserException('%s:%d:%d: parsing error: %s'
2048 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2049
2050 def raiseStmtError(self, sName, sMessage):
2051 """ Raises a statement parser error. """
2052 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2053
2054 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2055 """ Check the parameter count, raising an error it doesn't match. """
2056 if len(asParams) != cParamsExpected:
2057 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2058 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2059 return True;
2060
2061 @staticmethod
2062 def parseMcGeneric(oSelf, sName, asParams):
2063 """ Generic parser that returns a plain McStmt object. """
2064 _ = oSelf;
2065 return McStmt(sName, asParams);
2066
2067 @staticmethod
2068 def parseMcGenericCond(oSelf, sName, asParams):
2069 """ Generic parser that returns a plain McStmtCond object. """
2070 _ = oSelf;
2071 return McStmtCond(sName, asParams);
2072
2073 @staticmethod
2074 def parseMcBegin(oSelf, sName, asParams):
2075 """ IEM_MC_BEGIN """
2076 oSelf.checkStmtParamCount(sName, asParams, 4);
2077 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2078 oSelf.raiseStmtError(sName, 'Used more than once!');
2079 oSelf.cArgs = int(asParams[0]);
2080 oSelf.cLocals = int(asParams[1]);
2081
2082 if asParams[2] != '0':
2083 for sFlag in asParams[2].split('|'):
2084 sFlag = sFlag.strip();
2085 if sFlag not in g_kdMcFlags:
2086 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2087 oSelf.dsMcFlags[sFlag] = True;
2088 for sFlag2 in g_kdMcFlags[sFlag]:
2089 oSelf.dsMcFlags[sFlag2] = True;
2090
2091 if asParams[3] != '0':
2092 oSelf.parseCImplFlags(sName, asParams[3]);
2093
2094 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2095
2096 @staticmethod
2097 def parseMcArg(oSelf, sName, asParams):
2098 """ IEM_MC_ARG """
2099 oSelf.checkStmtParamCount(sName, asParams, 3);
2100 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2101 oSelf.aoArgs.append(oStmt);
2102 return oStmt;
2103
2104 @staticmethod
2105 def parseMcArgConst(oSelf, sName, asParams):
2106 """ IEM_MC_ARG_CONST """
2107 oSelf.checkStmtParamCount(sName, asParams, 4);
2108 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2109 oSelf.aoArgs.append(oStmt);
2110 return oStmt;
2111
2112 @staticmethod
2113 def parseMcArgLocalRef(oSelf, sName, asParams):
2114 """ IEM_MC_ARG_LOCAL_REF """
2115 oSelf.checkStmtParamCount(sName, asParams, 4);
2116 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2117 oSelf.aoArgs.append(oStmt);
2118 return oStmt;
2119
2120 @staticmethod
2121 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2122 """ IEM_MC_ARG_LOCAL_EFLAGS """
2123 oSelf.checkStmtParamCount(sName, asParams, 3);
2124 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2125 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2126 oSelf.aoLocals.append(oStmtLocal);
2127 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2128 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2129 oSelf.aoArgs.append(oStmtArg);
2130 return (oStmtLocal, oStmtArg,);
2131
2132 @staticmethod
2133 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2134 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2135 oSelf.checkStmtParamCount(sName, asParams, 0);
2136 # Note! Translate to IEM_MC_ARG_CONST
2137 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2138 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2139 oSelf.aoArgs.append(oStmt);
2140 return oStmt;
2141
2142 @staticmethod
2143 def parseMcLocal(oSelf, sName, asParams):
2144 """ IEM_MC_LOCAL """
2145 oSelf.checkStmtParamCount(sName, asParams, 2);
2146 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2147 oSelf.aoLocals.append(oStmt);
2148 return oStmt;
2149
2150 @staticmethod
2151 def parseMcLocalAssign(oSelf, sName, asParams):
2152 """ IEM_MC_LOCAL_ASSIGN """
2153 oSelf.checkStmtParamCount(sName, asParams, 3);
2154 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2155 oSelf.aoLocals.append(oStmt);
2156 return oStmt;
2157
2158 @staticmethod
2159 def parseMcLocalConst(oSelf, sName, asParams):
2160 """ IEM_MC_LOCAL_CONST """
2161 oSelf.checkStmtParamCount(sName, asParams, 3);
2162 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2163 oSelf.aoLocals.append(oStmt);
2164 return oStmt;
2165
2166 @staticmethod
2167 def parseMcCallAImpl(oSelf, sName, asParams):
2168 """ IEM_MC_CALL_AIMPL_3|4 """
2169 cArgs = int(sName[-1]);
2170 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2171 return McStmtCall(sName, asParams, 1, 0);
2172
2173 @staticmethod
2174 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2175 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2176 cArgs = int(sName[-1]);
2177 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2178 return McStmtCall(sName, asParams, 0);
2179
2180 @staticmethod
2181 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2182 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2183 cArgs = int(sName[-1]);
2184 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2185 return McStmtCall(sName, asParams, 0);
2186
2187 @staticmethod
2188 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2189 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2190 cArgs = int(sName[-1]);
2191 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2192 return McStmtCall(sName, asParams, 0);
2193
2194 @staticmethod
2195 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2196 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2197 cArgs = int(sName[-1]);
2198 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2199 return McStmtCall(sName, asParams, 0);
2200
2201 @staticmethod
2202 def parseMcCallSseAImpl(oSelf, sName, asParams):
2203 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2204 cArgs = int(sName[-1]);
2205 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2206 return McStmtCall(sName, asParams, 0);
2207
2208 def parseCImplFlags(self, sName, sFlags):
2209 """
2210 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2211 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2212 """
2213 if sFlags != '0':
2214 sFlags = self.stripComments(sFlags);
2215 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2216 for sFlag in sFlags.split('|'):
2217 sFlag = sFlag.strip();
2218 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2219 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2220 #print('debug: %s' % sFlag)
2221 if sFlag not in g_kdCImplFlags:
2222 if sFlag == '0':
2223 continue;
2224 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2225 self.dsCImplFlags[sFlag] = True;
2226 for sFlag2 in g_kdCImplFlags[sFlag]:
2227 self.dsCImplFlags[sFlag2] = True;
2228 return None;
2229
2230 @staticmethod
2231 def parseMcCallCImpl(oSelf, sName, asParams):
2232 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2233 cArgs = int(sName[-1]);
2234 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2235 oSelf.parseCImplFlags(sName, asParams[0]);
2236 return McStmtCall(sName, asParams, 1);
2237
2238 @staticmethod
2239 def parseMcDeferToCImpl(oSelf, sName, asParams):
2240 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2241 # Note! This code is called by workerIemMcDeferToCImplXRet.
2242 #print('debug: %s, %s,...' % (sName, asParams[0],));
2243 cArgs = int(sName[-5]);
2244 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2245 oSelf.parseCImplFlags(sName, asParams[0]);
2246 return McStmtCall(sName, asParams, 2);
2247
2248 @staticmethod
2249 def stripComments(sCode):
2250 """ Returns sCode with comments removed. """
2251 off = 0;
2252 while off < len(sCode):
2253 off = sCode.find('/', off);
2254 if off < 0 or off + 1 >= len(sCode):
2255 break;
2256
2257 if sCode[off + 1] == '/':
2258 # C++ comment.
2259 offEnd = sCode.find('\n', off + 2);
2260 if offEnd < 0:
2261 return sCode[:off].rstrip();
2262 sCode = sCode[ : off] + sCode[offEnd : ];
2263 off += 1;
2264
2265 elif sCode[off + 1] == '*':
2266 # C comment
2267 offEnd = sCode.find('*/', off + 2);
2268 if offEnd < 0:
2269 return sCode[:off].rstrip();
2270 sSep = ' ';
2271 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2272 sSep = '';
2273 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2274 off += len(sSep);
2275
2276 else:
2277 # Not a comment.
2278 off += 1;
2279 return sCode;
2280
2281 @staticmethod
2282 def extractParam(sCode, offParam):
2283 """
2284 Extracts the parameter value at offParam in sCode.
2285 Returns stripped value and the end offset of the terminating ',' or ')'.
2286 """
2287 # Extract it.
2288 cNesting = 0;
2289 offStart = offParam;
2290 while offParam < len(sCode):
2291 ch = sCode[offParam];
2292 if ch == '(':
2293 cNesting += 1;
2294 elif ch == ')':
2295 if cNesting == 0:
2296 break;
2297 cNesting -= 1;
2298 elif ch == ',' and cNesting == 0:
2299 break;
2300 offParam += 1;
2301 return (sCode[offStart : offParam].strip(), offParam);
2302
2303 @staticmethod
2304 def extractParams(sCode, offOpenParen):
2305 """
2306 Parses a parameter list.
2307 Returns the list of parameter values and the offset of the closing parentheses.
2308 Returns (None, len(sCode)) on if no closing parentheses was found.
2309 """
2310 assert sCode[offOpenParen] == '(';
2311 asParams = [];
2312 off = offOpenParen + 1;
2313 while off < len(sCode):
2314 ch = sCode[off];
2315 if ch.isspace():
2316 off += 1;
2317 elif ch != ')':
2318 (sParam, off) = McBlock.extractParam(sCode, off);
2319 asParams.append(sParam);
2320 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2321 if sCode[off] == ',':
2322 off += 1;
2323 else:
2324 return (asParams, off);
2325 return (None, off);
2326
2327 @staticmethod
2328 def findClosingBraces(sCode, off, offStop):
2329 """
2330 Finds the matching '}' for the '{' at off in sCode.
2331 Returns offset of the matching '}' on success, otherwise -1.
2332
2333 Note! Does not take comments into account.
2334 """
2335 cDepth = 1;
2336 off += 1;
2337 while off < offStop:
2338 offClose = sCode.find('}', off, offStop);
2339 if offClose < 0:
2340 break;
2341 cDepth += sCode.count('{', off, offClose);
2342 cDepth -= 1;
2343 if cDepth == 0:
2344 return offClose;
2345 off = offClose + 1;
2346 return -1;
2347
2348 @staticmethod
2349 def countSpacesAt(sCode, off, offStop):
2350 """ Returns the number of space characters at off in sCode. """
2351 offStart = off;
2352 while off < offStop and sCode[off].isspace():
2353 off += 1;
2354 return off - offStart;
2355
2356 @staticmethod
2357 def skipSpacesAt(sCode, off, offStop):
2358 """ Returns first offset at or after off for a non-space character. """
2359 return off + McBlock.countSpacesAt(sCode, off, offStop);
2360
2361 @staticmethod
2362 def isSubstrAt(sStr, off, sSubStr):
2363 """ Returns true of sSubStr is found at off in sStr. """
2364 return sStr[off : off + len(sSubStr)] == sSubStr;
2365
2366 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2367 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2368 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2369 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2370 + r')');
2371
2372 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2373 """
2374 Decodes sRawCode[off : offStop].
2375
2376 Returns list of McStmt instances.
2377 Raises ParserException on failure.
2378 """
2379 if offStop < 0:
2380 offStop = len(sRawCode);
2381 aoStmts = [];
2382 while off < offStop:
2383 ch = sRawCode[off];
2384
2385 #
2386 # Skip spaces and comments.
2387 #
2388 if ch.isspace():
2389 off += 1;
2390
2391 elif ch == '/':
2392 ch = sRawCode[off + 1];
2393 if ch == '/': # C++ comment.
2394 off = sRawCode.find('\n', off + 2);
2395 if off < 0:
2396 break;
2397 off += 1;
2398 elif ch == '*': # C comment.
2399 off = sRawCode.find('*/', off + 2);
2400 if off < 0:
2401 break;
2402 off += 2;
2403 else:
2404 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2405
2406 #
2407 # Is it a MC statement.
2408 #
2409 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2410 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2411 # Extract it and strip comments from it.
2412 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2413 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2414 if offEnd <= off:
2415 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2416 else:
2417 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2418 if offEnd <= off:
2419 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2420 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2421 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2422 offEnd -= 1;
2423 while offEnd > off and sRawCode[offEnd - 1].isspace():
2424 offEnd -= 1;
2425
2426 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2427
2428 # Isolate the statement name.
2429 offOpenParen = sRawStmt.find('(');
2430 if offOpenParen < 0:
2431 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2432 sName = sRawStmt[: offOpenParen].strip();
2433
2434 # Extract the parameters.
2435 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2436 if asParams is None:
2437 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2438 if offCloseParen + 1 != len(sRawStmt):
2439 self.raiseDecodeError(sRawCode, off,
2440 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2441
2442 # Hand it to the handler.
2443 fnParser = g_dMcStmtParsers.get(sName)[0];
2444 if not fnParser:
2445 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2446 oStmt = fnParser(self, sName, asParams);
2447 if not isinstance(oStmt, (list, tuple)):
2448 aoStmts.append(oStmt);
2449 else:
2450 aoStmts.extend(oStmt);
2451
2452 #
2453 # If conditional, we need to parse the whole statement.
2454 #
2455 # For reasons of simplicity, we assume the following structure
2456 # and parse each branch in a recursive call:
2457 # IEM_MC_IF_XXX() {
2458 # IEM_MC_WHATEVER();
2459 # } IEM_MC_ELSE() {
2460 # IEM_MC_WHATEVER();
2461 # } IEM_MC_ENDIF();
2462 #
2463 if sName.startswith('IEM_MC_IF_'):
2464 if iLevel > 1:
2465 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2466
2467 # Find start of the IF block:
2468 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2469 if sRawCode[offBlock1] != '{':
2470 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2471
2472 # Find the end of it.
2473 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2474 if offBlock1End < 0:
2475 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2476
2477 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2478
2479 # Is there an else section?
2480 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2481 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2482 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2483 if sRawCode[off] != '(':
2484 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2485 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2486 if sRawCode[off] != ')':
2487 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2488
2489 # Find start of the ELSE block.
2490 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2491 if sRawCode[offBlock2] != '{':
2492 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2493
2494 # Find the end of it.
2495 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2496 if offBlock2End < 0:
2497 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2498
2499 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2500 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2501
2502 # Parse past the endif statement.
2503 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2504 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2505 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2506 if sRawCode[off] != '(':
2507 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2508 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2509 if sRawCode[off] != ')':
2510 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2511 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2512 if sRawCode[off] != ';':
2513 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2514 off += 1;
2515
2516 else:
2517 # Advance.
2518 off = offEnd + 1;
2519
2520 #
2521 # Otherwise it must be a C/C++ statement of sorts.
2522 #
2523 else:
2524 # Find the end of the statement. if and else requires special handling.
2525 sCondExpr = None;
2526 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2527 if oMatch:
2528 if oMatch.group(1)[-1] == '(':
2529 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2530 else:
2531 offEnd = oMatch.end();
2532 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2533 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2534 elif ch == '#':
2535 offEnd = sRawCode.find('\n', off, offStop);
2536 if offEnd < 0:
2537 offEnd = offStop;
2538 offEnd -= 1;
2539 while offEnd > off and sRawCode[offEnd - 1].isspace():
2540 offEnd -= 1;
2541 else:
2542 offEnd = sRawCode.find(';', off);
2543 if offEnd < 0:
2544 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2545
2546 # Check this and the following statement whether it might have
2547 # something to do with decoding. This is a statement filter
2548 # criteria when generating the threaded functions blocks.
2549 offNextEnd = sRawCode.find(';', offEnd + 1);
2550 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2551 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2552 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2553 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2554 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2555 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2556 );
2557
2558 if not oMatch:
2559 if ch != '#':
2560 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2561 else:
2562 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2563 off = offEnd + 1;
2564 elif oMatch.group(1).startswith('if'):
2565 #
2566 # if () xxx [else yyy] statement.
2567 #
2568 oStmt = McCppCond(sCondExpr, fDecode);
2569 aoStmts.append(oStmt);
2570 off = offEnd + 1;
2571
2572 # Following the if () we can either have a {} containing zero or more statements
2573 # or we have a single statement.
2574 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2575 if sRawCode[offBlock1] == '{':
2576 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2577 if offBlock1End < 0:
2578 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2579 offBlock1 += 1;
2580 else:
2581 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2582 if offBlock1End < 0:
2583 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2584
2585 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2586
2587 # The else is optional and can likewise be followed by {} or a single statement.
2588 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2589 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2590 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2591 if sRawCode[offBlock2] == '{':
2592 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2593 if offBlock2End < 0:
2594 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2595 offBlock2 += 1;
2596 else:
2597 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2598 if offBlock2End < 0:
2599 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2600
2601 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2602 off = offBlock2End + 1;
2603
2604 elif oMatch.group(1) == 'else':
2605 # Problematic 'else' branch, typically involving #ifdefs.
2606 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2607
2608 return aoStmts;
2609
2610 def decode(self):
2611 """
2612 Decodes the block, populating self.aoStmts if necessary.
2613 Returns the statement list.
2614 Raises ParserException on failure.
2615 """
2616 if not self.aoStmts:
2617 self.aoStmts = self.decodeCode(''.join(self.asLines));
2618 return self.aoStmts;
2619
2620
2621 def checkForTooEarlyEffSegUse(self, aoStmts):
2622 """
2623 Checks if iEffSeg is used before the effective address has been decoded.
2624 Returns None on success, error string on failure.
2625
2626 See r158454 for an example of this issue.
2627 """
2628
2629 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2630 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2631 # as we're ASSUMING these will not occur before address calculation.
2632 for iStmt, oStmt in enumerate(aoStmts):
2633 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2634 while iStmt > 0:
2635 iStmt -= 1;
2636 oStmt = aoStmts[iStmt];
2637 for sArg in oStmt.asParams:
2638 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2639 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2640 break;
2641 return None;
2642
2643 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2644 kdDecodeCppStmtOkayAfterDone = {
2645 'IEMOP_HLP_IN_VMX_OPERATION': True,
2646 'IEMOP_HLP_VMX_INSTR': True,
2647 };
2648
2649 def checkForDoneDecoding(self, aoStmts):
2650 """
2651 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2652 invocation.
2653 Returns None on success, error string on failure.
2654
2655 This ensures safe instruction restarting in case the recompiler runs
2656 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2657 entries).
2658 """
2659
2660 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2661 # don't need to look.
2662 cIemOpHlpDone = 0;
2663 for iStmt, oStmt in enumerate(aoStmts):
2664 if oStmt.isCppStmt():
2665 #print('dbg: #%u[%u]: %s %s (%s)'
2666 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2667
2668 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2669 if oMatch:
2670 sFirstWord = oMatch.group(1);
2671 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2672 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2673 cIemOpHlpDone += 1;
2674 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2675 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2676 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2677 else:
2678 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2679 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2680 cIemOpHlpDone += 1;
2681 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2682 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2683 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2684 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2685 if cIemOpHlpDone == 1:
2686 return None;
2687 if cIemOpHlpDone > 1:
2688 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2689 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2690
2691 def check(self):
2692 """
2693 Performs some sanity checks on the block.
2694 Returns error string list, empty if all is fine.
2695 """
2696 aoStmts = self.decode();
2697 asRet = [];
2698
2699 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2700 if sRet:
2701 asRet.append(sRet);
2702
2703 sRet = self.checkForDoneDecoding(aoStmts);
2704 if sRet:
2705 asRet.append(sRet);
2706
2707 return asRet;
2708
2709
2710
2711## IEM_MC_XXX -> parser + info dictionary.
2712#
2713# The info columns:
2714# - col 0: boolean entry indicating whether the statement modifies state and
2715# must not be used before IEMOP_HL_DONE_*.
2716# - col 1: boolean entry indicating native recompiler support.
2717#
2718# The raw table was generated via the following command
2719# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2720# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2721g_dMcStmtParsers = {
2722 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2723 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2724 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2725 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2726 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2727 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2728 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2729 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2730 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2731 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2732 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2733 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2734 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2735 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2736 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2737 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2738 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, ),
2739 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, ),
2740 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, ),
2741 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, ),
2742 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, ),
2743 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2744 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2745 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2746 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
2747 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2748 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2749 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, ),
2750 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
2751 'IEM_MC_ARG': (McBlock.parseMcArg, False, True, ),
2752 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, True, ),
2753 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, True, ),
2754 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, True, ),
2755 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, ),
2756 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, True, ),
2757 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2758 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2759 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2760 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2761 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2762 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2763 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2764 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2765 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2766 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2767 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2768 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, ),
2769 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, ),
2770 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, False, ),
2771 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, False, ),
2772 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, False, ),
2773 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, False, ),
2774 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, False, ),
2775 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, False, ),
2776 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, False, ),
2777 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, False, ),
2778 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, False, ),
2779 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, False, ),
2780 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, False, ),
2781 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, False, ),
2782 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, False, ),
2783 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, False, ),
2784 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, False, ),
2785 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, False, ),
2786 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, False, ),
2787 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, False, ),
2788 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, False, ),
2789 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, False, ),
2790 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, False, ),
2791 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, False, ),
2792 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
2793 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, False, ),
2794 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2795 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, False, ),
2796 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, False, ),
2797 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, False, ),
2798 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, False, ),
2799 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2800 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2801 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2802 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2803 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2804 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2805 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2806 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, ),
2807 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, ),
2808 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, ),
2809 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, ),
2810 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, ),
2811 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, True, ),
2812 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, ),
2813 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2814 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2815 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2816 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, ),
2817 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2818 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2819 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, ),
2820 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2821 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, ),
2822 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, ),
2823 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, ),
2824 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2825 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, ),
2826 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2827 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2828 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, False, ),
2829 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, False, ),
2830 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, False, ),
2831 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, False, ),
2832 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, False, ),
2833 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, False, ),
2834 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, False, ),
2835 'IEM_MC_FETCH_MEM_S32_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2836 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, False, ),
2837 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
2838 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2839 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, False, ),
2840 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, False, ),
2841 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, False, ),
2842 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2843 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, False, ),
2844 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2845 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, False, ),
2846 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
2847 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2848 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, False, ),
2849 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, False, ),
2850 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2851 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2852 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, False, ),
2853 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, False, ),
2854 'IEM_MC_FETCH_MEM_U64_DISP': (McBlock.parseMcGeneric, True, False, ),
2855 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, False, ),
2856 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, False, ),
2857 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, False, ),
2858 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2859 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, False, ),
2860 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, False, ),
2861 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2862 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, False, ),
2863 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
2864 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2865 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, False, ),
2866 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, False, ),
2867 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, False, ),
2868 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
2869 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2870 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, False, ),
2871 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, False, ),
2872 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, ),
2873 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, ),
2874 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, ),
2875 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, ),
2876 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, ),
2877 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2878 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2879 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, ),
2880 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, ),
2881 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, ),
2882 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, ),
2883 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, ),
2884 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, ),
2885 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False, False, ),
2886 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, ),
2887 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, ),
2888 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, ),
2889 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, ),
2890 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
2891 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, False, ),
2892 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, False, ),
2893 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, False, ),
2894 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, False, ),
2895 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, False, ),
2896 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2897 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, False, ),
2898 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, False, ),
2899 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, False, ),
2900 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2901 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
2902 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
2903 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, False, ),
2904 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, False, ),
2905 'IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG': (McBlock.parseMcGeneric, True, True, ),
2906 'IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG': (McBlock.parseMcGeneric, True, True, ),
2907 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2908 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2909 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2910 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2911 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2912 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2913 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, True, ),
2914 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2915 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, True, ),
2916 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2917 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, True, ),
2918 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, True, ),
2919 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, True, ),
2920 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, True, ),
2921 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, False, ),
2922 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, False, ),
2923 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, False, ),
2924 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, False, ),
2925 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, ),
2926 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, ),
2927 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, False, ),
2928 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2929 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2930 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2931 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, False, ),
2932 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, False, ),
2933 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, ),
2934 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, False, ),
2935 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, True, ),
2936 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, True, ),
2937 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, True, ),
2938 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2939 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, False, ),
2940 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, False, ),
2941 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, False, ),
2942 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2943 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, False, ),
2944 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, False, ),
2945 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2946 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, False, ),
2947 'IEM_MC_MEM_COMMIT_AND_UNMAP': (McBlock.parseMcGeneric, True, False, ),
2948 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, False, ),
2949 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, False, ),
2950 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, False, ),
2951 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': (McBlock.parseMcGeneric, True, False, ),
2952 'IEM_MC_MEM_MAP': (McBlock.parseMcGeneric, True, False, ),
2953 'IEM_MC_MEM_MAP_EX': (McBlock.parseMcGeneric, True, False, ),
2954 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, False, ),
2955 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, False, ),
2956 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, False, ),
2957 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, False, ),
2958 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, False, ),
2959 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, False, ),
2960 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, False, ),
2961 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, False, ),
2962 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, False, ),
2963 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, False, ),
2964 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, False, ),
2965 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, False, ),
2966 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2967 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2968 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2969 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2970 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2971 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2972 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, False, ),
2973 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, False, ),
2974 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, ),
2975 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2976 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2977 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2978 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
2979 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2980 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2981 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
2982 'IEM_MC_POP_U16': (McBlock.parseMcGeneric, True, False, ),
2983 'IEM_MC_POP_U32': (McBlock.parseMcGeneric, True, False, ),
2984 'IEM_MC_POP_U64': (McBlock.parseMcGeneric, True, False, ),
2985 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, ),
2986 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, ),
2987 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, ),
2988 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
2989 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2990 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, False, ),
2991 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, False, ),
2992 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, False, ),
2993 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, False, ),
2994 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, False, ),
2995 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, False, ),
2996 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, False, ),
2997 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, False, ),
2998 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, False, ),
2999 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, ),
3000 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, ),
3001 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, ),
3002 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, ),
3003 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, ),
3004 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, ),
3005 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, ),
3006 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, ),
3007 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, ),
3008 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3009 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, ),
3010 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3011 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, ),
3012 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, ),
3013 'IEM_MC_REF_LOCAL': (McBlock.parseMcGeneric, False, False, ),
3014 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3015 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, ),
3016 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3017 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, ),
3018 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, ),
3019 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, ),
3020 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, ),
3021 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, ),
3022 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3023 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3024 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, ),
3025 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, ),
3026 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, ),
3027 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3028 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3029 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3030 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3031 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, ),
3032 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, ),
3033 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, ),
3034 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, ),
3035 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
3036 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
3037 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3038 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3039 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3040 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, ),
3041 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, ),
3042 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, ),
3043 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
3044 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, False, ),
3045 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
3046 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
3047 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3048 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3049 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, False, ),
3050 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, False, ),
3051 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
3052 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, False, ),
3053 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
3054 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, False, ),
3055 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
3056 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, False, ),
3057 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
3058 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, False, ),
3059 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3060 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3061 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3062 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3063 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3064 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3065 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3066 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3067 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, False, ),
3068 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
3069 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, False, ),
3070 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, False, ),
3071 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, False, ),
3072 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
3073 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, False, ),
3074 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, False, ),
3075 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, False, ),
3076 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, False, ),
3077 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, False, ),
3078 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, False, ),
3079 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
3080 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, False, ),
3081 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, False, ),
3082 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, False, ),
3083 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, False, ),
3084 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True, False, ),
3085 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, False, ),
3086 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, False, ),
3087 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, False, ),
3088 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, False, ),
3089 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, False, ),
3090 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, False, ),
3091 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, False, ),
3092 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, False, ),
3093 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, False, ),
3094 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, False, ),
3095 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, False, ),
3096 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, False, ),
3097 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, False, ),
3098 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, False, ),
3099 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3100 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3101 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3102 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3103 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, ),
3104 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, ),
3105 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, ),
3106 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
3107 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, False, ),
3108 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, False, ),
3109 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, False, ),
3110 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3111 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, False, ),
3112 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
3113 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3114 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, ),
3115};
3116
3117## List of microcode blocks.
3118g_aoMcBlocks = [] # type: List[McBlock]
3119
3120
3121
3122class ParserException(Exception):
3123 """ Parser exception """
3124 def __init__(self, sMessage):
3125 Exception.__init__(self, sMessage);
3126
3127
3128class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3129 """
3130 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3131 """
3132
3133 ## @name Parser state.
3134 ## @{
3135 kiCode = 0;
3136 kiCommentMulti = 1;
3137 ## @}
3138
3139 class Macro(object):
3140 """ Macro """
3141 def __init__(self, sName, asArgs, sBody, iLine):
3142 self.sName = sName; ##< The macro name.
3143 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3144 self.sBody = sBody;
3145 self.iLine = iLine;
3146 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3147
3148 @staticmethod
3149 def _needSpace(ch):
3150 """ This is just to make the expanded output a bit prettier. """
3151 return ch.isspace() and ch != '(';
3152
3153 def expandMacro(self, oParent, asArgs = None):
3154 """ Expands the macro body with the given arguments. """
3155 _ = oParent;
3156 sBody = self.sBody;
3157
3158 if self.oReArgMatch:
3159 assert len(asArgs) == len(self.asArgs);
3160 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3161
3162 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3163 oMatch = self.oReArgMatch.search(sBody);
3164 while oMatch:
3165 sName = oMatch.group(2);
3166 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3167 sValue = dArgs[sName];
3168 sPre = '';
3169 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3170 sPre = ' ';
3171 sPost = '';
3172 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3173 sPost = ' ';
3174 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3175 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3176 else:
3177 assert not asArgs;
3178
3179 return sBody;
3180
3181 class PreprocessorConditional(object):
3182 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3183
3184 ## Known defines.
3185 # - A value of 1 indicates that it's always defined.
3186 # - A value of 0 if it's always undefined
3187 # - A value of -1 if it's an arch and it depends of script parameters.
3188 # - A value of -2 if it's not recognized when filtering MC blocks.
3189 kdKnownDefines = {
3190 'IEM_WITH_ONE_BYTE_TABLE': 1,
3191 'IEM_WITH_TWO_BYTE_TABLE': 1,
3192 'IEM_WITH_THREE_0F_38': 1,
3193 'IEM_WITH_THREE_0F_3A': 1,
3194 'IEM_WITH_THREE_BYTE_TABLES': 1,
3195 'IEM_WITH_3DNOW': 1,
3196 'IEM_WITH_3DNOW_TABLE': 1,
3197 'IEM_WITH_VEX': 1,
3198 'IEM_WITH_VEX_TABLES': 1,
3199 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3200 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3201 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3202 'LOG_ENABLED': 1,
3203 'RT_WITHOUT_PRAGMA_ONCE': 0,
3204 'TST_IEM_CHECK_MC': 0,
3205 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3206 'RT_ARCH_AMD64': -1,
3207 'RT_ARCH_ARM64': -1,
3208 'RT_ARCH_ARM32': -1,
3209 'RT_ARCH_X86': -1,
3210 'RT_ARCH_SPARC': -1,
3211 'RT_ARCH_SPARC64': -1,
3212 };
3213 kdBuildArchToIprt = {
3214 'amd64': 'RT_ARCH_AMD64',
3215 'arm64': 'RT_ARCH_ARM64',
3216 'sparc32': 'RT_ARCH_SPARC64',
3217 };
3218 ## For parsing the next defined(xxxx).
3219 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3220
3221 def __init__(self, sType, sExpr):
3222 self.sType = sType;
3223 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3224 self.aoElif = [] # type: List[PreprocessorConditional]
3225 self.fInElse = [];
3226 if sType in ('if', 'elif'):
3227 self.checkExpression(sExpr);
3228 else:
3229 self.checkSupportedDefine(sExpr)
3230
3231 @staticmethod
3232 def checkSupportedDefine(sDefine):
3233 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3234 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3235 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3236 return True;
3237 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3238 return True;
3239 raise Exception('Unsupported define: %s' % (sDefine,));
3240
3241 @staticmethod
3242 def checkExpression(sExpr):
3243 """ Check that the expression is supported. Raises exception if not. """
3244 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3245 if sExpr in ('0', '1'):
3246 return True;
3247
3248 off = 0;
3249 cParan = 0;
3250 while off < len(sExpr):
3251 ch = sExpr[off];
3252
3253 # Unary operator or parentheses:
3254 if ch in ('(', '!'):
3255 if ch == '(':
3256 cParan += 1;
3257 off += 1;
3258 else:
3259 # defined(xxxx)
3260 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3261 if oMatch:
3262 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3263 elif sExpr[off:] != '1':
3264 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3265 off = oMatch.end();
3266
3267 # Look for closing parentheses.
3268 while off < len(sExpr) and sExpr[off].isspace():
3269 off += 1;
3270 if cParan > 0:
3271 while off < len(sExpr) and sExpr[off] == ')':
3272 if cParan <= 0:
3273 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3274 cParan -= 1;
3275 off += 1;
3276 while off < len(sExpr) and sExpr[off].isspace():
3277 off += 1;
3278
3279 # Look for binary operator.
3280 if off >= len(sExpr):
3281 break;
3282 if sExpr[off:off + 2] in ('||', '&&'):
3283 off += 2;
3284 else:
3285 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3286
3287 # Skip spaces.
3288 while off < len(sExpr) and sExpr[off].isspace():
3289 off += 1;
3290 if cParan != 0:
3291 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3292 return True;
3293
3294 @staticmethod
3295 def isArchIncludedInExpr(sExpr, sArch):
3296 """ Checks if sArch is included in the given expression. """
3297 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3298 if sExpr == '0':
3299 return False;
3300 if sExpr == '1':
3301 return True;
3302 off = 0;
3303 while off < len(sExpr):
3304 # defined(xxxx)
3305 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3306 if not oMatch:
3307 if sExpr[off:] == '1':
3308 return True;
3309 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3310 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3311 return True;
3312 off = oMatch.end();
3313
3314 # Look for OR operator.
3315 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3316 off += 1;
3317 if off >= len(sExpr):
3318 break;
3319 if sExpr.startswith('||'):
3320 off += 2;
3321 else:
3322 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3323
3324 return False;
3325
3326 @staticmethod
3327 def matchArch(sDefine, sArch):
3328 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3329 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3330
3331 @staticmethod
3332 def matchDefined(sExpr, sArch):
3333 """ Check the result of an ifdef/ifndef expression, given sArch. """
3334 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3335 if iDefine == -2:
3336 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3337 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3338
3339 def isArchIncludedInPrimaryBlock(self, sArch):
3340 """ Checks if sArch is included in the (primary) 'if' block. """
3341 if self.sType == 'ifdef':
3342 return self.matchDefined(self.sExpr, sArch);
3343 if self.sType == 'ifndef':
3344 return not self.matchDefined(self.sExpr, sArch);
3345 return self.isArchIncludedInExpr(self.sExpr, sArch);
3346
3347 @staticmethod
3348 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3349 """ Checks if sArch is included in the current conditional block. """
3350 _ = iLine;
3351 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3352 for oCond in aoCppCondStack:
3353 if oCond.isArchIncludedInPrimaryBlock(sArch):
3354 if oCond.aoElif or oCond.fInElse:
3355 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3356 return False;
3357 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3358 else:
3359 fFine = False;
3360 for oElifCond in oCond.aoElif:
3361 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3362 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3363 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3364 return False;
3365 fFine = True;
3366 if not fFine and not oCond.fInElse:
3367 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3368 return False;
3369 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3370 return True;
3371
3372 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3373 self.sSrcFile = sSrcFile;
3374 self.asLines = asLines;
3375 self.iLine = 0;
3376 self.iState = self.kiCode;
3377 self.sComment = '';
3378 self.iCommentLine = 0;
3379 self.aoCurInstrs = [] # type: List[Instruction]
3380 self.oCurFunction = None # type: DecoderFunction
3381 self.iMcBlockInFunc = 0;
3382 self.oCurMcBlock = None # type: McBlock
3383 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3384 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3385 if oInheritMacrosFrom:
3386 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3387 self.oReMacros = oInheritMacrosFrom.oReMacros;
3388 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3389 self.sHostArch = sHostArch;
3390
3391 assert sDefaultMap in g_dInstructionMaps;
3392 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3393
3394 self.cTotalInstr = 0;
3395 self.cTotalStubs = 0;
3396 self.cTotalTagged = 0;
3397 self.cTotalMcBlocks = 0;
3398
3399 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3400 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3401 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3402 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3403 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3404 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
3405 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3406 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
3407 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3408 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3409 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3410 self.fDebug = True;
3411 self.fDebugMc = False;
3412 self.fDebugPreproc = False;
3413
3414 self.dTagHandlers = {
3415 '@opbrief': self.parseTagOpBrief,
3416 '@opdesc': self.parseTagOpDesc,
3417 '@opmnemonic': self.parseTagOpMnemonic,
3418 '@op1': self.parseTagOpOperandN,
3419 '@op2': self.parseTagOpOperandN,
3420 '@op3': self.parseTagOpOperandN,
3421 '@op4': self.parseTagOpOperandN,
3422 '@oppfx': self.parseTagOpPfx,
3423 '@opmaps': self.parseTagOpMaps,
3424 '@opcode': self.parseTagOpcode,
3425 '@opcodesub': self.parseTagOpcodeSub,
3426 '@openc': self.parseTagOpEnc,
3427 '@opfltest': self.parseTagOpEFlags,
3428 '@opflmodify': self.parseTagOpEFlags,
3429 '@opflundef': self.parseTagOpEFlags,
3430 '@opflset': self.parseTagOpEFlags,
3431 '@opflclear': self.parseTagOpEFlags,
3432 '@ophints': self.parseTagOpHints,
3433 '@opdisenum': self.parseTagOpDisEnum,
3434 '@opmincpu': self.parseTagOpMinCpu,
3435 '@opcpuid': self.parseTagOpCpuId,
3436 '@opgroup': self.parseTagOpGroup,
3437 '@opunused': self.parseTagOpUnusedInvalid,
3438 '@opinvalid': self.parseTagOpUnusedInvalid,
3439 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3440 '@optest': self.parseTagOpTest,
3441 '@optestign': self.parseTagOpTestIgnore,
3442 '@optestignore': self.parseTagOpTestIgnore,
3443 '@opcopytests': self.parseTagOpCopyTests,
3444 '@oponly': self.parseTagOpOnlyTest,
3445 '@oponlytest': self.parseTagOpOnlyTest,
3446 '@opxcpttype': self.parseTagOpXcptType,
3447 '@opstats': self.parseTagOpStats,
3448 '@opfunction': self.parseTagOpFunction,
3449 '@opdone': self.parseTagOpDone,
3450 };
3451 for i in range(48):
3452 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3453 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3454
3455 self.asErrors = [];
3456
3457 def raiseError(self, sMessage):
3458 """
3459 Raise error prefixed with the source and line number.
3460 """
3461 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3462
3463 def raiseCommentError(self, iLineInComment, sMessage):
3464 """
3465 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3466 """
3467 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3468
3469 def error(self, sMessage):
3470 """
3471 Adds an error.
3472 returns False;
3473 """
3474 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3475 return False;
3476
3477 def errorOnLine(self, iLine, sMessage):
3478 """
3479 Adds an error.
3480 returns False;
3481 """
3482 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3483 return False;
3484
3485 def errorComment(self, iLineInComment, sMessage):
3486 """
3487 Adds a comment error.
3488 returns False;
3489 """
3490 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3491 return False;
3492
3493 def printErrors(self):
3494 """
3495 Print the errors to stderr.
3496 Returns number of errors.
3497 """
3498 if self.asErrors:
3499 sys.stderr.write(u''.join(self.asErrors));
3500 return len(self.asErrors);
3501
3502 def debug(self, sMessage):
3503 """
3504 For debugging.
3505 """
3506 if self.fDebug:
3507 print('debug: %s' % (sMessage,), file = sys.stderr);
3508
3509 def stripComments(self, sLine):
3510 """
3511 Returns sLine with comments stripped.
3512
3513 Complains if traces of incomplete multi-line comments are encountered.
3514 """
3515 sLine = self.oReComment.sub(" ", sLine);
3516 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3517 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3518 return sLine;
3519
3520 def parseFunctionTable(self, sLine):
3521 """
3522 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3523
3524 Note! Updates iLine as it consumes the whole table.
3525 """
3526
3527 #
3528 # Extract the table name.
3529 #
3530 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3531 oMap = g_dInstructionMapsByIemName.get(sName);
3532 if not oMap:
3533 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3534 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3535
3536 #
3537 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3538 # entries per byte:
3539 # no prefix, 066h prefix, f3h prefix, f2h prefix
3540 # Those tables has 256 & 32 entries respectively.
3541 #
3542 cEntriesPerByte = 4;
3543 cValidTableLength = 1024;
3544 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3545
3546 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3547 if oEntriesMatch:
3548 cEntriesPerByte = 1;
3549 cValidTableLength = int(oEntriesMatch.group(1));
3550 asPrefixes = (None,);
3551
3552 #
3553 # The next line should be '{' and nothing else.
3554 #
3555 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3556 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3557 self.iLine += 1;
3558
3559 #
3560 # Parse till we find the end of the table.
3561 #
3562 iEntry = 0;
3563 while self.iLine < len(self.asLines):
3564 # Get the next line and strip comments and spaces (assumes no
3565 # multi-line comments).
3566 sLine = self.asLines[self.iLine];
3567 self.iLine += 1;
3568 sLine = self.stripComments(sLine).strip();
3569
3570 # Split the line up into entries, expanding IEMOP_X4 usage.
3571 asEntries = sLine.split(',');
3572 for i in range(len(asEntries) - 1, -1, -1):
3573 sEntry = asEntries[i].strip();
3574 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3575 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3576 asEntries.insert(i + 1, sEntry);
3577 asEntries.insert(i + 1, sEntry);
3578 asEntries.insert(i + 1, sEntry);
3579 if sEntry:
3580 asEntries[i] = sEntry;
3581 else:
3582 del asEntries[i];
3583
3584 # Process the entries.
3585 for sEntry in asEntries:
3586 if sEntry in ('};', '}'):
3587 if iEntry != cValidTableLength:
3588 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3589 return True;
3590 if sEntry.startswith('iemOp_Invalid'):
3591 pass; # skip
3592 else:
3593 # Look up matching instruction by function.
3594 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3595 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3596 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3597 if aoInstr:
3598 if not isinstance(aoInstr, list):
3599 aoInstr = [aoInstr,];
3600 oInstr = None;
3601 for oCurInstr in aoInstr:
3602 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3603 pass;
3604 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3605 oCurInstr.sPrefix = sPrefix;
3606 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3607 oCurInstr.sOpcode = sOpcode;
3608 oCurInstr.sPrefix = sPrefix;
3609 else:
3610 continue;
3611 oInstr = oCurInstr;
3612 break;
3613 if not oInstr:
3614 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3615 aoInstr.append(oInstr);
3616 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3617 g_aoAllInstructions.append(oInstr);
3618 oMap.aoInstructions.append(oInstr);
3619 else:
3620 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3621 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3622 iEntry += 1;
3623
3624 return self.error('Unexpected end of file in PFNIEMOP table');
3625
3626 def addInstruction(self, iLine = None):
3627 """
3628 Adds an instruction.
3629 """
3630 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3631 g_aoAllInstructions.append(oInstr);
3632 self.aoCurInstrs.append(oInstr);
3633 return oInstr;
3634
3635 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3636 """
3637 Derives the mnemonic and operands from a IEM stats base name like string.
3638 """
3639 if oInstr.sMnemonic is None:
3640 asWords = sStats.split('_');
3641 oInstr.sMnemonic = asWords[0].lower();
3642 if len(asWords) > 1 and not oInstr.aoOperands:
3643 for sType in asWords[1:]:
3644 if sType in g_kdOpTypes:
3645 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3646 else:
3647 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3648 return False;
3649 return True;
3650
3651 def doneInstructionOne(self, oInstr, iLine):
3652 """
3653 Complete the parsing by processing, validating and expanding raw inputs.
3654 """
3655 assert oInstr.iLineCompleted is None;
3656 oInstr.iLineCompleted = iLine;
3657
3658 #
3659 # Specified instructions.
3660 #
3661 if oInstr.cOpTags > 0:
3662 if oInstr.sStats is None:
3663 pass;
3664
3665 #
3666 # Unspecified legacy stuff. We generally only got a few things to go on here.
3667 # /** Opcode 0x0f 0x00 /0. */
3668 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3669 #
3670 else:
3671 #if oInstr.sRawOldOpcodes:
3672 #
3673 #if oInstr.sMnemonic:
3674 pass;
3675
3676 #
3677 # Common defaults.
3678 #
3679
3680 # Guess mnemonic and operands from stats if the former is missing.
3681 if oInstr.sMnemonic is None:
3682 if oInstr.sStats is not None:
3683 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3684 elif oInstr.sFunction is not None:
3685 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3686
3687 # Derive the disassembler op enum constant from the mnemonic.
3688 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3689 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3690
3691 # Derive the IEM statistics base name from mnemonic and operand types.
3692 if oInstr.sStats is None:
3693 if oInstr.sFunction is not None:
3694 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3695 elif oInstr.sMnemonic is not None:
3696 oInstr.sStats = oInstr.sMnemonic;
3697 for oOperand in oInstr.aoOperands:
3698 if oOperand.sType:
3699 oInstr.sStats += '_' + oOperand.sType;
3700
3701 # Derive the IEM function name from mnemonic and operand types.
3702 if oInstr.sFunction is None:
3703 if oInstr.sMnemonic is not None:
3704 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3705 for oOperand in oInstr.aoOperands:
3706 if oOperand.sType:
3707 oInstr.sFunction += '_' + oOperand.sType;
3708 elif oInstr.sStats:
3709 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3710
3711 #
3712 # Apply default map and then add the instruction to all it's groups.
3713 #
3714 if not oInstr.aoMaps:
3715 oInstr.aoMaps = [ self.oDefaultMap, ];
3716 for oMap in oInstr.aoMaps:
3717 oMap.aoInstructions.append(oInstr);
3718
3719 #
3720 # Derive encoding from operands and maps.
3721 #
3722 if oInstr.sEncoding is None:
3723 if not oInstr.aoOperands:
3724 if oInstr.fUnused and oInstr.sSubOpcode:
3725 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3726 else:
3727 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3728 elif oInstr.aoOperands[0].usesModRM():
3729 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3730 or oInstr.onlyInVexMaps():
3731 oInstr.sEncoding = 'VEX.ModR/M';
3732 else:
3733 oInstr.sEncoding = 'ModR/M';
3734
3735 #
3736 # Check the opstat value and add it to the opstat indexed dictionary.
3737 #
3738 if oInstr.sStats:
3739 if oInstr.sStats not in g_dAllInstructionsByStat:
3740 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3741 else:
3742 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3743 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3744
3745 #
3746 # Add to function indexed dictionary. We allow multiple instructions per function.
3747 #
3748 if oInstr.sFunction:
3749 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3750 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3751 else:
3752 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3753
3754 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3755 return True;
3756
3757 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3758 """
3759 Done with current instruction.
3760 """
3761 for oInstr in self.aoCurInstrs:
3762 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3763 if oInstr.fStub:
3764 self.cTotalStubs += 1;
3765
3766 self.cTotalInstr += len(self.aoCurInstrs);
3767
3768 self.sComment = '';
3769 self.aoCurInstrs = [];
3770 if fEndOfFunction:
3771 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3772 if self.oCurFunction:
3773 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3774 self.oCurFunction = None;
3775 self.iMcBlockInFunc = 0;
3776 return True;
3777
3778 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3779 """
3780 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3781 is False, only None values and empty strings are replaced.
3782 """
3783 for oInstr in self.aoCurInstrs:
3784 if fOverwrite is not True:
3785 oOldValue = getattr(oInstr, sAttrib);
3786 if oOldValue is not None:
3787 continue;
3788 setattr(oInstr, sAttrib, oValue);
3789
3790 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3791 """
3792 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3793 If fOverwrite is False, only None values and empty strings are replaced.
3794 """
3795 for oInstr in self.aoCurInstrs:
3796 aoArray = getattr(oInstr, sAttrib);
3797 while len(aoArray) <= iEntry:
3798 aoArray.append(None);
3799 if fOverwrite is True or aoArray[iEntry] is None:
3800 aoArray[iEntry] = oValue;
3801
3802 def parseCommentOldOpcode(self, asLines):
3803 """ Deals with 'Opcode 0xff /4' like comments """
3804 asWords = asLines[0].split();
3805 if len(asWords) >= 2 \
3806 and asWords[0] == 'Opcode' \
3807 and ( asWords[1].startswith('0x')
3808 or asWords[1].startswith('0X')):
3809 asWords = asWords[:1];
3810 for iWord, sWord in enumerate(asWords):
3811 if sWord.startswith('0X'):
3812 sWord = '0x' + sWord[:2];
3813 asWords[iWord] = asWords;
3814 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3815
3816 return False;
3817
3818 def ensureInstructionForOpTag(self, iTagLine):
3819 """ Ensure there is an instruction for the op-tag being parsed. """
3820 if not self.aoCurInstrs:
3821 self.addInstruction(self.iCommentLine + iTagLine);
3822 for oInstr in self.aoCurInstrs:
3823 oInstr.cOpTags += 1;
3824 if oInstr.cOpTags == 1:
3825 self.cTotalTagged += 1;
3826 return self.aoCurInstrs[-1];
3827
3828 @staticmethod
3829 def flattenSections(aasSections):
3830 """
3831 Flattens multiline sections into stripped single strings.
3832 Returns list of strings, on section per string.
3833 """
3834 asRet = [];
3835 for asLines in aasSections:
3836 if asLines:
3837 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3838 return asRet;
3839
3840 @staticmethod
3841 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3842 """
3843 Flattens sections into a simple stripped string with newlines as
3844 section breaks. The final section does not sport a trailing newline.
3845 """
3846 # Typical: One section with a single line.
3847 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3848 return aasSections[0][0].strip();
3849
3850 sRet = '';
3851 for iSection, asLines in enumerate(aasSections):
3852 if asLines:
3853 if iSection > 0:
3854 sRet += sSectionSep;
3855 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3856 return sRet;
3857
3858
3859
3860 ## @name Tag parsers
3861 ## @{
3862
3863 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3864 """
3865 Tag: \@opbrief
3866 Value: Text description, multiple sections, appended.
3867
3868 Brief description. If not given, it's the first sentence from @opdesc.
3869 """
3870 oInstr = self.ensureInstructionForOpTag(iTagLine);
3871
3872 # Flatten and validate the value.
3873 sBrief = self.flattenAllSections(aasSections);
3874 if not sBrief:
3875 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3876 if sBrief[-1] != '.':
3877 sBrief = sBrief + '.';
3878 if len(sBrief) > 180:
3879 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3880 offDot = sBrief.find('.');
3881 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3882 offDot = sBrief.find('.', offDot + 1);
3883 if offDot >= 0 and offDot != len(sBrief) - 1:
3884 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3885
3886 # Update the instruction.
3887 if oInstr.sBrief is not None:
3888 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3889 % (sTag, oInstr.sBrief, sBrief,));
3890 _ = iEndLine;
3891 return True;
3892
3893 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3894 """
3895 Tag: \@opdesc
3896 Value: Text description, multiple sections, appended.
3897
3898 It is used to describe instructions.
3899 """
3900 oInstr = self.ensureInstructionForOpTag(iTagLine);
3901 if aasSections:
3902 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3903 return True;
3904
3905 _ = sTag; _ = iEndLine;
3906 return True;
3907
3908 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3909 """
3910 Tag: @opmenmonic
3911 Value: mnemonic
3912
3913 The 'mnemonic' value must be a valid C identifier string. Because of
3914 prefixes, groups and whatnot, there times when the mnemonic isn't that
3915 of an actual assembler mnemonic.
3916 """
3917 oInstr = self.ensureInstructionForOpTag(iTagLine);
3918
3919 # Flatten and validate the value.
3920 sMnemonic = self.flattenAllSections(aasSections);
3921 if not self.oReMnemonic.match(sMnemonic):
3922 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3923 if oInstr.sMnemonic is not None:
3924 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3925 % (sTag, oInstr.sMnemonic, sMnemonic,));
3926 oInstr.sMnemonic = sMnemonic
3927
3928 _ = iEndLine;
3929 return True;
3930
3931 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3932 """
3933 Tags: \@op1, \@op2, \@op3, \@op4
3934 Value: [where:]type
3935
3936 The 'where' value indicates where the operand is found, like the 'reg'
3937 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3938 a list.
3939
3940 The 'type' value indicates the operand type. These follow the types
3941 given in the opcode tables in the CPU reference manuals.
3942 See Instruction.kdOperandTypes for a list.
3943
3944 """
3945 oInstr = self.ensureInstructionForOpTag(iTagLine);
3946 idxOp = int(sTag[-1]) - 1;
3947 assert 0 <= idxOp < 4;
3948
3949 # flatten, split up, and validate the "where:type" value.
3950 sFlattened = self.flattenAllSections(aasSections);
3951 asSplit = sFlattened.split(':');
3952 if len(asSplit) == 1:
3953 sType = asSplit[0];
3954 sWhere = None;
3955 elif len(asSplit) == 2:
3956 (sWhere, sType) = asSplit;
3957 else:
3958 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3959
3960 if sType not in g_kdOpTypes:
3961 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3962 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3963 if sWhere is None:
3964 sWhere = g_kdOpTypes[sType][1];
3965 elif sWhere not in g_kdOpLocations:
3966 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3967 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3968
3969 # Insert the operand, refusing to overwrite an existing one.
3970 while idxOp >= len(oInstr.aoOperands):
3971 oInstr.aoOperands.append(None);
3972 if oInstr.aoOperands[idxOp] is not None:
3973 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3974 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3975 sWhere, sType,));
3976 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3977
3978 _ = iEndLine;
3979 return True;
3980
3981 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3982 """
3983 Tag: \@opmaps
3984 Value: map[,map2]
3985
3986 Indicates which maps the instruction is in. There is a default map
3987 associated with each input file.
3988 """
3989 oInstr = self.ensureInstructionForOpTag(iTagLine);
3990
3991 # Flatten, split up and validate the value.
3992 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3993 asMaps = sFlattened.split(',');
3994 if not asMaps:
3995 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3996 for sMap in asMaps:
3997 if sMap not in g_dInstructionMaps:
3998 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3999 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
4000
4001 # Add the maps to the current list. Throw errors on duplicates.
4002 for oMap in oInstr.aoMaps:
4003 if oMap.sName in asMaps:
4004 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4005
4006 for sMap in asMaps:
4007 oMap = g_dInstructionMaps[sMap];
4008 if oMap not in oInstr.aoMaps:
4009 oInstr.aoMaps.append(oMap);
4010 else:
4011 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4012
4013 _ = iEndLine;
4014 return True;
4015
4016 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4017 """
4018 Tag: \@oppfx
4019 Value: n/a|none|0x66|0xf3|0xf2
4020
4021 Required prefix for the instruction. (In a (E)VEX context this is the
4022 value of the 'pp' field rather than an actual prefix.)
4023 """
4024 oInstr = self.ensureInstructionForOpTag(iTagLine);
4025
4026 # Flatten and validate the value.
4027 sFlattened = self.flattenAllSections(aasSections);
4028 asPrefixes = sFlattened.split();
4029 if len(asPrefixes) > 1:
4030 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4031
4032 sPrefix = asPrefixes[0].lower();
4033 if sPrefix == 'none':
4034 sPrefix = 'none';
4035 elif sPrefix == 'n/a':
4036 sPrefix = None;
4037 else:
4038 if len(sPrefix) == 2:
4039 sPrefix = '0x' + sPrefix;
4040 if not _isValidOpcodeByte(sPrefix):
4041 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4042
4043 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4044 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4045
4046 # Set it.
4047 if oInstr.sPrefix is not None:
4048 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4049 oInstr.sPrefix = sPrefix;
4050
4051 _ = iEndLine;
4052 return True;
4053
4054 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4055 """
4056 Tag: \@opcode
4057 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4058
4059 The opcode byte or sub-byte for the instruction in the context of a map.
4060 """
4061 oInstr = self.ensureInstructionForOpTag(iTagLine);
4062
4063 # Flatten and validate the value.
4064 sOpcode = self.flattenAllSections(aasSections);
4065 if _isValidOpcodeByte(sOpcode):
4066 pass;
4067 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4068 pass;
4069 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4070 pass;
4071 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4072 pass;
4073 else:
4074 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4075
4076 # Set it.
4077 if oInstr.sOpcode is not None:
4078 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4079 oInstr.sOpcode = sOpcode;
4080
4081 _ = iEndLine;
4082 return True;
4083
4084 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4085 """
4086 Tag: \@opcodesub
4087 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4088 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4089
4090 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4091 represents exactly two different instructions. The more proper way would
4092 be to go via maps with two members, but this is faster.
4093 """
4094 oInstr = self.ensureInstructionForOpTag(iTagLine);
4095
4096 # Flatten and validate the value.
4097 sSubOpcode = self.flattenAllSections(aasSections);
4098 if sSubOpcode not in g_kdSubOpcodes:
4099 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
4100 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4101
4102 # Set it.
4103 if oInstr.sSubOpcode is not None:
4104 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4105 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4106 oInstr.sSubOpcode = sSubOpcode;
4107
4108 _ = iEndLine;
4109 return True;
4110
4111 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4112 """
4113 Tag: \@openc
4114 Value: ModR/M|fixed|prefix|<map name>
4115
4116 The instruction operand encoding style.
4117 """
4118 oInstr = self.ensureInstructionForOpTag(iTagLine);
4119
4120 # Flatten and validate the value.
4121 sEncoding = self.flattenAllSections(aasSections);
4122 if sEncoding in g_kdEncodings:
4123 pass;
4124 elif sEncoding in g_dInstructionMaps:
4125 pass;
4126 elif not _isValidOpcodeByte(sEncoding):
4127 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4128
4129 # Set it.
4130 if oInstr.sEncoding is not None:
4131 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4132 % ( sTag, oInstr.sEncoding, sEncoding,));
4133 oInstr.sEncoding = sEncoding;
4134
4135 _ = iEndLine;
4136 return True;
4137
4138 ## EFlags tag to Instruction attribute name.
4139 kdOpFlagToAttr = {
4140 '@opfltest': 'asFlTest',
4141 '@opflmodify': 'asFlModify',
4142 '@opflundef': 'asFlUndefined',
4143 '@opflset': 'asFlSet',
4144 '@opflclear': 'asFlClear',
4145 };
4146
4147 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4148 """
4149 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
4150 Value: <eflags specifier>
4151
4152 """
4153 oInstr = self.ensureInstructionForOpTag(iTagLine);
4154
4155 # Flatten, split up and validate the values.
4156 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4157 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4158 asFlags = [];
4159 else:
4160 fRc = True;
4161 for iFlag, sFlag in enumerate(asFlags):
4162 if sFlag not in g_kdEFlagsMnemonics:
4163 if sFlag.strip() in g_kdEFlagsMnemonics:
4164 asFlags[iFlag] = sFlag.strip();
4165 else:
4166 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4167 if not fRc:
4168 return False;
4169
4170 # Set them.
4171 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4172 if asOld is not None:
4173 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4174 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4175
4176 _ = iEndLine;
4177 return True;
4178
4179 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4180 """
4181 Tag: \@ophints
4182 Value: Comma or space separated list of flags and hints.
4183
4184 This covers the disassembler flags table and more.
4185 """
4186 oInstr = self.ensureInstructionForOpTag(iTagLine);
4187
4188 # Flatten as a space separated list, split it up and validate the values.
4189 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4190 if len(asHints) == 1 and asHints[0].lower() == 'none':
4191 asHints = [];
4192 else:
4193 fRc = True;
4194 for iHint, sHint in enumerate(asHints):
4195 if sHint not in g_kdHints:
4196 if sHint.strip() in g_kdHints:
4197 sHint[iHint] = sHint.strip();
4198 else:
4199 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4200 if not fRc:
4201 return False;
4202
4203 # Append them.
4204 for sHint in asHints:
4205 if sHint not in oInstr.dHints:
4206 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4207 else:
4208 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4209
4210 _ = iEndLine;
4211 return True;
4212
4213 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4214 """
4215 Tag: \@opdisenum
4216 Value: OP_XXXX
4217
4218 This is for select a specific (legacy) disassembler enum value for the
4219 instruction.
4220 """
4221 oInstr = self.ensureInstructionForOpTag(iTagLine);
4222
4223 # Flatten and split.
4224 asWords = self.flattenAllSections(aasSections).split();
4225 if len(asWords) != 1:
4226 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4227 if not asWords:
4228 return False;
4229 sDisEnum = asWords[0];
4230 if not self.oReDisEnum.match(sDisEnum):
4231 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4232 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4233
4234 # Set it.
4235 if oInstr.sDisEnum is not None:
4236 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4237 oInstr.sDisEnum = sDisEnum;
4238
4239 _ = iEndLine;
4240 return True;
4241
4242 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4243 """
4244 Tag: \@opmincpu
4245 Value: <simple CPU name>
4246
4247 Indicates when this instruction was introduced.
4248 """
4249 oInstr = self.ensureInstructionForOpTag(iTagLine);
4250
4251 # Flatten the value, split into words, make sure there's just one, valid it.
4252 asCpus = self.flattenAllSections(aasSections).split();
4253 if len(asCpus) > 1:
4254 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4255
4256 sMinCpu = asCpus[0];
4257 if sMinCpu in g_kdCpuNames:
4258 oInstr.sMinCpu = sMinCpu;
4259 else:
4260 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4261 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4262
4263 # Set it.
4264 if oInstr.sMinCpu is None:
4265 oInstr.sMinCpu = sMinCpu;
4266 elif oInstr.sMinCpu != sMinCpu:
4267 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4268
4269 _ = iEndLine;
4270 return True;
4271
4272 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4273 """
4274 Tag: \@opcpuid
4275 Value: none | <CPUID flag specifier>
4276
4277 CPUID feature bit which is required for the instruction to be present.
4278 """
4279 oInstr = self.ensureInstructionForOpTag(iTagLine);
4280
4281 # Flatten as a space separated list, split it up and validate the values.
4282 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4283 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4284 asCpuIds = [];
4285 else:
4286 fRc = True;
4287 for iCpuId, sCpuId in enumerate(asCpuIds):
4288 if sCpuId not in g_kdCpuIdFlags:
4289 if sCpuId.strip() in g_kdCpuIdFlags:
4290 sCpuId[iCpuId] = sCpuId.strip();
4291 else:
4292 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4293 if not fRc:
4294 return False;
4295
4296 # Append them.
4297 for sCpuId in asCpuIds:
4298 if sCpuId not in oInstr.asCpuIds:
4299 oInstr.asCpuIds.append(sCpuId);
4300 else:
4301 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4302
4303 _ = iEndLine;
4304 return True;
4305
4306 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4307 """
4308 Tag: \@opgroup
4309 Value: op_grp1[_subgrp2[_subsubgrp3]]
4310
4311 Instruction grouping.
4312 """
4313 oInstr = self.ensureInstructionForOpTag(iTagLine);
4314
4315 # Flatten as a space separated list, split it up and validate the values.
4316 asGroups = self.flattenAllSections(aasSections).split();
4317 if len(asGroups) != 1:
4318 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4319 sGroup = asGroups[0];
4320 if not self.oReGroupName.match(sGroup):
4321 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4322 % (sTag, sGroup, self.oReGroupName.pattern));
4323
4324 # Set it.
4325 if oInstr.sGroup is not None:
4326 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4327 oInstr.sGroup = sGroup;
4328
4329 _ = iEndLine;
4330 return True;
4331
4332 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4333 """
4334 Tag: \@opunused, \@opinvalid, \@opinvlstyle
4335 Value: <invalid opcode behaviour style>
4336
4337 The \@opunused indicates the specification is for a currently unused
4338 instruction encoding.
4339
4340 The \@opinvalid indicates the specification is for an invalid currently
4341 instruction encoding (like UD2).
4342
4343 The \@opinvlstyle just indicates how CPUs decode the instruction when
4344 not supported (\@opcpuid, \@opmincpu) or disabled.
4345 """
4346 oInstr = self.ensureInstructionForOpTag(iTagLine);
4347
4348 # Flatten as a space separated list, split it up and validate the values.
4349 asStyles = self.flattenAllSections(aasSections).split();
4350 if len(asStyles) != 1:
4351 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4352 sStyle = asStyles[0];
4353 if sStyle not in g_kdInvalidStyles:
4354 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4355 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4356 # Set it.
4357 if oInstr.sInvalidStyle is not None:
4358 return self.errorComment(iTagLine,
4359 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4360 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4361 oInstr.sInvalidStyle = sStyle;
4362 if sTag == '@opunused':
4363 oInstr.fUnused = True;
4364 elif sTag == '@opinvalid':
4365 oInstr.fInvalid = True;
4366
4367 _ = iEndLine;
4368 return True;
4369
4370 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4371 """
4372 Tag: \@optest
4373 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4374 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4375
4376 The main idea here is to generate basic instruction tests.
4377
4378 The probably simplest way of handling the diverse input, would be to use
4379 it to produce size optimized byte code for a simple interpreter that
4380 modifies the register input and output states.
4381
4382 An alternative to the interpreter would be creating multiple tables,
4383 but that becomes rather complicated wrt what goes where and then to use
4384 them in an efficient manner.
4385 """
4386 oInstr = self.ensureInstructionForOpTag(iTagLine);
4387
4388 #
4389 # Do it section by section.
4390 #
4391 for asSectionLines in aasSections:
4392 #
4393 # Sort the input into outputs, inputs and selector conditions.
4394 #
4395 sFlatSection = self.flattenAllSections([asSectionLines,]);
4396 if not sFlatSection:
4397 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4398 continue;
4399 oTest = InstructionTest(oInstr);
4400
4401 asSelectors = [];
4402 asInputs = [];
4403 asOutputs = [];
4404 asCur = asOutputs;
4405 fRc = True;
4406 asWords = sFlatSection.split();
4407 for iWord in range(len(asWords) - 1, -1, -1):
4408 sWord = asWords[iWord];
4409 # Check for array switchers.
4410 if sWord == '->':
4411 if asCur != asOutputs:
4412 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4413 break;
4414 asCur = asInputs;
4415 elif sWord == '/':
4416 if asCur != asInputs:
4417 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4418 break;
4419 asCur = asSelectors;
4420 else:
4421 asCur.insert(0, sWord);
4422
4423 #
4424 # Validate and add selectors.
4425 #
4426 for sCond in asSelectors:
4427 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4428 oSelector = None;
4429 for sOp in TestSelector.kasCompareOps:
4430 off = sCondExp.find(sOp);
4431 if off >= 0:
4432 sVariable = sCondExp[:off];
4433 sValue = sCondExp[off + len(sOp):];
4434 if sVariable in TestSelector.kdVariables:
4435 if sValue in TestSelector.kdVariables[sVariable]:
4436 oSelector = TestSelector(sVariable, sOp, sValue);
4437 else:
4438 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4439 % ( sTag, sValue, sCond,
4440 TestSelector.kdVariables[sVariable].keys(),));
4441 else:
4442 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4443 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4444 break;
4445 if oSelector is not None:
4446 for oExisting in oTest.aoSelectors:
4447 if oExisting.sVariable == oSelector.sVariable:
4448 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4449 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4450 oTest.aoSelectors.append(oSelector);
4451 else:
4452 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4453
4454 #
4455 # Validate outputs and inputs, adding them to the test as we go along.
4456 #
4457 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4458 asValidFieldKinds = [ 'both', sDesc, ];
4459 for sItem in asItems:
4460 oItem = None;
4461 for sOp in TestInOut.kasOperators:
4462 off = sItem.find(sOp);
4463 if off < 0:
4464 continue;
4465 sField = sItem[:off];
4466 sValueType = sItem[off + len(sOp):];
4467 if sField in TestInOut.kdFields \
4468 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4469 asSplit = sValueType.split(':', 1);
4470 sValue = asSplit[0];
4471 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4472 if sType in TestInOut.kdTypes:
4473 oValid = TestInOut.kdTypes[sType].validate(sValue);
4474 if oValid is True:
4475 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4476 oItem = TestInOut(sField, sOp, sValue, sType);
4477 else:
4478 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4479 % ( sTag, sDesc, sItem, ));
4480 else:
4481 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4482 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4483 else:
4484 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4485 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4486 else:
4487 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4488 % ( sTag, sDesc, sField, sItem,
4489 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4490 if asVal[1] in asValidFieldKinds]),));
4491 break;
4492 if oItem is not None:
4493 for oExisting in aoDst:
4494 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4495 self.errorComment(iTagLine,
4496 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4497 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4498 aoDst.append(oItem);
4499 else:
4500 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4501
4502 #
4503 # .
4504 #
4505 if fRc:
4506 oInstr.aoTests.append(oTest);
4507 else:
4508 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4509 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4510 % (sTag, asSelectors, asInputs, asOutputs,));
4511
4512 _ = iEndLine;
4513 return True;
4514
4515 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4516 """
4517 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4518 """
4519 oInstr = self.ensureInstructionForOpTag(iTagLine);
4520
4521 iTest = 0;
4522 if sTag[-1] == ']':
4523 iTest = int(sTag[8:-1]);
4524 else:
4525 iTest = int(sTag[7:]);
4526
4527 if iTest != len(oInstr.aoTests):
4528 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4529 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4530
4531 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4532 """
4533 Tag: \@optestign | \@optestignore
4534 Value: <value is ignored>
4535
4536 This is a simple trick to ignore a test while debugging another.
4537
4538 See also \@oponlytest.
4539 """
4540 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4541 return True;
4542
4543 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4544 """
4545 Tag: \@opcopytests
4546 Value: <opstat | function> [..]
4547 Example: \@opcopytests add_Eb_Gb
4548
4549 Trick to avoid duplicating tests for different encodings of the same
4550 operation.
4551 """
4552 oInstr = self.ensureInstructionForOpTag(iTagLine);
4553
4554 # Flatten, validate and append the copy job to the instruction. We execute
4555 # them after parsing all the input so we can handle forward references.
4556 asToCopy = self.flattenAllSections(aasSections).split();
4557 if not asToCopy:
4558 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4559 for sToCopy in asToCopy:
4560 if sToCopy not in oInstr.asCopyTests:
4561 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4562 oInstr.asCopyTests.append(sToCopy);
4563 else:
4564 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4565 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4566 else:
4567 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4568
4569 _ = iEndLine;
4570 return True;
4571
4572 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4573 """
4574 Tag: \@oponlytest | \@oponly
4575 Value: none
4576
4577 Only test instructions with this tag. This is a trick that is handy
4578 for singling out one or two new instructions or tests.
4579
4580 See also \@optestignore.
4581 """
4582 oInstr = self.ensureInstructionForOpTag(iTagLine);
4583
4584 # Validate and add instruction to only test dictionary.
4585 sValue = self.flattenAllSections(aasSections).strip();
4586 if sValue:
4587 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4588
4589 if oInstr not in g_aoOnlyTestInstructions:
4590 g_aoOnlyTestInstructions.append(oInstr);
4591
4592 _ = iEndLine;
4593 return True;
4594
4595 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4596 """
4597 Tag: \@opxcpttype
4598 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4599
4600 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4601 """
4602 oInstr = self.ensureInstructionForOpTag(iTagLine);
4603
4604 # Flatten as a space separated list, split it up and validate the values.
4605 asTypes = self.flattenAllSections(aasSections).split();
4606 if len(asTypes) != 1:
4607 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4608 sType = asTypes[0];
4609 if sType not in g_kdXcptTypes:
4610 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4611 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4612 # Set it.
4613 if oInstr.sXcptType is not None:
4614 return self.errorComment(iTagLine,
4615 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4616 % ( sTag, oInstr.sXcptType, sType,));
4617 oInstr.sXcptType = sType;
4618
4619 _ = iEndLine;
4620 return True;
4621
4622 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4623 """
4624 Tag: \@opfunction
4625 Value: <VMM function name>
4626
4627 This is for explicitly setting the IEM function name. Normally we pick
4628 this up from the FNIEMOP_XXX macro invocation after the description, or
4629 generate it from the mnemonic and operands.
4630
4631 It it thought it maybe necessary to set it when specifying instructions
4632 which implementation isn't following immediately or aren't implemented yet.
4633 """
4634 oInstr = self.ensureInstructionForOpTag(iTagLine);
4635
4636 # Flatten and validate the value.
4637 sFunction = self.flattenAllSections(aasSections);
4638 if not self.oReFunctionName.match(sFunction):
4639 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4640 % (sTag, sFunction, self.oReFunctionName.pattern));
4641
4642 if oInstr.sFunction is not None:
4643 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4644 % (sTag, oInstr.sFunction, sFunction,));
4645 oInstr.sFunction = sFunction;
4646
4647 _ = iEndLine;
4648 return True;
4649
4650 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4651 """
4652 Tag: \@opstats
4653 Value: <VMM statistics base name>
4654
4655 This is for explicitly setting the statistics name. Normally we pick
4656 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4657 the mnemonic and operands.
4658
4659 It it thought it maybe necessary to set it when specifying instructions
4660 which implementation isn't following immediately or aren't implemented yet.
4661 """
4662 oInstr = self.ensureInstructionForOpTag(iTagLine);
4663
4664 # Flatten and validate the value.
4665 sStats = self.flattenAllSections(aasSections);
4666 if not self.oReStatsName.match(sStats):
4667 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4668 % (sTag, sStats, self.oReStatsName.pattern));
4669
4670 if oInstr.sStats is not None:
4671 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4672 % (sTag, oInstr.sStats, sStats,));
4673 oInstr.sStats = sStats;
4674
4675 _ = iEndLine;
4676 return True;
4677
4678 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4679 """
4680 Tag: \@opdone
4681 Value: none
4682
4683 Used to explictily flush the instructions that have been specified.
4684 """
4685 sFlattened = self.flattenAllSections(aasSections);
4686 if sFlattened != '':
4687 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4688 _ = sTag; _ = iEndLine;
4689 return self.doneInstructions();
4690
4691 ## @}
4692
4693
4694 def parseComment(self):
4695 """
4696 Parse the current comment (self.sComment).
4697
4698 If it's a opcode specifiying comment, we reset the macro stuff.
4699 """
4700 #
4701 # Reject if comment doesn't seem to contain anything interesting.
4702 #
4703 if self.sComment.find('Opcode') < 0 \
4704 and self.sComment.find('@') < 0:
4705 return False;
4706
4707 #
4708 # Split the comment into lines, removing leading asterisks and spaces.
4709 # Also remove leading and trailing empty lines.
4710 #
4711 asLines = self.sComment.split('\n');
4712 for iLine, sLine in enumerate(asLines):
4713 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4714
4715 while asLines and not asLines[0]:
4716 self.iCommentLine += 1;
4717 asLines.pop(0);
4718
4719 while asLines and not asLines[-1]:
4720 asLines.pop(len(asLines) - 1);
4721
4722 #
4723 # Check for old style: Opcode 0x0f 0x12
4724 #
4725 if asLines[0].startswith('Opcode '):
4726 self.parseCommentOldOpcode(asLines);
4727
4728 #
4729 # Look for @op* tagged data.
4730 #
4731 cOpTags = 0;
4732 sFlatDefault = None;
4733 sCurTag = '@default';
4734 iCurTagLine = 0;
4735 asCurSection = [];
4736 aasSections = [ asCurSection, ];
4737 for iLine, sLine in enumerate(asLines):
4738 if not sLine.startswith('@'):
4739 if sLine:
4740 asCurSection.append(sLine);
4741 elif asCurSection:
4742 asCurSection = [];
4743 aasSections.append(asCurSection);
4744 else:
4745 #
4746 # Process the previous tag.
4747 #
4748 if not asCurSection and len(aasSections) > 1:
4749 aasSections.pop(-1);
4750 if sCurTag in self.dTagHandlers:
4751 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4752 cOpTags += 1;
4753 elif sCurTag.startswith('@op'):
4754 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4755 elif sCurTag == '@default':
4756 sFlatDefault = self.flattenAllSections(aasSections);
4757 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4758 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4759 elif sCurTag in ['@encoding', '@opencoding']:
4760 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4761
4762 #
4763 # New tag.
4764 #
4765 asSplit = sLine.split(None, 1);
4766 sCurTag = asSplit[0].lower();
4767 if len(asSplit) > 1:
4768 asCurSection = [asSplit[1],];
4769 else:
4770 asCurSection = [];
4771 aasSections = [asCurSection, ];
4772 iCurTagLine = iLine;
4773
4774 #
4775 # Process the final tag.
4776 #
4777 if not asCurSection and len(aasSections) > 1:
4778 aasSections.pop(-1);
4779 if sCurTag in self.dTagHandlers:
4780 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4781 cOpTags += 1;
4782 elif sCurTag.startswith('@op'):
4783 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4784 elif sCurTag == '@default':
4785 sFlatDefault = self.flattenAllSections(aasSections);
4786
4787 #
4788 # Don't allow default text in blocks containing @op*.
4789 #
4790 if cOpTags > 0 and sFlatDefault:
4791 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4792
4793 return True;
4794
4795 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4796 """
4797 Parses a macro invocation.
4798
4799 Returns three values:
4800 1. A list of macro arguments, where the zero'th is the macro name.
4801 2. The offset following the macro invocation, into sInvocation of
4802 this is on the same line or into the last line if it is on a
4803 different line.
4804 3. Number of additional lines the invocation spans (i.e. zero if
4805 it is all contained within sInvocation).
4806 """
4807 # First the name.
4808 offOpen = sInvocation.find('(', offStartInvocation);
4809 if offOpen <= offStartInvocation:
4810 self.raiseError("macro invocation open parenthesis not found");
4811 sName = sInvocation[offStartInvocation:offOpen].strip();
4812 if not self.oReMacroName.match(sName):
4813 self.raiseError("invalid macro name '%s'" % (sName,));
4814 asRet = [sName, ];
4815
4816 # Arguments.
4817 iLine = self.iLine;
4818 cDepth = 1;
4819 off = offOpen + 1;
4820 offStart = off;
4821 offCurLn = 0;
4822 chQuote = None;
4823 while cDepth > 0:
4824 if off >= len(sInvocation):
4825 if iLine >= len(self.asLines):
4826 self.error('macro invocation beyond end of file');
4827 return (asRet, off - offCurLn, iLine - self.iLine);
4828 offCurLn = off;
4829 sInvocation += self.asLines[iLine];
4830 iLine += 1;
4831 ch = sInvocation[off];
4832
4833 if chQuote:
4834 if ch == '\\' and off + 1 < len(sInvocation):
4835 off += 1;
4836 elif ch == chQuote:
4837 chQuote = None;
4838 elif ch in ('"', '\'',):
4839 chQuote = ch;
4840 elif ch in (',', ')',):
4841 if cDepth == 1:
4842 asRet.append(sInvocation[offStart:off].strip());
4843 offStart = off + 1;
4844 if ch == ')':
4845 cDepth -= 1;
4846 elif ch == '(':
4847 cDepth += 1;
4848 off += 1;
4849
4850 return (asRet, off - offCurLn, iLine - self.iLine);
4851
4852 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4853 """
4854 Returns (None, len(sCode), 0) if not found, otherwise the
4855 parseMacroInvocation() return value.
4856 """
4857 offHit = sCode.find(sMacro, offStart);
4858 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4859 return self.parseMacroInvocation(sCode, offHit);
4860 return (None, len(sCode), 0);
4861
4862 def findAndParseMacroInvocation(self, sCode, sMacro):
4863 """
4864 Returns None if not found, arguments as per parseMacroInvocation if found.
4865 """
4866 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4867
4868 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4869 """
4870 Returns same as findAndParseMacroInvocation.
4871 """
4872 for sMacro in asMacro:
4873 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4874 if asRet is not None:
4875 return asRet;
4876 return None;
4877
4878 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4879 sDisHints, sIemHints, asOperands):
4880 """
4881 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4882 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4883 """
4884 #
4885 # Some invocation checks.
4886 #
4887 if sUpper != sUpper.upper():
4888 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4889 if sLower != sLower.lower():
4890 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4891 if sUpper.lower() != sLower:
4892 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4893 if not self.oReMnemonic.match(sLower):
4894 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4895
4896 #
4897 # Check if sIemHints tells us to not consider this macro invocation.
4898 #
4899 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4900 return True;
4901
4902 # Apply to the last instruction only for now.
4903 if not self.aoCurInstrs:
4904 self.addInstruction();
4905 oInstr = self.aoCurInstrs[-1];
4906 if oInstr.iLineMnemonicMacro == -1:
4907 oInstr.iLineMnemonicMacro = self.iLine;
4908 else:
4909 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4910 % (sMacro, oInstr.iLineMnemonicMacro,));
4911
4912 # Mnemonic
4913 if oInstr.sMnemonic is None:
4914 oInstr.sMnemonic = sLower;
4915 elif oInstr.sMnemonic != sLower:
4916 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4917
4918 # Process operands.
4919 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4920 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4921 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4922 for iOperand, sType in enumerate(asOperands):
4923 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4924 if sWhere is None:
4925 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4926 if iOperand < len(oInstr.aoOperands): # error recovery.
4927 sWhere = oInstr.aoOperands[iOperand].sWhere;
4928 sType = oInstr.aoOperands[iOperand].sType;
4929 else:
4930 sWhere = 'reg';
4931 sType = 'Gb';
4932 if iOperand == len(oInstr.aoOperands):
4933 oInstr.aoOperands.append(Operand(sWhere, sType))
4934 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4935 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4936 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4937 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4938
4939 # Encoding.
4940 if sForm not in g_kdIemForms:
4941 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4942 else:
4943 if oInstr.sEncoding is None:
4944 oInstr.sEncoding = g_kdIemForms[sForm][0];
4945 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4946 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4947 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4948
4949 # Check the parameter locations for the encoding.
4950 if g_kdIemForms[sForm][1] is not None:
4951 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4952 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4953 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4954 else:
4955 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4956 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4957 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4958 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4959 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4960 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4961 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4962 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4963 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4964 or sForm.replace('VEX','').find('V') < 0) ):
4965 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4966 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4967 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4968 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4969 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4970 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4971 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4972 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4973 oInstr.aoOperands[iOperand].sWhere));
4974
4975
4976 # Check @opcodesub
4977 if oInstr.sSubOpcode \
4978 and g_kdIemForms[sForm][2] \
4979 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4980 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4981 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4982
4983 # Stats.
4984 if not self.oReStatsName.match(sStats):
4985 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4986 elif oInstr.sStats is None:
4987 oInstr.sStats = sStats;
4988 elif oInstr.sStats != sStats:
4989 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4990 % (sMacro, oInstr.sStats, sStats,));
4991
4992 # Process the hints (simply merge with @ophints w/o checking anything).
4993 for sHint in sDisHints.split('|'):
4994 sHint = sHint.strip();
4995 if sHint.startswith('DISOPTYPE_'):
4996 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4997 if sShortHint in g_kdHints:
4998 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4999 else:
5000 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5001 elif sHint != '0':
5002 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5003
5004 for sHint in sIemHints.split('|'):
5005 sHint = sHint.strip();
5006 if sHint.startswith('IEMOPHINT_'):
5007 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5008 if sShortHint in g_kdHints:
5009 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5010 else:
5011 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5012 elif sHint != '0':
5013 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5014
5015 _ = sAsm;
5016 return True;
5017
5018 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5019 """
5020 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5021 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5022 """
5023 if not asOperands:
5024 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5025 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5026 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5027
5028 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5029 """
5030 Process a IEM_MC_BEGIN macro invocation.
5031 """
5032 if self.fDebugMc:
5033 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5034 #self.debug('%s<eos>' % (sCode,));
5035
5036 # Check preconditions.
5037 if not self.oCurFunction:
5038 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5039 if self.oCurMcBlock:
5040 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5041
5042 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5043 cchIndent = offBeginStatementInCodeStr;
5044 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5045 if offPrevNewline >= 0:
5046 cchIndent -= offPrevNewline + 1;
5047 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5048
5049 # Start a new block.
5050 # But don't add it to the list unless the context matches the host architecture.
5051 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5052 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
5053 try:
5054 if ( not self.aoCppCondStack
5055 or not self.sHostArch
5056 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5057 g_aoMcBlocks.append(self.oCurMcBlock);
5058 self.cTotalMcBlocks += 1;
5059 except Exception as oXcpt:
5060 self.raiseError(oXcpt.args[0]);
5061
5062 self.iMcBlockInFunc += 1;
5063 return True;
5064
5065 @staticmethod
5066 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5067 """
5068 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5069 extracting a statement block from a string that's the result of macro
5070 expansion and therefore contains multiple "sub-lines" as it were.
5071
5072 Returns list of lines covering offBegin thru offEnd in sRawLine.
5073 """
5074
5075 off = sRawLine.find('\n', offEnd);
5076 if off > 0:
5077 sRawLine = sRawLine[:off + 1];
5078
5079 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5080 sRawLine = sRawLine[off:];
5081 if not sRawLine.strip().startswith(sBeginStmt):
5082 sRawLine = sRawLine[offBegin - off:]
5083
5084 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5085
5086 def workerIemMcEnd(self, offEndStatementInLine):
5087 """
5088 Process a IEM_MC_END macro invocation.
5089 """
5090 if self.fDebugMc:
5091 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5092
5093 # Check preconditions.
5094 if not self.oCurMcBlock:
5095 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5096
5097 #
5098 # HACK ALERT! For blocks originating from macro expansion the start and
5099 # end line will be the same, but the line has multiple
5100 # newlines inside it. So, we have to do some extra tricks
5101 # to get the lines out of there. We ASSUME macros aren't
5102 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5103 #
5104 if self.iLine > self.oCurMcBlock.iBeginLine:
5105 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5106 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5107 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5108
5109 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5110 # so we can deal correctly with IEM_MC_END below and everything else.
5111 for sLine in asLines:
5112 cNewLines = sLine.count('\n');
5113 assert cNewLines > 0;
5114 if cNewLines > 1:
5115 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5116 self.oCurMcBlock.offBeginLine,
5117 offEndStatementInLine
5118 + sum(len(s) for s in asLines)
5119 - len(asLines[-1]));
5120 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5121 break;
5122 else:
5123 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5124 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5125 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5126
5127 #
5128 # Strip anything following the IEM_MC_END(); statement in the final line,
5129 # so that we don't carry on any trailing 'break' after macro expansions
5130 # like for iemOp_movsb_Xb_Yb.
5131 #
5132 while asLines[-1].strip() == '':
5133 asLines.pop();
5134 sFinal = asLines[-1];
5135 offFinalEnd = sFinal.find('IEM_MC_END');
5136 offEndInFinal = offFinalEnd;
5137 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5138 offFinalEnd += len('IEM_MC_END');
5139
5140 while sFinal[offFinalEnd].isspace():
5141 offFinalEnd += 1;
5142 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5143 offFinalEnd += 1;
5144
5145 while sFinal[offFinalEnd].isspace():
5146 offFinalEnd += 1;
5147 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5148 offFinalEnd += 1;
5149
5150 while sFinal[offFinalEnd].isspace():
5151 offFinalEnd += 1;
5152 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5153 offFinalEnd += 1;
5154
5155 asLines[-1] = sFinal[: offFinalEnd];
5156
5157 #
5158 # Complete and discard the current block.
5159 #
5160 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5161 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5162 self.oCurMcBlock = None;
5163 return True;
5164
5165 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5166 """
5167 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5168 """
5169 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5170 if self.fDebugMc:
5171 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5172 #self.debug('%s<eos>' % (sCode,));
5173
5174 # Check preconditions.
5175 if not self.oCurFunction:
5176 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5177 if self.oCurMcBlock:
5178 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5179
5180 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5181 cchIndent = offBeginStatementInCodeStr;
5182 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5183 if offPrevNewline >= 0:
5184 cchIndent -= offPrevNewline + 1;
5185 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5186
5187 # Start a new block.
5188 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5189 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
5190
5191 # Parse the statment.
5192 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5193 if asArgs is None:
5194 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5195 if len(asArgs) != cParams + 4:
5196 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5197 % (sStmt, len(asArgs), cParams + 4, asArgs));
5198
5199 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5200
5201 # These MCs are not typically part of macro expansions, but let's get
5202 # it out of the way immediately if it's the case.
5203 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5204 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5205 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5206 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5207 asLines[-1] = asLines[-1][:offAfter + 1];
5208 else:
5209 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5210 offAfter, sStmt);
5211 assert asLines[-1].find(';') >= 0;
5212 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5213
5214 assert asLines[0].find(sStmt) >= 0;
5215 #if not asLines[0].strip().startswith(sStmt):
5216 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5217
5218 # Advance to the line with the closing ')'.
5219 self.iLine += cLines;
5220
5221 # Complete the block.
5222 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5223
5224 g_aoMcBlocks.append(oMcBlock);
5225 self.cTotalMcBlocks += 1;
5226 self.iMcBlockInFunc += 1;
5227
5228 return True;
5229
5230 def workerStartFunction(self, asArgs):
5231 """
5232 Deals with the start of a decoder function.
5233
5234 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5235 macros, so we get a argument list for these where the 0th argument is the
5236 macro name.
5237 """
5238 # Complete any existing function.
5239 if self.oCurFunction:
5240 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5241
5242 # Create the new function.
5243 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5244 return True;
5245
5246 def checkCodeForMacro(self, sCode, offLine):
5247 """
5248 Checks code for relevant macro invocation.
5249 """
5250
5251 #
5252 # Scan macro invocations.
5253 #
5254 if sCode.find('(') > 0:
5255 # Look for instruction decoder function definitions. ASSUME single line.
5256 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5257 [ 'FNIEMOP_DEF',
5258 'FNIEMOPRM_DEF',
5259 'FNIEMOP_STUB',
5260 'FNIEMOP_STUB_1',
5261 'FNIEMOP_UD_STUB',
5262 'FNIEMOP_UD_STUB_1' ]);
5263 if asArgs is not None:
5264 self.workerStartFunction(asArgs);
5265 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5266
5267 if not self.aoCurInstrs:
5268 self.addInstruction();
5269 for oInstr in self.aoCurInstrs:
5270 if oInstr.iLineFnIemOpMacro == -1:
5271 oInstr.iLineFnIemOpMacro = self.iLine;
5272 else:
5273 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5274 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5275 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5276 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5277 if asArgs[0].find('STUB') > 0:
5278 self.doneInstructions(fEndOfFunction = True);
5279 return True;
5280
5281 # Check for worker function definitions, so we can get a context for MC blocks.
5282 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5283 [ 'FNIEMOP_DEF_1',
5284 'FNIEMOP_DEF_2', ]);
5285 if asArgs is not None:
5286 self.workerStartFunction(asArgs);
5287 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5288 return True;
5289
5290 # IEMOP_HLP_DONE_VEX_DECODING_*
5291 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5292 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5293 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5294 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5295 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5296 ]);
5297 if asArgs is not None:
5298 sMacro = asArgs[0];
5299 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5300 for oInstr in self.aoCurInstrs:
5301 if 'vex_l_zero' not in oInstr.dHints:
5302 if oInstr.iLineMnemonicMacro >= 0:
5303 self.errorOnLine(oInstr.iLineMnemonicMacro,
5304 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5305 oInstr.dHints['vex_l_zero'] = True;
5306
5307 #
5308 # IEMOP_MNEMONIC*
5309 #
5310 if sCode.find('IEMOP_MNEMONIC') >= 0:
5311 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5312 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5313 if asArgs is not None:
5314 if len(self.aoCurInstrs) == 1:
5315 oInstr = self.aoCurInstrs[0];
5316 if oInstr.sStats is None:
5317 oInstr.sStats = asArgs[1];
5318 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5319
5320 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5321 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5322 if asArgs is not None:
5323 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5324 asArgs[7], []);
5325 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5326 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5327 if asArgs is not None:
5328 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5329 asArgs[8], [asArgs[6],]);
5330 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5331 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5332 if asArgs is not None:
5333 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5334 asArgs[9], [asArgs[6], asArgs[7]]);
5335 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5336 # a_fIemHints)
5337 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5338 if asArgs is not None:
5339 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5340 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5341 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5342 # a_fIemHints)
5343 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5344 if asArgs is not None:
5345 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5346 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5347
5348 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5349 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5350 if asArgs is not None:
5351 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5352 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5353 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5354 if asArgs is not None:
5355 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5356 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5357 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5358 if asArgs is not None:
5359 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5360 [asArgs[4], asArgs[5],]);
5361 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5362 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5363 if asArgs is not None:
5364 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5365 [asArgs[4], asArgs[5], asArgs[6],]);
5366 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5367 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5368 if asArgs is not None:
5369 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5370 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5371
5372 #
5373 # IEM_MC_BEGIN + IEM_MC_END.
5374 # We must support multiple instances per code snippet.
5375 #
5376 offCode = sCode.find('IEM_MC_');
5377 if offCode >= 0:
5378 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5379 if oMatch.group(1) == 'END':
5380 self.workerIemMcEnd(offLine + oMatch.start());
5381 elif oMatch.group(1) == 'BEGIN':
5382 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5383 else:
5384 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5385 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5386 return True;
5387
5388 return False;
5389
5390 def workerPreprocessorRecreateMacroRegex(self):
5391 """
5392 Recreates self.oReMacros when self.dMacros changes.
5393 """
5394 if self.dMacros:
5395 sRegex = '';
5396 for sName, oMacro in self.dMacros.items():
5397 if sRegex:
5398 sRegex += '|' + sName;
5399 else:
5400 sRegex = '\\b(' + sName;
5401 if oMacro.asArgs is not None:
5402 sRegex += '\s*\(';
5403 else:
5404 sRegex += '\\b';
5405 sRegex += ')';
5406 self.oReMacros = re.compile(sRegex);
5407 else:
5408 self.oReMacros = None;
5409 return True;
5410
5411 def workerPreprocessorDefine(self, sRest):
5412 """
5413 Handles a macro #define, the sRest is what follows after the directive word.
5414 """
5415 assert sRest[-1] == '\n';
5416
5417 #
5418 # If using line continutation, just concat all the lines together,
5419 # preserving the newline character but not the escaping.
5420 #
5421 iLineStart = self.iLine;
5422 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5423 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5424 self.iLine += 1;
5425 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5426
5427 #
5428 # Use regex to split out the name, argument list and body.
5429 # If this fails, we assume it's a simple macro.
5430 #
5431 oMatch = self.oReHashDefine2.match(sRest);
5432 if oMatch:
5433 sAllArgs = oMatch.group(2).strip();
5434 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5435 sBody = oMatch.group(3);
5436 else:
5437 oMatch = self.oReHashDefine3.match(sRest);
5438 if not oMatch:
5439 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5440 return self.error('bogus macro definition: %s' % (sRest,));
5441 asArgs = None;
5442 sBody = oMatch.group(2);
5443 sName = oMatch.group(1);
5444 assert sName == sName.strip();
5445 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5446
5447 #
5448 # Is this of any interest to us? We do NOT support MC blocks wihtin
5449 # nested macro expansion, just to avoid lots of extra work.
5450 #
5451 # There is only limited support for macros expanding to partial MC blocks.
5452 #
5453 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5454 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5455 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5456 # siblings in the recompiler. This is a lot simpler than nested macro
5457 # expansion and lots of heuristics for locating all the relevant macros.
5458 # Also, this way we don't produce lots of unnecessary threaded functions.
5459 #
5460 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5461 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5462 return True;
5463
5464 #
5465 # Add the macro.
5466 #
5467 if self.fDebugPreproc:
5468 self.debug('#define %s on line %u' % (sName, self.iLine,));
5469 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5470 return self.workerPreprocessorRecreateMacroRegex();
5471
5472 def workerPreprocessorUndef(self, sRest):
5473 """
5474 Handles a macro #undef, the sRest is what follows after the directive word.
5475 """
5476 # Quick comment strip and isolate the name.
5477 offSlash = sRest.find('/');
5478 if offSlash > 0:
5479 sRest = sRest[:offSlash];
5480 sName = sRest.strip();
5481
5482 # Remove the macro if we're clocking it.
5483 if sName in self.dMacros:
5484 if self.fDebugPreproc:
5485 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5486 del self.dMacros[sName];
5487 return self.workerPreprocessorRecreateMacroRegex();
5488
5489 return True;
5490
5491 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5492 """
5493 Handles an #if, #ifdef, #ifndef or #elif directive.
5494 """
5495 #
5496 # Sanity check #elif.
5497 #
5498 if sDirective == 'elif':
5499 if len(self.aoCppCondStack) == 0:
5500 self.raiseError('#elif without #if');
5501 if self.aoCppCondStack[-1].fInElse:
5502 self.raiseError('#elif after #else');
5503
5504 #
5505 # If using line continutation, just concat all the lines together,
5506 # stripping both the newline and escape characters.
5507 #
5508 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5509 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5510 self.iLine += 1;
5511
5512 # Strip it of all comments and leading and trailing blanks.
5513 sRest = self.stripComments(sRest).strip();
5514
5515 #
5516 # Stash it.
5517 #
5518 try:
5519 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5520 except Exception as oXcpt:
5521 self.raiseError(oXcpt.args[0]);
5522
5523 if sDirective == 'elif':
5524 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5525 else:
5526 self.aoCppCondStack.append(oPreprocCond);
5527
5528 return True;
5529
5530 def workerPreprocessorElse(self):
5531 """
5532 Handles an #else directive.
5533 """
5534 if len(self.aoCppCondStack) == 0:
5535 self.raiseError('#else without #if');
5536 if self.aoCppCondStack[-1].fInElse:
5537 self.raiseError('Another #else after #else');
5538
5539 self.aoCppCondStack[-1].fInElse = True;
5540 return True;
5541
5542 def workerPreprocessorEndif(self):
5543 """
5544 Handles an #endif directive.
5545 """
5546 if len(self.aoCppCondStack) == 0:
5547 self.raiseError('#endif without #if');
5548
5549 self.aoCppCondStack.pop();
5550 return True;
5551
5552 def checkPreprocessorDirective(self, sLine):
5553 """
5554 Handles a preprocessor directive.
5555 """
5556 # Skip past the preprocessor hash.
5557 off = sLine.find('#');
5558 assert off >= 0;
5559 off += 1;
5560 while off < len(sLine) and sLine[off].isspace():
5561 off += 1;
5562
5563 # Extract the directive.
5564 offDirective = off;
5565 while off < len(sLine) and not sLine[off].isspace():
5566 off += 1;
5567 sDirective = sLine[offDirective:off];
5568 if self.fDebugPreproc:
5569 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5570
5571 # Skip spaces following it to where the arguments/whatever starts.
5572 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5573 off += 1;
5574 sTail = sLine[off:];
5575
5576 # Handle the directive.
5577 if sDirective == 'define':
5578 return self.workerPreprocessorDefine(sTail);
5579 if sDirective == 'undef':
5580 return self.workerPreprocessorUndef(sTail);
5581 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5582 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5583 if sDirective == 'else':
5584 return self.workerPreprocessorElse();
5585 if sDirective == 'endif':
5586 return self.workerPreprocessorEndif();
5587
5588 if self.fDebugPreproc:
5589 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5590 return False;
5591
5592 def expandMacros(self, sLine, oMatch):
5593 """
5594 Expands macros we know about in the given line.
5595 Currently we ASSUME there is only one and that is what oMatch matched.
5596 """
5597 #
5598 # Get our bearings.
5599 #
5600 offMatch = oMatch.start();
5601 sName = oMatch.group(1);
5602 assert sName == sLine[oMatch.start() : oMatch.end()];
5603 fWithArgs = sName.endswith('(');
5604 if fWithArgs:
5605 sName = sName[:-1].strip();
5606 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5607
5608 #
5609 # Deal with simple macro invocations w/o parameters.
5610 #
5611 if not fWithArgs:
5612 if self.fDebugPreproc:
5613 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5614 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5615
5616 #
5617 # Complicated macro with parameters.
5618 # Start by extracting the parameters. ASSUMES they are all on the same line!
5619 #
5620 cLevel = 1;
5621 offCur = oMatch.end();
5622 offCurArg = offCur;
5623 asArgs = [];
5624 while True:
5625 if offCur >= len(sLine):
5626 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5627 ch = sLine[offCur];
5628 if ch == '(':
5629 cLevel += 1;
5630 elif ch == ')':
5631 cLevel -= 1;
5632 if cLevel == 0:
5633 asArgs.append(sLine[offCurArg:offCur].strip());
5634 break;
5635 elif ch == ',' and cLevel == 1:
5636 asArgs.append(sLine[offCurArg:offCur].strip());
5637 offCurArg = offCur + 1;
5638 offCur += 1;
5639 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5640 asArgs = [];
5641 if len(oMacro.asArgs) != len(asArgs):
5642 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5643
5644 #
5645 # Do the expanding.
5646 #
5647 if self.fDebugPreproc:
5648 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5649 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5650
5651 def parse(self):
5652 """
5653 Parses the given file.
5654
5655 Returns number or errors.
5656 Raises exception on fatal trouble.
5657 """
5658 #self.debug('Parsing %s' % (self.sSrcFile,));
5659
5660 #
5661 # Loop thru the lines.
5662 #
5663 # Please mind that self.iLine may be updated by checkCodeForMacro and
5664 # other worker methods.
5665 #
5666 while self.iLine < len(self.asLines):
5667 sLine = self.asLines[self.iLine];
5668 self.iLine += 1;
5669 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5670
5671 # Expand macros we know about if we're currently in code.
5672 if self.iState == self.kiCode and self.oReMacros:
5673 oMatch = self.oReMacros.search(sLine);
5674 if oMatch:
5675 sLine = self.expandMacros(sLine, oMatch);
5676 if self.fDebugPreproc:
5677 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5678 self.asLines[self.iLine - 1] = sLine;
5679
5680 # Check for preprocessor directives before comments and other stuff.
5681 # ASSUMES preprocessor directives doesn't end with multiline comments.
5682 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5683 if self.fDebugPreproc:
5684 self.debug('line %d: preproc' % (self.iLine,));
5685 self.checkPreprocessorDirective(sLine);
5686 else:
5687 # Look for comments.
5688 offSlash = sLine.find('/');
5689 if offSlash >= 0:
5690 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5691 offLine = 0;
5692 while offLine < len(sLine):
5693 if self.iState == self.kiCode:
5694 # Look for substantial multiline comment so we pass the following MC as a whole line:
5695 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5696 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5697 offHit = sLine.find('/*', offLine);
5698 while offHit >= 0:
5699 offEnd = sLine.find('*/', offHit + 2);
5700 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5701 break;
5702 offHit = sLine.find('/*', offEnd);
5703
5704 if offHit >= 0:
5705 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5706 self.sComment = '';
5707 self.iCommentLine = self.iLine;
5708 self.iState = self.kiCommentMulti;
5709 offLine = offHit + 2;
5710 else:
5711 self.checkCodeForMacro(sLine[offLine:], offLine);
5712 offLine = len(sLine);
5713
5714 elif self.iState == self.kiCommentMulti:
5715 offHit = sLine.find('*/', offLine);
5716 if offHit >= 0:
5717 self.sComment += sLine[offLine:offHit];
5718 self.iState = self.kiCode;
5719 offLine = offHit + 2;
5720 self.parseComment();
5721 else:
5722 self.sComment += sLine[offLine:];
5723 offLine = len(sLine);
5724 else:
5725 assert False;
5726 # C++ line comment.
5727 elif offSlash > 0:
5728 self.checkCodeForMacro(sLine[:offSlash], 0);
5729
5730 # No slash, but append the line if in multi-line comment.
5731 elif self.iState == self.kiCommentMulti:
5732 #self.debug('line %d: multi' % (self.iLine,));
5733 self.sComment += sLine;
5734
5735 # No slash, but check code line for relevant macro.
5736 elif ( self.iState == self.kiCode
5737 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5738 #self.debug('line %d: macro' % (self.iLine,));
5739 self.checkCodeForMacro(sLine, 0);
5740
5741 # If the line is a '}' in the first position, complete the instructions.
5742 elif self.iState == self.kiCode and sLine[0] == '}':
5743 #self.debug('line %d: }' % (self.iLine,));
5744 self.doneInstructions(fEndOfFunction = True);
5745
5746 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5747 # so we can check/add @oppfx info from it.
5748 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5749 self.parseFunctionTable(sLine);
5750
5751 self.doneInstructions(fEndOfFunction = True);
5752 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5753 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5754 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5755 return self.printErrors();
5756
5757## The parsed content of IEMAllInstCommonBodyMacros.h.
5758g_oParsedCommonBodyMacros = None # type: SimpleParser
5759
5760def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
5761 """
5762 Parses one source file for instruction specfications.
5763 """
5764 #
5765 # Read sSrcFile into a line array.
5766 #
5767 try:
5768 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5769 except Exception as oXcpt:
5770 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5771 try:
5772 asLines = oFile.readlines();
5773 except Exception as oXcpt:
5774 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5775 finally:
5776 oFile.close();
5777
5778 #
5779 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
5780 # can use the macros from it when processing the other files.
5781 #
5782 global g_oParsedCommonBodyMacros;
5783 if g_oParsedCommonBodyMacros is None:
5784 # Locate the file.
5785 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
5786 if not os.path.isfile(sCommonBodyMacros):
5787 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
5788
5789 # Read it.
5790 try:
5791 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5792 asIncFiles = oIncFile.readlines();
5793 except Exception as oXcpt:
5794 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5795
5796 # Parse it.
5797 try:
5798 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
5799 if oParser.parse() != 0:
5800 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5801 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5802 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5803 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5804 oParser.cTotalMcBlocks,
5805 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5806 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5807 except ParserException as oXcpt:
5808 print(str(oXcpt), file = sys.stderr);
5809 raise;
5810 g_oParsedCommonBodyMacros = oParser;
5811
5812 #
5813 # Do the parsing.
5814 #
5815 try:
5816 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
5817 return (oParser.parse(), oParser) ;
5818 except ParserException as oXcpt:
5819 print(str(oXcpt), file = sys.stderr);
5820 raise;
5821
5822
5823def __doTestCopying():
5824 """
5825 Executes the asCopyTests instructions.
5826 """
5827 asErrors = [];
5828 for oDstInstr in g_aoAllInstructions:
5829 if oDstInstr.asCopyTests:
5830 for sSrcInstr in oDstInstr.asCopyTests:
5831 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5832 if oSrcInstr:
5833 aoSrcInstrs = [oSrcInstr,];
5834 else:
5835 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5836 if aoSrcInstrs:
5837 for oSrcInstr in aoSrcInstrs:
5838 if oSrcInstr != oDstInstr:
5839 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5840 else:
5841 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5842 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5843 else:
5844 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5845 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5846
5847 if asErrors:
5848 sys.stderr.write(u''.join(asErrors));
5849 return len(asErrors);
5850
5851
5852def __applyOnlyTest():
5853 """
5854 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5855 all other instructions so that only these get tested.
5856 """
5857 if g_aoOnlyTestInstructions:
5858 for oInstr in g_aoAllInstructions:
5859 if oInstr.aoTests:
5860 if oInstr not in g_aoOnlyTestInstructions:
5861 oInstr.aoTests = [];
5862 return 0;
5863
5864## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
5865g_aaoAllInstrFilesAndDefaultMapAndSet = (
5866 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
5867 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
5868 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
5869 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
5870 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
5871 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
5872 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
5873 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
5874 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
5875);
5876
5877def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
5878 """
5879 Parses all the IEMAllInstruction*.cpp.h files.
5880
5881 Returns a list of the parsers on success.
5882 Raises exception on failure.
5883 """
5884 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5885 cErrors = 0;
5886 aoParsers = [];
5887 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5888 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5889 sFilename = os.path.join(sSrcDir, sFilename);
5890 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
5891 cErrors += cThisErrors;
5892 aoParsers.append(oParser);
5893 cErrors += __doTestCopying();
5894 cErrors += __applyOnlyTest();
5895
5896 # Total stub stats:
5897 cTotalStubs = 0;
5898 for oInstr in g_aoAllInstructions:
5899 cTotalStubs += oInstr.fStub;
5900 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5901 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5902 file = sys.stderr);
5903
5904 if cErrors != 0:
5905 raise Exception('%d parse errors' % (cErrors,));
5906 return aoParsers;
5907
5908
5909def parseFiles(asFiles, sHostArch = None):
5910 """
5911 Parses a selection of IEMAllInstruction*.cpp.h files.
5912
5913 Returns a list of the parsers on success.
5914 Raises exception on failure.
5915 """
5916 # Look up default maps for the files and call __parseFilesWorker to do the job.
5917 asFilesAndDefaultMap = [];
5918 for sFilename in asFiles:
5919 sName = os.path.split(sFilename)[1].lower();
5920 sMap = None;
5921 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
5922 if aoInfo[0].lower() == sName:
5923 sMap = aoInfo[1];
5924 break;
5925 if not sMap:
5926 raise Exception('Unable to classify file: %s' % (sFilename,));
5927 asFilesAndDefaultMap.append((sFilename, sMap));
5928
5929 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
5930
5931
5932def parseAll(sHostArch = None):
5933 """
5934 Parses all the IEMAllInstruction*.cpp.h files.
5935
5936 Returns a list of the parsers on success.
5937 Raises exception on failure.
5938 """
5939 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
5940
5941
5942#
5943# Generators (may perhaps move later).
5944#
5945def __formatDisassemblerTableEntry(oInstr):
5946 """
5947 """
5948 sMacro = 'OP';
5949 cMaxOperands = 3;
5950 if len(oInstr.aoOperands) > 3:
5951 sMacro = 'OPVEX'
5952 cMaxOperands = 4;
5953 assert len(oInstr.aoOperands) <= cMaxOperands;
5954
5955 #
5956 # Format string.
5957 #
5958 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5959 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5960 sTmp += ' ' if iOperand == 0 else ',';
5961 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5962 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5963 else:
5964 sTmp += g_kdOpTypes[oOperand.sType][2];
5965 sTmp += '",';
5966 asColumns = [ sTmp, ];
5967
5968 #
5969 # Decoders.
5970 #
5971 iStart = len(asColumns);
5972 if oInstr.sEncoding is None:
5973 pass;
5974 elif oInstr.sEncoding == 'ModR/M':
5975 # ASSUME the first operand is using the ModR/M encoding
5976 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5977 asColumns.append('IDX_ParseModRM,');
5978 elif oInstr.sEncoding in [ 'prefix', ]:
5979 for oOperand in oInstr.aoOperands:
5980 asColumns.append('0,');
5981 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5982 pass;
5983 elif oInstr.sEncoding == 'VEX.ModR/M':
5984 asColumns.append('IDX_ParseModRM,');
5985 elif oInstr.sEncoding == 'vex2':
5986 asColumns.append('IDX_ParseVex2b,')
5987 elif oInstr.sEncoding == 'vex3':
5988 asColumns.append('IDX_ParseVex3b,')
5989 elif oInstr.sEncoding in g_dInstructionMaps:
5990 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5991 else:
5992 ## @todo
5993 #IDX_ParseTwoByteEsc,
5994 #IDX_ParseGrp1,
5995 #IDX_ParseShiftGrp2,
5996 #IDX_ParseGrp3,
5997 #IDX_ParseGrp4,
5998 #IDX_ParseGrp5,
5999 #IDX_Parse3DNow,
6000 #IDX_ParseGrp6,
6001 #IDX_ParseGrp7,
6002 #IDX_ParseGrp8,
6003 #IDX_ParseGrp9,
6004 #IDX_ParseGrp10,
6005 #IDX_ParseGrp12,
6006 #IDX_ParseGrp13,
6007 #IDX_ParseGrp14,
6008 #IDX_ParseGrp15,
6009 #IDX_ParseGrp16,
6010 #IDX_ParseThreeByteEsc4,
6011 #IDX_ParseThreeByteEsc5,
6012 #IDX_ParseModFence,
6013 #IDX_ParseEscFP,
6014 #IDX_ParseNopPause,
6015 #IDX_ParseInvOpModRM,
6016 assert False, str(oInstr);
6017
6018 # Check for immediates and stuff in the remaining operands.
6019 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6020 sIdx = g_kdOpTypes[oOperand.sType][0];
6021 #if sIdx != 'IDX_UseModRM':
6022 asColumns.append(sIdx + ',');
6023 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6024
6025 #
6026 # Opcode and operands.
6027 #
6028 assert oInstr.sDisEnum, str(oInstr);
6029 asColumns.append(oInstr.sDisEnum + ',');
6030 iStart = len(asColumns)
6031 for oOperand in oInstr.aoOperands:
6032 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6033 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6034
6035 #
6036 # Flags.
6037 #
6038 sTmp = '';
6039 for sHint in sorted(oInstr.dHints.keys()):
6040 sDefine = g_kdHints[sHint];
6041 if sDefine.startswith('DISOPTYPE_'):
6042 if sTmp:
6043 sTmp += ' | ' + sDefine;
6044 else:
6045 sTmp += sDefine;
6046 if sTmp:
6047 sTmp += '),';
6048 else:
6049 sTmp += '0),';
6050 asColumns.append(sTmp);
6051
6052 #
6053 # Format the columns into a line.
6054 #
6055 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6056 sLine = '';
6057 for i, s in enumerate(asColumns):
6058 if len(sLine) < aoffColumns[i]:
6059 sLine += ' ' * (aoffColumns[i] - len(sLine));
6060 else:
6061 sLine += ' ';
6062 sLine += s;
6063
6064 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6065 # DISOPTYPE_HARMLESS),
6066 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6067 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6068 return sLine;
6069
6070def __checkIfShortTable(aoTableOrdered, oMap):
6071 """
6072 Returns (iInstr, cInstructions, fShortTable)
6073 """
6074
6075 # Determin how much we can trim off.
6076 cInstructions = len(aoTableOrdered);
6077 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6078 cInstructions -= 1;
6079
6080 iInstr = 0;
6081 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6082 iInstr += 1;
6083
6084 # If we can save more than 30%, we go for the short table version.
6085 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6086 return (iInstr, cInstructions, True);
6087 _ = oMap; # Use this for overriding.
6088
6089 # Output the full table.
6090 return (0, len(aoTableOrdered), False);
6091
6092def generateDisassemblerTables(oDstFile = sys.stdout):
6093 """
6094 Generates disassembler tables.
6095
6096 Returns exit code.
6097 """
6098
6099 #
6100 # Parse all.
6101 #
6102 try:
6103 parseAll();
6104 except Exception as oXcpt:
6105 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6106 traceback.print_exc(file = sys.stderr);
6107 return 1;
6108
6109
6110 #
6111 # The disassembler uses a slightly different table layout to save space,
6112 # since several of the prefix varia
6113 #
6114 aoDisasmMaps = [];
6115 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6116 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6117 if oMap.sSelector != 'byte+pfx':
6118 aoDisasmMaps.append(oMap);
6119 else:
6120 # Split the map by prefix.
6121 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6122 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6123 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6124 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6125
6126 #
6127 # Dump each map.
6128 #
6129 asHeaderLines = [];
6130 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6131 for oMap in aoDisasmMaps:
6132 sName = oMap.sName;
6133
6134 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6135
6136 #
6137 # Get the instructions for the map and see if we can do a short version or not.
6138 #
6139 aoTableOrder = oMap.getInstructionsInTableOrder();
6140 cEntriesPerByte = oMap.getEntriesPerByte();
6141 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6142
6143 #
6144 # Output the table start.
6145 # Note! Short tables are static and only accessible via the map range record.
6146 #
6147 asLines = [];
6148 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6149 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6150 if fShortTable:
6151 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6152 else:
6153 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6154 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6155 asLines.append('{');
6156
6157 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6158 asLines.append(' /* %#04x: */' % (iInstrStart,));
6159
6160 #
6161 # Output the instructions.
6162 #
6163 iInstr = iInstrStart;
6164 while iInstr < iInstrEnd:
6165 oInstr = aoTableOrder[iInstr];
6166 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6167 if iInstr != iInstrStart:
6168 asLines.append('');
6169 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6170
6171 if oInstr is None:
6172 # Invalid. Optimize blocks of invalid instructions.
6173 cInvalidInstrs = 1;
6174 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6175 cInvalidInstrs += 1;
6176 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6177 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6178 iInstr += 0x10 * cEntriesPerByte - 1;
6179 elif cEntriesPerByte > 1:
6180 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6181 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6182 iInstr += 3;
6183 else:
6184 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6185 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6186 else:
6187 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6188 elif isinstance(oInstr, list):
6189 if len(oInstr) != 0:
6190 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6191 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6192 else:
6193 asLines.append(__formatDisassemblerTableEntry(oInstr));
6194 else:
6195 asLines.append(__formatDisassemblerTableEntry(oInstr));
6196
6197 iInstr += 1;
6198
6199 if iInstrStart >= iInstrEnd:
6200 asLines.append(' /* dummy */ INVALID_OPCODE');
6201
6202 asLines.append('};');
6203 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6204
6205 #
6206 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6207 #
6208 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6209 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6210 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6211
6212 #
6213 # Write out the lines.
6214 #
6215 oDstFile.write('\n'.join(asLines));
6216 oDstFile.write('\n');
6217 oDstFile.write('\n');
6218 #break; #for now
6219 return 0;
6220
6221if __name__ == '__main__':
6222 sys.exit(generateDisassemblerTables());
6223
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette