VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 101950

Last change on this file since 101950 was 101950, checked in by vboxsync, 18 months ago

VMM/IEM: Eliminated IEM_MC_ASSIGN use from IEMAllInstTwoByte0f.cpp.h, replacing it with IEM_MC_ARG_CONST/IEM_MC_LOCAL_ASSIGN and moved IEM_MC_ARG* and IEM_MC_LOCAL* from the top of the MC-blocks being modified to where they are actually first needed, reducing the life time and troubles for the recompiler. bugref:10371

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 297.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 101950 2023-11-08 01:57:15Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 101950 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: List[TestInOut]
1374 self.aoOutputs = [] # type: List[TestInOut]
1375 self.aoSelectors = [] # type: List[TestSelector]
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: List[str]
1436 self.aoMaps = [] # type: List[InstructionMap]
1437 self.aoOperands = [] # type: List[Operand]
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: List[InstructionTest]
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: List[Instruction]
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: List[Instruction]
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sValue = sValue; ##< None if no assigned / const value.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCall(McCppGeneric):
1885 """
1886 A generic C++/C call statement.
1887
1888 The sName is still 'C++', so the function name is in the first parameter
1889 and the the arguments in the subsequent ones.
1890 """
1891 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1892 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1893 self.asParams.extend(asArgs);
1894
1895 def renderCode(self, cchIndent = 0):
1896 cchIndent += self.cchIndent;
1897 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1898 if self.fDecode:
1899 sRet += ' // C++ decode\n';
1900 else:
1901 sRet += ' // C++ normal\n';
1902 return sRet;
1903
1904class McCppCond(McStmtCond):
1905 """
1906 C++/C 'if' statement.
1907 """
1908 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1909 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1910 self.fDecode = fDecode;
1911 self.cchIndent = cchIndent;
1912
1913 def renderCode(self, cchIndent = 0):
1914 cchIndent += self.cchIndent;
1915 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1916 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1917 sRet += ' ' * cchIndent + '{\n';
1918 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1919 sRet += ' ' * cchIndent + '}\n';
1920 if self.aoElseBranch:
1921 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1922 sRet += ' ' * cchIndent + '{\n';
1923 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1924 sRet += ' ' * cchIndent + '}\n';
1925 return sRet;
1926
1927class McCppPreProc(McCppGeneric):
1928 """
1929 C++/C Preprocessor directive.
1930 """
1931 def __init__(self, sCode):
1932 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1933
1934 def renderCode(self, cchIndent = 0):
1935 return self.asParams[0] + '\n';
1936
1937
1938## IEM_MC_F_XXX values.
1939g_kdMcFlags = {
1940 'IEM_MC_F_ONLY_8086': (),
1941 'IEM_MC_F_MIN_186': (),
1942 'IEM_MC_F_MIN_286': (),
1943 'IEM_MC_F_NOT_286_OR_OLDER': (),
1944 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
1945 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
1946 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
1947 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
1948 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
1949 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
1950 'IEM_MC_F_NOT_64BIT': (),
1951};
1952## IEM_MC_F_XXX values.
1953g_kdCImplFlags = {
1954 'IEM_CIMPL_F_BRANCH_DIRECT': (),
1955 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
1956 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
1957 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
1958 'IEM_CIMPL_F_BRANCH_FAR': (),
1959 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
1960 'IEM_CIMPL_F_BRANCH_RELATIVE',),
1961 'IEM_CIMPL_F_MODE': (),
1962 'IEM_CIMPL_F_RFLAGS': (),
1963 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
1964 'IEM_CIMPL_F_STATUS_FLAGS': (),
1965 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
1966 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
1967 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
1968 'IEM_CIMPL_F_VMEXIT': (),
1969 'IEM_CIMPL_F_FPU': (),
1970 'IEM_CIMPL_F_REP': (),
1971 'IEM_CIMPL_F_IO': (),
1972 'IEM_CIMPL_F_END_TB': (),
1973 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
1974 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
1975 'IEM_CIMPL_F_CALLS_CIMPL': (),
1976 'IEM_CIMPL_F_CALLS_AIMPL': (),
1977 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
1978};
1979class McBlock(object):
1980 """
1981 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1982 """
1983
1984 ## @name Macro expansion types.
1985 ## @{
1986 kiMacroExp_None = 0;
1987 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
1988 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
1989 ## @}
1990
1991 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1992 ## The source file containing the block.
1993 self.sSrcFile = sSrcFile;
1994 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1995 self.iBeginLine = iBeginLine;
1996 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1997 self.offBeginLine = offBeginLine;
1998 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
1999 self.iEndLine = -1;
2000 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2001 self.offEndLine = 0;
2002 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2003 self.offAfterEnd = 0;
2004 ## The function the block resides in.
2005 self.oFunction = oFunction;
2006 ## The name of the function the block resides in. DEPRECATED.
2007 self.sFunction = oFunction.sName;
2008 ## The block number within the function.
2009 self.iInFunction = iInFunction;
2010 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2011 ##< The raw lines the block is made up of.
2012 self.asLines = [] # type: List[str]
2013 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2014 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2015 self.iMacroExp = self.kiMacroExp_None;
2016 ## IEM_MC_BEGIN: Argument count.
2017 self.cArgs = -1;
2018 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2019 self.aoArgs = [] # type: List[McStmtArg]
2020 ## IEM_MC_BEGIN: Locals count.
2021 self.cLocals = -1;
2022 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2023 self.aoLocals = [] # type: List[McStmtVar]
2024 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2025 self.dsMcFlags = {} # type: Dict[str, bool]
2026 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2027 self.dsCImplFlags = {} # type: Dict[str, bool]
2028 ## Decoded statements in the block.
2029 self.aoStmts = [] # type: List[McStmt]
2030
2031 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2032 """
2033 Completes the microcode block.
2034 """
2035 assert self.iEndLine == -1;
2036 self.iEndLine = iEndLine;
2037 self.offEndLine = offEndLine;
2038 self.offAfterEnd = offAfterEnd;
2039 self.asLines = asLines;
2040
2041 def raiseDecodeError(self, sRawCode, off, sMessage):
2042 """ Raises a decoding error. """
2043 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2044 iLine = sRawCode.count('\n', 0, off);
2045 raise ParserException('%s:%d:%d: parsing error: %s'
2046 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2047
2048 def raiseStmtError(self, sName, sMessage):
2049 """ Raises a statement parser error. """
2050 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2051
2052 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2053 """ Check the parameter count, raising an error it doesn't match. """
2054 if len(asParams) != cParamsExpected:
2055 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2056 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2057 return True;
2058
2059 @staticmethod
2060 def parseMcGeneric(oSelf, sName, asParams):
2061 """ Generic parser that returns a plain McStmt object. """
2062 _ = oSelf;
2063 return McStmt(sName, asParams);
2064
2065 @staticmethod
2066 def parseMcGenericCond(oSelf, sName, asParams):
2067 """ Generic parser that returns a plain McStmtCond object. """
2068 _ = oSelf;
2069 return McStmtCond(sName, asParams);
2070
2071 @staticmethod
2072 def parseMcBegin(oSelf, sName, asParams):
2073 """ IEM_MC_BEGIN """
2074 oSelf.checkStmtParamCount(sName, asParams, 4);
2075 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2076 oSelf.raiseStmtError(sName, 'Used more than once!');
2077 oSelf.cArgs = int(asParams[0]);
2078 oSelf.cLocals = int(asParams[1]);
2079
2080 if asParams[2] != '0':
2081 for sFlag in asParams[2].split('|'):
2082 sFlag = sFlag.strip();
2083 if sFlag not in g_kdMcFlags:
2084 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2085 oSelf.dsMcFlags[sFlag] = True;
2086 for sFlag2 in g_kdMcFlags[sFlag]:
2087 oSelf.dsMcFlags[sFlag2] = True;
2088
2089 if asParams[3] != '0':
2090 oSelf.parseCImplFlags(sName, asParams[3]);
2091
2092 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2093
2094 @staticmethod
2095 def parseMcArg(oSelf, sName, asParams):
2096 """ IEM_MC_ARG """
2097 oSelf.checkStmtParamCount(sName, asParams, 3);
2098 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2099 oSelf.aoArgs.append(oStmt);
2100 return oStmt;
2101
2102 @staticmethod
2103 def parseMcArgConst(oSelf, sName, asParams):
2104 """ IEM_MC_ARG_CONST """
2105 oSelf.checkStmtParamCount(sName, asParams, 4);
2106 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2107 oSelf.aoArgs.append(oStmt);
2108 return oStmt;
2109
2110 @staticmethod
2111 def parseMcArgLocalRef(oSelf, sName, asParams):
2112 """ IEM_MC_ARG_LOCAL_REF """
2113 oSelf.checkStmtParamCount(sName, asParams, 4);
2114 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2115 oSelf.aoArgs.append(oStmt);
2116 return oStmt;
2117
2118 @staticmethod
2119 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2120 """ IEM_MC_ARG_LOCAL_EFLAGS """
2121 oSelf.checkStmtParamCount(sName, asParams, 3);
2122 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2123 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2124 oSelf.aoLocals.append(oStmtLocal);
2125 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2126 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2127 oSelf.aoArgs.append(oStmtArg);
2128 return (oStmtLocal, oStmtArg,);
2129
2130 @staticmethod
2131 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2132 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2133 oSelf.checkStmtParamCount(sName, asParams, 0);
2134 # Note! Translate to IEM_MC_ARG_CONST
2135 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2136 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2137 oSelf.aoArgs.append(oStmt);
2138 return oStmt;
2139
2140 @staticmethod
2141 def parseMcLocal(oSelf, sName, asParams):
2142 """ IEM_MC_LOCAL """
2143 oSelf.checkStmtParamCount(sName, asParams, 2);
2144 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2145 oSelf.aoLocals.append(oStmt);
2146 return oStmt;
2147
2148 @staticmethod
2149 def parseMcLocalAssign(oSelf, sName, asParams):
2150 """ IEM_MC_LOCAL_ASSIGN """
2151 oSelf.checkStmtParamCount(sName, asParams, 3);
2152 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2153 oSelf.aoLocals.append(oStmt);
2154 return oStmt;
2155
2156 @staticmethod
2157 def parseMcLocalConst(oSelf, sName, asParams):
2158 """ IEM_MC_LOCAL_CONST """
2159 oSelf.checkStmtParamCount(sName, asParams, 3);
2160 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2161 oSelf.aoLocals.append(oStmt);
2162 return oStmt;
2163
2164 @staticmethod
2165 def parseMcCallAImpl(oSelf, sName, asParams):
2166 """ IEM_MC_CALL_AIMPL_3|4 """
2167 cArgs = int(sName[-1]);
2168 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2169 return McStmtCall(sName, asParams, 1, 0);
2170
2171 @staticmethod
2172 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2173 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2174 cArgs = int(sName[-1]);
2175 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2176 return McStmtCall(sName, asParams, 0);
2177
2178 @staticmethod
2179 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2180 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2181 cArgs = int(sName[-1]);
2182 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2183 return McStmtCall(sName, asParams, 0);
2184
2185 @staticmethod
2186 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2187 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2188 cArgs = int(sName[-1]);
2189 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2190 return McStmtCall(sName, asParams, 0);
2191
2192 @staticmethod
2193 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2194 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2195 cArgs = int(sName[-1]);
2196 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2197 return McStmtCall(sName, asParams, 0);
2198
2199 @staticmethod
2200 def parseMcCallSseAImpl(oSelf, sName, asParams):
2201 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2202 cArgs = int(sName[-1]);
2203 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2204 return McStmtCall(sName, asParams, 0);
2205
2206 def parseCImplFlags(self, sName, sFlags):
2207 """
2208 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2209 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2210 """
2211 if sFlags != '0':
2212 sFlags = self.stripComments(sFlags);
2213 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2214 for sFlag in sFlags.split('|'):
2215 sFlag = sFlag.strip();
2216 #print('debug: %s' % sFlag)
2217 if sFlag not in g_kdCImplFlags:
2218 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2219 self.dsCImplFlags[sFlag] = True;
2220 for sFlag2 in g_kdCImplFlags[sFlag]:
2221 self.dsCImplFlags[sFlag2] = True;
2222 return None;
2223
2224 @staticmethod
2225 def parseMcCallCImpl(oSelf, sName, asParams):
2226 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2227 cArgs = int(sName[-1]);
2228 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2229 oSelf.parseCImplFlags(sName, asParams[0]);
2230 return McStmtCall(sName, asParams, 1);
2231
2232 @staticmethod
2233 def parseMcDeferToCImpl(oSelf, sName, asParams):
2234 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2235 #print('debug: %s, %s,...' % (sName, asParams[0],));
2236 cArgs = int(sName[-5]);
2237 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2238 oSelf.parseCImplFlags(sName, asParams[0]);
2239 return McStmtCall(sName, asParams, 1);
2240
2241 @staticmethod
2242 def stripComments(sCode):
2243 """ Returns sCode with comments removed. """
2244 off = 0;
2245 while off < len(sCode):
2246 off = sCode.find('/', off);
2247 if off < 0 or off + 1 >= len(sCode):
2248 break;
2249
2250 if sCode[off + 1] == '/':
2251 # C++ comment.
2252 offEnd = sCode.find('\n', off + 2);
2253 if offEnd < 0:
2254 return sCode[:off].rstrip();
2255 sCode = sCode[ : off] + sCode[offEnd : ];
2256 off += 1;
2257
2258 elif sCode[off + 1] == '*':
2259 # C comment
2260 offEnd = sCode.find('*/', off + 2);
2261 if offEnd < 0:
2262 return sCode[:off].rstrip();
2263 sSep = ' ';
2264 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2265 sSep = '';
2266 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2267 off += len(sSep);
2268
2269 else:
2270 # Not a comment.
2271 off += 1;
2272 return sCode;
2273
2274 @staticmethod
2275 def extractParam(sCode, offParam):
2276 """
2277 Extracts the parameter value at offParam in sCode.
2278 Returns stripped value and the end offset of the terminating ',' or ')'.
2279 """
2280 # Extract it.
2281 cNesting = 0;
2282 offStart = offParam;
2283 while offParam < len(sCode):
2284 ch = sCode[offParam];
2285 if ch == '(':
2286 cNesting += 1;
2287 elif ch == ')':
2288 if cNesting == 0:
2289 break;
2290 cNesting -= 1;
2291 elif ch == ',' and cNesting == 0:
2292 break;
2293 offParam += 1;
2294 return (sCode[offStart : offParam].strip(), offParam);
2295
2296 @staticmethod
2297 def extractParams(sCode, offOpenParen):
2298 """
2299 Parses a parameter list.
2300 Returns the list of parameter values and the offset of the closing parentheses.
2301 Returns (None, len(sCode)) on if no closing parentheses was found.
2302 """
2303 assert sCode[offOpenParen] == '(';
2304 asParams = [];
2305 off = offOpenParen + 1;
2306 while off < len(sCode):
2307 ch = sCode[off];
2308 if ch.isspace():
2309 off += 1;
2310 elif ch != ')':
2311 (sParam, off) = McBlock.extractParam(sCode, off);
2312 asParams.append(sParam);
2313 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2314 if sCode[off] == ',':
2315 off += 1;
2316 else:
2317 return (asParams, off);
2318 return (None, off);
2319
2320 @staticmethod
2321 def findClosingBraces(sCode, off, offStop):
2322 """
2323 Finds the matching '}' for the '{' at off in sCode.
2324 Returns offset of the matching '}' on success, otherwise -1.
2325
2326 Note! Does not take comments into account.
2327 """
2328 cDepth = 1;
2329 off += 1;
2330 while off < offStop:
2331 offClose = sCode.find('}', off, offStop);
2332 if offClose < 0:
2333 break;
2334 cDepth += sCode.count('{', off, offClose);
2335 cDepth -= 1;
2336 if cDepth == 0:
2337 return offClose;
2338 off = offClose + 1;
2339 return -1;
2340
2341 @staticmethod
2342 def countSpacesAt(sCode, off, offStop):
2343 """ Returns the number of space characters at off in sCode. """
2344 offStart = off;
2345 while off < offStop and sCode[off].isspace():
2346 off += 1;
2347 return off - offStart;
2348
2349 @staticmethod
2350 def skipSpacesAt(sCode, off, offStop):
2351 """ Returns first offset at or after off for a non-space character. """
2352 return off + McBlock.countSpacesAt(sCode, off, offStop);
2353
2354 @staticmethod
2355 def isSubstrAt(sStr, off, sSubStr):
2356 """ Returns true of sSubStr is found at off in sStr. """
2357 return sStr[off : off + len(sSubStr)] == sSubStr;
2358
2359 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2360 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2361 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2362 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2363 + r')');
2364
2365 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2366 """
2367 Decodes sRawCode[off : offStop].
2368
2369 Returns list of McStmt instances.
2370 Raises ParserException on failure.
2371 """
2372 if offStop < 0:
2373 offStop = len(sRawCode);
2374 aoStmts = [];
2375 while off < offStop:
2376 ch = sRawCode[off];
2377
2378 #
2379 # Skip spaces and comments.
2380 #
2381 if ch.isspace():
2382 off += 1;
2383
2384 elif ch == '/':
2385 ch = sRawCode[off + 1];
2386 if ch == '/': # C++ comment.
2387 off = sRawCode.find('\n', off + 2);
2388 if off < 0:
2389 break;
2390 off += 1;
2391 elif ch == '*': # C comment.
2392 off = sRawCode.find('*/', off + 2);
2393 if off < 0:
2394 break;
2395 off += 2;
2396 else:
2397 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2398
2399 #
2400 # Is it a MC statement.
2401 #
2402 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2403 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2404 # Extract it and strip comments from it.
2405 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2406 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2407 if offEnd <= off:
2408 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2409 else:
2410 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2411 if offEnd <= off:
2412 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2413 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2414 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2415 offEnd -= 1;
2416 while offEnd > off and sRawCode[offEnd - 1].isspace():
2417 offEnd -= 1;
2418
2419 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2420
2421 # Isolate the statement name.
2422 offOpenParen = sRawStmt.find('(');
2423 if offOpenParen < 0:
2424 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2425 sName = sRawStmt[: offOpenParen].strip();
2426
2427 # Extract the parameters.
2428 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2429 if asParams is None:
2430 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2431 if offCloseParen + 1 != len(sRawStmt):
2432 self.raiseDecodeError(sRawCode, off,
2433 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2434
2435 # Hand it to the handler.
2436 fnParser = g_dMcStmtParsers.get(sName)[0];
2437 if not fnParser:
2438 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2439 oStmt = fnParser(self, sName, asParams);
2440 if not isinstance(oStmt, (list, tuple)):
2441 aoStmts.append(oStmt);
2442 else:
2443 aoStmts.extend(oStmt);
2444
2445 #
2446 # If conditional, we need to parse the whole statement.
2447 #
2448 # For reasons of simplicity, we assume the following structure
2449 # and parse each branch in a recursive call:
2450 # IEM_MC_IF_XXX() {
2451 # IEM_MC_WHATEVER();
2452 # } IEM_MC_ELSE() {
2453 # IEM_MC_WHATEVER();
2454 # } IEM_MC_ENDIF();
2455 #
2456 if sName.startswith('IEM_MC_IF_'):
2457 if iLevel > 1:
2458 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2459
2460 # Find start of the IF block:
2461 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2462 if sRawCode[offBlock1] != '{':
2463 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2464
2465 # Find the end of it.
2466 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2467 if offBlock1End < 0:
2468 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2469
2470 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2471
2472 # Is there an else section?
2473 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2474 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2475 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2476 if sRawCode[off] != '(':
2477 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2478 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2479 if sRawCode[off] != ')':
2480 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2481
2482 # Find start of the ELSE block.
2483 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2484 if sRawCode[offBlock2] != '{':
2485 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2486
2487 # Find the end of it.
2488 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2489 if offBlock2End < 0:
2490 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2491
2492 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2493 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2494
2495 # Parse past the endif statement.
2496 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2497 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2498 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2499 if sRawCode[off] != '(':
2500 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2501 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2502 if sRawCode[off] != ')':
2503 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2504 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2505 if sRawCode[off] != ';':
2506 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2507 off += 1;
2508
2509 else:
2510 # Advance.
2511 off = offEnd + 1;
2512
2513 #
2514 # Otherwise it must be a C/C++ statement of sorts.
2515 #
2516 else:
2517 # Find the end of the statement. if and else requires special handling.
2518 sCondExpr = None;
2519 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2520 if oMatch:
2521 if oMatch.group(1)[-1] == '(':
2522 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2523 else:
2524 offEnd = oMatch.end();
2525 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2526 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2527 elif ch == '#':
2528 offEnd = sRawCode.find('\n', off, offStop);
2529 if offEnd < 0:
2530 offEnd = offStop;
2531 offEnd -= 1;
2532 while offEnd > off and sRawCode[offEnd - 1].isspace():
2533 offEnd -= 1;
2534 else:
2535 offEnd = sRawCode.find(';', off);
2536 if offEnd < 0:
2537 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2538
2539 # Check this and the following statement whether it might have
2540 # something to do with decoding. This is a statement filter
2541 # criteria when generating the threaded functions blocks.
2542 offNextEnd = sRawCode.find(';', offEnd + 1);
2543 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2544 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2545 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2546 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2547 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2548 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2549 );
2550
2551 if not oMatch:
2552 if ch != '#':
2553 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2554 else:
2555 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2556 off = offEnd + 1;
2557 elif oMatch.group(1).startswith('if'):
2558 #
2559 # if () xxx [else yyy] statement.
2560 #
2561 oStmt = McCppCond(sCondExpr, fDecode);
2562 aoStmts.append(oStmt);
2563 off = offEnd + 1;
2564
2565 # Following the if () we can either have a {} containing zero or more statements
2566 # or we have a single statement.
2567 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2568 if sRawCode[offBlock1] == '{':
2569 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2570 if offBlock1End < 0:
2571 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2572 offBlock1 += 1;
2573 else:
2574 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2575 if offBlock1End < 0:
2576 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2577
2578 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2579
2580 # The else is optional and can likewise be followed by {} or a single statement.
2581 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2582 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2583 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2584 if sRawCode[offBlock2] == '{':
2585 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2586 if offBlock2End < 0:
2587 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2588 offBlock2 += 1;
2589 else:
2590 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2591 if offBlock2End < 0:
2592 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2593
2594 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2595 off = offBlock2End + 1;
2596
2597 elif oMatch.group(1) == 'else':
2598 # Problematic 'else' branch, typically involving #ifdefs.
2599 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2600
2601 return aoStmts;
2602
2603 def decode(self):
2604 """
2605 Decodes the block, populating self.aoStmts if necessary.
2606 Returns the statement list.
2607 Raises ParserException on failure.
2608 """
2609 if not self.aoStmts:
2610 self.aoStmts = self.decodeCode(''.join(self.asLines));
2611 return self.aoStmts;
2612
2613
2614 def checkForTooEarlyEffSegUse(self, aoStmts):
2615 """
2616 Checks if iEffSeg is used before the effective address has been decoded.
2617 Returns None on success, error string on failure.
2618
2619 See r158454 for an example of this issue.
2620 """
2621
2622 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2623 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2624 # as we're ASSUMING these will not occur before address calculation.
2625 for iStmt, oStmt in enumerate(aoStmts):
2626 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2627 while iStmt > 0:
2628 iStmt -= 1;
2629 oStmt = aoStmts[iStmt];
2630 for sArg in oStmt.asParams:
2631 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2632 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2633 break;
2634 return None;
2635
2636 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2637 kdDecodeCppStmtOkayAfterDone = {
2638 'IEMOP_HLP_IN_VMX_OPERATION': True,
2639 'IEMOP_HLP_VMX_INSTR': True,
2640 };
2641
2642 def checkForDoneDecoding(self, aoStmts):
2643 """
2644 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2645 invocation.
2646 Returns None on success, error string on failure.
2647
2648 This ensures safe instruction restarting in case the recompiler runs
2649 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2650 entries).
2651 """
2652
2653 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2654 # don't need to look.
2655 cIemOpHlpDone = 0;
2656 for iStmt, oStmt in enumerate(aoStmts):
2657 if oStmt.isCppStmt():
2658 #print('dbg: #%u[%u]: %s %s (%s)'
2659 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2660
2661 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2662 if oMatch:
2663 sFirstWord = oMatch.group(1);
2664 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2665 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2666 cIemOpHlpDone += 1;
2667 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2668 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2669 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2670 else:
2671 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2672 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2673 cIemOpHlpDone += 1;
2674 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2675 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2676 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2677 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2678 if cIemOpHlpDone == 1:
2679 return None;
2680 if cIemOpHlpDone > 1:
2681 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2682 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2683
2684 def check(self):
2685 """
2686 Performs some sanity checks on the block.
2687 Returns error string list, empty if all is fine.
2688 """
2689 aoStmts = self.decode();
2690 asRet = [];
2691
2692 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2693 if sRet:
2694 asRet.append(sRet);
2695
2696 sRet = self.checkForDoneDecoding(aoStmts);
2697 if sRet:
2698 asRet.append(sRet);
2699
2700 return asRet;
2701
2702
2703
2704## IEM_MC_XXX -> parser + info dictionary.
2705#
2706# The info columns:
2707# - col 0: boolean entry indicating whether the statement modifies state and
2708# must not be used before IEMOP_HL_DONE_*.
2709# - col 1: boolean entry indicating native recompiler support.
2710#
2711# The raw table was generated via the following command
2712# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2713# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2714g_dMcStmtParsers = {
2715 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2716 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2717 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2718 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2719 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2720 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2721 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2722 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2723 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2724 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2725 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2726 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2727 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2728 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2729 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2730 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2731 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, ),
2732 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, ),
2733 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, ),
2734 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, ),
2735 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, ),
2736 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2737 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2738 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2739 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
2740 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2741 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2742 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, ),
2743 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
2744 'IEM_MC_ARG': (McBlock.parseMcArg, False, True, ),
2745 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, True, ),
2746 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, True, ),
2747 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, True, ),
2748 'IEM_MC_ASSIGN': (McBlock.parseMcGeneric, False, False, ),
2749 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, ),
2750 'IEM_MC_ASSIGN_U8_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2751 'IEM_MC_ASSIGN_U32_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2752 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, True, ),
2753 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2754 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2755 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2756 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2757 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2758 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2759 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2760 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2761 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2762 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2763 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2764 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, ),
2765 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, ),
2766 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, False, ),
2767 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, False, ),
2768 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, False, ),
2769 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, False, ),
2770 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, False, ),
2771 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, False, ),
2772 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, False, ),
2773 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, False, ),
2774 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, False, ),
2775 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, False, ),
2776 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, False, ),
2777 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, False, ),
2778 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, False, ),
2779 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, False, ),
2780 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, False, ),
2781 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, False, ),
2782 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, False, ),
2783 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, False, ),
2784 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, False, ),
2785 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, False, ),
2786 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, False, ),
2787 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, False, ),
2788 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
2789 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, False, ),
2790 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2791 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, False, ),
2792 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, False, ),
2793 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, False, ),
2794 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, False, ),
2795 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2796 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2797 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2798 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2799 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2800 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2801 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2802 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, ),
2803 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, ),
2804 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, ),
2805 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, ),
2806 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, ),
2807 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, True, ),
2808 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, ),
2809 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2810 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2811 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2812 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, ),
2813 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2814 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2815 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, ),
2816 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2817 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, ),
2818 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, ),
2819 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, ),
2820 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2821 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, ),
2822 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2823 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2824 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, False, ),
2825 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, False, ),
2826 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, False, ),
2827 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, False, ),
2828 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, False, ),
2829 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, False, ),
2830 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, False, ),
2831 'IEM_MC_FETCH_MEM_S32_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2832 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, False, ),
2833 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
2834 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2835 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, False, ),
2836 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, False, ),
2837 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, False, ),
2838 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2839 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, False, ),
2840 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2841 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, False, ),
2842 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
2843 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2844 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, False, ),
2845 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, False, ),
2846 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2847 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2848 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, False, ),
2849 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, False, ),
2850 'IEM_MC_FETCH_MEM_U64_DISP': (McBlock.parseMcGeneric, True, False, ),
2851 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, False, ),
2852 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, False, ),
2853 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, False, ),
2854 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2855 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, False, ),
2856 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, False, ),
2857 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2858 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, False, ),
2859 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
2860 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2861 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, False, ),
2862 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, False, ),
2863 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, False, ),
2864 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
2865 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2866 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, False, ),
2867 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, False, ),
2868 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, ),
2869 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, ),
2870 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, ),
2871 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, ),
2872 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, ),
2873 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2874 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2875 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, ),
2876 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, ),
2877 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, ),
2878 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, ),
2879 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, ),
2880 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, ),
2881 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False, False, ),
2882 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, ),
2883 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, ),
2884 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, ),
2885 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, ),
2886 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
2887 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, False, ),
2888 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, False, ),
2889 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, False, ),
2890 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, False, ),
2891 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, False, ),
2892 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2893 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, False, ),
2894 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, False, ),
2895 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, False, ),
2896 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2897 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
2898 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
2899 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, False, ),
2900 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, False, ),
2901 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2902 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2903 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2904 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2905 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2906 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2907 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, True, ),
2908 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2909 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, True, ),
2910 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2911 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, True, ),
2912 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, True, ),
2913 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, True, ),
2914 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, True, ),
2915 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, False, ),
2916 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, False, ),
2917 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, False, ),
2918 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, False, ),
2919 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, ),
2920 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, ),
2921 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, False, ),
2922 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2923 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2924 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2925 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, False, ),
2926 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, False, ),
2927 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, ),
2928 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, False, ),
2929 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, True, ),
2930 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, True, ),
2931 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, True, ),
2932 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2933 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, False, ),
2934 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, False, ),
2935 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, False, ),
2936 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2937 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, False, ),
2938 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, False, ),
2939 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2940 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, False, ),
2941 'IEM_MC_MEM_COMMIT_AND_UNMAP': (McBlock.parseMcGeneric, True, False, ),
2942 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, False, ),
2943 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, False, ),
2944 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, False, ),
2945 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': (McBlock.parseMcGeneric, True, False, ),
2946 'IEM_MC_MEM_MAP': (McBlock.parseMcGeneric, True, False, ),
2947 'IEM_MC_MEM_MAP_EX': (McBlock.parseMcGeneric, True, False, ),
2948 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, False, ),
2949 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, False, ),
2950 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, False, ),
2951 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, False, ),
2952 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, False, ),
2953 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, False, ),
2954 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, False, ),
2955 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, False, ),
2956 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, False, ),
2957 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, False, ),
2958 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, False, ),
2959 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, False, ),
2960 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2961 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2962 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2963 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2964 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2965 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2966 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, False, ),
2967 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, False, ),
2968 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, ),
2969 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2970 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2971 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2972 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
2973 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2974 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2975 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
2976 'IEM_MC_POP_U16': (McBlock.parseMcGeneric, True, False, ),
2977 'IEM_MC_POP_U32': (McBlock.parseMcGeneric, True, False, ),
2978 'IEM_MC_POP_U64': (McBlock.parseMcGeneric, True, False, ),
2979 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, ),
2980 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, ),
2981 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, ),
2982 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
2983 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2984 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, False, ),
2985 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, False, ),
2986 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, False, ),
2987 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, False, ),
2988 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, False, ),
2989 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, False, ),
2990 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, False, ),
2991 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, False, ),
2992 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, False, ),
2993 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, ),
2994 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, ),
2995 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, ),
2996 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, ),
2997 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, ),
2998 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, ),
2999 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, ),
3000 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, ),
3001 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, ),
3002 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3003 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, ),
3004 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3005 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, ),
3006 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, ),
3007 'IEM_MC_REF_LOCAL': (McBlock.parseMcGeneric, False, False, ),
3008 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3009 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, ),
3010 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3011 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, ),
3012 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, ),
3013 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, ),
3014 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, ),
3015 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, ),
3016 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3017 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3018 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, ),
3019 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, ),
3020 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, ),
3021 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3022 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3023 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3024 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3025 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, ),
3026 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, ),
3027 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, ),
3028 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, ),
3029 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
3030 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
3031 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3032 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3033 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3034 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, ),
3035 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, ),
3036 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, ),
3037 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
3038 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, False, ),
3039 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
3040 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
3041 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3042 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3043 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, False, ),
3044 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, False, ),
3045 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
3046 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, False, ),
3047 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
3048 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, False, ),
3049 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
3050 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, False, ),
3051 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
3052 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, False, ),
3053 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3054 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3055 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3056 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3057 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3058 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3059 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3060 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3061 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, False, ),
3062 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
3063 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, False, ),
3064 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, False, ),
3065 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, False, ),
3066 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
3067 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, False, ),
3068 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, False, ),
3069 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, False, ),
3070 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, False, ),
3071 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, False, ),
3072 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, False, ),
3073 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
3074 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, False, ),
3075 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, False, ),
3076 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, False, ),
3077 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, False, ),
3078 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True, False, ),
3079 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, False, ),
3080 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, False, ),
3081 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, False, ),
3082 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, False, ),
3083 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, False, ),
3084 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, False, ),
3085 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, False, ),
3086 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, False, ),
3087 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, False, ),
3088 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, False, ),
3089 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, False, ),
3090 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, False, ),
3091 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, False, ),
3092 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, False, ),
3093 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3094 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3095 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3096 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3097 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, ),
3098 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, ),
3099 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, ),
3100 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
3101 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, False, ),
3102 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, False, ),
3103 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, False, ),
3104 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3105 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, False, ),
3106 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
3107 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3108 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, ),
3109};
3110
3111## List of microcode blocks.
3112g_aoMcBlocks = [] # type: List[McBlock]
3113
3114
3115
3116class ParserException(Exception):
3117 """ Parser exception """
3118 def __init__(self, sMessage):
3119 Exception.__init__(self, sMessage);
3120
3121
3122class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3123 """
3124 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3125 """
3126
3127 ## @name Parser state.
3128 ## @{
3129 kiCode = 0;
3130 kiCommentMulti = 1;
3131 ## @}
3132
3133 class Macro(object):
3134 """ Macro """
3135 def __init__(self, sName, asArgs, sBody, iLine):
3136 self.sName = sName; ##< The macro name.
3137 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3138 self.sBody = sBody;
3139 self.iLine = iLine;
3140 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3141
3142 @staticmethod
3143 def _needSpace(ch):
3144 """ This is just to make the expanded output a bit prettier. """
3145 return ch.isspace() and ch != '(';
3146
3147 def expandMacro(self, oParent, asArgs = None):
3148 """ Expands the macro body with the given arguments. """
3149 _ = oParent;
3150 sBody = self.sBody;
3151
3152 if self.oReArgMatch:
3153 assert len(asArgs) == len(self.asArgs);
3154 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3155
3156 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3157 oMatch = self.oReArgMatch.search(sBody);
3158 while oMatch:
3159 sName = oMatch.group(2);
3160 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3161 sValue = dArgs[sName];
3162 sPre = '';
3163 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3164 sPre = ' ';
3165 sPost = '';
3166 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3167 sPost = ' ';
3168 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3169 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3170 else:
3171 assert not asArgs;
3172
3173 return sBody;
3174
3175 class PreprocessorConditional(object):
3176 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3177
3178 ## Known defines.
3179 # - A value of 1 indicates that it's always defined.
3180 # - A value of 0 if it's always undefined
3181 # - A value of -1 if it's an arch and it depends of script parameters.
3182 # - A value of -2 if it's not recognized when filtering MC blocks.
3183 kdKnownDefines = {
3184 'IEM_WITH_ONE_BYTE_TABLE': 1,
3185 'IEM_WITH_TWO_BYTE_TABLE': 1,
3186 'IEM_WITH_THREE_0F_38': 1,
3187 'IEM_WITH_THREE_0F_3A': 1,
3188 'IEM_WITH_THREE_BYTE_TABLES': 1,
3189 'IEM_WITH_3DNOW': 1,
3190 'IEM_WITH_3DNOW_TABLE': 1,
3191 'IEM_WITH_VEX': 1,
3192 'IEM_WITH_VEX_TABLES': 1,
3193 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3194 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3195 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3196 'LOG_ENABLED': 1,
3197 'RT_WITHOUT_PRAGMA_ONCE': 0,
3198 'TST_IEM_CHECK_MC': 0,
3199 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3200 'RT_ARCH_AMD64': -1,
3201 'RT_ARCH_ARM64': -1,
3202 'RT_ARCH_ARM32': -1,
3203 'RT_ARCH_X86': -1,
3204 'RT_ARCH_SPARC': -1,
3205 'RT_ARCH_SPARC64': -1,
3206 };
3207 kdBuildArchToIprt = {
3208 'amd64': 'RT_ARCH_AMD64',
3209 'arm64': 'RT_ARCH_ARM64',
3210 'sparc32': 'RT_ARCH_SPARC64',
3211 };
3212 ## For parsing the next defined(xxxx).
3213 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3214
3215 def __init__(self, sType, sExpr):
3216 self.sType = sType;
3217 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3218 self.aoElif = [] # type: List[PreprocessorConditional]
3219 self.fInElse = [];
3220 if sType in ('if', 'elif'):
3221 self.checkExpression(sExpr);
3222 else:
3223 self.checkSupportedDefine(sExpr)
3224
3225 @staticmethod
3226 def checkSupportedDefine(sDefine):
3227 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3228 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3229 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3230 return True;
3231 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3232 return True;
3233 raise Exception('Unsupported define: %s' % (sDefine,));
3234
3235 @staticmethod
3236 def checkExpression(sExpr):
3237 """ Check that the expression is supported. Raises exception if not. """
3238 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3239 if sExpr in ('0', '1'):
3240 return True;
3241
3242 off = 0;
3243 cParan = 0;
3244 while off < len(sExpr):
3245 ch = sExpr[off];
3246
3247 # Unary operator or parentheses:
3248 if ch in ('(', '!'):
3249 if ch == '(':
3250 cParan += 1;
3251 off += 1;
3252 else:
3253 # defined(xxxx)
3254 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3255 if oMatch:
3256 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3257 elif sExpr[off:] != '1':
3258 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3259 off = oMatch.end();
3260
3261 # Look for closing parentheses.
3262 while off < len(sExpr) and sExpr[off].isspace():
3263 off += 1;
3264 if cParan > 0:
3265 while off < len(sExpr) and sExpr[off] == ')':
3266 if cParan <= 0:
3267 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3268 cParan -= 1;
3269 off += 1;
3270 while off < len(sExpr) and sExpr[off].isspace():
3271 off += 1;
3272
3273 # Look for binary operator.
3274 if off >= len(sExpr):
3275 break;
3276 if sExpr[off:off + 2] in ('||', '&&'):
3277 off += 2;
3278 else:
3279 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3280
3281 # Skip spaces.
3282 while off < len(sExpr) and sExpr[off].isspace():
3283 off += 1;
3284 if cParan != 0:
3285 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3286 return True;
3287
3288 @staticmethod
3289 def isArchIncludedInExpr(sExpr, sArch):
3290 """ Checks if sArch is included in the given expression. """
3291 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3292 if sExpr == '0':
3293 return False;
3294 if sExpr == '1':
3295 return True;
3296 off = 0;
3297 while off < len(sExpr):
3298 # defined(xxxx)
3299 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3300 if not oMatch:
3301 if sExpr[off:] == '1':
3302 return True;
3303 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3304 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3305 return True;
3306 off = oMatch.end();
3307
3308 # Look for OR operator.
3309 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3310 off += 1;
3311 if off >= len(sExpr):
3312 break;
3313 if sExpr.startswith('||'):
3314 off += 2;
3315 else:
3316 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3317
3318 return False;
3319
3320 @staticmethod
3321 def matchArch(sDefine, sArch):
3322 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3323 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3324
3325 @staticmethod
3326 def matchDefined(sExpr, sArch):
3327 """ Check the result of an ifdef/ifndef expression, given sArch. """
3328 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3329 if iDefine == -2:
3330 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3331 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3332
3333 def isArchIncludedInPrimaryBlock(self, sArch):
3334 """ Checks if sArch is included in the (primary) 'if' block. """
3335 if self.sType == 'ifdef':
3336 return self.matchDefined(self.sExpr, sArch);
3337 if self.sType == 'ifndef':
3338 return not self.matchDefined(self.sExpr, sArch);
3339 return self.isArchIncludedInExpr(self.sExpr, sArch);
3340
3341 @staticmethod
3342 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3343 """ Checks if sArch is included in the current conditional block. """
3344 _ = iLine;
3345 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3346 for oCond in aoCppCondStack:
3347 if oCond.isArchIncludedInPrimaryBlock(sArch):
3348 if oCond.aoElif or oCond.fInElse:
3349 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3350 return False;
3351 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3352 else:
3353 fFine = False;
3354 for oElifCond in oCond.aoElif:
3355 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3356 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3357 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3358 return False;
3359 fFine = True;
3360 if not fFine and not oCond.fInElse:
3361 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3362 return False;
3363 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3364 return True;
3365
3366 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3367 self.sSrcFile = sSrcFile;
3368 self.asLines = asLines;
3369 self.iLine = 0;
3370 self.iState = self.kiCode;
3371 self.sComment = '';
3372 self.iCommentLine = 0;
3373 self.aoCurInstrs = [] # type: List[Instruction]
3374 self.oCurFunction = None # type: DecoderFunction
3375 self.iMcBlockInFunc = 0;
3376 self.oCurMcBlock = None # type: McBlock
3377 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3378 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3379 if oInheritMacrosFrom:
3380 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3381 self.oReMacros = oInheritMacrosFrom.oReMacros;
3382 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3383 self.sHostArch = sHostArch;
3384
3385 assert sDefaultMap in g_dInstructionMaps;
3386 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3387
3388 self.cTotalInstr = 0;
3389 self.cTotalStubs = 0;
3390 self.cTotalTagged = 0;
3391 self.cTotalMcBlocks = 0;
3392
3393 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3394 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3395 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3396 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3397 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3398 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
3399 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3400 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
3401 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3402 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3403 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3404 self.fDebug = True;
3405 self.fDebugMc = False;
3406 self.fDebugPreproc = False;
3407
3408 self.dTagHandlers = {
3409 '@opbrief': self.parseTagOpBrief,
3410 '@opdesc': self.parseTagOpDesc,
3411 '@opmnemonic': self.parseTagOpMnemonic,
3412 '@op1': self.parseTagOpOperandN,
3413 '@op2': self.parseTagOpOperandN,
3414 '@op3': self.parseTagOpOperandN,
3415 '@op4': self.parseTagOpOperandN,
3416 '@oppfx': self.parseTagOpPfx,
3417 '@opmaps': self.parseTagOpMaps,
3418 '@opcode': self.parseTagOpcode,
3419 '@opcodesub': self.parseTagOpcodeSub,
3420 '@openc': self.parseTagOpEnc,
3421 '@opfltest': self.parseTagOpEFlags,
3422 '@opflmodify': self.parseTagOpEFlags,
3423 '@opflundef': self.parseTagOpEFlags,
3424 '@opflset': self.parseTagOpEFlags,
3425 '@opflclear': self.parseTagOpEFlags,
3426 '@ophints': self.parseTagOpHints,
3427 '@opdisenum': self.parseTagOpDisEnum,
3428 '@opmincpu': self.parseTagOpMinCpu,
3429 '@opcpuid': self.parseTagOpCpuId,
3430 '@opgroup': self.parseTagOpGroup,
3431 '@opunused': self.parseTagOpUnusedInvalid,
3432 '@opinvalid': self.parseTagOpUnusedInvalid,
3433 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3434 '@optest': self.parseTagOpTest,
3435 '@optestign': self.parseTagOpTestIgnore,
3436 '@optestignore': self.parseTagOpTestIgnore,
3437 '@opcopytests': self.parseTagOpCopyTests,
3438 '@oponly': self.parseTagOpOnlyTest,
3439 '@oponlytest': self.parseTagOpOnlyTest,
3440 '@opxcpttype': self.parseTagOpXcptType,
3441 '@opstats': self.parseTagOpStats,
3442 '@opfunction': self.parseTagOpFunction,
3443 '@opdone': self.parseTagOpDone,
3444 };
3445 for i in range(48):
3446 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3447 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3448
3449 self.asErrors = [];
3450
3451 def raiseError(self, sMessage):
3452 """
3453 Raise error prefixed with the source and line number.
3454 """
3455 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3456
3457 def raiseCommentError(self, iLineInComment, sMessage):
3458 """
3459 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3460 """
3461 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3462
3463 def error(self, sMessage):
3464 """
3465 Adds an error.
3466 returns False;
3467 """
3468 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3469 return False;
3470
3471 def errorOnLine(self, iLine, sMessage):
3472 """
3473 Adds an error.
3474 returns False;
3475 """
3476 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3477 return False;
3478
3479 def errorComment(self, iLineInComment, sMessage):
3480 """
3481 Adds a comment error.
3482 returns False;
3483 """
3484 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3485 return False;
3486
3487 def printErrors(self):
3488 """
3489 Print the errors to stderr.
3490 Returns number of errors.
3491 """
3492 if self.asErrors:
3493 sys.stderr.write(u''.join(self.asErrors));
3494 return len(self.asErrors);
3495
3496 def debug(self, sMessage):
3497 """
3498 For debugging.
3499 """
3500 if self.fDebug:
3501 print('debug: %s' % (sMessage,), file = sys.stderr);
3502
3503 def stripComments(self, sLine):
3504 """
3505 Returns sLine with comments stripped.
3506
3507 Complains if traces of incomplete multi-line comments are encountered.
3508 """
3509 sLine = self.oReComment.sub(" ", sLine);
3510 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3511 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3512 return sLine;
3513
3514 def parseFunctionTable(self, sLine):
3515 """
3516 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3517
3518 Note! Updates iLine as it consumes the whole table.
3519 """
3520
3521 #
3522 # Extract the table name.
3523 #
3524 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3525 oMap = g_dInstructionMapsByIemName.get(sName);
3526 if not oMap:
3527 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3528 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3529
3530 #
3531 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3532 # entries per byte:
3533 # no prefix, 066h prefix, f3h prefix, f2h prefix
3534 # Those tables has 256 & 32 entries respectively.
3535 #
3536 cEntriesPerByte = 4;
3537 cValidTableLength = 1024;
3538 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3539
3540 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3541 if oEntriesMatch:
3542 cEntriesPerByte = 1;
3543 cValidTableLength = int(oEntriesMatch.group(1));
3544 asPrefixes = (None,);
3545
3546 #
3547 # The next line should be '{' and nothing else.
3548 #
3549 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3550 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3551 self.iLine += 1;
3552
3553 #
3554 # Parse till we find the end of the table.
3555 #
3556 iEntry = 0;
3557 while self.iLine < len(self.asLines):
3558 # Get the next line and strip comments and spaces (assumes no
3559 # multi-line comments).
3560 sLine = self.asLines[self.iLine];
3561 self.iLine += 1;
3562 sLine = self.stripComments(sLine).strip();
3563
3564 # Split the line up into entries, expanding IEMOP_X4 usage.
3565 asEntries = sLine.split(',');
3566 for i in range(len(asEntries) - 1, -1, -1):
3567 sEntry = asEntries[i].strip();
3568 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3569 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3570 asEntries.insert(i + 1, sEntry);
3571 asEntries.insert(i + 1, sEntry);
3572 asEntries.insert(i + 1, sEntry);
3573 if sEntry:
3574 asEntries[i] = sEntry;
3575 else:
3576 del asEntries[i];
3577
3578 # Process the entries.
3579 for sEntry in asEntries:
3580 if sEntry in ('};', '}'):
3581 if iEntry != cValidTableLength:
3582 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3583 return True;
3584 if sEntry.startswith('iemOp_Invalid'):
3585 pass; # skip
3586 else:
3587 # Look up matching instruction by function.
3588 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3589 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3590 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3591 if aoInstr:
3592 if not isinstance(aoInstr, list):
3593 aoInstr = [aoInstr,];
3594 oInstr = None;
3595 for oCurInstr in aoInstr:
3596 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3597 pass;
3598 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3599 oCurInstr.sPrefix = sPrefix;
3600 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3601 oCurInstr.sOpcode = sOpcode;
3602 oCurInstr.sPrefix = sPrefix;
3603 else:
3604 continue;
3605 oInstr = oCurInstr;
3606 break;
3607 if not oInstr:
3608 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3609 aoInstr.append(oInstr);
3610 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3611 g_aoAllInstructions.append(oInstr);
3612 oMap.aoInstructions.append(oInstr);
3613 else:
3614 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3615 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3616 iEntry += 1;
3617
3618 return self.error('Unexpected end of file in PFNIEMOP table');
3619
3620 def addInstruction(self, iLine = None):
3621 """
3622 Adds an instruction.
3623 """
3624 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3625 g_aoAllInstructions.append(oInstr);
3626 self.aoCurInstrs.append(oInstr);
3627 return oInstr;
3628
3629 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3630 """
3631 Derives the mnemonic and operands from a IEM stats base name like string.
3632 """
3633 if oInstr.sMnemonic is None:
3634 asWords = sStats.split('_');
3635 oInstr.sMnemonic = asWords[0].lower();
3636 if len(asWords) > 1 and not oInstr.aoOperands:
3637 for sType in asWords[1:]:
3638 if sType in g_kdOpTypes:
3639 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3640 else:
3641 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3642 return False;
3643 return True;
3644
3645 def doneInstructionOne(self, oInstr, iLine):
3646 """
3647 Complete the parsing by processing, validating and expanding raw inputs.
3648 """
3649 assert oInstr.iLineCompleted is None;
3650 oInstr.iLineCompleted = iLine;
3651
3652 #
3653 # Specified instructions.
3654 #
3655 if oInstr.cOpTags > 0:
3656 if oInstr.sStats is None:
3657 pass;
3658
3659 #
3660 # Unspecified legacy stuff. We generally only got a few things to go on here.
3661 # /** Opcode 0x0f 0x00 /0. */
3662 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3663 #
3664 else:
3665 #if oInstr.sRawOldOpcodes:
3666 #
3667 #if oInstr.sMnemonic:
3668 pass;
3669
3670 #
3671 # Common defaults.
3672 #
3673
3674 # Guess mnemonic and operands from stats if the former is missing.
3675 if oInstr.sMnemonic is None:
3676 if oInstr.sStats is not None:
3677 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3678 elif oInstr.sFunction is not None:
3679 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3680
3681 # Derive the disassembler op enum constant from the mnemonic.
3682 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3683 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3684
3685 # Derive the IEM statistics base name from mnemonic and operand types.
3686 if oInstr.sStats is None:
3687 if oInstr.sFunction is not None:
3688 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3689 elif oInstr.sMnemonic is not None:
3690 oInstr.sStats = oInstr.sMnemonic;
3691 for oOperand in oInstr.aoOperands:
3692 if oOperand.sType:
3693 oInstr.sStats += '_' + oOperand.sType;
3694
3695 # Derive the IEM function name from mnemonic and operand types.
3696 if oInstr.sFunction is None:
3697 if oInstr.sMnemonic is not None:
3698 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3699 for oOperand in oInstr.aoOperands:
3700 if oOperand.sType:
3701 oInstr.sFunction += '_' + oOperand.sType;
3702 elif oInstr.sStats:
3703 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3704
3705 #
3706 # Apply default map and then add the instruction to all it's groups.
3707 #
3708 if not oInstr.aoMaps:
3709 oInstr.aoMaps = [ self.oDefaultMap, ];
3710 for oMap in oInstr.aoMaps:
3711 oMap.aoInstructions.append(oInstr);
3712
3713 #
3714 # Derive encoding from operands and maps.
3715 #
3716 if oInstr.sEncoding is None:
3717 if not oInstr.aoOperands:
3718 if oInstr.fUnused and oInstr.sSubOpcode:
3719 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3720 else:
3721 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3722 elif oInstr.aoOperands[0].usesModRM():
3723 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3724 or oInstr.onlyInVexMaps():
3725 oInstr.sEncoding = 'VEX.ModR/M';
3726 else:
3727 oInstr.sEncoding = 'ModR/M';
3728
3729 #
3730 # Check the opstat value and add it to the opstat indexed dictionary.
3731 #
3732 if oInstr.sStats:
3733 if oInstr.sStats not in g_dAllInstructionsByStat:
3734 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3735 else:
3736 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3737 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3738
3739 #
3740 # Add to function indexed dictionary. We allow multiple instructions per function.
3741 #
3742 if oInstr.sFunction:
3743 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3744 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3745 else:
3746 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3747
3748 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3749 return True;
3750
3751 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3752 """
3753 Done with current instruction.
3754 """
3755 for oInstr in self.aoCurInstrs:
3756 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3757 if oInstr.fStub:
3758 self.cTotalStubs += 1;
3759
3760 self.cTotalInstr += len(self.aoCurInstrs);
3761
3762 self.sComment = '';
3763 self.aoCurInstrs = [];
3764 if fEndOfFunction:
3765 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3766 if self.oCurFunction:
3767 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3768 self.oCurFunction = None;
3769 self.iMcBlockInFunc = 0;
3770 return True;
3771
3772 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3773 """
3774 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3775 is False, only None values and empty strings are replaced.
3776 """
3777 for oInstr in self.aoCurInstrs:
3778 if fOverwrite is not True:
3779 oOldValue = getattr(oInstr, sAttrib);
3780 if oOldValue is not None:
3781 continue;
3782 setattr(oInstr, sAttrib, oValue);
3783
3784 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3785 """
3786 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3787 If fOverwrite is False, only None values and empty strings are replaced.
3788 """
3789 for oInstr in self.aoCurInstrs:
3790 aoArray = getattr(oInstr, sAttrib);
3791 while len(aoArray) <= iEntry:
3792 aoArray.append(None);
3793 if fOverwrite is True or aoArray[iEntry] is None:
3794 aoArray[iEntry] = oValue;
3795
3796 def parseCommentOldOpcode(self, asLines):
3797 """ Deals with 'Opcode 0xff /4' like comments """
3798 asWords = asLines[0].split();
3799 if len(asWords) >= 2 \
3800 and asWords[0] == 'Opcode' \
3801 and ( asWords[1].startswith('0x')
3802 or asWords[1].startswith('0X')):
3803 asWords = asWords[:1];
3804 for iWord, sWord in enumerate(asWords):
3805 if sWord.startswith('0X'):
3806 sWord = '0x' + sWord[:2];
3807 asWords[iWord] = asWords;
3808 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3809
3810 return False;
3811
3812 def ensureInstructionForOpTag(self, iTagLine):
3813 """ Ensure there is an instruction for the op-tag being parsed. """
3814 if not self.aoCurInstrs:
3815 self.addInstruction(self.iCommentLine + iTagLine);
3816 for oInstr in self.aoCurInstrs:
3817 oInstr.cOpTags += 1;
3818 if oInstr.cOpTags == 1:
3819 self.cTotalTagged += 1;
3820 return self.aoCurInstrs[-1];
3821
3822 @staticmethod
3823 def flattenSections(aasSections):
3824 """
3825 Flattens multiline sections into stripped single strings.
3826 Returns list of strings, on section per string.
3827 """
3828 asRet = [];
3829 for asLines in aasSections:
3830 if asLines:
3831 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3832 return asRet;
3833
3834 @staticmethod
3835 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3836 """
3837 Flattens sections into a simple stripped string with newlines as
3838 section breaks. The final section does not sport a trailing newline.
3839 """
3840 # Typical: One section with a single line.
3841 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3842 return aasSections[0][0].strip();
3843
3844 sRet = '';
3845 for iSection, asLines in enumerate(aasSections):
3846 if asLines:
3847 if iSection > 0:
3848 sRet += sSectionSep;
3849 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3850 return sRet;
3851
3852
3853
3854 ## @name Tag parsers
3855 ## @{
3856
3857 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3858 """
3859 Tag: \@opbrief
3860 Value: Text description, multiple sections, appended.
3861
3862 Brief description. If not given, it's the first sentence from @opdesc.
3863 """
3864 oInstr = self.ensureInstructionForOpTag(iTagLine);
3865
3866 # Flatten and validate the value.
3867 sBrief = self.flattenAllSections(aasSections);
3868 if not sBrief:
3869 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3870 if sBrief[-1] != '.':
3871 sBrief = sBrief + '.';
3872 if len(sBrief) > 180:
3873 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3874 offDot = sBrief.find('.');
3875 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3876 offDot = sBrief.find('.', offDot + 1);
3877 if offDot >= 0 and offDot != len(sBrief) - 1:
3878 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3879
3880 # Update the instruction.
3881 if oInstr.sBrief is not None:
3882 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3883 % (sTag, oInstr.sBrief, sBrief,));
3884 _ = iEndLine;
3885 return True;
3886
3887 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3888 """
3889 Tag: \@opdesc
3890 Value: Text description, multiple sections, appended.
3891
3892 It is used to describe instructions.
3893 """
3894 oInstr = self.ensureInstructionForOpTag(iTagLine);
3895 if aasSections:
3896 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3897 return True;
3898
3899 _ = sTag; _ = iEndLine;
3900 return True;
3901
3902 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3903 """
3904 Tag: @opmenmonic
3905 Value: mnemonic
3906
3907 The 'mnemonic' value must be a valid C identifier string. Because of
3908 prefixes, groups and whatnot, there times when the mnemonic isn't that
3909 of an actual assembler mnemonic.
3910 """
3911 oInstr = self.ensureInstructionForOpTag(iTagLine);
3912
3913 # Flatten and validate the value.
3914 sMnemonic = self.flattenAllSections(aasSections);
3915 if not self.oReMnemonic.match(sMnemonic):
3916 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3917 if oInstr.sMnemonic is not None:
3918 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3919 % (sTag, oInstr.sMnemonic, sMnemonic,));
3920 oInstr.sMnemonic = sMnemonic
3921
3922 _ = iEndLine;
3923 return True;
3924
3925 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3926 """
3927 Tags: \@op1, \@op2, \@op3, \@op4
3928 Value: [where:]type
3929
3930 The 'where' value indicates where the operand is found, like the 'reg'
3931 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3932 a list.
3933
3934 The 'type' value indicates the operand type. These follow the types
3935 given in the opcode tables in the CPU reference manuals.
3936 See Instruction.kdOperandTypes for a list.
3937
3938 """
3939 oInstr = self.ensureInstructionForOpTag(iTagLine);
3940 idxOp = int(sTag[-1]) - 1;
3941 assert 0 <= idxOp < 4;
3942
3943 # flatten, split up, and validate the "where:type" value.
3944 sFlattened = self.flattenAllSections(aasSections);
3945 asSplit = sFlattened.split(':');
3946 if len(asSplit) == 1:
3947 sType = asSplit[0];
3948 sWhere = None;
3949 elif len(asSplit) == 2:
3950 (sWhere, sType) = asSplit;
3951 else:
3952 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3953
3954 if sType not in g_kdOpTypes:
3955 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3956 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3957 if sWhere is None:
3958 sWhere = g_kdOpTypes[sType][1];
3959 elif sWhere not in g_kdOpLocations:
3960 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3961 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3962
3963 # Insert the operand, refusing to overwrite an existing one.
3964 while idxOp >= len(oInstr.aoOperands):
3965 oInstr.aoOperands.append(None);
3966 if oInstr.aoOperands[idxOp] is not None:
3967 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3968 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3969 sWhere, sType,));
3970 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3971
3972 _ = iEndLine;
3973 return True;
3974
3975 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3976 """
3977 Tag: \@opmaps
3978 Value: map[,map2]
3979
3980 Indicates which maps the instruction is in. There is a default map
3981 associated with each input file.
3982 """
3983 oInstr = self.ensureInstructionForOpTag(iTagLine);
3984
3985 # Flatten, split up and validate the value.
3986 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3987 asMaps = sFlattened.split(',');
3988 if not asMaps:
3989 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3990 for sMap in asMaps:
3991 if sMap not in g_dInstructionMaps:
3992 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3993 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3994
3995 # Add the maps to the current list. Throw errors on duplicates.
3996 for oMap in oInstr.aoMaps:
3997 if oMap.sName in asMaps:
3998 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3999
4000 for sMap in asMaps:
4001 oMap = g_dInstructionMaps[sMap];
4002 if oMap not in oInstr.aoMaps:
4003 oInstr.aoMaps.append(oMap);
4004 else:
4005 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4006
4007 _ = iEndLine;
4008 return True;
4009
4010 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4011 """
4012 Tag: \@oppfx
4013 Value: n/a|none|0x66|0xf3|0xf2
4014
4015 Required prefix for the instruction. (In a (E)VEX context this is the
4016 value of the 'pp' field rather than an actual prefix.)
4017 """
4018 oInstr = self.ensureInstructionForOpTag(iTagLine);
4019
4020 # Flatten and validate the value.
4021 sFlattened = self.flattenAllSections(aasSections);
4022 asPrefixes = sFlattened.split();
4023 if len(asPrefixes) > 1:
4024 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4025
4026 sPrefix = asPrefixes[0].lower();
4027 if sPrefix == 'none':
4028 sPrefix = 'none';
4029 elif sPrefix == 'n/a':
4030 sPrefix = None;
4031 else:
4032 if len(sPrefix) == 2:
4033 sPrefix = '0x' + sPrefix;
4034 if not _isValidOpcodeByte(sPrefix):
4035 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4036
4037 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4038 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4039
4040 # Set it.
4041 if oInstr.sPrefix is not None:
4042 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4043 oInstr.sPrefix = sPrefix;
4044
4045 _ = iEndLine;
4046 return True;
4047
4048 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4049 """
4050 Tag: \@opcode
4051 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4052
4053 The opcode byte or sub-byte for the instruction in the context of a map.
4054 """
4055 oInstr = self.ensureInstructionForOpTag(iTagLine);
4056
4057 # Flatten and validate the value.
4058 sOpcode = self.flattenAllSections(aasSections);
4059 if _isValidOpcodeByte(sOpcode):
4060 pass;
4061 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4062 pass;
4063 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4064 pass;
4065 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4066 pass;
4067 else:
4068 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4069
4070 # Set it.
4071 if oInstr.sOpcode is not None:
4072 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4073 oInstr.sOpcode = sOpcode;
4074
4075 _ = iEndLine;
4076 return True;
4077
4078 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4079 """
4080 Tag: \@opcodesub
4081 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4082 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4083
4084 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4085 represents exactly two different instructions. The more proper way would
4086 be to go via maps with two members, but this is faster.
4087 """
4088 oInstr = self.ensureInstructionForOpTag(iTagLine);
4089
4090 # Flatten and validate the value.
4091 sSubOpcode = self.flattenAllSections(aasSections);
4092 if sSubOpcode not in g_kdSubOpcodes:
4093 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
4094 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4095
4096 # Set it.
4097 if oInstr.sSubOpcode is not None:
4098 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4099 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4100 oInstr.sSubOpcode = sSubOpcode;
4101
4102 _ = iEndLine;
4103 return True;
4104
4105 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4106 """
4107 Tag: \@openc
4108 Value: ModR/M|fixed|prefix|<map name>
4109
4110 The instruction operand encoding style.
4111 """
4112 oInstr = self.ensureInstructionForOpTag(iTagLine);
4113
4114 # Flatten and validate the value.
4115 sEncoding = self.flattenAllSections(aasSections);
4116 if sEncoding in g_kdEncodings:
4117 pass;
4118 elif sEncoding in g_dInstructionMaps:
4119 pass;
4120 elif not _isValidOpcodeByte(sEncoding):
4121 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4122
4123 # Set it.
4124 if oInstr.sEncoding is not None:
4125 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4126 % ( sTag, oInstr.sEncoding, sEncoding,));
4127 oInstr.sEncoding = sEncoding;
4128
4129 _ = iEndLine;
4130 return True;
4131
4132 ## EFlags tag to Instruction attribute name.
4133 kdOpFlagToAttr = {
4134 '@opfltest': 'asFlTest',
4135 '@opflmodify': 'asFlModify',
4136 '@opflundef': 'asFlUndefined',
4137 '@opflset': 'asFlSet',
4138 '@opflclear': 'asFlClear',
4139 };
4140
4141 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4142 """
4143 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
4144 Value: <eflags specifier>
4145
4146 """
4147 oInstr = self.ensureInstructionForOpTag(iTagLine);
4148
4149 # Flatten, split up and validate the values.
4150 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4151 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4152 asFlags = [];
4153 else:
4154 fRc = True;
4155 for iFlag, sFlag in enumerate(asFlags):
4156 if sFlag not in g_kdEFlagsMnemonics:
4157 if sFlag.strip() in g_kdEFlagsMnemonics:
4158 asFlags[iFlag] = sFlag.strip();
4159 else:
4160 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4161 if not fRc:
4162 return False;
4163
4164 # Set them.
4165 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4166 if asOld is not None:
4167 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4168 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4169
4170 _ = iEndLine;
4171 return True;
4172
4173 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4174 """
4175 Tag: \@ophints
4176 Value: Comma or space separated list of flags and hints.
4177
4178 This covers the disassembler flags table and more.
4179 """
4180 oInstr = self.ensureInstructionForOpTag(iTagLine);
4181
4182 # Flatten as a space separated list, split it up and validate the values.
4183 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4184 if len(asHints) == 1 and asHints[0].lower() == 'none':
4185 asHints = [];
4186 else:
4187 fRc = True;
4188 for iHint, sHint in enumerate(asHints):
4189 if sHint not in g_kdHints:
4190 if sHint.strip() in g_kdHints:
4191 sHint[iHint] = sHint.strip();
4192 else:
4193 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4194 if not fRc:
4195 return False;
4196
4197 # Append them.
4198 for sHint in asHints:
4199 if sHint not in oInstr.dHints:
4200 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4201 else:
4202 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4203
4204 _ = iEndLine;
4205 return True;
4206
4207 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4208 """
4209 Tag: \@opdisenum
4210 Value: OP_XXXX
4211
4212 This is for select a specific (legacy) disassembler enum value for the
4213 instruction.
4214 """
4215 oInstr = self.ensureInstructionForOpTag(iTagLine);
4216
4217 # Flatten and split.
4218 asWords = self.flattenAllSections(aasSections).split();
4219 if len(asWords) != 1:
4220 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4221 if not asWords:
4222 return False;
4223 sDisEnum = asWords[0];
4224 if not self.oReDisEnum.match(sDisEnum):
4225 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4226 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4227
4228 # Set it.
4229 if oInstr.sDisEnum is not None:
4230 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4231 oInstr.sDisEnum = sDisEnum;
4232
4233 _ = iEndLine;
4234 return True;
4235
4236 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4237 """
4238 Tag: \@opmincpu
4239 Value: <simple CPU name>
4240
4241 Indicates when this instruction was introduced.
4242 """
4243 oInstr = self.ensureInstructionForOpTag(iTagLine);
4244
4245 # Flatten the value, split into words, make sure there's just one, valid it.
4246 asCpus = self.flattenAllSections(aasSections).split();
4247 if len(asCpus) > 1:
4248 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4249
4250 sMinCpu = asCpus[0];
4251 if sMinCpu in g_kdCpuNames:
4252 oInstr.sMinCpu = sMinCpu;
4253 else:
4254 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4255 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4256
4257 # Set it.
4258 if oInstr.sMinCpu is None:
4259 oInstr.sMinCpu = sMinCpu;
4260 elif oInstr.sMinCpu != sMinCpu:
4261 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4262
4263 _ = iEndLine;
4264 return True;
4265
4266 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4267 """
4268 Tag: \@opcpuid
4269 Value: none | <CPUID flag specifier>
4270
4271 CPUID feature bit which is required for the instruction to be present.
4272 """
4273 oInstr = self.ensureInstructionForOpTag(iTagLine);
4274
4275 # Flatten as a space separated list, split it up and validate the values.
4276 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4277 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4278 asCpuIds = [];
4279 else:
4280 fRc = True;
4281 for iCpuId, sCpuId in enumerate(asCpuIds):
4282 if sCpuId not in g_kdCpuIdFlags:
4283 if sCpuId.strip() in g_kdCpuIdFlags:
4284 sCpuId[iCpuId] = sCpuId.strip();
4285 else:
4286 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4287 if not fRc:
4288 return False;
4289
4290 # Append them.
4291 for sCpuId in asCpuIds:
4292 if sCpuId not in oInstr.asCpuIds:
4293 oInstr.asCpuIds.append(sCpuId);
4294 else:
4295 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4296
4297 _ = iEndLine;
4298 return True;
4299
4300 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4301 """
4302 Tag: \@opgroup
4303 Value: op_grp1[_subgrp2[_subsubgrp3]]
4304
4305 Instruction grouping.
4306 """
4307 oInstr = self.ensureInstructionForOpTag(iTagLine);
4308
4309 # Flatten as a space separated list, split it up and validate the values.
4310 asGroups = self.flattenAllSections(aasSections).split();
4311 if len(asGroups) != 1:
4312 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4313 sGroup = asGroups[0];
4314 if not self.oReGroupName.match(sGroup):
4315 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4316 % (sTag, sGroup, self.oReGroupName.pattern));
4317
4318 # Set it.
4319 if oInstr.sGroup is not None:
4320 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4321 oInstr.sGroup = sGroup;
4322
4323 _ = iEndLine;
4324 return True;
4325
4326 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4327 """
4328 Tag: \@opunused, \@opinvalid, \@opinvlstyle
4329 Value: <invalid opcode behaviour style>
4330
4331 The \@opunused indicates the specification is for a currently unused
4332 instruction encoding.
4333
4334 The \@opinvalid indicates the specification is for an invalid currently
4335 instruction encoding (like UD2).
4336
4337 The \@opinvlstyle just indicates how CPUs decode the instruction when
4338 not supported (\@opcpuid, \@opmincpu) or disabled.
4339 """
4340 oInstr = self.ensureInstructionForOpTag(iTagLine);
4341
4342 # Flatten as a space separated list, split it up and validate the values.
4343 asStyles = self.flattenAllSections(aasSections).split();
4344 if len(asStyles) != 1:
4345 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4346 sStyle = asStyles[0];
4347 if sStyle not in g_kdInvalidStyles:
4348 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4349 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4350 # Set it.
4351 if oInstr.sInvalidStyle is not None:
4352 return self.errorComment(iTagLine,
4353 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4354 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4355 oInstr.sInvalidStyle = sStyle;
4356 if sTag == '@opunused':
4357 oInstr.fUnused = True;
4358 elif sTag == '@opinvalid':
4359 oInstr.fInvalid = True;
4360
4361 _ = iEndLine;
4362 return True;
4363
4364 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4365 """
4366 Tag: \@optest
4367 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4368 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4369
4370 The main idea here is to generate basic instruction tests.
4371
4372 The probably simplest way of handling the diverse input, would be to use
4373 it to produce size optimized byte code for a simple interpreter that
4374 modifies the register input and output states.
4375
4376 An alternative to the interpreter would be creating multiple tables,
4377 but that becomes rather complicated wrt what goes where and then to use
4378 them in an efficient manner.
4379 """
4380 oInstr = self.ensureInstructionForOpTag(iTagLine);
4381
4382 #
4383 # Do it section by section.
4384 #
4385 for asSectionLines in aasSections:
4386 #
4387 # Sort the input into outputs, inputs and selector conditions.
4388 #
4389 sFlatSection = self.flattenAllSections([asSectionLines,]);
4390 if not sFlatSection:
4391 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4392 continue;
4393 oTest = InstructionTest(oInstr);
4394
4395 asSelectors = [];
4396 asInputs = [];
4397 asOutputs = [];
4398 asCur = asOutputs;
4399 fRc = True;
4400 asWords = sFlatSection.split();
4401 for iWord in range(len(asWords) - 1, -1, -1):
4402 sWord = asWords[iWord];
4403 # Check for array switchers.
4404 if sWord == '->':
4405 if asCur != asOutputs:
4406 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4407 break;
4408 asCur = asInputs;
4409 elif sWord == '/':
4410 if asCur != asInputs:
4411 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4412 break;
4413 asCur = asSelectors;
4414 else:
4415 asCur.insert(0, sWord);
4416
4417 #
4418 # Validate and add selectors.
4419 #
4420 for sCond in asSelectors:
4421 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4422 oSelector = None;
4423 for sOp in TestSelector.kasCompareOps:
4424 off = sCondExp.find(sOp);
4425 if off >= 0:
4426 sVariable = sCondExp[:off];
4427 sValue = sCondExp[off + len(sOp):];
4428 if sVariable in TestSelector.kdVariables:
4429 if sValue in TestSelector.kdVariables[sVariable]:
4430 oSelector = TestSelector(sVariable, sOp, sValue);
4431 else:
4432 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4433 % ( sTag, sValue, sCond,
4434 TestSelector.kdVariables[sVariable].keys(),));
4435 else:
4436 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4437 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4438 break;
4439 if oSelector is not None:
4440 for oExisting in oTest.aoSelectors:
4441 if oExisting.sVariable == oSelector.sVariable:
4442 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4443 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4444 oTest.aoSelectors.append(oSelector);
4445 else:
4446 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4447
4448 #
4449 # Validate outputs and inputs, adding them to the test as we go along.
4450 #
4451 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4452 asValidFieldKinds = [ 'both', sDesc, ];
4453 for sItem in asItems:
4454 oItem = None;
4455 for sOp in TestInOut.kasOperators:
4456 off = sItem.find(sOp);
4457 if off < 0:
4458 continue;
4459 sField = sItem[:off];
4460 sValueType = sItem[off + len(sOp):];
4461 if sField in TestInOut.kdFields \
4462 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4463 asSplit = sValueType.split(':', 1);
4464 sValue = asSplit[0];
4465 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4466 if sType in TestInOut.kdTypes:
4467 oValid = TestInOut.kdTypes[sType].validate(sValue);
4468 if oValid is True:
4469 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4470 oItem = TestInOut(sField, sOp, sValue, sType);
4471 else:
4472 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4473 % ( sTag, sDesc, sItem, ));
4474 else:
4475 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4476 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4477 else:
4478 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4479 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4480 else:
4481 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4482 % ( sTag, sDesc, sField, sItem,
4483 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4484 if asVal[1] in asValidFieldKinds]),));
4485 break;
4486 if oItem is not None:
4487 for oExisting in aoDst:
4488 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4489 self.errorComment(iTagLine,
4490 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4491 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4492 aoDst.append(oItem);
4493 else:
4494 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4495
4496 #
4497 # .
4498 #
4499 if fRc:
4500 oInstr.aoTests.append(oTest);
4501 else:
4502 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4503 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4504 % (sTag, asSelectors, asInputs, asOutputs,));
4505
4506 _ = iEndLine;
4507 return True;
4508
4509 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4510 """
4511 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4512 """
4513 oInstr = self.ensureInstructionForOpTag(iTagLine);
4514
4515 iTest = 0;
4516 if sTag[-1] == ']':
4517 iTest = int(sTag[8:-1]);
4518 else:
4519 iTest = int(sTag[7:]);
4520
4521 if iTest != len(oInstr.aoTests):
4522 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4523 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4524
4525 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4526 """
4527 Tag: \@optestign | \@optestignore
4528 Value: <value is ignored>
4529
4530 This is a simple trick to ignore a test while debugging another.
4531
4532 See also \@oponlytest.
4533 """
4534 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4535 return True;
4536
4537 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4538 """
4539 Tag: \@opcopytests
4540 Value: <opstat | function> [..]
4541 Example: \@opcopytests add_Eb_Gb
4542
4543 Trick to avoid duplicating tests for different encodings of the same
4544 operation.
4545 """
4546 oInstr = self.ensureInstructionForOpTag(iTagLine);
4547
4548 # Flatten, validate and append the copy job to the instruction. We execute
4549 # them after parsing all the input so we can handle forward references.
4550 asToCopy = self.flattenAllSections(aasSections).split();
4551 if not asToCopy:
4552 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4553 for sToCopy in asToCopy:
4554 if sToCopy not in oInstr.asCopyTests:
4555 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4556 oInstr.asCopyTests.append(sToCopy);
4557 else:
4558 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4559 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4560 else:
4561 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4562
4563 _ = iEndLine;
4564 return True;
4565
4566 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4567 """
4568 Tag: \@oponlytest | \@oponly
4569 Value: none
4570
4571 Only test instructions with this tag. This is a trick that is handy
4572 for singling out one or two new instructions or tests.
4573
4574 See also \@optestignore.
4575 """
4576 oInstr = self.ensureInstructionForOpTag(iTagLine);
4577
4578 # Validate and add instruction to only test dictionary.
4579 sValue = self.flattenAllSections(aasSections).strip();
4580 if sValue:
4581 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4582
4583 if oInstr not in g_aoOnlyTestInstructions:
4584 g_aoOnlyTestInstructions.append(oInstr);
4585
4586 _ = iEndLine;
4587 return True;
4588
4589 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4590 """
4591 Tag: \@opxcpttype
4592 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4593
4594 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4595 """
4596 oInstr = self.ensureInstructionForOpTag(iTagLine);
4597
4598 # Flatten as a space separated list, split it up and validate the values.
4599 asTypes = self.flattenAllSections(aasSections).split();
4600 if len(asTypes) != 1:
4601 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4602 sType = asTypes[0];
4603 if sType not in g_kdXcptTypes:
4604 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4605 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4606 # Set it.
4607 if oInstr.sXcptType is not None:
4608 return self.errorComment(iTagLine,
4609 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4610 % ( sTag, oInstr.sXcptType, sType,));
4611 oInstr.sXcptType = sType;
4612
4613 _ = iEndLine;
4614 return True;
4615
4616 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4617 """
4618 Tag: \@opfunction
4619 Value: <VMM function name>
4620
4621 This is for explicitly setting the IEM function name. Normally we pick
4622 this up from the FNIEMOP_XXX macro invocation after the description, or
4623 generate it from the mnemonic and operands.
4624
4625 It it thought it maybe necessary to set it when specifying instructions
4626 which implementation isn't following immediately or aren't implemented yet.
4627 """
4628 oInstr = self.ensureInstructionForOpTag(iTagLine);
4629
4630 # Flatten and validate the value.
4631 sFunction = self.flattenAllSections(aasSections);
4632 if not self.oReFunctionName.match(sFunction):
4633 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4634 % (sTag, sFunction, self.oReFunctionName.pattern));
4635
4636 if oInstr.sFunction is not None:
4637 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4638 % (sTag, oInstr.sFunction, sFunction,));
4639 oInstr.sFunction = sFunction;
4640
4641 _ = iEndLine;
4642 return True;
4643
4644 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4645 """
4646 Tag: \@opstats
4647 Value: <VMM statistics base name>
4648
4649 This is for explicitly setting the statistics name. Normally we pick
4650 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4651 the mnemonic and operands.
4652
4653 It it thought it maybe necessary to set it when specifying instructions
4654 which implementation isn't following immediately or aren't implemented yet.
4655 """
4656 oInstr = self.ensureInstructionForOpTag(iTagLine);
4657
4658 # Flatten and validate the value.
4659 sStats = self.flattenAllSections(aasSections);
4660 if not self.oReStatsName.match(sStats):
4661 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4662 % (sTag, sStats, self.oReStatsName.pattern));
4663
4664 if oInstr.sStats is not None:
4665 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4666 % (sTag, oInstr.sStats, sStats,));
4667 oInstr.sStats = sStats;
4668
4669 _ = iEndLine;
4670 return True;
4671
4672 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4673 """
4674 Tag: \@opdone
4675 Value: none
4676
4677 Used to explictily flush the instructions that have been specified.
4678 """
4679 sFlattened = self.flattenAllSections(aasSections);
4680 if sFlattened != '':
4681 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4682 _ = sTag; _ = iEndLine;
4683 return self.doneInstructions();
4684
4685 ## @}
4686
4687
4688 def parseComment(self):
4689 """
4690 Parse the current comment (self.sComment).
4691
4692 If it's a opcode specifiying comment, we reset the macro stuff.
4693 """
4694 #
4695 # Reject if comment doesn't seem to contain anything interesting.
4696 #
4697 if self.sComment.find('Opcode') < 0 \
4698 and self.sComment.find('@') < 0:
4699 return False;
4700
4701 #
4702 # Split the comment into lines, removing leading asterisks and spaces.
4703 # Also remove leading and trailing empty lines.
4704 #
4705 asLines = self.sComment.split('\n');
4706 for iLine, sLine in enumerate(asLines):
4707 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4708
4709 while asLines and not asLines[0]:
4710 self.iCommentLine += 1;
4711 asLines.pop(0);
4712
4713 while asLines and not asLines[-1]:
4714 asLines.pop(len(asLines) - 1);
4715
4716 #
4717 # Check for old style: Opcode 0x0f 0x12
4718 #
4719 if asLines[0].startswith('Opcode '):
4720 self.parseCommentOldOpcode(asLines);
4721
4722 #
4723 # Look for @op* tagged data.
4724 #
4725 cOpTags = 0;
4726 sFlatDefault = None;
4727 sCurTag = '@default';
4728 iCurTagLine = 0;
4729 asCurSection = [];
4730 aasSections = [ asCurSection, ];
4731 for iLine, sLine in enumerate(asLines):
4732 if not sLine.startswith('@'):
4733 if sLine:
4734 asCurSection.append(sLine);
4735 elif asCurSection:
4736 asCurSection = [];
4737 aasSections.append(asCurSection);
4738 else:
4739 #
4740 # Process the previous tag.
4741 #
4742 if not asCurSection and len(aasSections) > 1:
4743 aasSections.pop(-1);
4744 if sCurTag in self.dTagHandlers:
4745 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4746 cOpTags += 1;
4747 elif sCurTag.startswith('@op'):
4748 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4749 elif sCurTag == '@default':
4750 sFlatDefault = self.flattenAllSections(aasSections);
4751 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4752 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4753 elif sCurTag in ['@encoding', '@opencoding']:
4754 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4755
4756 #
4757 # New tag.
4758 #
4759 asSplit = sLine.split(None, 1);
4760 sCurTag = asSplit[0].lower();
4761 if len(asSplit) > 1:
4762 asCurSection = [asSplit[1],];
4763 else:
4764 asCurSection = [];
4765 aasSections = [asCurSection, ];
4766 iCurTagLine = iLine;
4767
4768 #
4769 # Process the final tag.
4770 #
4771 if not asCurSection and len(aasSections) > 1:
4772 aasSections.pop(-1);
4773 if sCurTag in self.dTagHandlers:
4774 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4775 cOpTags += 1;
4776 elif sCurTag.startswith('@op'):
4777 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4778 elif sCurTag == '@default':
4779 sFlatDefault = self.flattenAllSections(aasSections);
4780
4781 #
4782 # Don't allow default text in blocks containing @op*.
4783 #
4784 if cOpTags > 0 and sFlatDefault:
4785 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4786
4787 return True;
4788
4789 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4790 """
4791 Parses a macro invocation.
4792
4793 Returns three values:
4794 1. A list of macro arguments, where the zero'th is the macro name.
4795 2. The offset following the macro invocation, into sInvocation of
4796 this is on the same line or into the last line if it is on a
4797 different line.
4798 3. Number of additional lines the invocation spans (i.e. zero if
4799 it is all contained within sInvocation).
4800 """
4801 # First the name.
4802 offOpen = sInvocation.find('(', offStartInvocation);
4803 if offOpen <= offStartInvocation:
4804 self.raiseError("macro invocation open parenthesis not found");
4805 sName = sInvocation[offStartInvocation:offOpen].strip();
4806 if not self.oReMacroName.match(sName):
4807 self.raiseError("invalid macro name '%s'" % (sName,));
4808 asRet = [sName, ];
4809
4810 # Arguments.
4811 iLine = self.iLine;
4812 cDepth = 1;
4813 off = offOpen + 1;
4814 offStart = off;
4815 offCurLn = 0;
4816 chQuote = None;
4817 while cDepth > 0:
4818 if off >= len(sInvocation):
4819 if iLine >= len(self.asLines):
4820 self.error('macro invocation beyond end of file');
4821 return (asRet, off - offCurLn, iLine - self.iLine);
4822 offCurLn = off;
4823 sInvocation += self.asLines[iLine];
4824 iLine += 1;
4825 ch = sInvocation[off];
4826
4827 if chQuote:
4828 if ch == '\\' and off + 1 < len(sInvocation):
4829 off += 1;
4830 elif ch == chQuote:
4831 chQuote = None;
4832 elif ch in ('"', '\'',):
4833 chQuote = ch;
4834 elif ch in (',', ')',):
4835 if cDepth == 1:
4836 asRet.append(sInvocation[offStart:off].strip());
4837 offStart = off + 1;
4838 if ch == ')':
4839 cDepth -= 1;
4840 elif ch == '(':
4841 cDepth += 1;
4842 off += 1;
4843
4844 return (asRet, off - offCurLn, iLine - self.iLine);
4845
4846 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4847 """
4848 Returns (None, len(sCode), 0) if not found, otherwise the
4849 parseMacroInvocation() return value.
4850 """
4851 offHit = sCode.find(sMacro, offStart);
4852 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4853 return self.parseMacroInvocation(sCode, offHit);
4854 return (None, len(sCode), 0);
4855
4856 def findAndParseMacroInvocation(self, sCode, sMacro):
4857 """
4858 Returns None if not found, arguments as per parseMacroInvocation if found.
4859 """
4860 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4861
4862 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4863 """
4864 Returns same as findAndParseMacroInvocation.
4865 """
4866 for sMacro in asMacro:
4867 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4868 if asRet is not None:
4869 return asRet;
4870 return None;
4871
4872 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4873 sDisHints, sIemHints, asOperands):
4874 """
4875 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4876 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4877 """
4878 #
4879 # Some invocation checks.
4880 #
4881 if sUpper != sUpper.upper():
4882 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4883 if sLower != sLower.lower():
4884 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4885 if sUpper.lower() != sLower:
4886 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4887 if not self.oReMnemonic.match(sLower):
4888 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4889
4890 #
4891 # Check if sIemHints tells us to not consider this macro invocation.
4892 #
4893 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4894 return True;
4895
4896 # Apply to the last instruction only for now.
4897 if not self.aoCurInstrs:
4898 self.addInstruction();
4899 oInstr = self.aoCurInstrs[-1];
4900 if oInstr.iLineMnemonicMacro == -1:
4901 oInstr.iLineMnemonicMacro = self.iLine;
4902 else:
4903 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4904 % (sMacro, oInstr.iLineMnemonicMacro,));
4905
4906 # Mnemonic
4907 if oInstr.sMnemonic is None:
4908 oInstr.sMnemonic = sLower;
4909 elif oInstr.sMnemonic != sLower:
4910 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4911
4912 # Process operands.
4913 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4914 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4915 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4916 for iOperand, sType in enumerate(asOperands):
4917 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4918 if sWhere is None:
4919 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4920 if iOperand < len(oInstr.aoOperands): # error recovery.
4921 sWhere = oInstr.aoOperands[iOperand].sWhere;
4922 sType = oInstr.aoOperands[iOperand].sType;
4923 else:
4924 sWhere = 'reg';
4925 sType = 'Gb';
4926 if iOperand == len(oInstr.aoOperands):
4927 oInstr.aoOperands.append(Operand(sWhere, sType))
4928 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4929 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4930 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4931 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4932
4933 # Encoding.
4934 if sForm not in g_kdIemForms:
4935 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4936 else:
4937 if oInstr.sEncoding is None:
4938 oInstr.sEncoding = g_kdIemForms[sForm][0];
4939 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4940 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4941 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4942
4943 # Check the parameter locations for the encoding.
4944 if g_kdIemForms[sForm][1] is not None:
4945 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4946 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4947 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4948 else:
4949 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4950 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4951 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4952 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4953 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4954 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4955 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4956 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4957 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4958 or sForm.replace('VEX','').find('V') < 0) ):
4959 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4960 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4961 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4962 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4963 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4964 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4965 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4966 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4967 oInstr.aoOperands[iOperand].sWhere));
4968
4969
4970 # Check @opcodesub
4971 if oInstr.sSubOpcode \
4972 and g_kdIemForms[sForm][2] \
4973 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4974 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4975 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4976
4977 # Stats.
4978 if not self.oReStatsName.match(sStats):
4979 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4980 elif oInstr.sStats is None:
4981 oInstr.sStats = sStats;
4982 elif oInstr.sStats != sStats:
4983 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4984 % (sMacro, oInstr.sStats, sStats,));
4985
4986 # Process the hints (simply merge with @ophints w/o checking anything).
4987 for sHint in sDisHints.split('|'):
4988 sHint = sHint.strip();
4989 if sHint.startswith('DISOPTYPE_'):
4990 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4991 if sShortHint in g_kdHints:
4992 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4993 else:
4994 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4995 elif sHint != '0':
4996 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4997
4998 for sHint in sIemHints.split('|'):
4999 sHint = sHint.strip();
5000 if sHint.startswith('IEMOPHINT_'):
5001 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5002 if sShortHint in g_kdHints:
5003 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5004 else:
5005 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5006 elif sHint != '0':
5007 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5008
5009 _ = sAsm;
5010 return True;
5011
5012 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5013 """
5014 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5015 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5016 """
5017 if not asOperands:
5018 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5019 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5020 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5021
5022 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5023 """
5024 Process a IEM_MC_BEGIN macro invocation.
5025 """
5026 if self.fDebugMc:
5027 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5028 #self.debug('%s<eos>' % (sCode,));
5029
5030 # Check preconditions.
5031 if not self.oCurFunction:
5032 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5033 if self.oCurMcBlock:
5034 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5035
5036 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5037 cchIndent = offBeginStatementInCodeStr;
5038 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5039 if offPrevNewline >= 0:
5040 cchIndent -= offPrevNewline + 1;
5041 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5042
5043 # Start a new block.
5044 # But don't add it to the list unless the context matches the host architecture.
5045 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5046 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
5047 try:
5048 if ( not self.aoCppCondStack
5049 or not self.sHostArch
5050 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5051 g_aoMcBlocks.append(self.oCurMcBlock);
5052 self.cTotalMcBlocks += 1;
5053 except Exception as oXcpt:
5054 self.raiseError(oXcpt.args[0]);
5055
5056 self.iMcBlockInFunc += 1;
5057 return True;
5058
5059 @staticmethod
5060 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5061 """
5062 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5063 extracting a statement block from a string that's the result of macro
5064 expansion and therefore contains multiple "sub-lines" as it were.
5065
5066 Returns list of lines covering offBegin thru offEnd in sRawLine.
5067 """
5068
5069 off = sRawLine.find('\n', offEnd);
5070 if off > 0:
5071 sRawLine = sRawLine[:off + 1];
5072
5073 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5074 sRawLine = sRawLine[off:];
5075 if not sRawLine.strip().startswith(sBeginStmt):
5076 sRawLine = sRawLine[offBegin - off:]
5077
5078 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5079
5080 def workerIemMcEnd(self, offEndStatementInLine):
5081 """
5082 Process a IEM_MC_END macro invocation.
5083 """
5084 if self.fDebugMc:
5085 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5086
5087 # Check preconditions.
5088 if not self.oCurMcBlock:
5089 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5090
5091 #
5092 # HACK ALERT! For blocks originating from macro expansion the start and
5093 # end line will be the same, but the line has multiple
5094 # newlines inside it. So, we have to do some extra tricks
5095 # to get the lines out of there. We ASSUME macros aren't
5096 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5097 #
5098 if self.iLine > self.oCurMcBlock.iBeginLine:
5099 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5100 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5101 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5102
5103 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5104 # so we can deal correctly with IEM_MC_END below and everything else.
5105 for sLine in asLines:
5106 cNewLines = sLine.count('\n');
5107 assert cNewLines > 0;
5108 if cNewLines > 1:
5109 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5110 self.oCurMcBlock.offBeginLine,
5111 offEndStatementInLine
5112 + sum(len(s) for s in asLines)
5113 - len(asLines[-1]));
5114 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5115 break;
5116 else:
5117 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5118 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5119 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5120
5121 #
5122 # Strip anything following the IEM_MC_END(); statement in the final line,
5123 # so that we don't carry on any trailing 'break' after macro expansions
5124 # like for iemOp_movsb_Xb_Yb.
5125 #
5126 while asLines[-1].strip() == '':
5127 asLines.pop();
5128 sFinal = asLines[-1];
5129 offFinalEnd = sFinal.find('IEM_MC_END');
5130 offEndInFinal = offFinalEnd;
5131 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5132 offFinalEnd += len('IEM_MC_END');
5133
5134 while sFinal[offFinalEnd].isspace():
5135 offFinalEnd += 1;
5136 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5137 offFinalEnd += 1;
5138
5139 while sFinal[offFinalEnd].isspace():
5140 offFinalEnd += 1;
5141 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5142 offFinalEnd += 1;
5143
5144 while sFinal[offFinalEnd].isspace():
5145 offFinalEnd += 1;
5146 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5147 offFinalEnd += 1;
5148
5149 asLines[-1] = sFinal[: offFinalEnd];
5150
5151 #
5152 # Complete and discard the current block.
5153 #
5154 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5155 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5156 self.oCurMcBlock = None;
5157 return True;
5158
5159 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5160 """
5161 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5162 """
5163 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5164 if self.fDebugMc:
5165 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5166 #self.debug('%s<eos>' % (sCode,));
5167
5168 # Check preconditions.
5169 if not self.oCurFunction:
5170 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5171 if self.oCurMcBlock:
5172 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5173
5174 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5175 cchIndent = offBeginStatementInCodeStr;
5176 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5177 if offPrevNewline >= 0:
5178 cchIndent -= offPrevNewline + 1;
5179 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5180
5181 # Start a new block.
5182 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5183 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
5184
5185 # Parse the statment.
5186 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5187 if asArgs is None:
5188 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5189 if len(asArgs) != cParams + 3:
5190 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s!'
5191 % (sStmt, len(asArgs), cParams + 3,));
5192
5193 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5194
5195 # These MCs are not typically part of macro expansions, but let's get
5196 # it out of the way immediately if it's the case.
5197 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5198 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5199 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5200 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5201 asLines[-1] = asLines[-1][:offAfter + 1];
5202 else:
5203 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5204 offAfter, sStmt);
5205 assert asLines[-1].find(';') >= 0;
5206 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5207
5208 assert asLines[0].find(sStmt) >= 0;
5209 #if not asLines[0].strip().startswith(sStmt):
5210 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5211
5212 # Advance to the line with the closing ')'.
5213 self.iLine += cLines;
5214
5215 # Complete the block.
5216 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5217
5218 g_aoMcBlocks.append(oMcBlock);
5219 self.cTotalMcBlocks += 1;
5220 self.iMcBlockInFunc += 1;
5221
5222 return True;
5223
5224 def workerStartFunction(self, asArgs):
5225 """
5226 Deals with the start of a decoder function.
5227
5228 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5229 macros, so we get a argument list for these where the 0th argument is the
5230 macro name.
5231 """
5232 # Complete any existing function.
5233 if self.oCurFunction:
5234 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5235
5236 # Create the new function.
5237 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5238 return True;
5239
5240 def checkCodeForMacro(self, sCode, offLine):
5241 """
5242 Checks code for relevant macro invocation.
5243 """
5244
5245 #
5246 # Scan macro invocations.
5247 #
5248 if sCode.find('(') > 0:
5249 # Look for instruction decoder function definitions. ASSUME single line.
5250 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5251 [ 'FNIEMOP_DEF',
5252 'FNIEMOPRM_DEF',
5253 'FNIEMOP_STUB',
5254 'FNIEMOP_STUB_1',
5255 'FNIEMOP_UD_STUB',
5256 'FNIEMOP_UD_STUB_1' ]);
5257 if asArgs is not None:
5258 self.workerStartFunction(asArgs);
5259 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5260
5261 if not self.aoCurInstrs:
5262 self.addInstruction();
5263 for oInstr in self.aoCurInstrs:
5264 if oInstr.iLineFnIemOpMacro == -1:
5265 oInstr.iLineFnIemOpMacro = self.iLine;
5266 else:
5267 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5268 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5269 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5270 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5271 if asArgs[0].find('STUB') > 0:
5272 self.doneInstructions(fEndOfFunction = True);
5273 return True;
5274
5275 # Check for worker function definitions, so we can get a context for MC blocks.
5276 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5277 [ 'FNIEMOP_DEF_1',
5278 'FNIEMOP_DEF_2', ]);
5279 if asArgs is not None:
5280 self.workerStartFunction(asArgs);
5281 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5282 return True;
5283
5284 # IEMOP_HLP_DONE_VEX_DECODING_*
5285 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5286 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5287 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5288 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5289 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5290 ]);
5291 if asArgs is not None:
5292 sMacro = asArgs[0];
5293 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5294 for oInstr in self.aoCurInstrs:
5295 if 'vex_l_zero' not in oInstr.dHints:
5296 if oInstr.iLineMnemonicMacro >= 0:
5297 self.errorOnLine(oInstr.iLineMnemonicMacro,
5298 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5299 oInstr.dHints['vex_l_zero'] = True;
5300
5301 #
5302 # IEMOP_MNEMONIC*
5303 #
5304 if sCode.find('IEMOP_MNEMONIC') >= 0:
5305 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5306 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5307 if asArgs is not None:
5308 if len(self.aoCurInstrs) == 1:
5309 oInstr = self.aoCurInstrs[0];
5310 if oInstr.sStats is None:
5311 oInstr.sStats = asArgs[1];
5312 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5313
5314 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5315 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5316 if asArgs is not None:
5317 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5318 asArgs[7], []);
5319 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5320 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5321 if asArgs is not None:
5322 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5323 asArgs[8], [asArgs[6],]);
5324 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5325 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5326 if asArgs is not None:
5327 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5328 asArgs[9], [asArgs[6], asArgs[7]]);
5329 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5330 # a_fIemHints)
5331 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5332 if asArgs is not None:
5333 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5334 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5335 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5336 # a_fIemHints)
5337 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5338 if asArgs is not None:
5339 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5340 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5341
5342 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5343 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5344 if asArgs is not None:
5345 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5346 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5347 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5348 if asArgs is not None:
5349 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5350 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5351 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5352 if asArgs is not None:
5353 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5354 [asArgs[4], asArgs[5],]);
5355 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5356 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5357 if asArgs is not None:
5358 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5359 [asArgs[4], asArgs[5], asArgs[6],]);
5360 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5361 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5362 if asArgs is not None:
5363 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5364 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5365
5366 #
5367 # IEM_MC_BEGIN + IEM_MC_END.
5368 # We must support multiple instances per code snippet.
5369 #
5370 offCode = sCode.find('IEM_MC_');
5371 if offCode >= 0:
5372 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5373 if oMatch.group(1) == 'END':
5374 self.workerIemMcEnd(offLine + oMatch.start());
5375 elif oMatch.group(1) == 'BEGIN':
5376 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5377 else:
5378 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5379 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5380 return True;
5381
5382 return False;
5383
5384 def workerPreprocessorRecreateMacroRegex(self):
5385 """
5386 Recreates self.oReMacros when self.dMacros changes.
5387 """
5388 if self.dMacros:
5389 sRegex = '';
5390 for sName, oMacro in self.dMacros.items():
5391 if sRegex:
5392 sRegex += '|' + sName;
5393 else:
5394 sRegex = '\\b(' + sName;
5395 if oMacro.asArgs is not None:
5396 sRegex += '\s*\(';
5397 else:
5398 sRegex += '\\b';
5399 sRegex += ')';
5400 self.oReMacros = re.compile(sRegex);
5401 else:
5402 self.oReMacros = None;
5403 return True;
5404
5405 def workerPreprocessorDefine(self, sRest):
5406 """
5407 Handles a macro #define, the sRest is what follows after the directive word.
5408 """
5409 assert sRest[-1] == '\n';
5410
5411 #
5412 # If using line continutation, just concat all the lines together,
5413 # preserving the newline character but not the escaping.
5414 #
5415 iLineStart = self.iLine;
5416 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5417 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5418 self.iLine += 1;
5419 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5420
5421 #
5422 # Use regex to split out the name, argument list and body.
5423 # If this fails, we assume it's a simple macro.
5424 #
5425 oMatch = self.oReHashDefine2.match(sRest);
5426 if oMatch:
5427 sAllArgs = oMatch.group(2).strip();
5428 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5429 sBody = oMatch.group(3);
5430 else:
5431 oMatch = self.oReHashDefine3.match(sRest);
5432 if not oMatch:
5433 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5434 return self.error('bogus macro definition: %s' % (sRest,));
5435 asArgs = None;
5436 sBody = oMatch.group(2);
5437 sName = oMatch.group(1);
5438 assert sName == sName.strip();
5439 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5440
5441 #
5442 # Is this of any interest to us? We do NOT support MC blocks wihtin
5443 # nested macro expansion, just to avoid lots of extra work.
5444 #
5445 # There is only limited support for macros expanding to partial MC blocks.
5446 #
5447 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5448 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5449 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5450 # siblings in the recompiler. This is a lot simpler than nested macro
5451 # expansion and lots of heuristics for locating all the relevant macros.
5452 # Also, this way we don't produce lots of unnecessary threaded functions.
5453 #
5454 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5455 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5456 return True;
5457
5458 #
5459 # Add the macro.
5460 #
5461 if self.fDebugPreproc:
5462 self.debug('#define %s on line %u' % (sName, self.iLine,));
5463 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5464 return self.workerPreprocessorRecreateMacroRegex();
5465
5466 def workerPreprocessorUndef(self, sRest):
5467 """
5468 Handles a macro #undef, the sRest is what follows after the directive word.
5469 """
5470 # Quick comment strip and isolate the name.
5471 offSlash = sRest.find('/');
5472 if offSlash > 0:
5473 sRest = sRest[:offSlash];
5474 sName = sRest.strip();
5475
5476 # Remove the macro if we're clocking it.
5477 if sName in self.dMacros:
5478 if self.fDebugPreproc:
5479 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5480 del self.dMacros[sName];
5481 return self.workerPreprocessorRecreateMacroRegex();
5482
5483 return True;
5484
5485 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5486 """
5487 Handles an #if, #ifdef, #ifndef or #elif directive.
5488 """
5489 #
5490 # Sanity check #elif.
5491 #
5492 if sDirective == 'elif':
5493 if len(self.aoCppCondStack) == 0:
5494 self.raiseError('#elif without #if');
5495 if self.aoCppCondStack[-1].fInElse:
5496 self.raiseError('#elif after #else');
5497
5498 #
5499 # If using line continutation, just concat all the lines together,
5500 # stripping both the newline and escape characters.
5501 #
5502 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5503 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5504 self.iLine += 1;
5505
5506 # Strip it of all comments and leading and trailing blanks.
5507 sRest = self.stripComments(sRest).strip();
5508
5509 #
5510 # Stash it.
5511 #
5512 try:
5513 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5514 except Exception as oXcpt:
5515 self.raiseError(oXcpt.args[0]);
5516
5517 if sDirective == 'elif':
5518 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5519 else:
5520 self.aoCppCondStack.append(oPreprocCond);
5521
5522 return True;
5523
5524 def workerPreprocessorElse(self):
5525 """
5526 Handles an #else directive.
5527 """
5528 if len(self.aoCppCondStack) == 0:
5529 self.raiseError('#else without #if');
5530 if self.aoCppCondStack[-1].fInElse:
5531 self.raiseError('Another #else after #else');
5532
5533 self.aoCppCondStack[-1].fInElse = True;
5534 return True;
5535
5536 def workerPreprocessorEndif(self):
5537 """
5538 Handles an #endif directive.
5539 """
5540 if len(self.aoCppCondStack) == 0:
5541 self.raiseError('#endif without #if');
5542
5543 self.aoCppCondStack.pop();
5544 return True;
5545
5546 def checkPreprocessorDirective(self, sLine):
5547 """
5548 Handles a preprocessor directive.
5549 """
5550 # Skip past the preprocessor hash.
5551 off = sLine.find('#');
5552 assert off >= 0;
5553 off += 1;
5554 while off < len(sLine) and sLine[off].isspace():
5555 off += 1;
5556
5557 # Extract the directive.
5558 offDirective = off;
5559 while off < len(sLine) and not sLine[off].isspace():
5560 off += 1;
5561 sDirective = sLine[offDirective:off];
5562 if self.fDebugPreproc:
5563 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5564
5565 # Skip spaces following it to where the arguments/whatever starts.
5566 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5567 off += 1;
5568 sTail = sLine[off:];
5569
5570 # Handle the directive.
5571 if sDirective == 'define':
5572 return self.workerPreprocessorDefine(sTail);
5573 if sDirective == 'undef':
5574 return self.workerPreprocessorUndef(sTail);
5575 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5576 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5577 if sDirective == 'else':
5578 return self.workerPreprocessorElse();
5579 if sDirective == 'endif':
5580 return self.workerPreprocessorEndif();
5581
5582 if self.fDebugPreproc:
5583 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5584 return False;
5585
5586 def expandMacros(self, sLine, oMatch):
5587 """
5588 Expands macros we know about in the given line.
5589 Currently we ASSUME there is only one and that is what oMatch matched.
5590 """
5591 #
5592 # Get our bearings.
5593 #
5594 offMatch = oMatch.start();
5595 sName = oMatch.group(1);
5596 assert sName == sLine[oMatch.start() : oMatch.end()];
5597 fWithArgs = sName.endswith('(');
5598 if fWithArgs:
5599 sName = sName[:-1].strip();
5600 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5601
5602 #
5603 # Deal with simple macro invocations w/o parameters.
5604 #
5605 if not fWithArgs:
5606 if self.fDebugPreproc:
5607 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5608 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5609
5610 #
5611 # Complicated macro with parameters.
5612 # Start by extracting the parameters. ASSUMES they are all on the same line!
5613 #
5614 cLevel = 1;
5615 offCur = oMatch.end();
5616 offCurArg = offCur;
5617 asArgs = [];
5618 while True:
5619 if offCur >= len(sLine):
5620 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5621 ch = sLine[offCur];
5622 if ch == '(':
5623 cLevel += 1;
5624 elif ch == ')':
5625 cLevel -= 1;
5626 if cLevel == 0:
5627 asArgs.append(sLine[offCurArg:offCur].strip());
5628 break;
5629 elif ch == ',' and cLevel == 1:
5630 asArgs.append(sLine[offCurArg:offCur].strip());
5631 offCurArg = offCur + 1;
5632 offCur += 1;
5633 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5634 asArgs = [];
5635 if len(oMacro.asArgs) != len(asArgs):
5636 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5637
5638 #
5639 # Do the expanding.
5640 #
5641 if self.fDebugPreproc:
5642 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5643 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5644
5645 def parse(self):
5646 """
5647 Parses the given file.
5648
5649 Returns number or errors.
5650 Raises exception on fatal trouble.
5651 """
5652 #self.debug('Parsing %s' % (self.sSrcFile,));
5653
5654 #
5655 # Loop thru the lines.
5656 #
5657 # Please mind that self.iLine may be updated by checkCodeForMacro and
5658 # other worker methods.
5659 #
5660 while self.iLine < len(self.asLines):
5661 sLine = self.asLines[self.iLine];
5662 self.iLine += 1;
5663 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5664
5665 # Expand macros we know about if we're currently in code.
5666 if self.iState == self.kiCode and self.oReMacros:
5667 oMatch = self.oReMacros.search(sLine);
5668 if oMatch:
5669 sLine = self.expandMacros(sLine, oMatch);
5670 if self.fDebugPreproc:
5671 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5672 self.asLines[self.iLine - 1] = sLine;
5673
5674 # Check for preprocessor directives before comments and other stuff.
5675 # ASSUMES preprocessor directives doesn't end with multiline comments.
5676 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5677 if self.fDebugPreproc:
5678 self.debug('line %d: preproc' % (self.iLine,));
5679 self.checkPreprocessorDirective(sLine);
5680 else:
5681 # Look for comments.
5682 offSlash = sLine.find('/');
5683 if offSlash >= 0:
5684 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5685 offLine = 0;
5686 while offLine < len(sLine):
5687 if self.iState == self.kiCode:
5688 # Look for substantial multiline comment so we pass the following MC as a whole line:
5689 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5690 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5691 offHit = sLine.find('/*', offLine);
5692 while offHit >= 0:
5693 offEnd = sLine.find('*/', offHit + 2);
5694 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5695 break;
5696 offHit = sLine.find('/*', offEnd);
5697
5698 if offHit >= 0:
5699 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5700 self.sComment = '';
5701 self.iCommentLine = self.iLine;
5702 self.iState = self.kiCommentMulti;
5703 offLine = offHit + 2;
5704 else:
5705 self.checkCodeForMacro(sLine[offLine:], offLine);
5706 offLine = len(sLine);
5707
5708 elif self.iState == self.kiCommentMulti:
5709 offHit = sLine.find('*/', offLine);
5710 if offHit >= 0:
5711 self.sComment += sLine[offLine:offHit];
5712 self.iState = self.kiCode;
5713 offLine = offHit + 2;
5714 self.parseComment();
5715 else:
5716 self.sComment += sLine[offLine:];
5717 offLine = len(sLine);
5718 else:
5719 assert False;
5720 # C++ line comment.
5721 elif offSlash > 0:
5722 self.checkCodeForMacro(sLine[:offSlash], 0);
5723
5724 # No slash, but append the line if in multi-line comment.
5725 elif self.iState == self.kiCommentMulti:
5726 #self.debug('line %d: multi' % (self.iLine,));
5727 self.sComment += sLine;
5728
5729 # No slash, but check code line for relevant macro.
5730 elif ( self.iState == self.kiCode
5731 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5732 #self.debug('line %d: macro' % (self.iLine,));
5733 self.checkCodeForMacro(sLine, 0);
5734
5735 # If the line is a '}' in the first position, complete the instructions.
5736 elif self.iState == self.kiCode and sLine[0] == '}':
5737 #self.debug('line %d: }' % (self.iLine,));
5738 self.doneInstructions(fEndOfFunction = True);
5739
5740 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5741 # so we can check/add @oppfx info from it.
5742 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5743 self.parseFunctionTable(sLine);
5744
5745 self.doneInstructions(fEndOfFunction = True);
5746 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5747 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5748 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5749 return self.printErrors();
5750
5751## The parsed content of IEMAllInstCommonBodyMacros.h.
5752g_oParsedCommonBodyMacros = None # type: SimpleParser
5753
5754def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
5755 """
5756 Parses one source file for instruction specfications.
5757 """
5758 #
5759 # Read sSrcFile into a line array.
5760 #
5761 try:
5762 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5763 except Exception as oXcpt:
5764 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5765 try:
5766 asLines = oFile.readlines();
5767 except Exception as oXcpt:
5768 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5769 finally:
5770 oFile.close();
5771
5772 #
5773 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
5774 # can use the macros from it when processing the other files.
5775 #
5776 global g_oParsedCommonBodyMacros;
5777 if g_oParsedCommonBodyMacros is None:
5778 # Locate the file.
5779 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
5780 if not os.path.isfile(sCommonBodyMacros):
5781 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
5782
5783 # Read it.
5784 try:
5785 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5786 asIncFiles = oIncFile.readlines();
5787 except Exception as oXcpt:
5788 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5789
5790 # Parse it.
5791 try:
5792 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
5793 if oParser.parse() != 0:
5794 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5795 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5796 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5797 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5798 oParser.cTotalMcBlocks,
5799 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5800 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5801 except ParserException as oXcpt:
5802 print(str(oXcpt), file = sys.stderr);
5803 raise;
5804 g_oParsedCommonBodyMacros = oParser;
5805
5806 #
5807 # Do the parsing.
5808 #
5809 try:
5810 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
5811 return (oParser.parse(), oParser) ;
5812 except ParserException as oXcpt:
5813 print(str(oXcpt), file = sys.stderr);
5814 raise;
5815
5816
5817def __doTestCopying():
5818 """
5819 Executes the asCopyTests instructions.
5820 """
5821 asErrors = [];
5822 for oDstInstr in g_aoAllInstructions:
5823 if oDstInstr.asCopyTests:
5824 for sSrcInstr in oDstInstr.asCopyTests:
5825 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5826 if oSrcInstr:
5827 aoSrcInstrs = [oSrcInstr,];
5828 else:
5829 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5830 if aoSrcInstrs:
5831 for oSrcInstr in aoSrcInstrs:
5832 if oSrcInstr != oDstInstr:
5833 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5834 else:
5835 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5836 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5837 else:
5838 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5839 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5840
5841 if asErrors:
5842 sys.stderr.write(u''.join(asErrors));
5843 return len(asErrors);
5844
5845
5846def __applyOnlyTest():
5847 """
5848 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5849 all other instructions so that only these get tested.
5850 """
5851 if g_aoOnlyTestInstructions:
5852 for oInstr in g_aoAllInstructions:
5853 if oInstr.aoTests:
5854 if oInstr not in g_aoOnlyTestInstructions:
5855 oInstr.aoTests = [];
5856 return 0;
5857
5858## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
5859g_aaoAllInstrFilesAndDefaultMapAndSet = (
5860 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
5861 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
5862 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
5863 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
5864 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
5865 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
5866 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
5867 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
5868 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
5869);
5870
5871def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
5872 """
5873 Parses all the IEMAllInstruction*.cpp.h files.
5874
5875 Returns a list of the parsers on success.
5876 Raises exception on failure.
5877 """
5878 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5879 cErrors = 0;
5880 aoParsers = [];
5881 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5882 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5883 sFilename = os.path.join(sSrcDir, sFilename);
5884 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
5885 cErrors += cThisErrors;
5886 aoParsers.append(oParser);
5887 cErrors += __doTestCopying();
5888 cErrors += __applyOnlyTest();
5889
5890 # Total stub stats:
5891 cTotalStubs = 0;
5892 for oInstr in g_aoAllInstructions:
5893 cTotalStubs += oInstr.fStub;
5894 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5895 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5896 file = sys.stderr);
5897
5898 if cErrors != 0:
5899 raise Exception('%d parse errors' % (cErrors,));
5900 return aoParsers;
5901
5902
5903def parseFiles(asFiles, sHostArch = None):
5904 """
5905 Parses a selection of IEMAllInstruction*.cpp.h files.
5906
5907 Returns a list of the parsers on success.
5908 Raises exception on failure.
5909 """
5910 # Look up default maps for the files and call __parseFilesWorker to do the job.
5911 asFilesAndDefaultMap = [];
5912 for sFilename in asFiles:
5913 sName = os.path.split(sFilename)[1].lower();
5914 sMap = None;
5915 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
5916 if aoInfo[0].lower() == sName:
5917 sMap = aoInfo[1];
5918 break;
5919 if not sMap:
5920 raise Exception('Unable to classify file: %s' % (sFilename,));
5921 asFilesAndDefaultMap.append((sFilename, sMap));
5922
5923 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
5924
5925
5926def parseAll(sHostArch = None):
5927 """
5928 Parses all the IEMAllInstruction*.cpp.h files.
5929
5930 Returns a list of the parsers on success.
5931 Raises exception on failure.
5932 """
5933 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
5934
5935
5936#
5937# Generators (may perhaps move later).
5938#
5939def __formatDisassemblerTableEntry(oInstr):
5940 """
5941 """
5942 sMacro = 'OP';
5943 cMaxOperands = 3;
5944 if len(oInstr.aoOperands) > 3:
5945 sMacro = 'OPVEX'
5946 cMaxOperands = 4;
5947 assert len(oInstr.aoOperands) <= cMaxOperands;
5948
5949 #
5950 # Format string.
5951 #
5952 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5953 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5954 sTmp += ' ' if iOperand == 0 else ',';
5955 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5956 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5957 else:
5958 sTmp += g_kdOpTypes[oOperand.sType][2];
5959 sTmp += '",';
5960 asColumns = [ sTmp, ];
5961
5962 #
5963 # Decoders.
5964 #
5965 iStart = len(asColumns);
5966 if oInstr.sEncoding is None:
5967 pass;
5968 elif oInstr.sEncoding == 'ModR/M':
5969 # ASSUME the first operand is using the ModR/M encoding
5970 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5971 asColumns.append('IDX_ParseModRM,');
5972 elif oInstr.sEncoding in [ 'prefix', ]:
5973 for oOperand in oInstr.aoOperands:
5974 asColumns.append('0,');
5975 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5976 pass;
5977 elif oInstr.sEncoding == 'VEX.ModR/M':
5978 asColumns.append('IDX_ParseModRM,');
5979 elif oInstr.sEncoding == 'vex2':
5980 asColumns.append('IDX_ParseVex2b,')
5981 elif oInstr.sEncoding == 'vex3':
5982 asColumns.append('IDX_ParseVex3b,')
5983 elif oInstr.sEncoding in g_dInstructionMaps:
5984 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5985 else:
5986 ## @todo
5987 #IDX_ParseTwoByteEsc,
5988 #IDX_ParseGrp1,
5989 #IDX_ParseShiftGrp2,
5990 #IDX_ParseGrp3,
5991 #IDX_ParseGrp4,
5992 #IDX_ParseGrp5,
5993 #IDX_Parse3DNow,
5994 #IDX_ParseGrp6,
5995 #IDX_ParseGrp7,
5996 #IDX_ParseGrp8,
5997 #IDX_ParseGrp9,
5998 #IDX_ParseGrp10,
5999 #IDX_ParseGrp12,
6000 #IDX_ParseGrp13,
6001 #IDX_ParseGrp14,
6002 #IDX_ParseGrp15,
6003 #IDX_ParseGrp16,
6004 #IDX_ParseThreeByteEsc4,
6005 #IDX_ParseThreeByteEsc5,
6006 #IDX_ParseModFence,
6007 #IDX_ParseEscFP,
6008 #IDX_ParseNopPause,
6009 #IDX_ParseInvOpModRM,
6010 assert False, str(oInstr);
6011
6012 # Check for immediates and stuff in the remaining operands.
6013 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6014 sIdx = g_kdOpTypes[oOperand.sType][0];
6015 #if sIdx != 'IDX_UseModRM':
6016 asColumns.append(sIdx + ',');
6017 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6018
6019 #
6020 # Opcode and operands.
6021 #
6022 assert oInstr.sDisEnum, str(oInstr);
6023 asColumns.append(oInstr.sDisEnum + ',');
6024 iStart = len(asColumns)
6025 for oOperand in oInstr.aoOperands:
6026 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6027 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6028
6029 #
6030 # Flags.
6031 #
6032 sTmp = '';
6033 for sHint in sorted(oInstr.dHints.keys()):
6034 sDefine = g_kdHints[sHint];
6035 if sDefine.startswith('DISOPTYPE_'):
6036 if sTmp:
6037 sTmp += ' | ' + sDefine;
6038 else:
6039 sTmp += sDefine;
6040 if sTmp:
6041 sTmp += '),';
6042 else:
6043 sTmp += '0),';
6044 asColumns.append(sTmp);
6045
6046 #
6047 # Format the columns into a line.
6048 #
6049 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6050 sLine = '';
6051 for i, s in enumerate(asColumns):
6052 if len(sLine) < aoffColumns[i]:
6053 sLine += ' ' * (aoffColumns[i] - len(sLine));
6054 else:
6055 sLine += ' ';
6056 sLine += s;
6057
6058 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6059 # DISOPTYPE_HARMLESS),
6060 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6061 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6062 return sLine;
6063
6064def __checkIfShortTable(aoTableOrdered, oMap):
6065 """
6066 Returns (iInstr, cInstructions, fShortTable)
6067 """
6068
6069 # Determin how much we can trim off.
6070 cInstructions = len(aoTableOrdered);
6071 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6072 cInstructions -= 1;
6073
6074 iInstr = 0;
6075 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6076 iInstr += 1;
6077
6078 # If we can save more than 30%, we go for the short table version.
6079 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6080 return (iInstr, cInstructions, True);
6081 _ = oMap; # Use this for overriding.
6082
6083 # Output the full table.
6084 return (0, len(aoTableOrdered), False);
6085
6086def generateDisassemblerTables(oDstFile = sys.stdout):
6087 """
6088 Generates disassembler tables.
6089
6090 Returns exit code.
6091 """
6092
6093 #
6094 # Parse all.
6095 #
6096 try:
6097 parseAll();
6098 except Exception as oXcpt:
6099 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6100 traceback.print_exc(file = sys.stderr);
6101 return 1;
6102
6103
6104 #
6105 # The disassembler uses a slightly different table layout to save space,
6106 # since several of the prefix varia
6107 #
6108 aoDisasmMaps = [];
6109 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6110 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6111 if oMap.sSelector != 'byte+pfx':
6112 aoDisasmMaps.append(oMap);
6113 else:
6114 # Split the map by prefix.
6115 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6116 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6117 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6118 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6119
6120 #
6121 # Dump each map.
6122 #
6123 asHeaderLines = [];
6124 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6125 for oMap in aoDisasmMaps:
6126 sName = oMap.sName;
6127
6128 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6129
6130 #
6131 # Get the instructions for the map and see if we can do a short version or not.
6132 #
6133 aoTableOrder = oMap.getInstructionsInTableOrder();
6134 cEntriesPerByte = oMap.getEntriesPerByte();
6135 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6136
6137 #
6138 # Output the table start.
6139 # Note! Short tables are static and only accessible via the map range record.
6140 #
6141 asLines = [];
6142 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6143 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6144 if fShortTable:
6145 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6146 else:
6147 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6148 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6149 asLines.append('{');
6150
6151 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6152 asLines.append(' /* %#04x: */' % (iInstrStart,));
6153
6154 #
6155 # Output the instructions.
6156 #
6157 iInstr = iInstrStart;
6158 while iInstr < iInstrEnd:
6159 oInstr = aoTableOrder[iInstr];
6160 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6161 if iInstr != iInstrStart:
6162 asLines.append('');
6163 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6164
6165 if oInstr is None:
6166 # Invalid. Optimize blocks of invalid instructions.
6167 cInvalidInstrs = 1;
6168 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6169 cInvalidInstrs += 1;
6170 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6171 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6172 iInstr += 0x10 * cEntriesPerByte - 1;
6173 elif cEntriesPerByte > 1:
6174 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6175 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6176 iInstr += 3;
6177 else:
6178 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6179 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6180 else:
6181 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6182 elif isinstance(oInstr, list):
6183 if len(oInstr) != 0:
6184 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6185 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6186 else:
6187 asLines.append(__formatDisassemblerTableEntry(oInstr));
6188 else:
6189 asLines.append(__formatDisassemblerTableEntry(oInstr));
6190
6191 iInstr += 1;
6192
6193 if iInstrStart >= iInstrEnd:
6194 asLines.append(' /* dummy */ INVALID_OPCODE');
6195
6196 asLines.append('};');
6197 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6198
6199 #
6200 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6201 #
6202 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6203 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6204 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6205
6206 #
6207 # Write out the lines.
6208 #
6209 oDstFile.write('\n'.join(asLines));
6210 oDstFile.write('\n');
6211 oDstFile.write('\n');
6212 #break; #for now
6213 return 0;
6214
6215if __name__ == '__main__':
6216 sys.exit(generateDisassemblerTables());
6217
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette