VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 100759

Last change on this file since 100759 was 100753, checked in by vboxsync, 17 months ago

VMM/IEM: Build fix. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 271.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 100753 2023-07-31 13:26:23Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 100753 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCall(McCppGeneric):
1885 """
1886 A generic C++/C call statement.
1887
1888 The sName is still 'C++', so the function name is in the first parameter
1889 and the the arguments in the subsequent ones.
1890 """
1891 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1892 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1893 self.asParams.extend(asArgs);
1894
1895 def renderCode(self, cchIndent = 0):
1896 cchIndent += self.cchIndent;
1897 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1898 if self.fDecode:
1899 sRet += ' // C++ decode\n';
1900 else:
1901 sRet += ' // C++ normal\n';
1902 return sRet;
1903
1904class McCppCond(McStmtCond):
1905 """
1906 C++/C 'if' statement.
1907 """
1908 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1909 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1910 self.fDecode = fDecode;
1911 self.cchIndent = cchIndent;
1912
1913 def renderCode(self, cchIndent = 0):
1914 cchIndent += self.cchIndent;
1915 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1916 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1917 sRet += ' ' * cchIndent + '{\n';
1918 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1919 sRet += ' ' * cchIndent + '}\n';
1920 if self.aoElseBranch:
1921 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1922 sRet += ' ' * cchIndent + '{\n';
1923 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1924 sRet += ' ' * cchIndent + '}\n';
1925 return sRet;
1926
1927class McCppPreProc(McCppGeneric):
1928 """
1929 C++/C Preprocessor directive.
1930 """
1931 def __init__(self, sCode):
1932 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1933
1934 def renderCode(self, cchIndent = 0):
1935 return self.asParams[0] + '\n';
1936
1937
1938class McBlock(object):
1939 """
1940 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1941 """
1942
1943 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1944 ## The source file containing the block.
1945 self.sSrcFile = sSrcFile;
1946 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1947 self.iBeginLine = iBeginLine;
1948 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1949 self.offBeginLine = offBeginLine;
1950 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
1951 self.iEndLine = -1;
1952 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
1953 self.offEndLine = 0;
1954 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
1955 self.offAfterEnd = 0;
1956 ## The function the block resides in.
1957 self.oFunction = oFunction;
1958 ## The name of the function the block resides in. DEPRECATED.
1959 self.sFunction = oFunction.sName;
1960 ## The block number within the function.
1961 self.iInFunction = iInFunction;
1962 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1963 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1964 ## Decoded statements in the block.
1965 self.aoStmts = [] # type: list(McStmt)
1966
1967 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
1968 """
1969 Completes the microcode block.
1970 """
1971 assert self.iEndLine == -1;
1972 self.iEndLine = iEndLine;
1973 self.offEndLine = offEndLine;
1974 self.offAfterEnd = offAfterEnd;
1975 self.asLines = asLines;
1976
1977 def raiseDecodeError(self, sRawCode, off, sMessage):
1978 """ Raises a decoding error. """
1979 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1980 iLine = sRawCode.count('\n', 0, off);
1981 raise ParserException('%s:%d:%d: parsing error: %s'
1982 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1983
1984 def raiseStmtError(self, sName, sMessage):
1985 """ Raises a statement parser error. """
1986 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1987
1988 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1989 """ Check the parameter count, raising an error it doesn't match. """
1990 if len(asParams) != cParamsExpected:
1991 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1992 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1993 return True;
1994
1995 @staticmethod
1996 def parseMcGeneric(oSelf, sName, asParams):
1997 """ Generic parser that returns a plain McStmt object. """
1998 _ = oSelf;
1999 return McStmt(sName, asParams);
2000
2001 @staticmethod
2002 def parseMcGenericCond(oSelf, sName, asParams):
2003 """ Generic parser that returns a plain McStmtCond object. """
2004 _ = oSelf;
2005 return McStmtCond(sName, asParams);
2006
2007 @staticmethod
2008 def parseMcBegin(oSelf, sName, asParams):
2009 """ IEM_MC_BEGIN """
2010 oSelf.checkStmtParamCount(sName, asParams, 2);
2011 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2012
2013 @staticmethod
2014 def parseMcArg(oSelf, sName, asParams):
2015 """ IEM_MC_ARG """
2016 oSelf.checkStmtParamCount(sName, asParams, 3);
2017 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2018
2019 @staticmethod
2020 def parseMcArgConst(oSelf, sName, asParams):
2021 """ IEM_MC_ARG_CONST """
2022 oSelf.checkStmtParamCount(sName, asParams, 4);
2023 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2024
2025 @staticmethod
2026 def parseMcArgLocalRef(oSelf, sName, asParams):
2027 """ IEM_MC_ARG_LOCAL_REF """
2028 oSelf.checkStmtParamCount(sName, asParams, 4);
2029 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2030
2031 @staticmethod
2032 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2033 """ IEM_MC_ARG_LOCAL_EFLAGS """
2034 oSelf.checkStmtParamCount(sName, asParams, 3);
2035 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2036 return (
2037 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2038 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2039 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2040 );
2041
2042 @staticmethod
2043 def parseMcLocal(oSelf, sName, asParams):
2044 """ IEM_MC_LOCAL """
2045 oSelf.checkStmtParamCount(sName, asParams, 2);
2046 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2047
2048 @staticmethod
2049 def parseMcLocalConst(oSelf, sName, asParams):
2050 """ IEM_MC_LOCAL_CONST """
2051 oSelf.checkStmtParamCount(sName, asParams, 3);
2052 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2053
2054 @staticmethod
2055 def parseMcCallAImpl(oSelf, sName, asParams):
2056 """ IEM_MC_CALL_AIMPL_3|4 """
2057 cArgs = int(sName[-1]);
2058 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2059 return McStmtCall(sName, asParams, 1, 0);
2060
2061 @staticmethod
2062 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2063 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2064 cArgs = int(sName[-1]);
2065 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2066 return McStmtCall(sName, asParams, 0);
2067
2068 @staticmethod
2069 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2070 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2071 cArgs = int(sName[-1]);
2072 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2073 return McStmtCall(sName, asParams, 0);
2074
2075 @staticmethod
2076 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2077 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2078 cArgs = int(sName[-1]);
2079 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2080 return McStmtCall(sName, asParams, 0);
2081
2082 @staticmethod
2083 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2084 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2085 cArgs = int(sName[-1]);
2086 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2087 return McStmtCall(sName, asParams, 0);
2088
2089 @staticmethod
2090 def parseMcCallSseAImpl(oSelf, sName, asParams):
2091 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2092 cArgs = int(sName[-1]);
2093 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2094 return McStmtCall(sName, asParams, 0);
2095
2096 @staticmethod
2097 def parseMcCallCImpl(oSelf, sName, asParams):
2098 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2099 cArgs = int(sName[-1]);
2100 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2101 return McStmtCall(sName, asParams, 1);
2102
2103 @staticmethod
2104 def stripComments(sCode):
2105 """ Returns sCode with comments removed. """
2106 off = 0;
2107 while off < len(sCode):
2108 off = sCode.find('/', off);
2109 if off < 0 or off + 1 >= len(sCode):
2110 break;
2111
2112 if sCode[off + 1] == '/':
2113 # C++ comment.
2114 offEnd = sCode.find('\n', off + 2);
2115 if offEnd < 0:
2116 return sCode[:off].rstrip();
2117 sCode = sCode[ : off] + sCode[offEnd : ];
2118 off += 1;
2119
2120 elif sCode[off + 1] == '*':
2121 # C comment
2122 offEnd = sCode.find('*/', off + 2);
2123 if offEnd < 0:
2124 return sCode[:off].rstrip();
2125 sSep = ' ';
2126 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2127 sSep = '';
2128 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2129 off += len(sSep);
2130
2131 else:
2132 # Not a comment.
2133 off += 1;
2134 return sCode;
2135
2136 @staticmethod
2137 def extractParam(sCode, offParam):
2138 """
2139 Extracts the parameter value at offParam in sCode.
2140 Returns stripped value and the end offset of the terminating ',' or ')'.
2141 """
2142 # Extract it.
2143 cNesting = 0;
2144 offStart = offParam;
2145 while offParam < len(sCode):
2146 ch = sCode[offParam];
2147 if ch == '(':
2148 cNesting += 1;
2149 elif ch == ')':
2150 if cNesting == 0:
2151 break;
2152 cNesting -= 1;
2153 elif ch == ',' and cNesting == 0:
2154 break;
2155 offParam += 1;
2156 return (sCode[offStart : offParam].strip(), offParam);
2157
2158 @staticmethod
2159 def extractParams(sCode, offOpenParen):
2160 """
2161 Parses a parameter list.
2162 Returns the list of parameter values and the offset of the closing parentheses.
2163 Returns (None, len(sCode)) on if no closing parentheses was found.
2164 """
2165 assert sCode[offOpenParen] == '(';
2166 asParams = [];
2167 off = offOpenParen + 1;
2168 while off < len(sCode):
2169 ch = sCode[off];
2170 if ch.isspace():
2171 off += 1;
2172 elif ch != ')':
2173 (sParam, off) = McBlock.extractParam(sCode, off);
2174 asParams.append(sParam);
2175 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2176 if sCode[off] == ',':
2177 off += 1;
2178 else:
2179 return (asParams, off);
2180 return (None, off);
2181
2182 @staticmethod
2183 def findClosingBraces(sCode, off, offStop):
2184 """
2185 Finds the matching '}' for the '{' at off in sCode.
2186 Returns offset of the matching '}' on success, otherwise -1.
2187
2188 Note! Does not take comments into account.
2189 """
2190 cDepth = 1;
2191 off += 1;
2192 while off < offStop:
2193 offClose = sCode.find('}', off, offStop);
2194 if offClose < 0:
2195 break;
2196 cDepth += sCode.count('{', off, offClose);
2197 cDepth -= 1;
2198 if cDepth == 0:
2199 return offClose;
2200 off = offClose + 1;
2201 return -1;
2202
2203 @staticmethod
2204 def countSpacesAt(sCode, off, offStop):
2205 """ Returns the number of space characters at off in sCode. """
2206 offStart = off;
2207 while off < offStop and sCode[off].isspace():
2208 off += 1;
2209 return off - offStart;
2210
2211 @staticmethod
2212 def skipSpacesAt(sCode, off, offStop):
2213 """ Returns first offset at or after off for a non-space character. """
2214 return off + McBlock.countSpacesAt(sCode, off, offStop);
2215
2216 @staticmethod
2217 def isSubstrAt(sStr, off, sSubStr):
2218 """ Returns true of sSubStr is found at off in sStr. """
2219 return sStr[off : off + len(sSubStr)] == sSubStr;
2220
2221 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2222 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2223 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2224 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2225 + r')');
2226
2227 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2228 """
2229 Decodes sRawCode[off : offStop].
2230
2231 Returns list of McStmt instances.
2232 Raises ParserException on failure.
2233 """
2234 if offStop < 0:
2235 offStop = len(sRawCode);
2236 aoStmts = [];
2237 while off < offStop:
2238 ch = sRawCode[off];
2239
2240 #
2241 # Skip spaces and comments.
2242 #
2243 if ch.isspace():
2244 off += 1;
2245
2246 elif ch == '/':
2247 ch = sRawCode[off + 1];
2248 if ch == '/': # C++ comment.
2249 off = sRawCode.find('\n', off + 2);
2250 if off < 0:
2251 break;
2252 off += 1;
2253 elif ch == '*': # C comment.
2254 off = sRawCode.find('*/', off + 2);
2255 if off < 0:
2256 break;
2257 off += 2;
2258 else:
2259 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2260
2261 #
2262 # Is it a MC statement.
2263 #
2264 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2265 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2266 # Extract it and strip comments from it.
2267 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2268 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2269 if offEnd <= off:
2270 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2271 else:
2272 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2273 if offEnd <= off:
2274 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2275 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2276 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2277 offEnd -= 1;
2278 while offEnd > off and sRawCode[offEnd - 1].isspace():
2279 offEnd -= 1;
2280
2281 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2282
2283 # Isolate the statement name.
2284 offOpenParen = sRawStmt.find('(');
2285 if offOpenParen < 0:
2286 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2287 sName = sRawStmt[: offOpenParen].strip();
2288
2289 # Extract the parameters.
2290 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2291 if asParams is None:
2292 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2293 if offCloseParen + 1 != len(sRawStmt):
2294 self.raiseDecodeError(sRawCode, off,
2295 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2296
2297 # Hand it to the handler.
2298 fnParser = g_dMcStmtParsers.get(sName)[0];
2299 if not fnParser:
2300 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2301 oStmt = fnParser(self, sName, asParams);
2302 if not isinstance(oStmt, (list, tuple)):
2303 aoStmts.append(oStmt);
2304 else:
2305 aoStmts.extend(oStmt);
2306
2307 #
2308 # If conditional, we need to parse the whole statement.
2309 #
2310 # For reasons of simplicity, we assume the following structure
2311 # and parse each branch in a recursive call:
2312 # IEM_MC_IF_XXX() {
2313 # IEM_MC_WHATEVER();
2314 # } IEM_MC_ELSE() {
2315 # IEM_MC_WHATEVER();
2316 # } IEM_MC_ENDIF();
2317 #
2318 if sName.startswith('IEM_MC_IF_'):
2319 if iLevel > 1:
2320 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2321
2322 # Find start of the IF block:
2323 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2324 if sRawCode[offBlock1] != '{':
2325 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2326
2327 # Find the end of it.
2328 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2329 if offBlock1End < 0:
2330 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2331
2332 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2333
2334 # Is there an else section?
2335 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2336 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2337 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2338 if sRawCode[off] != '(':
2339 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2340 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2341 if sRawCode[off] != ')':
2342 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2343
2344 # Find start of the ELSE block.
2345 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2346 if sRawCode[offBlock2] != '{':
2347 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2348
2349 # Find the end of it.
2350 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2351 if offBlock2End < 0:
2352 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2353
2354 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2355 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2356
2357 # Parse past the endif statement.
2358 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2359 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2360 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2361 if sRawCode[off] != '(':
2362 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2363 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2364 if sRawCode[off] != ')':
2365 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2366 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2367 if sRawCode[off] != ';':
2368 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2369 off += 1;
2370
2371 else:
2372 # Advance.
2373 off = offEnd + 1;
2374
2375 #
2376 # Otherwise it must be a C/C++ statement of sorts.
2377 #
2378 else:
2379 # Find the end of the statement. if and else requires special handling.
2380 sCondExpr = None;
2381 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2382 if oMatch:
2383 if oMatch.group(1)[-1] == '(':
2384 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2385 else:
2386 offEnd = oMatch.end();
2387 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2388 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2389 elif ch == '#':
2390 offEnd = sRawCode.find('\n', off, offStop);
2391 if offEnd < 0:
2392 offEnd = offStop;
2393 offEnd -= 1;
2394 while offEnd > off and sRawCode[offEnd - 1].isspace():
2395 offEnd -= 1;
2396 else:
2397 offEnd = sRawCode.find(';', off);
2398 if offEnd < 0:
2399 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2400
2401 # Check this and the following statement whether it might have
2402 # something to do with decoding. This is a statement filter
2403 # criteria when generating the threaded functions blocks.
2404 offNextEnd = sRawCode.find(';', offEnd + 1);
2405 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2406 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2407 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2408 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2409 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2410 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2411 );
2412
2413 if not oMatch:
2414 if ch != '#':
2415 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2416 else:
2417 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2418 off = offEnd + 1;
2419 elif oMatch.group(1).startswith('if'):
2420 #
2421 # if () xxx [else yyy] statement.
2422 #
2423 oStmt = McCppCond(sCondExpr, fDecode);
2424 aoStmts.append(oStmt);
2425 off = offEnd + 1;
2426
2427 # Following the if () we can either have a {} containing zero or more statements
2428 # or we have a single statement.
2429 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2430 if sRawCode[offBlock1] == '{':
2431 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2432 if offBlock1End < 0:
2433 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2434 offBlock1 += 1;
2435 else:
2436 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2437 if offBlock1End < 0:
2438 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2439
2440 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2441
2442 # The else is optional and can likewise be followed by {} or a single statement.
2443 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2444 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2445 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2446 if sRawCode[offBlock2] == '{':
2447 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2448 if offBlock2End < 0:
2449 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2450 offBlock2 += 1;
2451 else:
2452 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2453 if offBlock2End < 0:
2454 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2455
2456 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2457 off = offBlock2End + 1;
2458
2459 elif oMatch.group(1) == 'else':
2460 # Problematic 'else' branch, typically involving #ifdefs.
2461 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2462
2463 return aoStmts;
2464
2465 def decode(self):
2466 """
2467 Decodes the block, populating self.aoStmts if necessary.
2468 Returns the statement list.
2469 Raises ParserException on failure.
2470 """
2471 if not self.aoStmts:
2472 self.aoStmts = self.decodeCode(''.join(self.asLines));
2473 return self.aoStmts;
2474
2475
2476 def checkForTooEarlyEffSegUse(self, aoStmts):
2477 """
2478 Checks if iEffSeg is used before the effective address has been decoded.
2479 Returns None on success, error string on failure.
2480
2481 See r158454 for an example of this issue.
2482 """
2483
2484 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2485 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2486 # as we're ASSUMING these will not occur before address calculation.
2487 for iStmt, oStmt in enumerate(aoStmts):
2488 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2489 while iStmt > 0:
2490 iStmt -= 1;
2491 oStmt = aoStmts[iStmt];
2492 for sArg in oStmt.asParams:
2493 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2494 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2495 break;
2496 return None;
2497
2498 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2499 kdDecodeCppStmtOkayAfterDone = {
2500 'IEMOP_HLP_IN_VMX_OPERATION': True,
2501 'IEMOP_HLP_VMX_INSTR': True,
2502 };
2503
2504 def checkForDoneDecoding(self, aoStmts):
2505 """
2506 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2507 invocation.
2508 Returns None on success, error string on failure.
2509
2510 This ensures safe instruction restarting in case the recompiler runs
2511 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2512 entries).
2513 """
2514
2515 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2516 # don't need to look.
2517 cIemOpHlpDone = 0;
2518 for iStmt, oStmt in enumerate(aoStmts):
2519 if oStmt.isCppStmt():
2520 #print('dbg: #%u[%u]: %s %s (%s)'
2521 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2522
2523 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2524 if oMatch:
2525 sFirstWord = oMatch.group(1);
2526 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2527 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2528 cIemOpHlpDone += 1;
2529 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2530 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2531 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2532 else:
2533 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2534 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2535 cIemOpHlpDone += 1;
2536 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2537 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2538 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2539 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2540 if cIemOpHlpDone == 1:
2541 return None;
2542 if cIemOpHlpDone > 1:
2543 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2544 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2545
2546 def check(self):
2547 """
2548 Performs some sanity checks on the block.
2549 Returns error string list, empty if all is fine.
2550 """
2551 aoStmts = self.decode();
2552 asRet = [];
2553
2554 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2555 if sRet:
2556 asRet.append(sRet);
2557
2558 sRet = self.checkForDoneDecoding(aoStmts);
2559 if sRet:
2560 asRet.append(sRet);
2561
2562 return asRet;
2563
2564
2565
2566## IEM_MC_XXX -> parser + info dictionary.
2567#
2568# The info is currently a single boolean entry indicating whether the
2569# statement modifies state and must not be used before IEMOP_HL_DONE_*.
2570#
2571# The raw table was generated via the following command
2572# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2573# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2574g_dMcStmtParsers = {
2575 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2576 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2577 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2578 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2579 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2580 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2581 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True),
2582 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False),
2583 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True),
2584 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False),
2585 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True),
2586 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False),
2587 'IEM_MC_ADD_GREG_U8': (McBlock.parseMcGeneric, True),
2588 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False),
2589 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2590 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2591 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2592 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True),
2593 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False),
2594 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False),
2595 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False),
2596 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False),
2597 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True),
2598 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True),
2599 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True),
2600 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True),
2601 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False),
2602 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False),
2603 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False),
2604 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False),
2605 'IEM_MC_ARG': (McBlock.parseMcArg, False),
2606 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False),
2607 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False),
2608 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False),
2609 'IEM_MC_ASSIGN': (McBlock.parseMcGeneric, False),
2610 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False),
2611 'IEM_MC_ASSIGN_U8_SX_U64': (McBlock.parseMcGeneric, False),
2612 'IEM_MC_ASSIGN_U32_SX_U64': (McBlock.parseMcGeneric, False),
2613 'IEM_MC_BEGIN': (McBlock.parseMcGeneric, False),
2614 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2615 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2616 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2617 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2618 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2619 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2620 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2621 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2622 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2623 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False),
2624 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False),
2625 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False),
2626 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False),
2627 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True),
2628 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True),
2629 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True),
2630 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True),
2631 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True),
2632 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True),
2633 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True),
2634 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True),
2635 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True),
2636 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True),
2637 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True),
2638 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True),
2639 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True),
2640 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True),
2641 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True),
2642 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True),
2643 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True),
2644 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True),
2645 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True),
2646 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True),
2647 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True),
2648 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True),
2649 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True),
2650 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True),
2651 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True),
2652 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': (McBlock.parseMcGeneric, True),
2653 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True),
2654 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True),
2655 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True),
2656 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True),
2657 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2658 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2659 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2660 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcGeneric, False),
2661 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcGeneric, False),
2662 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcGeneric, False),
2663 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcGeneric, False),
2664 'IEM_MC_END': (McBlock.parseMcGeneric, True),
2665 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False),
2666 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False),
2667 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False),
2668 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False),
2669 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False),
2670 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False),
2671 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False),
2672 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False),
2673 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False),
2674 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False),
2675 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False),
2676 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False),
2677 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False),
2678 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False),
2679 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False),
2680 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False),
2681 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False),
2682 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False),
2683 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False),
2684 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False),
2685 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False),
2686 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True),
2687 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True),
2688 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True),
2689 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True),
2690 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True),
2691 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True),
2692 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True),
2693 'IEM_MC_FETCH_MEM_S32_SX_U64': (McBlock.parseMcGeneric, True),
2694 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True),
2695 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2696 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True),
2697 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True),
2698 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True),
2699 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True),
2700 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True),
2701 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True),
2702 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True),
2703 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True),
2704 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2705 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True),
2706 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True),
2707 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True),
2708 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True),
2709 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True),
2710 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True),
2711 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True),
2712 'IEM_MC_FETCH_MEM_U64_DISP': (McBlock.parseMcGeneric, True),
2713 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True),
2714 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True),
2715 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True),
2716 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True),
2717 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True),
2718 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True),
2719 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True),
2720 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True),
2721 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2722 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True),
2723 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True),
2724 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True),
2725 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True),
2726 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2727 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True),
2728 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True),
2729 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True),
2730 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False),
2731 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False),
2732 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False),
2733 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False),
2734 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False),
2735 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False),
2736 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False),
2737 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False),
2738 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False),
2739 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False),
2740 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False),
2741 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False),
2742 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False),
2743 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False),
2744 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False),
2745 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False),
2746 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False),
2747 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False),
2748 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True),
2749 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True),
2750 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True),
2751 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True),
2752 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True),
2753 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True),
2754 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True),
2755 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True),
2756 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True),
2757 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True),
2758 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True),
2759 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2760 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True),
2761 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True),
2762 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True),
2763 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True),
2764 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2765 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2766 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True),
2767 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2768 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2769 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True),
2770 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2771 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True),
2772 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2773 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True),
2774 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True),
2775 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True),
2776 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True),
2777 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True),
2778 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True),
2779 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True),
2780 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True),
2781 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True),
2782 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True),
2783 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True),
2784 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True),
2785 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2786 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2787 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True),
2788 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True),
2789 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcGeneric, False),
2790 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True),
2791 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False),
2792 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False),
2793 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2794 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True),
2795 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True),
2796 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True),
2797 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2798 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True),
2799 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True),
2800 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2801 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True),
2802 'IEM_MC_MEM_COMMIT_AND_UNMAP': (McBlock.parseMcGeneric, True),
2803 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': (McBlock.parseMcGeneric, True),
2804 'IEM_MC_MEM_MAP': (McBlock.parseMcGeneric, True),
2805 'IEM_MC_MEM_MAP_EX': (McBlock.parseMcGeneric, True),
2806 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2807 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2808 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2809 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2810 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2811 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2812 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True),
2813 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True),
2814 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False),
2815 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True),
2816 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True),
2817 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True),
2818 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True),
2819 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False),
2820 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False),
2821 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False),
2822 'IEM_MC_POP_U16': (McBlock.parseMcGeneric, True),
2823 'IEM_MC_POP_U32': (McBlock.parseMcGeneric, True),
2824 'IEM_MC_POP_U64': (McBlock.parseMcGeneric, True),
2825 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False),
2826 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False),
2827 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False),
2828 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True),
2829 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True),
2830 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True),
2831 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True),
2832 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True),
2833 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True),
2834 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True),
2835 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True),
2836 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True),
2837 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True),
2838 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True),
2839 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False),
2840 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False),
2841 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False),
2842 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False),
2843 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False),
2844 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False),
2845 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False),
2846 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False),
2847 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False),
2848 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False),
2849 'IEM_MC_REF_LOCAL': (McBlock.parseMcGeneric, False),
2850 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False),
2851 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False),
2852 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False),
2853 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False),
2854 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False),
2855 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False),
2856 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False),
2857 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False),
2858 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False),
2859 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False),
2860 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False),
2861 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False),
2862 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False),
2863 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False),
2864 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True),
2865 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True),
2866 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True),
2867 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False),
2868 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False),
2869 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False),
2870 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False),
2871 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True),
2872 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True),
2873 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True),
2874 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True),
2875 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True),
2876 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False),
2877 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False),
2878 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False),
2879 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False),
2880 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True),
2881 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True),
2882 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True),
2883 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True),
2884 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2885 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True),
2886 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True),
2887 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True),
2888 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True),
2889 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True),
2890 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True),
2891 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True),
2892 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True),
2893 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True),
2894 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True),
2895 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2896 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2897 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2898 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2899 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True),
2900 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True),
2901 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True),
2902 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True),
2903 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True),
2904 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2905 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True),
2906 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True),
2907 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True),
2908 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2909 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True),
2910 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True),
2911 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True),
2912 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True),
2913 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True),
2914 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True),
2915 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True),
2916 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True),
2917 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True),
2918 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True),
2919 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True),
2920 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True),
2921 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True),
2922 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True),
2923 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True),
2924 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True),
2925 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True),
2926 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True),
2927 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True),
2928 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True),
2929 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True),
2930 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True),
2931 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True),
2932 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True),
2933 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True),
2934 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True),
2935 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2936 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2937 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2938 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2939 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True),
2940 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True),
2941 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True),
2942 'IEM_MC_SUB_GREG_U8': (McBlock.parseMcGeneric, True),
2943 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False),
2944 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True),
2945 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True),
2946 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True),
2947 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True),
2948 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True),
2949 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True),
2950 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2951};
2952
2953## List of microcode blocks.
2954g_aoMcBlocks = [] # type: list(McBlock)
2955
2956
2957
2958class ParserException(Exception):
2959 """ Parser exception """
2960 def __init__(self, sMessage):
2961 Exception.__init__(self, sMessage);
2962
2963
2964class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2965 """
2966 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2967 """
2968
2969 ## @name Parser state.
2970 ## @{
2971 kiCode = 0;
2972 kiCommentMulti = 1;
2973 ## @}
2974
2975 class Macro(object):
2976 """ Macro """
2977 def __init__(self, sName, asArgs, sBody, iLine):
2978 self.sName = sName; ##< The macro name.
2979 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2980 self.sBody = sBody;
2981 self.iLine = iLine;
2982 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2983
2984 @staticmethod
2985 def _needSpace(ch):
2986 """ This is just to make the expanded output a bit prettier. """
2987 return ch.isspace() and ch != '(';
2988
2989 def expandMacro(self, oParent, asArgs = None):
2990 """ Expands the macro body with the given arguments. """
2991 _ = oParent;
2992 sBody = self.sBody;
2993
2994 if self.oReArgMatch:
2995 assert len(asArgs) == len(self.asArgs);
2996 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2997
2998 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2999 oMatch = self.oReArgMatch.search(sBody);
3000 while oMatch:
3001 sName = oMatch.group(2);
3002 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3003 sValue = dArgs[sName];
3004 sPre = '';
3005 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3006 sPre = ' ';
3007 sPost = '';
3008 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3009 sPost = ' ';
3010 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3011 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3012 else:
3013 assert not asArgs;
3014
3015 return sBody;
3016
3017
3018 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
3019 self.sSrcFile = sSrcFile;
3020 self.asLines = asLines;
3021 self.iLine = 0;
3022 self.iState = self.kiCode;
3023 self.sComment = '';
3024 self.iCommentLine = 0;
3025 self.aoCurInstrs = [] # type: list(Instruction)
3026 self.oCurFunction = None # type: DecoderFunction
3027 self.iMcBlockInFunc = 0;
3028 self.oCurMcBlock = None # type: McBlock
3029 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
3030 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3031 if oInheritMacrosFrom:
3032 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3033 self.oReMacros = oInheritMacrosFrom.oReMacros;
3034
3035 assert sDefaultMap in g_dInstructionMaps;
3036 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3037
3038 self.cTotalInstr = 0;
3039 self.cTotalStubs = 0;
3040 self.cTotalTagged = 0;
3041 self.cTotalMcBlocks = 0;
3042
3043 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3044 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3045 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3046 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3047 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3048 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
3049 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3050 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
3051 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
3052 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3053 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3054 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
3055 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3056 self.fDebug = True;
3057 self.fDebugMc = False;
3058 self.fDebugPreProc = False;
3059
3060 self.dTagHandlers = {
3061 '@opbrief': self.parseTagOpBrief,
3062 '@opdesc': self.parseTagOpDesc,
3063 '@opmnemonic': self.parseTagOpMnemonic,
3064 '@op1': self.parseTagOpOperandN,
3065 '@op2': self.parseTagOpOperandN,
3066 '@op3': self.parseTagOpOperandN,
3067 '@op4': self.parseTagOpOperandN,
3068 '@oppfx': self.parseTagOpPfx,
3069 '@opmaps': self.parseTagOpMaps,
3070 '@opcode': self.parseTagOpcode,
3071 '@opcodesub': self.parseTagOpcodeSub,
3072 '@openc': self.parseTagOpEnc,
3073 '@opfltest': self.parseTagOpEFlags,
3074 '@opflmodify': self.parseTagOpEFlags,
3075 '@opflundef': self.parseTagOpEFlags,
3076 '@opflset': self.parseTagOpEFlags,
3077 '@opflclear': self.parseTagOpEFlags,
3078 '@ophints': self.parseTagOpHints,
3079 '@opdisenum': self.parseTagOpDisEnum,
3080 '@opmincpu': self.parseTagOpMinCpu,
3081 '@opcpuid': self.parseTagOpCpuId,
3082 '@opgroup': self.parseTagOpGroup,
3083 '@opunused': self.parseTagOpUnusedInvalid,
3084 '@opinvalid': self.parseTagOpUnusedInvalid,
3085 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3086 '@optest': self.parseTagOpTest,
3087 '@optestign': self.parseTagOpTestIgnore,
3088 '@optestignore': self.parseTagOpTestIgnore,
3089 '@opcopytests': self.parseTagOpCopyTests,
3090 '@oponly': self.parseTagOpOnlyTest,
3091 '@oponlytest': self.parseTagOpOnlyTest,
3092 '@opxcpttype': self.parseTagOpXcptType,
3093 '@opstats': self.parseTagOpStats,
3094 '@opfunction': self.parseTagOpFunction,
3095 '@opdone': self.parseTagOpDone,
3096 };
3097 for i in range(48):
3098 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3099 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3100
3101 self.asErrors = [];
3102
3103 def raiseError(self, sMessage):
3104 """
3105 Raise error prefixed with the source and line number.
3106 """
3107 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3108
3109 def raiseCommentError(self, iLineInComment, sMessage):
3110 """
3111 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3112 """
3113 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3114
3115 def error(self, sMessage):
3116 """
3117 Adds an error.
3118 returns False;
3119 """
3120 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3121 return False;
3122
3123 def errorOnLine(self, iLine, sMessage):
3124 """
3125 Adds an error.
3126 returns False;
3127 """
3128 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3129 return False;
3130
3131 def errorComment(self, iLineInComment, sMessage):
3132 """
3133 Adds a comment error.
3134 returns False;
3135 """
3136 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3137 return False;
3138
3139 def printErrors(self):
3140 """
3141 Print the errors to stderr.
3142 Returns number of errors.
3143 """
3144 if self.asErrors:
3145 sys.stderr.write(u''.join(self.asErrors));
3146 return len(self.asErrors);
3147
3148 def debug(self, sMessage):
3149 """
3150 For debugging.
3151 """
3152 if self.fDebug:
3153 print('debug: %s' % (sMessage,), file = sys.stderr);
3154
3155 def stripComments(self, sLine):
3156 """
3157 Returns sLine with comments stripped.
3158
3159 Complains if traces of incomplete multi-line comments are encountered.
3160 """
3161 sLine = self.oReComment.sub(" ", sLine);
3162 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3163 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3164 return sLine;
3165
3166 def parseFunctionTable(self, sLine):
3167 """
3168 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3169
3170 Note! Updates iLine as it consumes the whole table.
3171 """
3172
3173 #
3174 # Extract the table name.
3175 #
3176 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3177 oMap = g_dInstructionMapsByIemName.get(sName);
3178 if not oMap:
3179 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3180 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3181
3182 #
3183 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3184 # entries per byte:
3185 # no prefix, 066h prefix, f3h prefix, f2h prefix
3186 # Those tables has 256 & 32 entries respectively.
3187 #
3188 cEntriesPerByte = 4;
3189 cValidTableLength = 1024;
3190 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3191
3192 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3193 if oEntriesMatch:
3194 cEntriesPerByte = 1;
3195 cValidTableLength = int(oEntriesMatch.group(1));
3196 asPrefixes = (None,);
3197
3198 #
3199 # The next line should be '{' and nothing else.
3200 #
3201 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3202 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3203 self.iLine += 1;
3204
3205 #
3206 # Parse till we find the end of the table.
3207 #
3208 iEntry = 0;
3209 while self.iLine < len(self.asLines):
3210 # Get the next line and strip comments and spaces (assumes no
3211 # multi-line comments).
3212 sLine = self.asLines[self.iLine];
3213 self.iLine += 1;
3214 sLine = self.stripComments(sLine).strip();
3215
3216 # Split the line up into entries, expanding IEMOP_X4 usage.
3217 asEntries = sLine.split(',');
3218 for i in range(len(asEntries) - 1, -1, -1):
3219 sEntry = asEntries[i].strip();
3220 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3221 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3222 asEntries.insert(i + 1, sEntry);
3223 asEntries.insert(i + 1, sEntry);
3224 asEntries.insert(i + 1, sEntry);
3225 if sEntry:
3226 asEntries[i] = sEntry;
3227 else:
3228 del asEntries[i];
3229
3230 # Process the entries.
3231 for sEntry in asEntries:
3232 if sEntry in ('};', '}'):
3233 if iEntry != cValidTableLength:
3234 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3235 return True;
3236 if sEntry.startswith('iemOp_Invalid'):
3237 pass; # skip
3238 else:
3239 # Look up matching instruction by function.
3240 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3241 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3242 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3243 if aoInstr:
3244 if not isinstance(aoInstr, list):
3245 aoInstr = [aoInstr,];
3246 oInstr = None;
3247 for oCurInstr in aoInstr:
3248 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3249 pass;
3250 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3251 oCurInstr.sPrefix = sPrefix;
3252 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3253 oCurInstr.sOpcode = sOpcode;
3254 oCurInstr.sPrefix = sPrefix;
3255 else:
3256 continue;
3257 oInstr = oCurInstr;
3258 break;
3259 if not oInstr:
3260 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3261 aoInstr.append(oInstr);
3262 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3263 g_aoAllInstructions.append(oInstr);
3264 oMap.aoInstructions.append(oInstr);
3265 else:
3266 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3267 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3268 iEntry += 1;
3269
3270 return self.error('Unexpected end of file in PFNIEMOP table');
3271
3272 def addInstruction(self, iLine = None):
3273 """
3274 Adds an instruction.
3275 """
3276 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3277 g_aoAllInstructions.append(oInstr);
3278 self.aoCurInstrs.append(oInstr);
3279 return oInstr;
3280
3281 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3282 """
3283 Derives the mnemonic and operands from a IEM stats base name like string.
3284 """
3285 if oInstr.sMnemonic is None:
3286 asWords = sStats.split('_');
3287 oInstr.sMnemonic = asWords[0].lower();
3288 if len(asWords) > 1 and not oInstr.aoOperands:
3289 for sType in asWords[1:]:
3290 if sType in g_kdOpTypes:
3291 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3292 else:
3293 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3294 return False;
3295 return True;
3296
3297 def doneInstructionOne(self, oInstr, iLine):
3298 """
3299 Complete the parsing by processing, validating and expanding raw inputs.
3300 """
3301 assert oInstr.iLineCompleted is None;
3302 oInstr.iLineCompleted = iLine;
3303
3304 #
3305 # Specified instructions.
3306 #
3307 if oInstr.cOpTags > 0:
3308 if oInstr.sStats is None:
3309 pass;
3310
3311 #
3312 # Unspecified legacy stuff. We generally only got a few things to go on here.
3313 # /** Opcode 0x0f 0x00 /0. */
3314 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3315 #
3316 else:
3317 #if oInstr.sRawOldOpcodes:
3318 #
3319 #if oInstr.sMnemonic:
3320 pass;
3321
3322 #
3323 # Common defaults.
3324 #
3325
3326 # Guess mnemonic and operands from stats if the former is missing.
3327 if oInstr.sMnemonic is None:
3328 if oInstr.sStats is not None:
3329 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3330 elif oInstr.sFunction is not None:
3331 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3332
3333 # Derive the disassembler op enum constant from the mnemonic.
3334 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3335 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3336
3337 # Derive the IEM statistics base name from mnemonic and operand types.
3338 if oInstr.sStats is None:
3339 if oInstr.sFunction is not None:
3340 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3341 elif oInstr.sMnemonic is not None:
3342 oInstr.sStats = oInstr.sMnemonic;
3343 for oOperand in oInstr.aoOperands:
3344 if oOperand.sType:
3345 oInstr.sStats += '_' + oOperand.sType;
3346
3347 # Derive the IEM function name from mnemonic and operand types.
3348 if oInstr.sFunction is None:
3349 if oInstr.sMnemonic is not None:
3350 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3351 for oOperand in oInstr.aoOperands:
3352 if oOperand.sType:
3353 oInstr.sFunction += '_' + oOperand.sType;
3354 elif oInstr.sStats:
3355 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3356
3357 #
3358 # Apply default map and then add the instruction to all it's groups.
3359 #
3360 if not oInstr.aoMaps:
3361 oInstr.aoMaps = [ self.oDefaultMap, ];
3362 for oMap in oInstr.aoMaps:
3363 oMap.aoInstructions.append(oInstr);
3364
3365 #
3366 # Derive encoding from operands and maps.
3367 #
3368 if oInstr.sEncoding is None:
3369 if not oInstr.aoOperands:
3370 if oInstr.fUnused and oInstr.sSubOpcode:
3371 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3372 else:
3373 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3374 elif oInstr.aoOperands[0].usesModRM():
3375 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3376 or oInstr.onlyInVexMaps():
3377 oInstr.sEncoding = 'VEX.ModR/M';
3378 else:
3379 oInstr.sEncoding = 'ModR/M';
3380
3381 #
3382 # Check the opstat value and add it to the opstat indexed dictionary.
3383 #
3384 if oInstr.sStats:
3385 if oInstr.sStats not in g_dAllInstructionsByStat:
3386 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3387 else:
3388 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3389 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3390
3391 #
3392 # Add to function indexed dictionary. We allow multiple instructions per function.
3393 #
3394 if oInstr.sFunction:
3395 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3396 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3397 else:
3398 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3399
3400 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3401 return True;
3402
3403 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3404 """
3405 Done with current instruction.
3406 """
3407 for oInstr in self.aoCurInstrs:
3408 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3409 if oInstr.fStub:
3410 self.cTotalStubs += 1;
3411
3412 self.cTotalInstr += len(self.aoCurInstrs);
3413
3414 self.sComment = '';
3415 self.aoCurInstrs = [];
3416 if fEndOfFunction:
3417 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3418 if self.oCurFunction:
3419 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3420 self.oCurFunction = None;
3421 self.iMcBlockInFunc = 0;
3422 return True;
3423
3424 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3425 """
3426 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3427 is False, only None values and empty strings are replaced.
3428 """
3429 for oInstr in self.aoCurInstrs:
3430 if fOverwrite is not True:
3431 oOldValue = getattr(oInstr, sAttrib);
3432 if oOldValue is not None:
3433 continue;
3434 setattr(oInstr, sAttrib, oValue);
3435
3436 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3437 """
3438 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3439 If fOverwrite is False, only None values and empty strings are replaced.
3440 """
3441 for oInstr in self.aoCurInstrs:
3442 aoArray = getattr(oInstr, sAttrib);
3443 while len(aoArray) <= iEntry:
3444 aoArray.append(None);
3445 if fOverwrite is True or aoArray[iEntry] is None:
3446 aoArray[iEntry] = oValue;
3447
3448 def parseCommentOldOpcode(self, asLines):
3449 """ Deals with 'Opcode 0xff /4' like comments """
3450 asWords = asLines[0].split();
3451 if len(asWords) >= 2 \
3452 and asWords[0] == 'Opcode' \
3453 and ( asWords[1].startswith('0x')
3454 or asWords[1].startswith('0X')):
3455 asWords = asWords[:1];
3456 for iWord, sWord in enumerate(asWords):
3457 if sWord.startswith('0X'):
3458 sWord = '0x' + sWord[:2];
3459 asWords[iWord] = asWords;
3460 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3461
3462 return False;
3463
3464 def ensureInstructionForOpTag(self, iTagLine):
3465 """ Ensure there is an instruction for the op-tag being parsed. """
3466 if not self.aoCurInstrs:
3467 self.addInstruction(self.iCommentLine + iTagLine);
3468 for oInstr in self.aoCurInstrs:
3469 oInstr.cOpTags += 1;
3470 if oInstr.cOpTags == 1:
3471 self.cTotalTagged += 1;
3472 return self.aoCurInstrs[-1];
3473
3474 @staticmethod
3475 def flattenSections(aasSections):
3476 """
3477 Flattens multiline sections into stripped single strings.
3478 Returns list of strings, on section per string.
3479 """
3480 asRet = [];
3481 for asLines in aasSections:
3482 if asLines:
3483 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3484 return asRet;
3485
3486 @staticmethod
3487 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3488 """
3489 Flattens sections into a simple stripped string with newlines as
3490 section breaks. The final section does not sport a trailing newline.
3491 """
3492 # Typical: One section with a single line.
3493 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3494 return aasSections[0][0].strip();
3495
3496 sRet = '';
3497 for iSection, asLines in enumerate(aasSections):
3498 if asLines:
3499 if iSection > 0:
3500 sRet += sSectionSep;
3501 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3502 return sRet;
3503
3504
3505
3506 ## @name Tag parsers
3507 ## @{
3508
3509 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3510 """
3511 Tag: \@opbrief
3512 Value: Text description, multiple sections, appended.
3513
3514 Brief description. If not given, it's the first sentence from @opdesc.
3515 """
3516 oInstr = self.ensureInstructionForOpTag(iTagLine);
3517
3518 # Flatten and validate the value.
3519 sBrief = self.flattenAllSections(aasSections);
3520 if not sBrief:
3521 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3522 if sBrief[-1] != '.':
3523 sBrief = sBrief + '.';
3524 if len(sBrief) > 180:
3525 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3526 offDot = sBrief.find('.');
3527 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3528 offDot = sBrief.find('.', offDot + 1);
3529 if offDot >= 0 and offDot != len(sBrief) - 1:
3530 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3531
3532 # Update the instruction.
3533 if oInstr.sBrief is not None:
3534 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3535 % (sTag, oInstr.sBrief, sBrief,));
3536 _ = iEndLine;
3537 return True;
3538
3539 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3540 """
3541 Tag: \@opdesc
3542 Value: Text description, multiple sections, appended.
3543
3544 It is used to describe instructions.
3545 """
3546 oInstr = self.ensureInstructionForOpTag(iTagLine);
3547 if aasSections:
3548 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3549 return True;
3550
3551 _ = sTag; _ = iEndLine;
3552 return True;
3553
3554 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3555 """
3556 Tag: @opmenmonic
3557 Value: mnemonic
3558
3559 The 'mnemonic' value must be a valid C identifier string. Because of
3560 prefixes, groups and whatnot, there times when the mnemonic isn't that
3561 of an actual assembler mnemonic.
3562 """
3563 oInstr = self.ensureInstructionForOpTag(iTagLine);
3564
3565 # Flatten and validate the value.
3566 sMnemonic = self.flattenAllSections(aasSections);
3567 if not self.oReMnemonic.match(sMnemonic):
3568 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3569 if oInstr.sMnemonic is not None:
3570 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3571 % (sTag, oInstr.sMnemonic, sMnemonic,));
3572 oInstr.sMnemonic = sMnemonic
3573
3574 _ = iEndLine;
3575 return True;
3576
3577 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3578 """
3579 Tags: \@op1, \@op2, \@op3, \@op4
3580 Value: [where:]type
3581
3582 The 'where' value indicates where the operand is found, like the 'reg'
3583 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3584 a list.
3585
3586 The 'type' value indicates the operand type. These follow the types
3587 given in the opcode tables in the CPU reference manuals.
3588 See Instruction.kdOperandTypes for a list.
3589
3590 """
3591 oInstr = self.ensureInstructionForOpTag(iTagLine);
3592 idxOp = int(sTag[-1]) - 1;
3593 assert 0 <= idxOp < 4;
3594
3595 # flatten, split up, and validate the "where:type" value.
3596 sFlattened = self.flattenAllSections(aasSections);
3597 asSplit = sFlattened.split(':');
3598 if len(asSplit) == 1:
3599 sType = asSplit[0];
3600 sWhere = None;
3601 elif len(asSplit) == 2:
3602 (sWhere, sType) = asSplit;
3603 else:
3604 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3605
3606 if sType not in g_kdOpTypes:
3607 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3608 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3609 if sWhere is None:
3610 sWhere = g_kdOpTypes[sType][1];
3611 elif sWhere not in g_kdOpLocations:
3612 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3613 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3614
3615 # Insert the operand, refusing to overwrite an existing one.
3616 while idxOp >= len(oInstr.aoOperands):
3617 oInstr.aoOperands.append(None);
3618 if oInstr.aoOperands[idxOp] is not None:
3619 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3620 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3621 sWhere, sType,));
3622 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3623
3624 _ = iEndLine;
3625 return True;
3626
3627 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3628 """
3629 Tag: \@opmaps
3630 Value: map[,map2]
3631
3632 Indicates which maps the instruction is in. There is a default map
3633 associated with each input file.
3634 """
3635 oInstr = self.ensureInstructionForOpTag(iTagLine);
3636
3637 # Flatten, split up and validate the value.
3638 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3639 asMaps = sFlattened.split(',');
3640 if not asMaps:
3641 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3642 for sMap in asMaps:
3643 if sMap not in g_dInstructionMaps:
3644 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3645 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3646
3647 # Add the maps to the current list. Throw errors on duplicates.
3648 for oMap in oInstr.aoMaps:
3649 if oMap.sName in asMaps:
3650 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3651
3652 for sMap in asMaps:
3653 oMap = g_dInstructionMaps[sMap];
3654 if oMap not in oInstr.aoMaps:
3655 oInstr.aoMaps.append(oMap);
3656 else:
3657 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3658
3659 _ = iEndLine;
3660 return True;
3661
3662 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3663 """
3664 Tag: \@oppfx
3665 Value: n/a|none|0x66|0xf3|0xf2
3666
3667 Required prefix for the instruction. (In a (E)VEX context this is the
3668 value of the 'pp' field rather than an actual prefix.)
3669 """
3670 oInstr = self.ensureInstructionForOpTag(iTagLine);
3671
3672 # Flatten and validate the value.
3673 sFlattened = self.flattenAllSections(aasSections);
3674 asPrefixes = sFlattened.split();
3675 if len(asPrefixes) > 1:
3676 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3677
3678 sPrefix = asPrefixes[0].lower();
3679 if sPrefix == 'none':
3680 sPrefix = 'none';
3681 elif sPrefix == 'n/a':
3682 sPrefix = None;
3683 else:
3684 if len(sPrefix) == 2:
3685 sPrefix = '0x' + sPrefix;
3686 if not _isValidOpcodeByte(sPrefix):
3687 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3688
3689 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3690 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3691
3692 # Set it.
3693 if oInstr.sPrefix is not None:
3694 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3695 oInstr.sPrefix = sPrefix;
3696
3697 _ = iEndLine;
3698 return True;
3699
3700 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3701 """
3702 Tag: \@opcode
3703 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3704
3705 The opcode byte or sub-byte for the instruction in the context of a map.
3706 """
3707 oInstr = self.ensureInstructionForOpTag(iTagLine);
3708
3709 # Flatten and validate the value.
3710 sOpcode = self.flattenAllSections(aasSections);
3711 if _isValidOpcodeByte(sOpcode):
3712 pass;
3713 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3714 pass;
3715 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3716 pass;
3717 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3718 pass;
3719 else:
3720 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3721
3722 # Set it.
3723 if oInstr.sOpcode is not None:
3724 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3725 oInstr.sOpcode = sOpcode;
3726
3727 _ = iEndLine;
3728 return True;
3729
3730 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3731 """
3732 Tag: \@opcodesub
3733 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3734 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3735
3736 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3737 represents exactly two different instructions. The more proper way would
3738 be to go via maps with two members, but this is faster.
3739 """
3740 oInstr = self.ensureInstructionForOpTag(iTagLine);
3741
3742 # Flatten and validate the value.
3743 sSubOpcode = self.flattenAllSections(aasSections);
3744 if sSubOpcode not in g_kdSubOpcodes:
3745 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3746 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3747
3748 # Set it.
3749 if oInstr.sSubOpcode is not None:
3750 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3751 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3752 oInstr.sSubOpcode = sSubOpcode;
3753
3754 _ = iEndLine;
3755 return True;
3756
3757 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3758 """
3759 Tag: \@openc
3760 Value: ModR/M|fixed|prefix|<map name>
3761
3762 The instruction operand encoding style.
3763 """
3764 oInstr = self.ensureInstructionForOpTag(iTagLine);
3765
3766 # Flatten and validate the value.
3767 sEncoding = self.flattenAllSections(aasSections);
3768 if sEncoding in g_kdEncodings:
3769 pass;
3770 elif sEncoding in g_dInstructionMaps:
3771 pass;
3772 elif not _isValidOpcodeByte(sEncoding):
3773 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3774
3775 # Set it.
3776 if oInstr.sEncoding is not None:
3777 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3778 % ( sTag, oInstr.sEncoding, sEncoding,));
3779 oInstr.sEncoding = sEncoding;
3780
3781 _ = iEndLine;
3782 return True;
3783
3784 ## EFlags tag to Instruction attribute name.
3785 kdOpFlagToAttr = {
3786 '@opfltest': 'asFlTest',
3787 '@opflmodify': 'asFlModify',
3788 '@opflundef': 'asFlUndefined',
3789 '@opflset': 'asFlSet',
3790 '@opflclear': 'asFlClear',
3791 };
3792
3793 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3794 """
3795 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3796 Value: <eflags specifier>
3797
3798 """
3799 oInstr = self.ensureInstructionForOpTag(iTagLine);
3800
3801 # Flatten, split up and validate the values.
3802 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3803 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3804 asFlags = [];
3805 else:
3806 fRc = True;
3807 for iFlag, sFlag in enumerate(asFlags):
3808 if sFlag not in g_kdEFlagsMnemonics:
3809 if sFlag.strip() in g_kdEFlagsMnemonics:
3810 asFlags[iFlag] = sFlag.strip();
3811 else:
3812 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3813 if not fRc:
3814 return False;
3815
3816 # Set them.
3817 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3818 if asOld is not None:
3819 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3820 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3821
3822 _ = iEndLine;
3823 return True;
3824
3825 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3826 """
3827 Tag: \@ophints
3828 Value: Comma or space separated list of flags and hints.
3829
3830 This covers the disassembler flags table and more.
3831 """
3832 oInstr = self.ensureInstructionForOpTag(iTagLine);
3833
3834 # Flatten as a space separated list, split it up and validate the values.
3835 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3836 if len(asHints) == 1 and asHints[0].lower() == 'none':
3837 asHints = [];
3838 else:
3839 fRc = True;
3840 for iHint, sHint in enumerate(asHints):
3841 if sHint not in g_kdHints:
3842 if sHint.strip() in g_kdHints:
3843 sHint[iHint] = sHint.strip();
3844 else:
3845 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3846 if not fRc:
3847 return False;
3848
3849 # Append them.
3850 for sHint in asHints:
3851 if sHint not in oInstr.dHints:
3852 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3853 else:
3854 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3855
3856 _ = iEndLine;
3857 return True;
3858
3859 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3860 """
3861 Tag: \@opdisenum
3862 Value: OP_XXXX
3863
3864 This is for select a specific (legacy) disassembler enum value for the
3865 instruction.
3866 """
3867 oInstr = self.ensureInstructionForOpTag(iTagLine);
3868
3869 # Flatten and split.
3870 asWords = self.flattenAllSections(aasSections).split();
3871 if len(asWords) != 1:
3872 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3873 if not asWords:
3874 return False;
3875 sDisEnum = asWords[0];
3876 if not self.oReDisEnum.match(sDisEnum):
3877 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3878 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3879
3880 # Set it.
3881 if oInstr.sDisEnum is not None:
3882 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3883 oInstr.sDisEnum = sDisEnum;
3884
3885 _ = iEndLine;
3886 return True;
3887
3888 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3889 """
3890 Tag: \@opmincpu
3891 Value: <simple CPU name>
3892
3893 Indicates when this instruction was introduced.
3894 """
3895 oInstr = self.ensureInstructionForOpTag(iTagLine);
3896
3897 # Flatten the value, split into words, make sure there's just one, valid it.
3898 asCpus = self.flattenAllSections(aasSections).split();
3899 if len(asCpus) > 1:
3900 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3901
3902 sMinCpu = asCpus[0];
3903 if sMinCpu in g_kdCpuNames:
3904 oInstr.sMinCpu = sMinCpu;
3905 else:
3906 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3907 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3908
3909 # Set it.
3910 if oInstr.sMinCpu is None:
3911 oInstr.sMinCpu = sMinCpu;
3912 elif oInstr.sMinCpu != sMinCpu:
3913 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3914
3915 _ = iEndLine;
3916 return True;
3917
3918 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3919 """
3920 Tag: \@opcpuid
3921 Value: none | <CPUID flag specifier>
3922
3923 CPUID feature bit which is required for the instruction to be present.
3924 """
3925 oInstr = self.ensureInstructionForOpTag(iTagLine);
3926
3927 # Flatten as a space separated list, split it up and validate the values.
3928 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3929 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3930 asCpuIds = [];
3931 else:
3932 fRc = True;
3933 for iCpuId, sCpuId in enumerate(asCpuIds):
3934 if sCpuId not in g_kdCpuIdFlags:
3935 if sCpuId.strip() in g_kdCpuIdFlags:
3936 sCpuId[iCpuId] = sCpuId.strip();
3937 else:
3938 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3939 if not fRc:
3940 return False;
3941
3942 # Append them.
3943 for sCpuId in asCpuIds:
3944 if sCpuId not in oInstr.asCpuIds:
3945 oInstr.asCpuIds.append(sCpuId);
3946 else:
3947 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3948
3949 _ = iEndLine;
3950 return True;
3951
3952 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3953 """
3954 Tag: \@opgroup
3955 Value: op_grp1[_subgrp2[_subsubgrp3]]
3956
3957 Instruction grouping.
3958 """
3959 oInstr = self.ensureInstructionForOpTag(iTagLine);
3960
3961 # Flatten as a space separated list, split it up and validate the values.
3962 asGroups = self.flattenAllSections(aasSections).split();
3963 if len(asGroups) != 1:
3964 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3965 sGroup = asGroups[0];
3966 if not self.oReGroupName.match(sGroup):
3967 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3968 % (sTag, sGroup, self.oReGroupName.pattern));
3969
3970 # Set it.
3971 if oInstr.sGroup is not None:
3972 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3973 oInstr.sGroup = sGroup;
3974
3975 _ = iEndLine;
3976 return True;
3977
3978 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3979 """
3980 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3981 Value: <invalid opcode behaviour style>
3982
3983 The \@opunused indicates the specification is for a currently unused
3984 instruction encoding.
3985
3986 The \@opinvalid indicates the specification is for an invalid currently
3987 instruction encoding (like UD2).
3988
3989 The \@opinvlstyle just indicates how CPUs decode the instruction when
3990 not supported (\@opcpuid, \@opmincpu) or disabled.
3991 """
3992 oInstr = self.ensureInstructionForOpTag(iTagLine);
3993
3994 # Flatten as a space separated list, split it up and validate the values.
3995 asStyles = self.flattenAllSections(aasSections).split();
3996 if len(asStyles) != 1:
3997 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3998 sStyle = asStyles[0];
3999 if sStyle not in g_kdInvalidStyles:
4000 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4001 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4002 # Set it.
4003 if oInstr.sInvalidStyle is not None:
4004 return self.errorComment(iTagLine,
4005 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4006 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4007 oInstr.sInvalidStyle = sStyle;
4008 if sTag == '@opunused':
4009 oInstr.fUnused = True;
4010 elif sTag == '@opinvalid':
4011 oInstr.fInvalid = True;
4012
4013 _ = iEndLine;
4014 return True;
4015
4016 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4017 """
4018 Tag: \@optest
4019 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4020 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4021
4022 The main idea here is to generate basic instruction tests.
4023
4024 The probably simplest way of handling the diverse input, would be to use
4025 it to produce size optimized byte code for a simple interpreter that
4026 modifies the register input and output states.
4027
4028 An alternative to the interpreter would be creating multiple tables,
4029 but that becomes rather complicated wrt what goes where and then to use
4030 them in an efficient manner.
4031 """
4032 oInstr = self.ensureInstructionForOpTag(iTagLine);
4033
4034 #
4035 # Do it section by section.
4036 #
4037 for asSectionLines in aasSections:
4038 #
4039 # Sort the input into outputs, inputs and selector conditions.
4040 #
4041 sFlatSection = self.flattenAllSections([asSectionLines,]);
4042 if not sFlatSection:
4043 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4044 continue;
4045 oTest = InstructionTest(oInstr);
4046
4047 asSelectors = [];
4048 asInputs = [];
4049 asOutputs = [];
4050 asCur = asOutputs;
4051 fRc = True;
4052 asWords = sFlatSection.split();
4053 for iWord in range(len(asWords) - 1, -1, -1):
4054 sWord = asWords[iWord];
4055 # Check for array switchers.
4056 if sWord == '->':
4057 if asCur != asOutputs:
4058 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4059 break;
4060 asCur = asInputs;
4061 elif sWord == '/':
4062 if asCur != asInputs:
4063 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4064 break;
4065 asCur = asSelectors;
4066 else:
4067 asCur.insert(0, sWord);
4068
4069 #
4070 # Validate and add selectors.
4071 #
4072 for sCond in asSelectors:
4073 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4074 oSelector = None;
4075 for sOp in TestSelector.kasCompareOps:
4076 off = sCondExp.find(sOp);
4077 if off >= 0:
4078 sVariable = sCondExp[:off];
4079 sValue = sCondExp[off + len(sOp):];
4080 if sVariable in TestSelector.kdVariables:
4081 if sValue in TestSelector.kdVariables[sVariable]:
4082 oSelector = TestSelector(sVariable, sOp, sValue);
4083 else:
4084 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4085 % ( sTag, sValue, sCond,
4086 TestSelector.kdVariables[sVariable].keys(),));
4087 else:
4088 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4089 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4090 break;
4091 if oSelector is not None:
4092 for oExisting in oTest.aoSelectors:
4093 if oExisting.sVariable == oSelector.sVariable:
4094 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4095 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4096 oTest.aoSelectors.append(oSelector);
4097 else:
4098 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4099
4100 #
4101 # Validate outputs and inputs, adding them to the test as we go along.
4102 #
4103 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4104 asValidFieldKinds = [ 'both', sDesc, ];
4105 for sItem in asItems:
4106 oItem = None;
4107 for sOp in TestInOut.kasOperators:
4108 off = sItem.find(sOp);
4109 if off < 0:
4110 continue;
4111 sField = sItem[:off];
4112 sValueType = sItem[off + len(sOp):];
4113 if sField in TestInOut.kdFields \
4114 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4115 asSplit = sValueType.split(':', 1);
4116 sValue = asSplit[0];
4117 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4118 if sType in TestInOut.kdTypes:
4119 oValid = TestInOut.kdTypes[sType].validate(sValue);
4120 if oValid is True:
4121 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4122 oItem = TestInOut(sField, sOp, sValue, sType);
4123 else:
4124 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4125 % ( sTag, sDesc, sItem, ));
4126 else:
4127 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4128 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4129 else:
4130 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4131 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4132 else:
4133 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4134 % ( sTag, sDesc, sField, sItem,
4135 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4136 if asVal[1] in asValidFieldKinds]),));
4137 break;
4138 if oItem is not None:
4139 for oExisting in aoDst:
4140 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4141 self.errorComment(iTagLine,
4142 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4143 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4144 aoDst.append(oItem);
4145 else:
4146 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4147
4148 #
4149 # .
4150 #
4151 if fRc:
4152 oInstr.aoTests.append(oTest);
4153 else:
4154 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4155 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4156 % (sTag, asSelectors, asInputs, asOutputs,));
4157
4158 _ = iEndLine;
4159 return True;
4160
4161 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4162 """
4163 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4164 """
4165 oInstr = self.ensureInstructionForOpTag(iTagLine);
4166
4167 iTest = 0;
4168 if sTag[-1] == ']':
4169 iTest = int(sTag[8:-1]);
4170 else:
4171 iTest = int(sTag[7:]);
4172
4173 if iTest != len(oInstr.aoTests):
4174 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4175 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4176
4177 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4178 """
4179 Tag: \@optestign | \@optestignore
4180 Value: <value is ignored>
4181
4182 This is a simple trick to ignore a test while debugging another.
4183
4184 See also \@oponlytest.
4185 """
4186 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4187 return True;
4188
4189 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4190 """
4191 Tag: \@opcopytests
4192 Value: <opstat | function> [..]
4193 Example: \@opcopytests add_Eb_Gb
4194
4195 Trick to avoid duplicating tests for different encodings of the same
4196 operation.
4197 """
4198 oInstr = self.ensureInstructionForOpTag(iTagLine);
4199
4200 # Flatten, validate and append the copy job to the instruction. We execute
4201 # them after parsing all the input so we can handle forward references.
4202 asToCopy = self.flattenAllSections(aasSections).split();
4203 if not asToCopy:
4204 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4205 for sToCopy in asToCopy:
4206 if sToCopy not in oInstr.asCopyTests:
4207 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4208 oInstr.asCopyTests.append(sToCopy);
4209 else:
4210 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4211 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4212 else:
4213 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4214
4215 _ = iEndLine;
4216 return True;
4217
4218 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4219 """
4220 Tag: \@oponlytest | \@oponly
4221 Value: none
4222
4223 Only test instructions with this tag. This is a trick that is handy
4224 for singling out one or two new instructions or tests.
4225
4226 See also \@optestignore.
4227 """
4228 oInstr = self.ensureInstructionForOpTag(iTagLine);
4229
4230 # Validate and add instruction to only test dictionary.
4231 sValue = self.flattenAllSections(aasSections).strip();
4232 if sValue:
4233 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4234
4235 if oInstr not in g_aoOnlyTestInstructions:
4236 g_aoOnlyTestInstructions.append(oInstr);
4237
4238 _ = iEndLine;
4239 return True;
4240
4241 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4242 """
4243 Tag: \@opxcpttype
4244 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4245
4246 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4247 """
4248 oInstr = self.ensureInstructionForOpTag(iTagLine);
4249
4250 # Flatten as a space separated list, split it up and validate the values.
4251 asTypes = self.flattenAllSections(aasSections).split();
4252 if len(asTypes) != 1:
4253 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4254 sType = asTypes[0];
4255 if sType not in g_kdXcptTypes:
4256 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4257 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4258 # Set it.
4259 if oInstr.sXcptType is not None:
4260 return self.errorComment(iTagLine,
4261 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4262 % ( sTag, oInstr.sXcptType, sType,));
4263 oInstr.sXcptType = sType;
4264
4265 _ = iEndLine;
4266 return True;
4267
4268 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4269 """
4270 Tag: \@opfunction
4271 Value: <VMM function name>
4272
4273 This is for explicitly setting the IEM function name. Normally we pick
4274 this up from the FNIEMOP_XXX macro invocation after the description, or
4275 generate it from the mnemonic and operands.
4276
4277 It it thought it maybe necessary to set it when specifying instructions
4278 which implementation isn't following immediately or aren't implemented yet.
4279 """
4280 oInstr = self.ensureInstructionForOpTag(iTagLine);
4281
4282 # Flatten and validate the value.
4283 sFunction = self.flattenAllSections(aasSections);
4284 if not self.oReFunctionName.match(sFunction):
4285 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4286 % (sTag, sFunction, self.oReFunctionName.pattern));
4287
4288 if oInstr.sFunction is not None:
4289 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4290 % (sTag, oInstr.sFunction, sFunction,));
4291 oInstr.sFunction = sFunction;
4292
4293 _ = iEndLine;
4294 return True;
4295
4296 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4297 """
4298 Tag: \@opstats
4299 Value: <VMM statistics base name>
4300
4301 This is for explicitly setting the statistics name. Normally we pick
4302 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4303 the mnemonic and operands.
4304
4305 It it thought it maybe necessary to set it when specifying instructions
4306 which implementation isn't following immediately or aren't implemented yet.
4307 """
4308 oInstr = self.ensureInstructionForOpTag(iTagLine);
4309
4310 # Flatten and validate the value.
4311 sStats = self.flattenAllSections(aasSections);
4312 if not self.oReStatsName.match(sStats):
4313 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4314 % (sTag, sStats, self.oReStatsName.pattern));
4315
4316 if oInstr.sStats is not None:
4317 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4318 % (sTag, oInstr.sStats, sStats,));
4319 oInstr.sStats = sStats;
4320
4321 _ = iEndLine;
4322 return True;
4323
4324 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4325 """
4326 Tag: \@opdone
4327 Value: none
4328
4329 Used to explictily flush the instructions that have been specified.
4330 """
4331 sFlattened = self.flattenAllSections(aasSections);
4332 if sFlattened != '':
4333 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4334 _ = sTag; _ = iEndLine;
4335 return self.doneInstructions();
4336
4337 ## @}
4338
4339
4340 def parseComment(self):
4341 """
4342 Parse the current comment (self.sComment).
4343
4344 If it's a opcode specifiying comment, we reset the macro stuff.
4345 """
4346 #
4347 # Reject if comment doesn't seem to contain anything interesting.
4348 #
4349 if self.sComment.find('Opcode') < 0 \
4350 and self.sComment.find('@') < 0:
4351 return False;
4352
4353 #
4354 # Split the comment into lines, removing leading asterisks and spaces.
4355 # Also remove leading and trailing empty lines.
4356 #
4357 asLines = self.sComment.split('\n');
4358 for iLine, sLine in enumerate(asLines):
4359 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4360
4361 while asLines and not asLines[0]:
4362 self.iCommentLine += 1;
4363 asLines.pop(0);
4364
4365 while asLines and not asLines[-1]:
4366 asLines.pop(len(asLines) - 1);
4367
4368 #
4369 # Check for old style: Opcode 0x0f 0x12
4370 #
4371 if asLines[0].startswith('Opcode '):
4372 self.parseCommentOldOpcode(asLines);
4373
4374 #
4375 # Look for @op* tagged data.
4376 #
4377 cOpTags = 0;
4378 sFlatDefault = None;
4379 sCurTag = '@default';
4380 iCurTagLine = 0;
4381 asCurSection = [];
4382 aasSections = [ asCurSection, ];
4383 for iLine, sLine in enumerate(asLines):
4384 if not sLine.startswith('@'):
4385 if sLine:
4386 asCurSection.append(sLine);
4387 elif asCurSection:
4388 asCurSection = [];
4389 aasSections.append(asCurSection);
4390 else:
4391 #
4392 # Process the previous tag.
4393 #
4394 if not asCurSection and len(aasSections) > 1:
4395 aasSections.pop(-1);
4396 if sCurTag in self.dTagHandlers:
4397 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4398 cOpTags += 1;
4399 elif sCurTag.startswith('@op'):
4400 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4401 elif sCurTag == '@default':
4402 sFlatDefault = self.flattenAllSections(aasSections);
4403 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4404 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4405 elif sCurTag in ['@encoding', '@opencoding']:
4406 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4407
4408 #
4409 # New tag.
4410 #
4411 asSplit = sLine.split(None, 1);
4412 sCurTag = asSplit[0].lower();
4413 if len(asSplit) > 1:
4414 asCurSection = [asSplit[1],];
4415 else:
4416 asCurSection = [];
4417 aasSections = [asCurSection, ];
4418 iCurTagLine = iLine;
4419
4420 #
4421 # Process the final tag.
4422 #
4423 if not asCurSection and len(aasSections) > 1:
4424 aasSections.pop(-1);
4425 if sCurTag in self.dTagHandlers:
4426 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4427 cOpTags += 1;
4428 elif sCurTag.startswith('@op'):
4429 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4430 elif sCurTag == '@default':
4431 sFlatDefault = self.flattenAllSections(aasSections);
4432
4433 #
4434 # Don't allow default text in blocks containing @op*.
4435 #
4436 if cOpTags > 0 and sFlatDefault:
4437 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4438
4439 return True;
4440
4441 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4442 """
4443 Parses a macro invocation.
4444
4445 Returns three values:
4446 1. A list of macro arguments, where the zero'th is the macro name.
4447 2. The offset following the macro invocation, into sInvocation of
4448 this is on the same line or into the last line if it is on a
4449 different line.
4450 3. Number of additional lines the invocation spans (i.e. zero if
4451 it is all contained within sInvocation).
4452 """
4453 # First the name.
4454 offOpen = sInvocation.find('(', offStartInvocation);
4455 if offOpen <= offStartInvocation:
4456 self.raiseError("macro invocation open parenthesis not found");
4457 sName = sInvocation[offStartInvocation:offOpen].strip();
4458 if not self.oReMacroName.match(sName):
4459 self.raiseError("invalid macro name '%s'" % (sName,));
4460 asRet = [sName, ];
4461
4462 # Arguments.
4463 iLine = self.iLine;
4464 cDepth = 1;
4465 off = offOpen + 1;
4466 offStart = off;
4467 offCurLn = 0;
4468 chQuote = None;
4469 while cDepth > 0:
4470 if off >= len(sInvocation):
4471 if iLine >= len(self.asLines):
4472 self.error('macro invocation beyond end of file');
4473 return (asRet, off - offCurLn, iLine - self.iLine);
4474 offCurLn = off;
4475 sInvocation += self.asLines[iLine];
4476 iLine += 1;
4477 ch = sInvocation[off];
4478
4479 if chQuote:
4480 if ch == '\\' and off + 1 < len(sInvocation):
4481 off += 1;
4482 elif ch == chQuote:
4483 chQuote = None;
4484 elif ch in ('"', '\'',):
4485 chQuote = ch;
4486 elif ch in (',', ')',):
4487 if cDepth == 1:
4488 asRet.append(sInvocation[offStart:off].strip());
4489 offStart = off + 1;
4490 if ch == ')':
4491 cDepth -= 1;
4492 elif ch == '(':
4493 cDepth += 1;
4494 off += 1;
4495
4496 return (asRet, off - offCurLn, iLine - self.iLine);
4497
4498 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4499 """
4500 Returns (None, len(sCode), 0) if not found, otherwise the
4501 parseMacroInvocation() return value.
4502 """
4503 offHit = sCode.find(sMacro, offStart);
4504 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4505 return self.parseMacroInvocation(sCode, offHit);
4506 return (None, len(sCode), 0);
4507
4508 def findAndParseMacroInvocation(self, sCode, sMacro):
4509 """
4510 Returns None if not found, arguments as per parseMacroInvocation if found.
4511 """
4512 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4513
4514 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4515 """
4516 Returns same as findAndParseMacroInvocation.
4517 """
4518 for sMacro in asMacro:
4519 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4520 if asRet is not None:
4521 return asRet;
4522 return None;
4523
4524 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4525 sDisHints, sIemHints, asOperands):
4526 """
4527 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4528 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4529 """
4530 #
4531 # Some invocation checks.
4532 #
4533 if sUpper != sUpper.upper():
4534 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4535 if sLower != sLower.lower():
4536 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4537 if sUpper.lower() != sLower:
4538 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4539 if not self.oReMnemonic.match(sLower):
4540 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4541
4542 #
4543 # Check if sIemHints tells us to not consider this macro invocation.
4544 #
4545 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4546 return True;
4547
4548 # Apply to the last instruction only for now.
4549 if not self.aoCurInstrs:
4550 self.addInstruction();
4551 oInstr = self.aoCurInstrs[-1];
4552 if oInstr.iLineMnemonicMacro == -1:
4553 oInstr.iLineMnemonicMacro = self.iLine;
4554 else:
4555 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4556 % (sMacro, oInstr.iLineMnemonicMacro,));
4557
4558 # Mnemonic
4559 if oInstr.sMnemonic is None:
4560 oInstr.sMnemonic = sLower;
4561 elif oInstr.sMnemonic != sLower:
4562 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4563
4564 # Process operands.
4565 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4566 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4567 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4568 for iOperand, sType in enumerate(asOperands):
4569 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4570 if sWhere is None:
4571 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4572 if iOperand < len(oInstr.aoOperands): # error recovery.
4573 sWhere = oInstr.aoOperands[iOperand].sWhere;
4574 sType = oInstr.aoOperands[iOperand].sType;
4575 else:
4576 sWhere = 'reg';
4577 sType = 'Gb';
4578 if iOperand == len(oInstr.aoOperands):
4579 oInstr.aoOperands.append(Operand(sWhere, sType))
4580 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4581 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4582 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4583 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4584
4585 # Encoding.
4586 if sForm not in g_kdIemForms:
4587 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4588 else:
4589 if oInstr.sEncoding is None:
4590 oInstr.sEncoding = g_kdIemForms[sForm][0];
4591 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4592 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4593 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4594
4595 # Check the parameter locations for the encoding.
4596 if g_kdIemForms[sForm][1] is not None:
4597 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4598 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4599 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4600 else:
4601 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4602 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4603 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4604 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4605 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4606 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4607 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4608 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4609 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4610 or sForm.replace('VEX','').find('V') < 0) ):
4611 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4612 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4613 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4614 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4615 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4616 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4617 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4618 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4619 oInstr.aoOperands[iOperand].sWhere));
4620
4621
4622 # Check @opcodesub
4623 if oInstr.sSubOpcode \
4624 and g_kdIemForms[sForm][2] \
4625 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4626 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4627 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4628
4629 # Stats.
4630 if not self.oReStatsName.match(sStats):
4631 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4632 elif oInstr.sStats is None:
4633 oInstr.sStats = sStats;
4634 elif oInstr.sStats != sStats:
4635 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4636 % (sMacro, oInstr.sStats, sStats,));
4637
4638 # Process the hints (simply merge with @ophints w/o checking anything).
4639 for sHint in sDisHints.split('|'):
4640 sHint = sHint.strip();
4641 if sHint.startswith('DISOPTYPE_'):
4642 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4643 if sShortHint in g_kdHints:
4644 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4645 else:
4646 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4647 elif sHint != '0':
4648 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4649
4650 for sHint in sIemHints.split('|'):
4651 sHint = sHint.strip();
4652 if sHint.startswith('IEMOPHINT_'):
4653 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4654 if sShortHint in g_kdHints:
4655 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4656 else:
4657 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4658 elif sHint != '0':
4659 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4660
4661 _ = sAsm;
4662 return True;
4663
4664 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4665 """
4666 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4667 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4668 """
4669 if not asOperands:
4670 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4671 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4672 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4673
4674 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4675 """
4676 Process a IEM_MC_BEGIN macro invocation.
4677 """
4678 if self.fDebugMc:
4679 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4680 #self.debug('%s<eos>' % (sCode,));
4681
4682 # Check preconditions.
4683 if not self.oCurFunction:
4684 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4685 if self.oCurMcBlock:
4686 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4687
4688 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4689 cchIndent = offBeginStatementInCodeStr;
4690 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4691 if offPrevNewline >= 0:
4692 cchIndent -= offPrevNewline + 1;
4693 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4694
4695 # Start a new block.
4696 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4697 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4698 g_aoMcBlocks.append(self.oCurMcBlock);
4699 self.cTotalMcBlocks += 1;
4700 self.iMcBlockInFunc += 1;
4701 return True;
4702
4703 @staticmethod
4704 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
4705 """
4706 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
4707 extracting a statement block from a string that's the result of macro
4708 expansion and therefore contains multiple "sub-lines" as it were.
4709
4710 Returns list of lines covering offBegin thru offEnd in sRawLine.
4711 """
4712
4713 off = sRawLine.find('\n', offEnd);
4714 if off > 0:
4715 sRawLine = sRawLine[:off + 1];
4716
4717 off = sRawLine.rfind('\n', 0, offBegin) + 1;
4718 sRawLine = sRawLine[off:];
4719 if not sRawLine.strip().startswith(sBeginStmt):
4720 sRawLine = sRawLine[offBegin - off:]
4721
4722 return [sLine + '\n' for sLine in sRawLine.split('\n')];
4723
4724 def workerIemMcEnd(self, offEndStatementInLine):
4725 """
4726 Process a IEM_MC_END macro invocation.
4727 """
4728 if self.fDebugMc:
4729 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4730
4731 # Check preconditions.
4732 if not self.oCurMcBlock:
4733 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4734
4735 #
4736 # HACK ALERT! For blocks orginating from macro expansion the start and
4737 # end line will be the same, but the line has multiple
4738 # newlines inside it. So, we have to do some extra tricks
4739 # to get the lines out of there. We ASSUME macros aren't
4740 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4741 #
4742 if self.iLine > self.oCurMcBlock.iBeginLine:
4743 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4744 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4745 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4746 else:
4747 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
4748 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
4749
4750 #
4751 # Strip anything following the IEM_MC_END(); statement in the final line,
4752 # so that we don't carry on any trailing 'break' after macro expansions
4753 # like for iemOp_movsb_Xb_Yb.
4754 #
4755 while asLines[-1].strip() == '':
4756 asLines.pop();
4757 sFinal = asLines[-1];
4758 offFinalEnd = sFinal.find('IEM_MC_END');
4759 offEndInFinal = offFinalEnd;
4760 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4761 offFinalEnd += len('IEM_MC_END');
4762
4763 while sFinal[offFinalEnd].isspace():
4764 offFinalEnd += 1;
4765 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4766 offFinalEnd += 1;
4767
4768 while sFinal[offFinalEnd].isspace():
4769 offFinalEnd += 1;
4770 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4771 offFinalEnd += 1;
4772
4773 while sFinal[offFinalEnd].isspace():
4774 offFinalEnd += 1;
4775 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4776 offFinalEnd += 1;
4777
4778 asLines[-1] = sFinal[: offFinalEnd];
4779
4780 #
4781 # Complete and discard the current block.
4782 #
4783 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
4784 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
4785 self.oCurMcBlock = None;
4786 return True;
4787
4788 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
4789 """
4790 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
4791 """
4792 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
4793 if self.fDebugMc:
4794 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
4795 #self.debug('%s<eos>' % (sCode,));
4796
4797 # Check preconditions.
4798 if not self.oCurFunction:
4799 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
4800 if self.oCurMcBlock:
4801 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
4802
4803 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4804 cchIndent = offBeginStatementInCodeStr;
4805 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4806 if offPrevNewline >= 0:
4807 cchIndent -= offPrevNewline + 1;
4808 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4809
4810 # Start a new block.
4811 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4812 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4813
4814 # Parse the statment.
4815 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
4816 if asArgs is None:
4817 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
4818 if len(asArgs) != cParams + 3:
4819 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s!'
4820 % (sStmt, len(asArgs), cParams + 3,));
4821
4822 oMcBlock.aoStmts = [McStmtCall(asArgs[0], asArgs[1:], 1),];
4823
4824 # These MCs are not typically part of macro expansions, but let's get
4825 # it out of the way immediately if it's the case.
4826 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
4827 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
4828 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
4829 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
4830 asLines[-1] = asLines[-1][:offAfter + 1];
4831 else:
4832 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
4833 offAfter, sStmt);
4834 assert asLines[-1].find(';') >= 0;
4835 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
4836
4837 assert asLines[0].find(sStmt) >= 0;
4838 #if not asLines[0].strip().startswith(sStmt):
4839 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
4840
4841 # Advance to the line with the closing ')'.
4842 self.iLine += cLines;
4843
4844 # Complete the block.
4845 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
4846
4847 g_aoMcBlocks.append(oMcBlock);
4848 self.cTotalMcBlocks += 1;
4849 self.iMcBlockInFunc += 1;
4850
4851 return True;
4852
4853 def workerStartFunction(self, asArgs):
4854 """
4855 Deals with the start of a decoder function.
4856
4857 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4858 macros, so we get a argument list for these where the 0th argument is the
4859 macro name.
4860 """
4861 # Complete any existing function.
4862 if self.oCurFunction:
4863 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4864
4865 # Create the new function.
4866 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4867 return True;
4868
4869 def checkCodeForMacro(self, sCode, offLine):
4870 """
4871 Checks code for relevant macro invocation.
4872 """
4873
4874 #
4875 # Scan macro invocations.
4876 #
4877 if sCode.find('(') > 0:
4878 # Look for instruction decoder function definitions. ASSUME single line.
4879 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4880 [ 'FNIEMOP_DEF',
4881 'FNIEMOPRM_DEF',
4882 'FNIEMOP_STUB',
4883 'FNIEMOP_STUB_1',
4884 'FNIEMOP_UD_STUB',
4885 'FNIEMOP_UD_STUB_1' ]);
4886 if asArgs is not None:
4887 self.workerStartFunction(asArgs);
4888 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4889
4890 if not self.aoCurInstrs:
4891 self.addInstruction();
4892 for oInstr in self.aoCurInstrs:
4893 if oInstr.iLineFnIemOpMacro == -1:
4894 oInstr.iLineFnIemOpMacro = self.iLine;
4895 else:
4896 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4897 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4898 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4899 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4900 if asArgs[0].find('STUB') > 0:
4901 self.doneInstructions(fEndOfFunction = True);
4902 return True;
4903
4904 # Check for worker function definitions, so we can get a context for MC blocks.
4905 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4906 [ 'FNIEMOP_DEF_1',
4907 'FNIEMOP_DEF_2', ]);
4908 if asArgs is not None:
4909 self.workerStartFunction(asArgs);
4910 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4911 return True;
4912
4913 # IEMOP_HLP_DONE_VEX_DECODING_*
4914 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4915 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4916 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4917 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4918 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4919 ]);
4920 if asArgs is not None:
4921 sMacro = asArgs[0];
4922 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4923 for oInstr in self.aoCurInstrs:
4924 if 'vex_l_zero' not in oInstr.dHints:
4925 if oInstr.iLineMnemonicMacro >= 0:
4926 self.errorOnLine(oInstr.iLineMnemonicMacro,
4927 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4928 oInstr.dHints['vex_l_zero'] = True;
4929
4930 #
4931 # IEMOP_MNEMONIC*
4932 #
4933 if sCode.find('IEMOP_MNEMONIC') >= 0:
4934 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4935 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4936 if asArgs is not None:
4937 if len(self.aoCurInstrs) == 1:
4938 oInstr = self.aoCurInstrs[0];
4939 if oInstr.sStats is None:
4940 oInstr.sStats = asArgs[1];
4941 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4942
4943 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4944 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4945 if asArgs is not None:
4946 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4947 asArgs[7], []);
4948 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4949 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4950 if asArgs is not None:
4951 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4952 asArgs[8], [asArgs[6],]);
4953 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4954 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4955 if asArgs is not None:
4956 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4957 asArgs[9], [asArgs[6], asArgs[7]]);
4958 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4959 # a_fIemHints)
4960 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4961 if asArgs is not None:
4962 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4963 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4964 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4965 # a_fIemHints)
4966 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4967 if asArgs is not None:
4968 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4969 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4970
4971 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4972 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4973 if asArgs is not None:
4974 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4975 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4976 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4977 if asArgs is not None:
4978 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4979 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4980 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4981 if asArgs is not None:
4982 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4983 [asArgs[4], asArgs[5],]);
4984 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4985 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4986 if asArgs is not None:
4987 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4988 [asArgs[4], asArgs[5], asArgs[6],]);
4989 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4990 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4991 if asArgs is not None:
4992 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4993 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4994
4995 #
4996 # IEM_MC_BEGIN + IEM_MC_END.
4997 # We must support multiple instances per code snippet.
4998 #
4999 offCode = sCode.find('IEM_MC_');
5000 if offCode >= 0:
5001 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5002 if oMatch.group(1) == 'END':
5003 self.workerIemMcEnd(offLine + oMatch.start());
5004 elif oMatch.group(1) == 'BEGIN':
5005 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5006 else:
5007 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5008 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5009 return True;
5010
5011 return False;
5012
5013 def workerPreProcessRecreateMacroRegex(self):
5014 """
5015 Recreates self.oReMacros when self.dMacros changes.
5016 """
5017 if self.dMacros:
5018 sRegex = '';
5019 for sName, oMacro in self.dMacros.items():
5020 if sRegex:
5021 sRegex += '|' + sName;
5022 else:
5023 sRegex = '\\b(' + sName;
5024 if oMacro.asArgs is not None:
5025 sRegex += '\s*\(';
5026 else:
5027 sRegex += '\\b';
5028 sRegex += ')';
5029 self.oReMacros = re.compile(sRegex);
5030 else:
5031 self.oReMacros = None;
5032 return True;
5033
5034 def workerPreProcessDefine(self, sRest):
5035 """
5036 Handles a macro #define, the sRest is what follows after the directive word.
5037 """
5038
5039 #
5040 # If using line continutation, just concat all the lines together,
5041 # preserving the newline character but not the escaping.
5042 #
5043 iLineStart = self.iLine;
5044 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5045 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5046 self.iLine += 1;
5047 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
5048
5049 #
5050 # Use regex to split out the name, argument list and body.
5051 # If this fails, we assume it's a simple macro.
5052 #
5053 oMatch = self.oReHashDefine2.match(sRest);
5054 if oMatch:
5055 sAllArgs = oMatch.group(2).strip();
5056 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5057 sBody = oMatch.group(3);
5058 else:
5059 oMatch = self.oReHashDefine3.match(sRest);
5060 if not oMatch:
5061 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
5062 return self.error('bogus macro definition: %s' % (sRest,));
5063 asArgs = None;
5064 sBody = oMatch.group(2);
5065 sName = oMatch.group(1);
5066 assert sName == sName.strip();
5067 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5068
5069 #
5070 # Is this of any interest to us? We do NOT support MC blocks wihtin
5071 # nested macro expansion, just to avoid lots of extra work.
5072 #
5073 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5074 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5075 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5076 # siblings in the recompiler. This is a lot simpler than nested macro
5077 # expansion and lots of heuristics for locating all the relevant macros.
5078 # Also, this way we don't produce lots of unnecessary threaded functions.
5079 #
5080 if sBody.find("IEM_MC_BEGIN") < 0:
5081 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
5082 return True;
5083
5084 #
5085 # Add the macro.
5086 #
5087 if self.fDebugPreProc:
5088 self.debug('#define %s on line %u' % (sName, self.iLine,));
5089 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5090 return self.workerPreProcessRecreateMacroRegex();
5091
5092 def workerPreProcessUndef(self, sRest):
5093 """
5094 Handles a macro #undef, the sRest is what follows after the directive word.
5095 """
5096 # Quick comment strip and isolate the name.
5097 offSlash = sRest.find('/');
5098 if offSlash > 0:
5099 sRest = sRest[:offSlash];
5100 sName = sRest.strip();
5101
5102 # Remove the macro if we're clocking it.
5103 if sName in self.dMacros:
5104 if self.fDebugPreProc:
5105 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5106 del self.dMacros[sName];
5107 return self.workerPreProcessRecreateMacroRegex();
5108
5109 return True;
5110
5111 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
5112 """
5113 Handles a preprocessor directive.
5114 """
5115 oMatch = self.oReHashDefine.match(sLine);
5116 if oMatch:
5117 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
5118
5119 oMatch = self.oReHashUndef.match(sLine);
5120 if oMatch:
5121 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
5122 return False;
5123
5124 def expandMacros(self, sLine, oMatch):
5125 """
5126 Expands macros we know about in the given line.
5127 Currently we ASSUME there is only one and that is what oMatch matched.
5128 """
5129 #
5130 # Get our bearings.
5131 #
5132 offMatch = oMatch.start();
5133 sName = oMatch.group(1);
5134 assert sName == sLine[oMatch.start() : oMatch.end()];
5135 fWithArgs = sName.endswith('(');
5136 if fWithArgs:
5137 sName = sName[:-1].strip();
5138 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5139
5140 #
5141 # Deal with simple macro invocations w/o parameters.
5142 #
5143 if not fWithArgs:
5144 if self.fDebugPreProc:
5145 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5146 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5147
5148 #
5149 # Complicated macro with parameters.
5150 # Start by extracting the parameters. ASSUMES they are all on the same line!
5151 #
5152 cLevel = 1;
5153 offCur = oMatch.end();
5154 offCurArg = offCur;
5155 asArgs = [];
5156 while True:
5157 if offCur >= len(sLine):
5158 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5159 ch = sLine[offCur];
5160 if ch == '(':
5161 cLevel += 1;
5162 elif ch == ')':
5163 cLevel -= 1;
5164 if cLevel == 0:
5165 asArgs.append(sLine[offCurArg:offCur].strip());
5166 break;
5167 elif ch == ',' and cLevel == 1:
5168 asArgs.append(sLine[offCurArg:offCur].strip());
5169 offCurArg = offCur + 1;
5170 offCur += 1;
5171 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5172 asArgs = [];
5173 if len(oMacro.asArgs) != len(asArgs):
5174 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5175
5176 #
5177 # Do the expanding.
5178 #
5179 if self.fDebugPreProc:
5180 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5181 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5182
5183 def parse(self):
5184 """
5185 Parses the given file.
5186
5187 Returns number or errors.
5188 Raises exception on fatal trouble.
5189 """
5190 #self.debug('Parsing %s' % (self.sSrcFile,));
5191
5192 #
5193 # Loop thru the lines.
5194 #
5195 # Please mind that self.iLine may be updated by checkCodeForMacro and
5196 # other worker methods.
5197 #
5198 while self.iLine < len(self.asLines):
5199 sLine = self.asLines[self.iLine];
5200 self.iLine += 1;
5201 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5202
5203 # Expand macros we know about if we're currently in code.
5204 if self.iState == self.kiCode and self.oReMacros:
5205 oMatch = self.oReMacros.search(sLine);
5206 if oMatch:
5207 sLine = self.expandMacros(sLine, oMatch);
5208 if self.fDebugPreProc:
5209 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5210 self.asLines[self.iLine - 1] = sLine;
5211
5212 # Look for comments.
5213 offSlash = sLine.find('/');
5214 if offSlash >= 0:
5215 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5216 offLine = 0;
5217 while offLine < len(sLine):
5218 if self.iState == self.kiCode:
5219 # Look for substantial multiline comment so we pass the following MC as a whole line:
5220 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5221 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5222 offHit = sLine.find('/*', offLine);
5223 while offHit >= 0:
5224 offEnd = sLine.find('*/', offHit + 2);
5225 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5226 break;
5227 offHit = sLine.find('/*', offEnd);
5228
5229 if offHit >= 0:
5230 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5231 self.sComment = '';
5232 self.iCommentLine = self.iLine;
5233 self.iState = self.kiCommentMulti;
5234 offLine = offHit + 2;
5235 else:
5236 self.checkCodeForMacro(sLine[offLine:], offLine);
5237 offLine = len(sLine);
5238
5239 elif self.iState == self.kiCommentMulti:
5240 offHit = sLine.find('*/', offLine);
5241 if offHit >= 0:
5242 self.sComment += sLine[offLine:offHit];
5243 self.iState = self.kiCode;
5244 offLine = offHit + 2;
5245 self.parseComment();
5246 else:
5247 self.sComment += sLine[offLine:];
5248 offLine = len(sLine);
5249 else:
5250 assert False;
5251 # C++ line comment.
5252 elif offSlash > 0:
5253 self.checkCodeForMacro(sLine[:offSlash], 0);
5254
5255 # No slash, but append the line if in multi-line comment.
5256 elif self.iState == self.kiCommentMulti:
5257 #self.debug('line %d: multi' % (self.iLine,));
5258 self.sComment += sLine;
5259
5260 # No slash, but check if this is a macro #define or #undef, since we
5261 # need to be able to selectively expand the ones containing MC blocks.
5262 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5263 if self.fDebugPreProc:
5264 self.debug('line %d: pre-proc' % (self.iLine,));
5265 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5266
5267 # No slash, but check code line for relevant macro.
5268 elif ( self.iState == self.kiCode
5269 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5270 #self.debug('line %d: macro' % (self.iLine,));
5271 self.checkCodeForMacro(sLine, 0);
5272
5273 # If the line is a '}' in the first position, complete the instructions.
5274 elif self.iState == self.kiCode and sLine[0] == '}':
5275 #self.debug('line %d: }' % (self.iLine,));
5276 self.doneInstructions(fEndOfFunction = True);
5277
5278 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5279 # so we can check/add @oppfx info from it.
5280 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5281 self.parseFunctionTable(sLine);
5282
5283 self.doneInstructions(fEndOfFunction = True);
5284 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5285 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5286 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5287 return self.printErrors();
5288
5289## The parsed content of IEMAllInstCommonBodyMacros.h.
5290g_oParsedCommonBodyMacros = None # type: SimpleParser
5291
5292def __parseFileByName(sSrcFile, sDefaultMap):
5293 """
5294 Parses one source file for instruction specfications.
5295 """
5296 #
5297 # Read sSrcFile into a line array.
5298 #
5299 try:
5300 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5301 except Exception as oXcpt:
5302 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5303 try:
5304 asLines = oFile.readlines();
5305 except Exception as oXcpt:
5306 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5307 finally:
5308 oFile.close();
5309
5310 #
5311 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
5312 # can use the macros from it when processing the other files.
5313 #
5314 global g_oParsedCommonBodyMacros;
5315 if g_oParsedCommonBodyMacros is None:
5316 # Locate the file.
5317 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
5318 if not os.path.isfile(sCommonBodyMacros):
5319 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
5320
5321 # Read it.
5322 try:
5323 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5324 asIncFiles = oIncFile.readlines();
5325 except Exception as oXcpt:
5326 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5327
5328 # Parse it.
5329 try:
5330 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5331 if oParser.parse() != 0:
5332 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5333 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5334 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5335 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5336 oParser.cTotalMcBlocks,
5337 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5338 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5339 except ParserException as oXcpt:
5340 print(str(oXcpt), file = sys.stderr);
5341 raise;
5342 g_oParsedCommonBodyMacros = oParser;
5343
5344 #
5345 # Do the parsing.
5346 #
5347 try:
5348 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5349 return (oParser.parse(), oParser) ;
5350 except ParserException as oXcpt:
5351 print(str(oXcpt), file = sys.stderr);
5352 raise;
5353
5354
5355def __doTestCopying():
5356 """
5357 Executes the asCopyTests instructions.
5358 """
5359 asErrors = [];
5360 for oDstInstr in g_aoAllInstructions:
5361 if oDstInstr.asCopyTests:
5362 for sSrcInstr in oDstInstr.asCopyTests:
5363 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5364 if oSrcInstr:
5365 aoSrcInstrs = [oSrcInstr,];
5366 else:
5367 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5368 if aoSrcInstrs:
5369 for oSrcInstr in aoSrcInstrs:
5370 if oSrcInstr != oDstInstr:
5371 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5372 else:
5373 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5374 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5375 else:
5376 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5377 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5378
5379 if asErrors:
5380 sys.stderr.write(u''.join(asErrors));
5381 return len(asErrors);
5382
5383
5384def __applyOnlyTest():
5385 """
5386 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5387 all other instructions so that only these get tested.
5388 """
5389 if g_aoOnlyTestInstructions:
5390 for oInstr in g_aoAllInstructions:
5391 if oInstr.aoTests:
5392 if oInstr not in g_aoOnlyTestInstructions:
5393 oInstr.aoTests = [];
5394 return 0;
5395
5396## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
5397g_aaoAllInstrFilesAndDefaultMapAndSet = (
5398 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
5399 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
5400 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
5401 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
5402 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
5403 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
5404 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
5405 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
5406 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
5407);
5408
5409def __parseFilesWorker(asFilesAndDefaultMap):
5410 """
5411 Parses all the IEMAllInstruction*.cpp.h files.
5412
5413 Returns a list of the parsers on success.
5414 Raises exception on failure.
5415 """
5416 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5417 cErrors = 0;
5418 aoParsers = [];
5419 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5420 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5421 sFilename = os.path.join(sSrcDir, sFilename);
5422 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5423 cErrors += cThisErrors;
5424 aoParsers.append(oParser);
5425 cErrors += __doTestCopying();
5426 cErrors += __applyOnlyTest();
5427
5428 # Total stub stats:
5429 cTotalStubs = 0;
5430 for oInstr in g_aoAllInstructions:
5431 cTotalStubs += oInstr.fStub;
5432 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5433 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5434 file = sys.stderr);
5435
5436 if cErrors != 0:
5437 raise Exception('%d parse errors' % (cErrors,));
5438 return aoParsers;
5439
5440
5441def parseFiles(asFiles):
5442 """
5443 Parses a selection of IEMAllInstruction*.cpp.h files.
5444
5445 Returns a list of the parsers on success.
5446 Raises exception on failure.
5447 """
5448 # Look up default maps for the files and call __parseFilesWorker to do the job.
5449 asFilesAndDefaultMap = [];
5450 for sFilename in asFiles:
5451 sName = os.path.split(sFilename)[1].lower();
5452 sMap = None;
5453 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
5454 if aoInfo[0].lower() == sName:
5455 sMap = aoInfo[1];
5456 break;
5457 if not sMap:
5458 raise Exception('Unable to classify file: %s' % (sFilename,));
5459 asFilesAndDefaultMap.append((sFilename, sMap));
5460
5461 return __parseFilesWorker(asFilesAndDefaultMap);
5462
5463
5464def parseAll():
5465 """
5466 Parses all the IEMAllInstruction*.cpp.h files.
5467
5468 Returns a list of the parsers on success.
5469 Raises exception on failure.
5470 """
5471 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet]);
5472
5473
5474#
5475# Generators (may perhaps move later).
5476#
5477def __formatDisassemblerTableEntry(oInstr):
5478 """
5479 """
5480 sMacro = 'OP';
5481 cMaxOperands = 3;
5482 if len(oInstr.aoOperands) > 3:
5483 sMacro = 'OPVEX'
5484 cMaxOperands = 4;
5485 assert len(oInstr.aoOperands) <= cMaxOperands;
5486
5487 #
5488 # Format string.
5489 #
5490 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5491 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5492 sTmp += ' ' if iOperand == 0 else ',';
5493 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5494 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5495 else:
5496 sTmp += g_kdOpTypes[oOperand.sType][2];
5497 sTmp += '",';
5498 asColumns = [ sTmp, ];
5499
5500 #
5501 # Decoders.
5502 #
5503 iStart = len(asColumns);
5504 if oInstr.sEncoding is None:
5505 pass;
5506 elif oInstr.sEncoding == 'ModR/M':
5507 # ASSUME the first operand is using the ModR/M encoding
5508 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5509 asColumns.append('IDX_ParseModRM,');
5510 elif oInstr.sEncoding in [ 'prefix', ]:
5511 for oOperand in oInstr.aoOperands:
5512 asColumns.append('0,');
5513 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5514 pass;
5515 elif oInstr.sEncoding == 'VEX.ModR/M':
5516 asColumns.append('IDX_ParseModRM,');
5517 elif oInstr.sEncoding == 'vex2':
5518 asColumns.append('IDX_ParseVex2b,')
5519 elif oInstr.sEncoding == 'vex3':
5520 asColumns.append('IDX_ParseVex3b,')
5521 elif oInstr.sEncoding in g_dInstructionMaps:
5522 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5523 else:
5524 ## @todo
5525 #IDX_ParseTwoByteEsc,
5526 #IDX_ParseGrp1,
5527 #IDX_ParseShiftGrp2,
5528 #IDX_ParseGrp3,
5529 #IDX_ParseGrp4,
5530 #IDX_ParseGrp5,
5531 #IDX_Parse3DNow,
5532 #IDX_ParseGrp6,
5533 #IDX_ParseGrp7,
5534 #IDX_ParseGrp8,
5535 #IDX_ParseGrp9,
5536 #IDX_ParseGrp10,
5537 #IDX_ParseGrp12,
5538 #IDX_ParseGrp13,
5539 #IDX_ParseGrp14,
5540 #IDX_ParseGrp15,
5541 #IDX_ParseGrp16,
5542 #IDX_ParseThreeByteEsc4,
5543 #IDX_ParseThreeByteEsc5,
5544 #IDX_ParseModFence,
5545 #IDX_ParseEscFP,
5546 #IDX_ParseNopPause,
5547 #IDX_ParseInvOpModRM,
5548 assert False, str(oInstr);
5549
5550 # Check for immediates and stuff in the remaining operands.
5551 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5552 sIdx = g_kdOpTypes[oOperand.sType][0];
5553 #if sIdx != 'IDX_UseModRM':
5554 asColumns.append(sIdx + ',');
5555 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5556
5557 #
5558 # Opcode and operands.
5559 #
5560 assert oInstr.sDisEnum, str(oInstr);
5561 asColumns.append(oInstr.sDisEnum + ',');
5562 iStart = len(asColumns)
5563 for oOperand in oInstr.aoOperands:
5564 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5565 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5566
5567 #
5568 # Flags.
5569 #
5570 sTmp = '';
5571 for sHint in sorted(oInstr.dHints.keys()):
5572 sDefine = g_kdHints[sHint];
5573 if sDefine.startswith('DISOPTYPE_'):
5574 if sTmp:
5575 sTmp += ' | ' + sDefine;
5576 else:
5577 sTmp += sDefine;
5578 if sTmp:
5579 sTmp += '),';
5580 else:
5581 sTmp += '0),';
5582 asColumns.append(sTmp);
5583
5584 #
5585 # Format the columns into a line.
5586 #
5587 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5588 sLine = '';
5589 for i, s in enumerate(asColumns):
5590 if len(sLine) < aoffColumns[i]:
5591 sLine += ' ' * (aoffColumns[i] - len(sLine));
5592 else:
5593 sLine += ' ';
5594 sLine += s;
5595
5596 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5597 # DISOPTYPE_HARMLESS),
5598 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5599 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5600 return sLine;
5601
5602def __checkIfShortTable(aoTableOrdered, oMap):
5603 """
5604 Returns (iInstr, cInstructions, fShortTable)
5605 """
5606
5607 # Determin how much we can trim off.
5608 cInstructions = len(aoTableOrdered);
5609 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5610 cInstructions -= 1;
5611
5612 iInstr = 0;
5613 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5614 iInstr += 1;
5615
5616 # If we can save more than 30%, we go for the short table version.
5617 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5618 return (iInstr, cInstructions, True);
5619 _ = oMap; # Use this for overriding.
5620
5621 # Output the full table.
5622 return (0, len(aoTableOrdered), False);
5623
5624def generateDisassemblerTables(oDstFile = sys.stdout):
5625 """
5626 Generates disassembler tables.
5627
5628 Returns exit code.
5629 """
5630
5631 #
5632 # Parse all.
5633 #
5634 try:
5635 parseAll();
5636 except Exception as oXcpt:
5637 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5638 traceback.print_exc(file = sys.stderr);
5639 return 1;
5640
5641
5642 #
5643 # The disassembler uses a slightly different table layout to save space,
5644 # since several of the prefix varia
5645 #
5646 aoDisasmMaps = [];
5647 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5648 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5649 if oMap.sSelector != 'byte+pfx':
5650 aoDisasmMaps.append(oMap);
5651 else:
5652 # Split the map by prefix.
5653 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5654 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5655 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5656 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5657
5658 #
5659 # Dump each map.
5660 #
5661 asHeaderLines = [];
5662 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5663 for oMap in aoDisasmMaps:
5664 sName = oMap.sName;
5665
5666 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5667
5668 #
5669 # Get the instructions for the map and see if we can do a short version or not.
5670 #
5671 aoTableOrder = oMap.getInstructionsInTableOrder();
5672 cEntriesPerByte = oMap.getEntriesPerByte();
5673 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5674
5675 #
5676 # Output the table start.
5677 # Note! Short tables are static and only accessible via the map range record.
5678 #
5679 asLines = [];
5680 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5681 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5682 if fShortTable:
5683 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5684 else:
5685 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5686 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5687 asLines.append('{');
5688
5689 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5690 asLines.append(' /* %#04x: */' % (iInstrStart,));
5691
5692 #
5693 # Output the instructions.
5694 #
5695 iInstr = iInstrStart;
5696 while iInstr < iInstrEnd:
5697 oInstr = aoTableOrder[iInstr];
5698 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5699 if iInstr != iInstrStart:
5700 asLines.append('');
5701 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5702
5703 if oInstr is None:
5704 # Invalid. Optimize blocks of invalid instructions.
5705 cInvalidInstrs = 1;
5706 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5707 cInvalidInstrs += 1;
5708 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5709 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5710 iInstr += 0x10 * cEntriesPerByte - 1;
5711 elif cEntriesPerByte > 1:
5712 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5713 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5714 iInstr += 3;
5715 else:
5716 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5717 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5718 else:
5719 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5720 elif isinstance(oInstr, list):
5721 if len(oInstr) != 0:
5722 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5723 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5724 else:
5725 asLines.append(__formatDisassemblerTableEntry(oInstr));
5726 else:
5727 asLines.append(__formatDisassemblerTableEntry(oInstr));
5728
5729 iInstr += 1;
5730
5731 if iInstrStart >= iInstrEnd:
5732 asLines.append(' /* dummy */ INVALID_OPCODE');
5733
5734 asLines.append('};');
5735 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5736
5737 #
5738 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5739 #
5740 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5741 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5742 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5743
5744 #
5745 # Write out the lines.
5746 #
5747 oDstFile.write('\n'.join(asLines));
5748 oDstFile.write('\n');
5749 oDstFile.write('\n');
5750 #break; #for now
5751 return 0;
5752
5753if __name__ == '__main__':
5754 sys.exit(generateDisassemblerTables());
5755
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette