VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 100746

Last change on this file since 100746 was 100743, checked in by vboxsync, 20 months ago

VMM/IEM: Split up IEMAllThrdTables.cpp into four files to speed up compilation, just like done in r158610 for the interpreter. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 270.8 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 100743 2023-07-30 23:17:41Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 100743 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCond(McStmtCond):
1885 """
1886 C++/C 'if' statement.
1887 """
1888 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1889 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1890 self.fDecode = fDecode;
1891 self.cchIndent = cchIndent;
1892
1893 def renderCode(self, cchIndent = 0):
1894 cchIndent += self.cchIndent;
1895 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1896 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1897 sRet += ' ' * cchIndent + '{\n';
1898 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1899 sRet += ' ' * cchIndent + '}\n';
1900 if self.aoElseBranch:
1901 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1902 sRet += ' ' * cchIndent + '{\n';
1903 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1904 sRet += ' ' * cchIndent + '}\n';
1905 return sRet;
1906
1907class McCppPreProc(McCppGeneric):
1908 """
1909 C++/C Preprocessor directive.
1910 """
1911 def __init__(self, sCode):
1912 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1913
1914 def renderCode(self, cchIndent = 0):
1915 return self.asParams[0] + '\n';
1916
1917
1918class McBlock(object):
1919 """
1920 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1921 """
1922
1923 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1924 ## The source file containing the block.
1925 self.sSrcFile = sSrcFile;
1926 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1927 self.iBeginLine = iBeginLine;
1928 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1929 self.offBeginLine = offBeginLine;
1930 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
1931 self.iEndLine = -1;
1932 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
1933 self.offEndLine = 0;
1934 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
1935 self.offAfterEnd = 0;
1936 ## The function the block resides in.
1937 self.oFunction = oFunction;
1938 ## The name of the function the block resides in. DEPRECATED.
1939 self.sFunction = oFunction.sName;
1940 ## The block number within the function.
1941 self.iInFunction = iInFunction;
1942 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1943 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1944 ## Decoded statements in the block.
1945 self.aoStmts = [] # type: list(McStmt)
1946
1947 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
1948 """
1949 Completes the microcode block.
1950 """
1951 assert self.iEndLine == -1;
1952 self.iEndLine = iEndLine;
1953 self.offEndLine = offEndLine;
1954 self.offAfterEnd = offAfterEnd;
1955 self.asLines = asLines;
1956
1957 def raiseDecodeError(self, sRawCode, off, sMessage):
1958 """ Raises a decoding error. """
1959 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1960 iLine = sRawCode.count('\n', 0, off);
1961 raise ParserException('%s:%d:%d: parsing error: %s'
1962 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1963
1964 def raiseStmtError(self, sName, sMessage):
1965 """ Raises a statement parser error. """
1966 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1967
1968 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1969 """ Check the parameter count, raising an error it doesn't match. """
1970 if len(asParams) != cParamsExpected:
1971 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1972 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1973 return True;
1974
1975 @staticmethod
1976 def parseMcGeneric(oSelf, sName, asParams):
1977 """ Generic parser that returns a plain McStmt object. """
1978 _ = oSelf;
1979 return McStmt(sName, asParams);
1980
1981 @staticmethod
1982 def parseMcGenericCond(oSelf, sName, asParams):
1983 """ Generic parser that returns a plain McStmtCond object. """
1984 _ = oSelf;
1985 return McStmtCond(sName, asParams);
1986
1987 @staticmethod
1988 def parseMcBegin(oSelf, sName, asParams):
1989 """ IEM_MC_BEGIN """
1990 oSelf.checkStmtParamCount(sName, asParams, 2);
1991 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1992
1993 @staticmethod
1994 def parseMcArg(oSelf, sName, asParams):
1995 """ IEM_MC_ARG """
1996 oSelf.checkStmtParamCount(sName, asParams, 3);
1997 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1998
1999 @staticmethod
2000 def parseMcArgConst(oSelf, sName, asParams):
2001 """ IEM_MC_ARG_CONST """
2002 oSelf.checkStmtParamCount(sName, asParams, 4);
2003 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2004
2005 @staticmethod
2006 def parseMcArgLocalRef(oSelf, sName, asParams):
2007 """ IEM_MC_ARG_LOCAL_REF """
2008 oSelf.checkStmtParamCount(sName, asParams, 4);
2009 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2010
2011 @staticmethod
2012 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2013 """ IEM_MC_ARG_LOCAL_EFLAGS """
2014 oSelf.checkStmtParamCount(sName, asParams, 3);
2015 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2016 return (
2017 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2018 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2019 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2020 );
2021
2022 @staticmethod
2023 def parseMcLocal(oSelf, sName, asParams):
2024 """ IEM_MC_LOCAL """
2025 oSelf.checkStmtParamCount(sName, asParams, 2);
2026 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2027
2028 @staticmethod
2029 def parseMcLocalConst(oSelf, sName, asParams):
2030 """ IEM_MC_LOCAL_CONST """
2031 oSelf.checkStmtParamCount(sName, asParams, 3);
2032 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2033
2034 @staticmethod
2035 def parseMcCallAImpl(oSelf, sName, asParams):
2036 """ IEM_MC_CALL_AIMPL_3|4 """
2037 cArgs = int(sName[-1]);
2038 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2039 return McStmtCall(sName, asParams, 1, 0);
2040
2041 @staticmethod
2042 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2043 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2044 cArgs = int(sName[-1]);
2045 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2046 return McStmtCall(sName, asParams, 0);
2047
2048 @staticmethod
2049 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2050 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2051 cArgs = int(sName[-1]);
2052 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2053 return McStmtCall(sName, asParams, 0);
2054
2055 @staticmethod
2056 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2057 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2058 cArgs = int(sName[-1]);
2059 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2060 return McStmtCall(sName, asParams, 0);
2061
2062 @staticmethod
2063 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2064 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2065 cArgs = int(sName[-1]);
2066 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2067 return McStmtCall(sName, asParams, 0);
2068
2069 @staticmethod
2070 def parseMcCallSseAImpl(oSelf, sName, asParams):
2071 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2072 cArgs = int(sName[-1]);
2073 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2074 return McStmtCall(sName, asParams, 0);
2075
2076 @staticmethod
2077 def parseMcCallCImpl(oSelf, sName, asParams):
2078 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2079 cArgs = int(sName[-1]);
2080 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2081 return McStmtCall(sName, asParams, 1);
2082
2083 @staticmethod
2084 def stripComments(sCode):
2085 """ Returns sCode with comments removed. """
2086 off = 0;
2087 while off < len(sCode):
2088 off = sCode.find('/', off);
2089 if off < 0 or off + 1 >= len(sCode):
2090 break;
2091
2092 if sCode[off + 1] == '/':
2093 # C++ comment.
2094 offEnd = sCode.find('\n', off + 2);
2095 if offEnd < 0:
2096 return sCode[:off].rstrip();
2097 sCode = sCode[ : off] + sCode[offEnd : ];
2098 off += 1;
2099
2100 elif sCode[off + 1] == '*':
2101 # C comment
2102 offEnd = sCode.find('*/', off + 2);
2103 if offEnd < 0:
2104 return sCode[:off].rstrip();
2105 sSep = ' ';
2106 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2107 sSep = '';
2108 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2109 off += len(sSep);
2110
2111 else:
2112 # Not a comment.
2113 off += 1;
2114 return sCode;
2115
2116 @staticmethod
2117 def extractParam(sCode, offParam):
2118 """
2119 Extracts the parameter value at offParam in sCode.
2120 Returns stripped value and the end offset of the terminating ',' or ')'.
2121 """
2122 # Extract it.
2123 cNesting = 0;
2124 offStart = offParam;
2125 while offParam < len(sCode):
2126 ch = sCode[offParam];
2127 if ch == '(':
2128 cNesting += 1;
2129 elif ch == ')':
2130 if cNesting == 0:
2131 break;
2132 cNesting -= 1;
2133 elif ch == ',' and cNesting == 0:
2134 break;
2135 offParam += 1;
2136 return (sCode[offStart : offParam].strip(), offParam);
2137
2138 @staticmethod
2139 def extractParams(sCode, offOpenParen):
2140 """
2141 Parses a parameter list.
2142 Returns the list of parameter values and the offset of the closing parentheses.
2143 Returns (None, len(sCode)) on if no closing parentheses was found.
2144 """
2145 assert sCode[offOpenParen] == '(';
2146 asParams = [];
2147 off = offOpenParen + 1;
2148 while off < len(sCode):
2149 ch = sCode[off];
2150 if ch.isspace():
2151 off += 1;
2152 elif ch != ')':
2153 (sParam, off) = McBlock.extractParam(sCode, off);
2154 asParams.append(sParam);
2155 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2156 if sCode[off] == ',':
2157 off += 1;
2158 else:
2159 return (asParams, off);
2160 return (None, off);
2161
2162 @staticmethod
2163 def findClosingBraces(sCode, off, offStop):
2164 """
2165 Finds the matching '}' for the '{' at off in sCode.
2166 Returns offset of the matching '}' on success, otherwise -1.
2167
2168 Note! Does not take comments into account.
2169 """
2170 cDepth = 1;
2171 off += 1;
2172 while off < offStop:
2173 offClose = sCode.find('}', off, offStop);
2174 if offClose < 0:
2175 break;
2176 cDepth += sCode.count('{', off, offClose);
2177 cDepth -= 1;
2178 if cDepth == 0:
2179 return offClose;
2180 off = offClose + 1;
2181 return -1;
2182
2183 @staticmethod
2184 def countSpacesAt(sCode, off, offStop):
2185 """ Returns the number of space characters at off in sCode. """
2186 offStart = off;
2187 while off < offStop and sCode[off].isspace():
2188 off += 1;
2189 return off - offStart;
2190
2191 @staticmethod
2192 def skipSpacesAt(sCode, off, offStop):
2193 """ Returns first offset at or after off for a non-space character. """
2194 return off + McBlock.countSpacesAt(sCode, off, offStop);
2195
2196 @staticmethod
2197 def isSubstrAt(sStr, off, sSubStr):
2198 """ Returns true of sSubStr is found at off in sStr. """
2199 return sStr[off : off + len(sSubStr)] == sSubStr;
2200
2201 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2202 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2203 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2204 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2205 + r')');
2206
2207 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2208 """
2209 Decodes sRawCode[off : offStop].
2210
2211 Returns list of McStmt instances.
2212 Raises ParserException on failure.
2213 """
2214 if offStop < 0:
2215 offStop = len(sRawCode);
2216 aoStmts = [];
2217 while off < offStop:
2218 ch = sRawCode[off];
2219
2220 #
2221 # Skip spaces and comments.
2222 #
2223 if ch.isspace():
2224 off += 1;
2225
2226 elif ch == '/':
2227 ch = sRawCode[off + 1];
2228 if ch == '/': # C++ comment.
2229 off = sRawCode.find('\n', off + 2);
2230 if off < 0:
2231 break;
2232 off += 1;
2233 elif ch == '*': # C comment.
2234 off = sRawCode.find('*/', off + 2);
2235 if off < 0:
2236 break;
2237 off += 2;
2238 else:
2239 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2240
2241 #
2242 # Is it a MC statement.
2243 #
2244 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2245 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2246 # Extract it and strip comments from it.
2247 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2248 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2249 if offEnd <= off:
2250 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2251 else:
2252 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2253 if offEnd <= off:
2254 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2255 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2256 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2257 offEnd -= 1;
2258 while offEnd > off and sRawCode[offEnd - 1].isspace():
2259 offEnd -= 1;
2260
2261 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2262
2263 # Isolate the statement name.
2264 offOpenParen = sRawStmt.find('(');
2265 if offOpenParen < 0:
2266 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2267 sName = sRawStmt[: offOpenParen].strip();
2268
2269 # Extract the parameters.
2270 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2271 if asParams is None:
2272 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2273 if offCloseParen + 1 != len(sRawStmt):
2274 self.raiseDecodeError(sRawCode, off,
2275 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2276
2277 # Hand it to the handler.
2278 fnParser = g_dMcStmtParsers.get(sName)[0];
2279 if not fnParser:
2280 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2281 oStmt = fnParser(self, sName, asParams);
2282 if not isinstance(oStmt, (list, tuple)):
2283 aoStmts.append(oStmt);
2284 else:
2285 aoStmts.extend(oStmt);
2286
2287 #
2288 # If conditional, we need to parse the whole statement.
2289 #
2290 # For reasons of simplicity, we assume the following structure
2291 # and parse each branch in a recursive call:
2292 # IEM_MC_IF_XXX() {
2293 # IEM_MC_WHATEVER();
2294 # } IEM_MC_ELSE() {
2295 # IEM_MC_WHATEVER();
2296 # } IEM_MC_ENDIF();
2297 #
2298 if sName.startswith('IEM_MC_IF_'):
2299 if iLevel > 1:
2300 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2301
2302 # Find start of the IF block:
2303 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2304 if sRawCode[offBlock1] != '{':
2305 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2306
2307 # Find the end of it.
2308 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2309 if offBlock1End < 0:
2310 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2311
2312 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2313
2314 # Is there an else section?
2315 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2316 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2317 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2318 if sRawCode[off] != '(':
2319 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2320 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2321 if sRawCode[off] != ')':
2322 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2323
2324 # Find start of the ELSE block.
2325 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2326 if sRawCode[offBlock2] != '{':
2327 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2328
2329 # Find the end of it.
2330 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2331 if offBlock2End < 0:
2332 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2333
2334 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2335 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2336
2337 # Parse past the endif statement.
2338 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2339 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2340 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2341 if sRawCode[off] != '(':
2342 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2343 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2344 if sRawCode[off] != ')':
2345 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2346 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2347 if sRawCode[off] != ';':
2348 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2349 off += 1;
2350
2351 else:
2352 # Advance.
2353 off = offEnd + 1;
2354
2355 #
2356 # Otherwise it must be a C/C++ statement of sorts.
2357 #
2358 else:
2359 # Find the end of the statement. if and else requires special handling.
2360 sCondExpr = None;
2361 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2362 if oMatch:
2363 if oMatch.group(1)[-1] == '(':
2364 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2365 else:
2366 offEnd = oMatch.end();
2367 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2368 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2369 elif ch == '#':
2370 offEnd = sRawCode.find('\n', off, offStop);
2371 if offEnd < 0:
2372 offEnd = offStop;
2373 offEnd -= 1;
2374 while offEnd > off and sRawCode[offEnd - 1].isspace():
2375 offEnd -= 1;
2376 else:
2377 offEnd = sRawCode.find(';', off);
2378 if offEnd < 0:
2379 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2380
2381 # Check this and the following statement whether it might have
2382 # something to do with decoding. This is a statement filter
2383 # criteria when generating the threaded functions blocks.
2384 offNextEnd = sRawCode.find(';', offEnd + 1);
2385 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2386 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2387 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2388 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2389 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2390 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2391 );
2392
2393 if not oMatch:
2394 if ch != '#':
2395 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2396 else:
2397 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2398 off = offEnd + 1;
2399 elif oMatch.group(1).startswith('if'):
2400 #
2401 # if () xxx [else yyy] statement.
2402 #
2403 oStmt = McCppCond(sCondExpr, fDecode);
2404 aoStmts.append(oStmt);
2405 off = offEnd + 1;
2406
2407 # Following the if () we can either have a {} containing zero or more statements
2408 # or we have a single statement.
2409 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2410 if sRawCode[offBlock1] == '{':
2411 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2412 if offBlock1End < 0:
2413 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2414 offBlock1 += 1;
2415 else:
2416 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2417 if offBlock1End < 0:
2418 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2419
2420 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2421
2422 # The else is optional and can likewise be followed by {} or a single statement.
2423 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2424 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2425 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2426 if sRawCode[offBlock2] == '{':
2427 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2428 if offBlock2End < 0:
2429 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2430 offBlock2 += 1;
2431 else:
2432 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2433 if offBlock2End < 0:
2434 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2435
2436 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2437 off = offBlock2End + 1;
2438
2439 elif oMatch.group(1) == 'else':
2440 # Problematic 'else' branch, typically involving #ifdefs.
2441 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2442
2443 return aoStmts;
2444
2445 def decode(self):
2446 """
2447 Decodes the block, populating self.aoStmts if necessary.
2448 Returns the statement list.
2449 Raises ParserException on failure.
2450 """
2451 if not self.aoStmts:
2452 self.aoStmts = self.decodeCode(''.join(self.asLines));
2453 return self.aoStmts;
2454
2455
2456 def checkForTooEarlyEffSegUse(self, aoStmts):
2457 """
2458 Checks if iEffSeg is used before the effective address has been decoded.
2459 Returns None on success, error string on failure.
2460
2461 See r158454 for an example of this issue.
2462 """
2463
2464 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2465 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2466 # as we're ASSUMING these will not occur before address calculation.
2467 for iStmt, oStmt in enumerate(aoStmts):
2468 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2469 while iStmt > 0:
2470 iStmt -= 1;
2471 oStmt = aoStmts[iStmt];
2472 for sArg in oStmt.asParams:
2473 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2474 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2475 break;
2476 return None;
2477
2478 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2479 kdDecodeCppStmtOkayAfterDone = {
2480 'IEMOP_HLP_IN_VMX_OPERATION': True,
2481 'IEMOP_HLP_VMX_INSTR': True,
2482 };
2483
2484 def checkForDoneDecoding(self, aoStmts):
2485 """
2486 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2487 invocation.
2488 Returns None on success, error string on failure.
2489
2490 This ensures safe instruction restarting in case the recompiler runs
2491 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2492 entries).
2493 """
2494
2495 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2496 # don't need to look.
2497 cIemOpHlpDone = 0;
2498 for iStmt, oStmt in enumerate(aoStmts):
2499 if oStmt.isCppStmt():
2500 #print('dbg: #%u[%u]: %s %s (%s)'
2501 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2502
2503 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2504 if oMatch:
2505 sFirstWord = oMatch.group(1);
2506 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2507 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2508 cIemOpHlpDone += 1;
2509 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2510 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2511 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2512 else:
2513 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2514 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2515 cIemOpHlpDone += 1;
2516 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2517 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2518 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2519 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2520 if cIemOpHlpDone == 1:
2521 return None;
2522 if cIemOpHlpDone > 1:
2523 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2524 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2525
2526 def check(self):
2527 """
2528 Performs some sanity checks on the block.
2529 Returns error string list, empty if all is fine.
2530 """
2531 aoStmts = self.decode();
2532 asRet = [];
2533
2534 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2535 if sRet:
2536 asRet.append(sRet);
2537
2538 sRet = self.checkForDoneDecoding(aoStmts);
2539 if sRet:
2540 asRet.append(sRet);
2541
2542 return asRet;
2543
2544
2545
2546## IEM_MC_XXX -> parser + info dictionary.
2547#
2548# The info is currently a single boolean entry indicating whether the
2549# statement modifies state and must not be used before IEMOP_HL_DONE_*.
2550#
2551# The raw table was generated via the following command
2552# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2553# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2554g_dMcStmtParsers = {
2555 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2556 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2557 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2558 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2559 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False),
2560 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False),
2561 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True),
2562 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False),
2563 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True),
2564 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False),
2565 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True),
2566 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False),
2567 'IEM_MC_ADD_GREG_U8': (McBlock.parseMcGeneric, True),
2568 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False),
2569 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2570 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2571 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True),
2572 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True),
2573 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False),
2574 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False),
2575 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False),
2576 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False),
2577 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True),
2578 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True),
2579 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True),
2580 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True),
2581 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False),
2582 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False),
2583 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False),
2584 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False),
2585 'IEM_MC_ARG': (McBlock.parseMcArg, False),
2586 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False),
2587 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False),
2588 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False),
2589 'IEM_MC_ASSIGN': (McBlock.parseMcGeneric, False),
2590 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False),
2591 'IEM_MC_ASSIGN_U8_SX_U64': (McBlock.parseMcGeneric, False),
2592 'IEM_MC_ASSIGN_U32_SX_U64': (McBlock.parseMcGeneric, False),
2593 'IEM_MC_BEGIN': (McBlock.parseMcGeneric, False),
2594 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2595 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2596 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2597 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2598 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2599 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2600 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2601 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2602 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2603 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False),
2604 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False),
2605 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False),
2606 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False),
2607 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True),
2608 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True),
2609 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True),
2610 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True),
2611 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True),
2612 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True),
2613 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True),
2614 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True),
2615 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True),
2616 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True),
2617 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True),
2618 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True),
2619 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True),
2620 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True),
2621 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True),
2622 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True),
2623 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True),
2624 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True),
2625 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True),
2626 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True),
2627 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True),
2628 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True),
2629 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True),
2630 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True),
2631 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True),
2632 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': (McBlock.parseMcGeneric, True),
2633 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True),
2634 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True),
2635 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True),
2636 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True),
2637 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2638 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2639 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2640 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcGeneric, False),
2641 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcGeneric, False),
2642 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcGeneric, False),
2643 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcGeneric, False),
2644 'IEM_MC_END': (McBlock.parseMcGeneric, True),
2645 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False),
2646 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False),
2647 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False),
2648 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False),
2649 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False),
2650 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False),
2651 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False),
2652 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False),
2653 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False),
2654 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False),
2655 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False),
2656 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False),
2657 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False),
2658 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False),
2659 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False),
2660 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False),
2661 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False),
2662 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False),
2663 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False),
2664 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False),
2665 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False),
2666 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True),
2667 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True),
2668 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True),
2669 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True),
2670 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True),
2671 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True),
2672 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True),
2673 'IEM_MC_FETCH_MEM_S32_SX_U64': (McBlock.parseMcGeneric, True),
2674 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True),
2675 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2676 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True),
2677 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True),
2678 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True),
2679 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True),
2680 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True),
2681 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True),
2682 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True),
2683 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True),
2684 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2685 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True),
2686 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True),
2687 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True),
2688 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True),
2689 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True),
2690 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True),
2691 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True),
2692 'IEM_MC_FETCH_MEM_U64_DISP': (McBlock.parseMcGeneric, True),
2693 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True),
2694 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True),
2695 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True),
2696 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True),
2697 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True),
2698 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True),
2699 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True),
2700 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True),
2701 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2702 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True),
2703 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True),
2704 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True),
2705 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True),
2706 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2707 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True),
2708 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True),
2709 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True),
2710 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False),
2711 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False),
2712 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False),
2713 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False),
2714 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False),
2715 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False),
2716 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False),
2717 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False),
2718 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False),
2719 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False),
2720 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False),
2721 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False),
2722 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False),
2723 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False),
2724 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False),
2725 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False),
2726 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False),
2727 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False),
2728 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True),
2729 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True),
2730 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True),
2731 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True),
2732 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True),
2733 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True),
2734 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True),
2735 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True),
2736 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True),
2737 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True),
2738 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True),
2739 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2740 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True),
2741 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True),
2742 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True),
2743 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True),
2744 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2745 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2746 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True),
2747 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2748 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2749 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True),
2750 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2751 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True),
2752 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2753 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True),
2754 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True),
2755 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True),
2756 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True),
2757 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True),
2758 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True),
2759 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True),
2760 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True),
2761 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True),
2762 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True),
2763 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True),
2764 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True),
2765 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True),
2766 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True),
2767 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True),
2768 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True),
2769 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcGeneric, False),
2770 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True),
2771 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False),
2772 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False),
2773 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2774 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True),
2775 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True),
2776 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True),
2777 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2778 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True),
2779 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True),
2780 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True),
2781 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True),
2782 'IEM_MC_MEM_COMMIT_AND_UNMAP': (McBlock.parseMcGeneric, True),
2783 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': (McBlock.parseMcGeneric, True),
2784 'IEM_MC_MEM_MAP': (McBlock.parseMcGeneric, True),
2785 'IEM_MC_MEM_MAP_EX': (McBlock.parseMcGeneric, True),
2786 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2787 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2788 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2789 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2790 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2791 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2792 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True),
2793 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True),
2794 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False),
2795 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True),
2796 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True),
2797 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True),
2798 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True),
2799 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False),
2800 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False),
2801 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False),
2802 'IEM_MC_POP_U16': (McBlock.parseMcGeneric, True),
2803 'IEM_MC_POP_U32': (McBlock.parseMcGeneric, True),
2804 'IEM_MC_POP_U64': (McBlock.parseMcGeneric, True),
2805 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False),
2806 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False),
2807 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False),
2808 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True),
2809 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True),
2810 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True),
2811 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True),
2812 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True),
2813 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True),
2814 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True),
2815 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True),
2816 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True),
2817 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True),
2818 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True),
2819 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False),
2820 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False),
2821 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False),
2822 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False),
2823 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False),
2824 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False),
2825 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False),
2826 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False),
2827 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False),
2828 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False),
2829 'IEM_MC_REF_LOCAL': (McBlock.parseMcGeneric, False),
2830 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False),
2831 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False),
2832 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False),
2833 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False),
2834 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False),
2835 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False),
2836 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False),
2837 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False),
2838 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False),
2839 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False),
2840 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False),
2841 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False),
2842 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False),
2843 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False),
2844 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True),
2845 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True),
2846 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True),
2847 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False),
2848 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False),
2849 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False),
2850 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False),
2851 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True),
2852 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True),
2853 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True),
2854 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True),
2855 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True),
2856 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False),
2857 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False),
2858 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False),
2859 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False),
2860 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True),
2861 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True),
2862 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True),
2863 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True),
2864 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2865 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True),
2866 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True),
2867 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True),
2868 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True),
2869 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True),
2870 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True),
2871 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True),
2872 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True),
2873 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True),
2874 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True),
2875 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2876 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2877 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2878 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True),
2879 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True),
2880 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True),
2881 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True),
2882 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True),
2883 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True),
2884 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True),
2885 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True),
2886 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True),
2887 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True),
2888 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True),
2889 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True),
2890 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True),
2891 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True),
2892 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True),
2893 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True),
2894 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True),
2895 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True),
2896 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True),
2897 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True),
2898 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True),
2899 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True),
2900 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True),
2901 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True),
2902 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True),
2903 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True),
2904 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True),
2905 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True),
2906 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True),
2907 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True),
2908 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True),
2909 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True),
2910 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True),
2911 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True),
2912 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True),
2913 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True),
2914 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True),
2915 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2916 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2917 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2918 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True),
2919 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True),
2920 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True),
2921 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True),
2922 'IEM_MC_SUB_GREG_U8': (McBlock.parseMcGeneric, True),
2923 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False),
2924 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True),
2925 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True),
2926 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True),
2927 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True),
2928 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True),
2929 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True),
2930 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True),
2931};
2932
2933## List of microcode blocks.
2934g_aoMcBlocks = [] # type: list(McBlock)
2935
2936
2937
2938class ParserException(Exception):
2939 """ Parser exception """
2940 def __init__(self, sMessage):
2941 Exception.__init__(self, sMessage);
2942
2943
2944class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2945 """
2946 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2947 """
2948
2949 ## @name Parser state.
2950 ## @{
2951 kiCode = 0;
2952 kiCommentMulti = 1;
2953 ## @}
2954
2955 class Macro(object):
2956 """ Macro """
2957 def __init__(self, sName, asArgs, sBody, iLine):
2958 self.sName = sName; ##< The macro name.
2959 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2960 self.sBody = sBody;
2961 self.iLine = iLine;
2962 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2963
2964 @staticmethod
2965 def _needSpace(ch):
2966 """ This is just to make the expanded output a bit prettier. """
2967 return ch.isspace() and ch != '(';
2968
2969 def expandMacro(self, oParent, asArgs = None):
2970 """ Expands the macro body with the given arguments. """
2971 _ = oParent;
2972 sBody = self.sBody;
2973
2974 if self.oReArgMatch:
2975 assert len(asArgs) == len(self.asArgs);
2976 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2977
2978 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2979 oMatch = self.oReArgMatch.search(sBody);
2980 while oMatch:
2981 sName = oMatch.group(2);
2982 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2983 sValue = dArgs[sName];
2984 sPre = '';
2985 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2986 sPre = ' ';
2987 sPost = '';
2988 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2989 sPost = ' ';
2990 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2991 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2992 else:
2993 assert not asArgs;
2994
2995 return sBody;
2996
2997
2998 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2999 self.sSrcFile = sSrcFile;
3000 self.asLines = asLines;
3001 self.iLine = 0;
3002 self.iState = self.kiCode;
3003 self.sComment = '';
3004 self.iCommentLine = 0;
3005 self.aoCurInstrs = [] # type: list(Instruction)
3006 self.oCurFunction = None # type: DecoderFunction
3007 self.iMcBlockInFunc = 0;
3008 self.oCurMcBlock = None # type: McBlock
3009 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
3010 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3011 if oInheritMacrosFrom:
3012 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3013 self.oReMacros = oInheritMacrosFrom.oReMacros;
3014
3015 assert sDefaultMap in g_dInstructionMaps;
3016 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3017
3018 self.cTotalInstr = 0;
3019 self.cTotalStubs = 0;
3020 self.cTotalTagged = 0;
3021 self.cTotalMcBlocks = 0;
3022
3023 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3024 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3025 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3026 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3027 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3028 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
3029 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3030 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
3031 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
3032 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3033 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3034 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
3035 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3036 self.fDebug = True;
3037 self.fDebugMc = False;
3038 self.fDebugPreProc = False;
3039
3040 self.dTagHandlers = {
3041 '@opbrief': self.parseTagOpBrief,
3042 '@opdesc': self.parseTagOpDesc,
3043 '@opmnemonic': self.parseTagOpMnemonic,
3044 '@op1': self.parseTagOpOperandN,
3045 '@op2': self.parseTagOpOperandN,
3046 '@op3': self.parseTagOpOperandN,
3047 '@op4': self.parseTagOpOperandN,
3048 '@oppfx': self.parseTagOpPfx,
3049 '@opmaps': self.parseTagOpMaps,
3050 '@opcode': self.parseTagOpcode,
3051 '@opcodesub': self.parseTagOpcodeSub,
3052 '@openc': self.parseTagOpEnc,
3053 '@opfltest': self.parseTagOpEFlags,
3054 '@opflmodify': self.parseTagOpEFlags,
3055 '@opflundef': self.parseTagOpEFlags,
3056 '@opflset': self.parseTagOpEFlags,
3057 '@opflclear': self.parseTagOpEFlags,
3058 '@ophints': self.parseTagOpHints,
3059 '@opdisenum': self.parseTagOpDisEnum,
3060 '@opmincpu': self.parseTagOpMinCpu,
3061 '@opcpuid': self.parseTagOpCpuId,
3062 '@opgroup': self.parseTagOpGroup,
3063 '@opunused': self.parseTagOpUnusedInvalid,
3064 '@opinvalid': self.parseTagOpUnusedInvalid,
3065 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3066 '@optest': self.parseTagOpTest,
3067 '@optestign': self.parseTagOpTestIgnore,
3068 '@optestignore': self.parseTagOpTestIgnore,
3069 '@opcopytests': self.parseTagOpCopyTests,
3070 '@oponly': self.parseTagOpOnlyTest,
3071 '@oponlytest': self.parseTagOpOnlyTest,
3072 '@opxcpttype': self.parseTagOpXcptType,
3073 '@opstats': self.parseTagOpStats,
3074 '@opfunction': self.parseTagOpFunction,
3075 '@opdone': self.parseTagOpDone,
3076 };
3077 for i in range(48):
3078 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3079 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3080
3081 self.asErrors = [];
3082
3083 def raiseError(self, sMessage):
3084 """
3085 Raise error prefixed with the source and line number.
3086 """
3087 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3088
3089 def raiseCommentError(self, iLineInComment, sMessage):
3090 """
3091 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3092 """
3093 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3094
3095 def error(self, sMessage):
3096 """
3097 Adds an error.
3098 returns False;
3099 """
3100 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3101 return False;
3102
3103 def errorOnLine(self, iLine, sMessage):
3104 """
3105 Adds an error.
3106 returns False;
3107 """
3108 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3109 return False;
3110
3111 def errorComment(self, iLineInComment, sMessage):
3112 """
3113 Adds a comment error.
3114 returns False;
3115 """
3116 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3117 return False;
3118
3119 def printErrors(self):
3120 """
3121 Print the errors to stderr.
3122 Returns number of errors.
3123 """
3124 if self.asErrors:
3125 sys.stderr.write(u''.join(self.asErrors));
3126 return len(self.asErrors);
3127
3128 def debug(self, sMessage):
3129 """
3130 For debugging.
3131 """
3132 if self.fDebug:
3133 print('debug: %s' % (sMessage,), file = sys.stderr);
3134
3135 def stripComments(self, sLine):
3136 """
3137 Returns sLine with comments stripped.
3138
3139 Complains if traces of incomplete multi-line comments are encountered.
3140 """
3141 sLine = self.oReComment.sub(" ", sLine);
3142 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3143 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3144 return sLine;
3145
3146 def parseFunctionTable(self, sLine):
3147 """
3148 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3149
3150 Note! Updates iLine as it consumes the whole table.
3151 """
3152
3153 #
3154 # Extract the table name.
3155 #
3156 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3157 oMap = g_dInstructionMapsByIemName.get(sName);
3158 if not oMap:
3159 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3160 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3161
3162 #
3163 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3164 # entries per byte:
3165 # no prefix, 066h prefix, f3h prefix, f2h prefix
3166 # Those tables has 256 & 32 entries respectively.
3167 #
3168 cEntriesPerByte = 4;
3169 cValidTableLength = 1024;
3170 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3171
3172 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3173 if oEntriesMatch:
3174 cEntriesPerByte = 1;
3175 cValidTableLength = int(oEntriesMatch.group(1));
3176 asPrefixes = (None,);
3177
3178 #
3179 # The next line should be '{' and nothing else.
3180 #
3181 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3182 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3183 self.iLine += 1;
3184
3185 #
3186 # Parse till we find the end of the table.
3187 #
3188 iEntry = 0;
3189 while self.iLine < len(self.asLines):
3190 # Get the next line and strip comments and spaces (assumes no
3191 # multi-line comments).
3192 sLine = self.asLines[self.iLine];
3193 self.iLine += 1;
3194 sLine = self.stripComments(sLine).strip();
3195
3196 # Split the line up into entries, expanding IEMOP_X4 usage.
3197 asEntries = sLine.split(',');
3198 for i in range(len(asEntries) - 1, -1, -1):
3199 sEntry = asEntries[i].strip();
3200 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3201 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3202 asEntries.insert(i + 1, sEntry);
3203 asEntries.insert(i + 1, sEntry);
3204 asEntries.insert(i + 1, sEntry);
3205 if sEntry:
3206 asEntries[i] = sEntry;
3207 else:
3208 del asEntries[i];
3209
3210 # Process the entries.
3211 for sEntry in asEntries:
3212 if sEntry in ('};', '}'):
3213 if iEntry != cValidTableLength:
3214 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3215 return True;
3216 if sEntry.startswith('iemOp_Invalid'):
3217 pass; # skip
3218 else:
3219 # Look up matching instruction by function.
3220 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3221 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3222 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3223 if aoInstr:
3224 if not isinstance(aoInstr, list):
3225 aoInstr = [aoInstr,];
3226 oInstr = None;
3227 for oCurInstr in aoInstr:
3228 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3229 pass;
3230 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3231 oCurInstr.sPrefix = sPrefix;
3232 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3233 oCurInstr.sOpcode = sOpcode;
3234 oCurInstr.sPrefix = sPrefix;
3235 else:
3236 continue;
3237 oInstr = oCurInstr;
3238 break;
3239 if not oInstr:
3240 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3241 aoInstr.append(oInstr);
3242 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3243 g_aoAllInstructions.append(oInstr);
3244 oMap.aoInstructions.append(oInstr);
3245 else:
3246 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3247 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3248 iEntry += 1;
3249
3250 return self.error('Unexpected end of file in PFNIEMOP table');
3251
3252 def addInstruction(self, iLine = None):
3253 """
3254 Adds an instruction.
3255 """
3256 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3257 g_aoAllInstructions.append(oInstr);
3258 self.aoCurInstrs.append(oInstr);
3259 return oInstr;
3260
3261 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3262 """
3263 Derives the mnemonic and operands from a IEM stats base name like string.
3264 """
3265 if oInstr.sMnemonic is None:
3266 asWords = sStats.split('_');
3267 oInstr.sMnemonic = asWords[0].lower();
3268 if len(asWords) > 1 and not oInstr.aoOperands:
3269 for sType in asWords[1:]:
3270 if sType in g_kdOpTypes:
3271 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3272 else:
3273 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3274 return False;
3275 return True;
3276
3277 def doneInstructionOne(self, oInstr, iLine):
3278 """
3279 Complete the parsing by processing, validating and expanding raw inputs.
3280 """
3281 assert oInstr.iLineCompleted is None;
3282 oInstr.iLineCompleted = iLine;
3283
3284 #
3285 # Specified instructions.
3286 #
3287 if oInstr.cOpTags > 0:
3288 if oInstr.sStats is None:
3289 pass;
3290
3291 #
3292 # Unspecified legacy stuff. We generally only got a few things to go on here.
3293 # /** Opcode 0x0f 0x00 /0. */
3294 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3295 #
3296 else:
3297 #if oInstr.sRawOldOpcodes:
3298 #
3299 #if oInstr.sMnemonic:
3300 pass;
3301
3302 #
3303 # Common defaults.
3304 #
3305
3306 # Guess mnemonic and operands from stats if the former is missing.
3307 if oInstr.sMnemonic is None:
3308 if oInstr.sStats is not None:
3309 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3310 elif oInstr.sFunction is not None:
3311 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3312
3313 # Derive the disassembler op enum constant from the mnemonic.
3314 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3315 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3316
3317 # Derive the IEM statistics base name from mnemonic and operand types.
3318 if oInstr.sStats is None:
3319 if oInstr.sFunction is not None:
3320 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3321 elif oInstr.sMnemonic is not None:
3322 oInstr.sStats = oInstr.sMnemonic;
3323 for oOperand in oInstr.aoOperands:
3324 if oOperand.sType:
3325 oInstr.sStats += '_' + oOperand.sType;
3326
3327 # Derive the IEM function name from mnemonic and operand types.
3328 if oInstr.sFunction is None:
3329 if oInstr.sMnemonic is not None:
3330 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3331 for oOperand in oInstr.aoOperands:
3332 if oOperand.sType:
3333 oInstr.sFunction += '_' + oOperand.sType;
3334 elif oInstr.sStats:
3335 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3336
3337 #
3338 # Apply default map and then add the instruction to all it's groups.
3339 #
3340 if not oInstr.aoMaps:
3341 oInstr.aoMaps = [ self.oDefaultMap, ];
3342 for oMap in oInstr.aoMaps:
3343 oMap.aoInstructions.append(oInstr);
3344
3345 #
3346 # Derive encoding from operands and maps.
3347 #
3348 if oInstr.sEncoding is None:
3349 if not oInstr.aoOperands:
3350 if oInstr.fUnused and oInstr.sSubOpcode:
3351 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3352 else:
3353 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3354 elif oInstr.aoOperands[0].usesModRM():
3355 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3356 or oInstr.onlyInVexMaps():
3357 oInstr.sEncoding = 'VEX.ModR/M';
3358 else:
3359 oInstr.sEncoding = 'ModR/M';
3360
3361 #
3362 # Check the opstat value and add it to the opstat indexed dictionary.
3363 #
3364 if oInstr.sStats:
3365 if oInstr.sStats not in g_dAllInstructionsByStat:
3366 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3367 else:
3368 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3369 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3370
3371 #
3372 # Add to function indexed dictionary. We allow multiple instructions per function.
3373 #
3374 if oInstr.sFunction:
3375 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3376 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3377 else:
3378 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3379
3380 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3381 return True;
3382
3383 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3384 """
3385 Done with current instruction.
3386 """
3387 for oInstr in self.aoCurInstrs:
3388 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3389 if oInstr.fStub:
3390 self.cTotalStubs += 1;
3391
3392 self.cTotalInstr += len(self.aoCurInstrs);
3393
3394 self.sComment = '';
3395 self.aoCurInstrs = [];
3396 if fEndOfFunction:
3397 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3398 if self.oCurFunction:
3399 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3400 self.oCurFunction = None;
3401 self.iMcBlockInFunc = 0;
3402 return True;
3403
3404 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3405 """
3406 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3407 is False, only None values and empty strings are replaced.
3408 """
3409 for oInstr in self.aoCurInstrs:
3410 if fOverwrite is not True:
3411 oOldValue = getattr(oInstr, sAttrib);
3412 if oOldValue is not None:
3413 continue;
3414 setattr(oInstr, sAttrib, oValue);
3415
3416 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3417 """
3418 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3419 If fOverwrite is False, only None values and empty strings are replaced.
3420 """
3421 for oInstr in self.aoCurInstrs:
3422 aoArray = getattr(oInstr, sAttrib);
3423 while len(aoArray) <= iEntry:
3424 aoArray.append(None);
3425 if fOverwrite is True or aoArray[iEntry] is None:
3426 aoArray[iEntry] = oValue;
3427
3428 def parseCommentOldOpcode(self, asLines):
3429 """ Deals with 'Opcode 0xff /4' like comments """
3430 asWords = asLines[0].split();
3431 if len(asWords) >= 2 \
3432 and asWords[0] == 'Opcode' \
3433 and ( asWords[1].startswith('0x')
3434 or asWords[1].startswith('0X')):
3435 asWords = asWords[:1];
3436 for iWord, sWord in enumerate(asWords):
3437 if sWord.startswith('0X'):
3438 sWord = '0x' + sWord[:2];
3439 asWords[iWord] = asWords;
3440 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3441
3442 return False;
3443
3444 def ensureInstructionForOpTag(self, iTagLine):
3445 """ Ensure there is an instruction for the op-tag being parsed. """
3446 if not self.aoCurInstrs:
3447 self.addInstruction(self.iCommentLine + iTagLine);
3448 for oInstr in self.aoCurInstrs:
3449 oInstr.cOpTags += 1;
3450 if oInstr.cOpTags == 1:
3451 self.cTotalTagged += 1;
3452 return self.aoCurInstrs[-1];
3453
3454 @staticmethod
3455 def flattenSections(aasSections):
3456 """
3457 Flattens multiline sections into stripped single strings.
3458 Returns list of strings, on section per string.
3459 """
3460 asRet = [];
3461 for asLines in aasSections:
3462 if asLines:
3463 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3464 return asRet;
3465
3466 @staticmethod
3467 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3468 """
3469 Flattens sections into a simple stripped string with newlines as
3470 section breaks. The final section does not sport a trailing newline.
3471 """
3472 # Typical: One section with a single line.
3473 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3474 return aasSections[0][0].strip();
3475
3476 sRet = '';
3477 for iSection, asLines in enumerate(aasSections):
3478 if asLines:
3479 if iSection > 0:
3480 sRet += sSectionSep;
3481 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3482 return sRet;
3483
3484
3485
3486 ## @name Tag parsers
3487 ## @{
3488
3489 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3490 """
3491 Tag: \@opbrief
3492 Value: Text description, multiple sections, appended.
3493
3494 Brief description. If not given, it's the first sentence from @opdesc.
3495 """
3496 oInstr = self.ensureInstructionForOpTag(iTagLine);
3497
3498 # Flatten and validate the value.
3499 sBrief = self.flattenAllSections(aasSections);
3500 if not sBrief:
3501 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3502 if sBrief[-1] != '.':
3503 sBrief = sBrief + '.';
3504 if len(sBrief) > 180:
3505 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3506 offDot = sBrief.find('.');
3507 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3508 offDot = sBrief.find('.', offDot + 1);
3509 if offDot >= 0 and offDot != len(sBrief) - 1:
3510 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3511
3512 # Update the instruction.
3513 if oInstr.sBrief is not None:
3514 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3515 % (sTag, oInstr.sBrief, sBrief,));
3516 _ = iEndLine;
3517 return True;
3518
3519 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3520 """
3521 Tag: \@opdesc
3522 Value: Text description, multiple sections, appended.
3523
3524 It is used to describe instructions.
3525 """
3526 oInstr = self.ensureInstructionForOpTag(iTagLine);
3527 if aasSections:
3528 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3529 return True;
3530
3531 _ = sTag; _ = iEndLine;
3532 return True;
3533
3534 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3535 """
3536 Tag: @opmenmonic
3537 Value: mnemonic
3538
3539 The 'mnemonic' value must be a valid C identifier string. Because of
3540 prefixes, groups and whatnot, there times when the mnemonic isn't that
3541 of an actual assembler mnemonic.
3542 """
3543 oInstr = self.ensureInstructionForOpTag(iTagLine);
3544
3545 # Flatten and validate the value.
3546 sMnemonic = self.flattenAllSections(aasSections);
3547 if not self.oReMnemonic.match(sMnemonic):
3548 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3549 if oInstr.sMnemonic is not None:
3550 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3551 % (sTag, oInstr.sMnemonic, sMnemonic,));
3552 oInstr.sMnemonic = sMnemonic
3553
3554 _ = iEndLine;
3555 return True;
3556
3557 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3558 """
3559 Tags: \@op1, \@op2, \@op3, \@op4
3560 Value: [where:]type
3561
3562 The 'where' value indicates where the operand is found, like the 'reg'
3563 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3564 a list.
3565
3566 The 'type' value indicates the operand type. These follow the types
3567 given in the opcode tables in the CPU reference manuals.
3568 See Instruction.kdOperandTypes for a list.
3569
3570 """
3571 oInstr = self.ensureInstructionForOpTag(iTagLine);
3572 idxOp = int(sTag[-1]) - 1;
3573 assert 0 <= idxOp < 4;
3574
3575 # flatten, split up, and validate the "where:type" value.
3576 sFlattened = self.flattenAllSections(aasSections);
3577 asSplit = sFlattened.split(':');
3578 if len(asSplit) == 1:
3579 sType = asSplit[0];
3580 sWhere = None;
3581 elif len(asSplit) == 2:
3582 (sWhere, sType) = asSplit;
3583 else:
3584 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3585
3586 if sType not in g_kdOpTypes:
3587 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3588 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3589 if sWhere is None:
3590 sWhere = g_kdOpTypes[sType][1];
3591 elif sWhere not in g_kdOpLocations:
3592 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3593 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3594
3595 # Insert the operand, refusing to overwrite an existing one.
3596 while idxOp >= len(oInstr.aoOperands):
3597 oInstr.aoOperands.append(None);
3598 if oInstr.aoOperands[idxOp] is not None:
3599 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3600 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3601 sWhere, sType,));
3602 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3603
3604 _ = iEndLine;
3605 return True;
3606
3607 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3608 """
3609 Tag: \@opmaps
3610 Value: map[,map2]
3611
3612 Indicates which maps the instruction is in. There is a default map
3613 associated with each input file.
3614 """
3615 oInstr = self.ensureInstructionForOpTag(iTagLine);
3616
3617 # Flatten, split up and validate the value.
3618 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3619 asMaps = sFlattened.split(',');
3620 if not asMaps:
3621 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3622 for sMap in asMaps:
3623 if sMap not in g_dInstructionMaps:
3624 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3625 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3626
3627 # Add the maps to the current list. Throw errors on duplicates.
3628 for oMap in oInstr.aoMaps:
3629 if oMap.sName in asMaps:
3630 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3631
3632 for sMap in asMaps:
3633 oMap = g_dInstructionMaps[sMap];
3634 if oMap not in oInstr.aoMaps:
3635 oInstr.aoMaps.append(oMap);
3636 else:
3637 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3638
3639 _ = iEndLine;
3640 return True;
3641
3642 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3643 """
3644 Tag: \@oppfx
3645 Value: n/a|none|0x66|0xf3|0xf2
3646
3647 Required prefix for the instruction. (In a (E)VEX context this is the
3648 value of the 'pp' field rather than an actual prefix.)
3649 """
3650 oInstr = self.ensureInstructionForOpTag(iTagLine);
3651
3652 # Flatten and validate the value.
3653 sFlattened = self.flattenAllSections(aasSections);
3654 asPrefixes = sFlattened.split();
3655 if len(asPrefixes) > 1:
3656 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3657
3658 sPrefix = asPrefixes[0].lower();
3659 if sPrefix == 'none':
3660 sPrefix = 'none';
3661 elif sPrefix == 'n/a':
3662 sPrefix = None;
3663 else:
3664 if len(sPrefix) == 2:
3665 sPrefix = '0x' + sPrefix;
3666 if not _isValidOpcodeByte(sPrefix):
3667 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3668
3669 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3670 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3671
3672 # Set it.
3673 if oInstr.sPrefix is not None:
3674 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3675 oInstr.sPrefix = sPrefix;
3676
3677 _ = iEndLine;
3678 return True;
3679
3680 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3681 """
3682 Tag: \@opcode
3683 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3684
3685 The opcode byte or sub-byte for the instruction in the context of a map.
3686 """
3687 oInstr = self.ensureInstructionForOpTag(iTagLine);
3688
3689 # Flatten and validate the value.
3690 sOpcode = self.flattenAllSections(aasSections);
3691 if _isValidOpcodeByte(sOpcode):
3692 pass;
3693 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3694 pass;
3695 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3696 pass;
3697 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3698 pass;
3699 else:
3700 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3701
3702 # Set it.
3703 if oInstr.sOpcode is not None:
3704 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3705 oInstr.sOpcode = sOpcode;
3706
3707 _ = iEndLine;
3708 return True;
3709
3710 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3711 """
3712 Tag: \@opcodesub
3713 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3714 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3715
3716 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3717 represents exactly two different instructions. The more proper way would
3718 be to go via maps with two members, but this is faster.
3719 """
3720 oInstr = self.ensureInstructionForOpTag(iTagLine);
3721
3722 # Flatten and validate the value.
3723 sSubOpcode = self.flattenAllSections(aasSections);
3724 if sSubOpcode not in g_kdSubOpcodes:
3725 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3726 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3727
3728 # Set it.
3729 if oInstr.sSubOpcode is not None:
3730 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3731 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3732 oInstr.sSubOpcode = sSubOpcode;
3733
3734 _ = iEndLine;
3735 return True;
3736
3737 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3738 """
3739 Tag: \@openc
3740 Value: ModR/M|fixed|prefix|<map name>
3741
3742 The instruction operand encoding style.
3743 """
3744 oInstr = self.ensureInstructionForOpTag(iTagLine);
3745
3746 # Flatten and validate the value.
3747 sEncoding = self.flattenAllSections(aasSections);
3748 if sEncoding in g_kdEncodings:
3749 pass;
3750 elif sEncoding in g_dInstructionMaps:
3751 pass;
3752 elif not _isValidOpcodeByte(sEncoding):
3753 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3754
3755 # Set it.
3756 if oInstr.sEncoding is not None:
3757 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3758 % ( sTag, oInstr.sEncoding, sEncoding,));
3759 oInstr.sEncoding = sEncoding;
3760
3761 _ = iEndLine;
3762 return True;
3763
3764 ## EFlags tag to Instruction attribute name.
3765 kdOpFlagToAttr = {
3766 '@opfltest': 'asFlTest',
3767 '@opflmodify': 'asFlModify',
3768 '@opflundef': 'asFlUndefined',
3769 '@opflset': 'asFlSet',
3770 '@opflclear': 'asFlClear',
3771 };
3772
3773 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3774 """
3775 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3776 Value: <eflags specifier>
3777
3778 """
3779 oInstr = self.ensureInstructionForOpTag(iTagLine);
3780
3781 # Flatten, split up and validate the values.
3782 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3783 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3784 asFlags = [];
3785 else:
3786 fRc = True;
3787 for iFlag, sFlag in enumerate(asFlags):
3788 if sFlag not in g_kdEFlagsMnemonics:
3789 if sFlag.strip() in g_kdEFlagsMnemonics:
3790 asFlags[iFlag] = sFlag.strip();
3791 else:
3792 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3793 if not fRc:
3794 return False;
3795
3796 # Set them.
3797 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3798 if asOld is not None:
3799 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3800 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3801
3802 _ = iEndLine;
3803 return True;
3804
3805 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3806 """
3807 Tag: \@ophints
3808 Value: Comma or space separated list of flags and hints.
3809
3810 This covers the disassembler flags table and more.
3811 """
3812 oInstr = self.ensureInstructionForOpTag(iTagLine);
3813
3814 # Flatten as a space separated list, split it up and validate the values.
3815 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3816 if len(asHints) == 1 and asHints[0].lower() == 'none':
3817 asHints = [];
3818 else:
3819 fRc = True;
3820 for iHint, sHint in enumerate(asHints):
3821 if sHint not in g_kdHints:
3822 if sHint.strip() in g_kdHints:
3823 sHint[iHint] = sHint.strip();
3824 else:
3825 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3826 if not fRc:
3827 return False;
3828
3829 # Append them.
3830 for sHint in asHints:
3831 if sHint not in oInstr.dHints:
3832 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3833 else:
3834 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3835
3836 _ = iEndLine;
3837 return True;
3838
3839 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3840 """
3841 Tag: \@opdisenum
3842 Value: OP_XXXX
3843
3844 This is for select a specific (legacy) disassembler enum value for the
3845 instruction.
3846 """
3847 oInstr = self.ensureInstructionForOpTag(iTagLine);
3848
3849 # Flatten and split.
3850 asWords = self.flattenAllSections(aasSections).split();
3851 if len(asWords) != 1:
3852 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3853 if not asWords:
3854 return False;
3855 sDisEnum = asWords[0];
3856 if not self.oReDisEnum.match(sDisEnum):
3857 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3858 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3859
3860 # Set it.
3861 if oInstr.sDisEnum is not None:
3862 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3863 oInstr.sDisEnum = sDisEnum;
3864
3865 _ = iEndLine;
3866 return True;
3867
3868 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3869 """
3870 Tag: \@opmincpu
3871 Value: <simple CPU name>
3872
3873 Indicates when this instruction was introduced.
3874 """
3875 oInstr = self.ensureInstructionForOpTag(iTagLine);
3876
3877 # Flatten the value, split into words, make sure there's just one, valid it.
3878 asCpus = self.flattenAllSections(aasSections).split();
3879 if len(asCpus) > 1:
3880 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3881
3882 sMinCpu = asCpus[0];
3883 if sMinCpu in g_kdCpuNames:
3884 oInstr.sMinCpu = sMinCpu;
3885 else:
3886 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3887 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3888
3889 # Set it.
3890 if oInstr.sMinCpu is None:
3891 oInstr.sMinCpu = sMinCpu;
3892 elif oInstr.sMinCpu != sMinCpu:
3893 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3894
3895 _ = iEndLine;
3896 return True;
3897
3898 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3899 """
3900 Tag: \@opcpuid
3901 Value: none | <CPUID flag specifier>
3902
3903 CPUID feature bit which is required for the instruction to be present.
3904 """
3905 oInstr = self.ensureInstructionForOpTag(iTagLine);
3906
3907 # Flatten as a space separated list, split it up and validate the values.
3908 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3909 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3910 asCpuIds = [];
3911 else:
3912 fRc = True;
3913 for iCpuId, sCpuId in enumerate(asCpuIds):
3914 if sCpuId not in g_kdCpuIdFlags:
3915 if sCpuId.strip() in g_kdCpuIdFlags:
3916 sCpuId[iCpuId] = sCpuId.strip();
3917 else:
3918 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3919 if not fRc:
3920 return False;
3921
3922 # Append them.
3923 for sCpuId in asCpuIds:
3924 if sCpuId not in oInstr.asCpuIds:
3925 oInstr.asCpuIds.append(sCpuId);
3926 else:
3927 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3928
3929 _ = iEndLine;
3930 return True;
3931
3932 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3933 """
3934 Tag: \@opgroup
3935 Value: op_grp1[_subgrp2[_subsubgrp3]]
3936
3937 Instruction grouping.
3938 """
3939 oInstr = self.ensureInstructionForOpTag(iTagLine);
3940
3941 # Flatten as a space separated list, split it up and validate the values.
3942 asGroups = self.flattenAllSections(aasSections).split();
3943 if len(asGroups) != 1:
3944 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3945 sGroup = asGroups[0];
3946 if not self.oReGroupName.match(sGroup):
3947 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3948 % (sTag, sGroup, self.oReGroupName.pattern));
3949
3950 # Set it.
3951 if oInstr.sGroup is not None:
3952 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3953 oInstr.sGroup = sGroup;
3954
3955 _ = iEndLine;
3956 return True;
3957
3958 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3959 """
3960 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3961 Value: <invalid opcode behaviour style>
3962
3963 The \@opunused indicates the specification is for a currently unused
3964 instruction encoding.
3965
3966 The \@opinvalid indicates the specification is for an invalid currently
3967 instruction encoding (like UD2).
3968
3969 The \@opinvlstyle just indicates how CPUs decode the instruction when
3970 not supported (\@opcpuid, \@opmincpu) or disabled.
3971 """
3972 oInstr = self.ensureInstructionForOpTag(iTagLine);
3973
3974 # Flatten as a space separated list, split it up and validate the values.
3975 asStyles = self.flattenAllSections(aasSections).split();
3976 if len(asStyles) != 1:
3977 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3978 sStyle = asStyles[0];
3979 if sStyle not in g_kdInvalidStyles:
3980 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3981 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3982 # Set it.
3983 if oInstr.sInvalidStyle is not None:
3984 return self.errorComment(iTagLine,
3985 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3986 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3987 oInstr.sInvalidStyle = sStyle;
3988 if sTag == '@opunused':
3989 oInstr.fUnused = True;
3990 elif sTag == '@opinvalid':
3991 oInstr.fInvalid = True;
3992
3993 _ = iEndLine;
3994 return True;
3995
3996 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3997 """
3998 Tag: \@optest
3999 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4000 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4001
4002 The main idea here is to generate basic instruction tests.
4003
4004 The probably simplest way of handling the diverse input, would be to use
4005 it to produce size optimized byte code for a simple interpreter that
4006 modifies the register input and output states.
4007
4008 An alternative to the interpreter would be creating multiple tables,
4009 but that becomes rather complicated wrt what goes where and then to use
4010 them in an efficient manner.
4011 """
4012 oInstr = self.ensureInstructionForOpTag(iTagLine);
4013
4014 #
4015 # Do it section by section.
4016 #
4017 for asSectionLines in aasSections:
4018 #
4019 # Sort the input into outputs, inputs and selector conditions.
4020 #
4021 sFlatSection = self.flattenAllSections([asSectionLines,]);
4022 if not sFlatSection:
4023 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4024 continue;
4025 oTest = InstructionTest(oInstr);
4026
4027 asSelectors = [];
4028 asInputs = [];
4029 asOutputs = [];
4030 asCur = asOutputs;
4031 fRc = True;
4032 asWords = sFlatSection.split();
4033 for iWord in range(len(asWords) - 1, -1, -1):
4034 sWord = asWords[iWord];
4035 # Check for array switchers.
4036 if sWord == '->':
4037 if asCur != asOutputs:
4038 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4039 break;
4040 asCur = asInputs;
4041 elif sWord == '/':
4042 if asCur != asInputs:
4043 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4044 break;
4045 asCur = asSelectors;
4046 else:
4047 asCur.insert(0, sWord);
4048
4049 #
4050 # Validate and add selectors.
4051 #
4052 for sCond in asSelectors:
4053 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4054 oSelector = None;
4055 for sOp in TestSelector.kasCompareOps:
4056 off = sCondExp.find(sOp);
4057 if off >= 0:
4058 sVariable = sCondExp[:off];
4059 sValue = sCondExp[off + len(sOp):];
4060 if sVariable in TestSelector.kdVariables:
4061 if sValue in TestSelector.kdVariables[sVariable]:
4062 oSelector = TestSelector(sVariable, sOp, sValue);
4063 else:
4064 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4065 % ( sTag, sValue, sCond,
4066 TestSelector.kdVariables[sVariable].keys(),));
4067 else:
4068 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4069 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4070 break;
4071 if oSelector is not None:
4072 for oExisting in oTest.aoSelectors:
4073 if oExisting.sVariable == oSelector.sVariable:
4074 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4075 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4076 oTest.aoSelectors.append(oSelector);
4077 else:
4078 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4079
4080 #
4081 # Validate outputs and inputs, adding them to the test as we go along.
4082 #
4083 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4084 asValidFieldKinds = [ 'both', sDesc, ];
4085 for sItem in asItems:
4086 oItem = None;
4087 for sOp in TestInOut.kasOperators:
4088 off = sItem.find(sOp);
4089 if off < 0:
4090 continue;
4091 sField = sItem[:off];
4092 sValueType = sItem[off + len(sOp):];
4093 if sField in TestInOut.kdFields \
4094 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4095 asSplit = sValueType.split(':', 1);
4096 sValue = asSplit[0];
4097 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4098 if sType in TestInOut.kdTypes:
4099 oValid = TestInOut.kdTypes[sType].validate(sValue);
4100 if oValid is True:
4101 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4102 oItem = TestInOut(sField, sOp, sValue, sType);
4103 else:
4104 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4105 % ( sTag, sDesc, sItem, ));
4106 else:
4107 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4108 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4109 else:
4110 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4111 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4112 else:
4113 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4114 % ( sTag, sDesc, sField, sItem,
4115 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4116 if asVal[1] in asValidFieldKinds]),));
4117 break;
4118 if oItem is not None:
4119 for oExisting in aoDst:
4120 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4121 self.errorComment(iTagLine,
4122 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4123 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4124 aoDst.append(oItem);
4125 else:
4126 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4127
4128 #
4129 # .
4130 #
4131 if fRc:
4132 oInstr.aoTests.append(oTest);
4133 else:
4134 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4135 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4136 % (sTag, asSelectors, asInputs, asOutputs,));
4137
4138 _ = iEndLine;
4139 return True;
4140
4141 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4142 """
4143 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4144 """
4145 oInstr = self.ensureInstructionForOpTag(iTagLine);
4146
4147 iTest = 0;
4148 if sTag[-1] == ']':
4149 iTest = int(sTag[8:-1]);
4150 else:
4151 iTest = int(sTag[7:]);
4152
4153 if iTest != len(oInstr.aoTests):
4154 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4155 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4156
4157 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4158 """
4159 Tag: \@optestign | \@optestignore
4160 Value: <value is ignored>
4161
4162 This is a simple trick to ignore a test while debugging another.
4163
4164 See also \@oponlytest.
4165 """
4166 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4167 return True;
4168
4169 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4170 """
4171 Tag: \@opcopytests
4172 Value: <opstat | function> [..]
4173 Example: \@opcopytests add_Eb_Gb
4174
4175 Trick to avoid duplicating tests for different encodings of the same
4176 operation.
4177 """
4178 oInstr = self.ensureInstructionForOpTag(iTagLine);
4179
4180 # Flatten, validate and append the copy job to the instruction. We execute
4181 # them after parsing all the input so we can handle forward references.
4182 asToCopy = self.flattenAllSections(aasSections).split();
4183 if not asToCopy:
4184 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4185 for sToCopy in asToCopy:
4186 if sToCopy not in oInstr.asCopyTests:
4187 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4188 oInstr.asCopyTests.append(sToCopy);
4189 else:
4190 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4191 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4192 else:
4193 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4194
4195 _ = iEndLine;
4196 return True;
4197
4198 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4199 """
4200 Tag: \@oponlytest | \@oponly
4201 Value: none
4202
4203 Only test instructions with this tag. This is a trick that is handy
4204 for singling out one or two new instructions or tests.
4205
4206 See also \@optestignore.
4207 """
4208 oInstr = self.ensureInstructionForOpTag(iTagLine);
4209
4210 # Validate and add instruction to only test dictionary.
4211 sValue = self.flattenAllSections(aasSections).strip();
4212 if sValue:
4213 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4214
4215 if oInstr not in g_aoOnlyTestInstructions:
4216 g_aoOnlyTestInstructions.append(oInstr);
4217
4218 _ = iEndLine;
4219 return True;
4220
4221 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4222 """
4223 Tag: \@opxcpttype
4224 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4225
4226 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4227 """
4228 oInstr = self.ensureInstructionForOpTag(iTagLine);
4229
4230 # Flatten as a space separated list, split it up and validate the values.
4231 asTypes = self.flattenAllSections(aasSections).split();
4232 if len(asTypes) != 1:
4233 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4234 sType = asTypes[0];
4235 if sType not in g_kdXcptTypes:
4236 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4237 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4238 # Set it.
4239 if oInstr.sXcptType is not None:
4240 return self.errorComment(iTagLine,
4241 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4242 % ( sTag, oInstr.sXcptType, sType,));
4243 oInstr.sXcptType = sType;
4244
4245 _ = iEndLine;
4246 return True;
4247
4248 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4249 """
4250 Tag: \@opfunction
4251 Value: <VMM function name>
4252
4253 This is for explicitly setting the IEM function name. Normally we pick
4254 this up from the FNIEMOP_XXX macro invocation after the description, or
4255 generate it from the mnemonic and operands.
4256
4257 It it thought it maybe necessary to set it when specifying instructions
4258 which implementation isn't following immediately or aren't implemented yet.
4259 """
4260 oInstr = self.ensureInstructionForOpTag(iTagLine);
4261
4262 # Flatten and validate the value.
4263 sFunction = self.flattenAllSections(aasSections);
4264 if not self.oReFunctionName.match(sFunction):
4265 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4266 % (sTag, sFunction, self.oReFunctionName.pattern));
4267
4268 if oInstr.sFunction is not None:
4269 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4270 % (sTag, oInstr.sFunction, sFunction,));
4271 oInstr.sFunction = sFunction;
4272
4273 _ = iEndLine;
4274 return True;
4275
4276 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4277 """
4278 Tag: \@opstats
4279 Value: <VMM statistics base name>
4280
4281 This is for explicitly setting the statistics name. Normally we pick
4282 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4283 the mnemonic and operands.
4284
4285 It it thought it maybe necessary to set it when specifying instructions
4286 which implementation isn't following immediately or aren't implemented yet.
4287 """
4288 oInstr = self.ensureInstructionForOpTag(iTagLine);
4289
4290 # Flatten and validate the value.
4291 sStats = self.flattenAllSections(aasSections);
4292 if not self.oReStatsName.match(sStats):
4293 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4294 % (sTag, sStats, self.oReStatsName.pattern));
4295
4296 if oInstr.sStats is not None:
4297 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4298 % (sTag, oInstr.sStats, sStats,));
4299 oInstr.sStats = sStats;
4300
4301 _ = iEndLine;
4302 return True;
4303
4304 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4305 """
4306 Tag: \@opdone
4307 Value: none
4308
4309 Used to explictily flush the instructions that have been specified.
4310 """
4311 sFlattened = self.flattenAllSections(aasSections);
4312 if sFlattened != '':
4313 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4314 _ = sTag; _ = iEndLine;
4315 return self.doneInstructions();
4316
4317 ## @}
4318
4319
4320 def parseComment(self):
4321 """
4322 Parse the current comment (self.sComment).
4323
4324 If it's a opcode specifiying comment, we reset the macro stuff.
4325 """
4326 #
4327 # Reject if comment doesn't seem to contain anything interesting.
4328 #
4329 if self.sComment.find('Opcode') < 0 \
4330 and self.sComment.find('@') < 0:
4331 return False;
4332
4333 #
4334 # Split the comment into lines, removing leading asterisks and spaces.
4335 # Also remove leading and trailing empty lines.
4336 #
4337 asLines = self.sComment.split('\n');
4338 for iLine, sLine in enumerate(asLines):
4339 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4340
4341 while asLines and not asLines[0]:
4342 self.iCommentLine += 1;
4343 asLines.pop(0);
4344
4345 while asLines and not asLines[-1]:
4346 asLines.pop(len(asLines) - 1);
4347
4348 #
4349 # Check for old style: Opcode 0x0f 0x12
4350 #
4351 if asLines[0].startswith('Opcode '):
4352 self.parseCommentOldOpcode(asLines);
4353
4354 #
4355 # Look for @op* tagged data.
4356 #
4357 cOpTags = 0;
4358 sFlatDefault = None;
4359 sCurTag = '@default';
4360 iCurTagLine = 0;
4361 asCurSection = [];
4362 aasSections = [ asCurSection, ];
4363 for iLine, sLine in enumerate(asLines):
4364 if not sLine.startswith('@'):
4365 if sLine:
4366 asCurSection.append(sLine);
4367 elif asCurSection:
4368 asCurSection = [];
4369 aasSections.append(asCurSection);
4370 else:
4371 #
4372 # Process the previous tag.
4373 #
4374 if not asCurSection and len(aasSections) > 1:
4375 aasSections.pop(-1);
4376 if sCurTag in self.dTagHandlers:
4377 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4378 cOpTags += 1;
4379 elif sCurTag.startswith('@op'):
4380 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4381 elif sCurTag == '@default':
4382 sFlatDefault = self.flattenAllSections(aasSections);
4383 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4384 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4385 elif sCurTag in ['@encoding', '@opencoding']:
4386 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4387
4388 #
4389 # New tag.
4390 #
4391 asSplit = sLine.split(None, 1);
4392 sCurTag = asSplit[0].lower();
4393 if len(asSplit) > 1:
4394 asCurSection = [asSplit[1],];
4395 else:
4396 asCurSection = [];
4397 aasSections = [asCurSection, ];
4398 iCurTagLine = iLine;
4399
4400 #
4401 # Process the final tag.
4402 #
4403 if not asCurSection and len(aasSections) > 1:
4404 aasSections.pop(-1);
4405 if sCurTag in self.dTagHandlers:
4406 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4407 cOpTags += 1;
4408 elif sCurTag.startswith('@op'):
4409 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4410 elif sCurTag == '@default':
4411 sFlatDefault = self.flattenAllSections(aasSections);
4412
4413 #
4414 # Don't allow default text in blocks containing @op*.
4415 #
4416 if cOpTags > 0 and sFlatDefault:
4417 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4418
4419 return True;
4420
4421 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4422 """
4423 Parses a macro invocation.
4424
4425 Returns three values:
4426 1. A list of macro arguments, where the zero'th is the macro name.
4427 2. The offset following the macro invocation, into sInvocation of
4428 this is on the same line or into the last line if it is on a
4429 different line.
4430 3. Number of additional lines the invocation spans (i.e. zero if
4431 it is all contained within sInvocation).
4432 """
4433 # First the name.
4434 offOpen = sInvocation.find('(', offStartInvocation);
4435 if offOpen <= offStartInvocation:
4436 self.raiseError("macro invocation open parenthesis not found");
4437 sName = sInvocation[offStartInvocation:offOpen].strip();
4438 if not self.oReMacroName.match(sName):
4439 self.raiseError("invalid macro name '%s'" % (sName,));
4440 asRet = [sName, ];
4441
4442 # Arguments.
4443 iLine = self.iLine;
4444 cDepth = 1;
4445 off = offOpen + 1;
4446 offStart = off;
4447 offCurLn = 0;
4448 chQuote = None;
4449 while cDepth > 0:
4450 if off >= len(sInvocation):
4451 if iLine >= len(self.asLines):
4452 self.error('macro invocation beyond end of file');
4453 return (asRet, off - offCurLn, iLine - self.iLine);
4454 offCurLn = off;
4455 sInvocation += self.asLines[iLine];
4456 iLine += 1;
4457 ch = sInvocation[off];
4458
4459 if chQuote:
4460 if ch == '\\' and off + 1 < len(sInvocation):
4461 off += 1;
4462 elif ch == chQuote:
4463 chQuote = None;
4464 elif ch in ('"', '\'',):
4465 chQuote = ch;
4466 elif ch in (',', ')',):
4467 if cDepth == 1:
4468 asRet.append(sInvocation[offStart:off].strip());
4469 offStart = off + 1;
4470 if ch == ')':
4471 cDepth -= 1;
4472 elif ch == '(':
4473 cDepth += 1;
4474 off += 1;
4475
4476 return (asRet, off - offCurLn, iLine - self.iLine);
4477
4478 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4479 """
4480 Returns (None, len(sCode), 0) if not found, otherwise the
4481 parseMacroInvocation() return value.
4482 """
4483 offHit = sCode.find(sMacro, offStart);
4484 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4485 return self.parseMacroInvocation(sCode, offHit);
4486 return (None, len(sCode), 0);
4487
4488 def findAndParseMacroInvocation(self, sCode, sMacro):
4489 """
4490 Returns None if not found, arguments as per parseMacroInvocation if found.
4491 """
4492 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4493
4494 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4495 """
4496 Returns same as findAndParseMacroInvocation.
4497 """
4498 for sMacro in asMacro:
4499 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4500 if asRet is not None:
4501 return asRet;
4502 return None;
4503
4504 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4505 sDisHints, sIemHints, asOperands):
4506 """
4507 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4508 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4509 """
4510 #
4511 # Some invocation checks.
4512 #
4513 if sUpper != sUpper.upper():
4514 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4515 if sLower != sLower.lower():
4516 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4517 if sUpper.lower() != sLower:
4518 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4519 if not self.oReMnemonic.match(sLower):
4520 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4521
4522 #
4523 # Check if sIemHints tells us to not consider this macro invocation.
4524 #
4525 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4526 return True;
4527
4528 # Apply to the last instruction only for now.
4529 if not self.aoCurInstrs:
4530 self.addInstruction();
4531 oInstr = self.aoCurInstrs[-1];
4532 if oInstr.iLineMnemonicMacro == -1:
4533 oInstr.iLineMnemonicMacro = self.iLine;
4534 else:
4535 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4536 % (sMacro, oInstr.iLineMnemonicMacro,));
4537
4538 # Mnemonic
4539 if oInstr.sMnemonic is None:
4540 oInstr.sMnemonic = sLower;
4541 elif oInstr.sMnemonic != sLower:
4542 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4543
4544 # Process operands.
4545 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4546 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4547 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4548 for iOperand, sType in enumerate(asOperands):
4549 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4550 if sWhere is None:
4551 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4552 if iOperand < len(oInstr.aoOperands): # error recovery.
4553 sWhere = oInstr.aoOperands[iOperand].sWhere;
4554 sType = oInstr.aoOperands[iOperand].sType;
4555 else:
4556 sWhere = 'reg';
4557 sType = 'Gb';
4558 if iOperand == len(oInstr.aoOperands):
4559 oInstr.aoOperands.append(Operand(sWhere, sType))
4560 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4561 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4562 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4563 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4564
4565 # Encoding.
4566 if sForm not in g_kdIemForms:
4567 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4568 else:
4569 if oInstr.sEncoding is None:
4570 oInstr.sEncoding = g_kdIemForms[sForm][0];
4571 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4572 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4573 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4574
4575 # Check the parameter locations for the encoding.
4576 if g_kdIemForms[sForm][1] is not None:
4577 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4578 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4579 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4580 else:
4581 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4582 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4583 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4584 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4585 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4586 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4587 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4588 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4589 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4590 or sForm.replace('VEX','').find('V') < 0) ):
4591 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4592 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4593 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4594 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4595 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4596 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4597 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4598 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4599 oInstr.aoOperands[iOperand].sWhere));
4600
4601
4602 # Check @opcodesub
4603 if oInstr.sSubOpcode \
4604 and g_kdIemForms[sForm][2] \
4605 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4606 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4607 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4608
4609 # Stats.
4610 if not self.oReStatsName.match(sStats):
4611 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4612 elif oInstr.sStats is None:
4613 oInstr.sStats = sStats;
4614 elif oInstr.sStats != sStats:
4615 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4616 % (sMacro, oInstr.sStats, sStats,));
4617
4618 # Process the hints (simply merge with @ophints w/o checking anything).
4619 for sHint in sDisHints.split('|'):
4620 sHint = sHint.strip();
4621 if sHint.startswith('DISOPTYPE_'):
4622 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4623 if sShortHint in g_kdHints:
4624 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4625 else:
4626 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4627 elif sHint != '0':
4628 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4629
4630 for sHint in sIemHints.split('|'):
4631 sHint = sHint.strip();
4632 if sHint.startswith('IEMOPHINT_'):
4633 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4634 if sShortHint in g_kdHints:
4635 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4636 else:
4637 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4638 elif sHint != '0':
4639 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4640
4641 _ = sAsm;
4642 return True;
4643
4644 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4645 """
4646 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4647 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4648 """
4649 if not asOperands:
4650 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4651 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4652 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4653
4654 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4655 """
4656 Process a IEM_MC_BEGIN macro invocation.
4657 """
4658 if self.fDebugMc:
4659 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4660 #self.debug('%s<eos>' % (sCode,));
4661
4662 # Check preconditions.
4663 if not self.oCurFunction:
4664 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4665 if self.oCurMcBlock:
4666 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4667
4668 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4669 cchIndent = offBeginStatementInCodeStr;
4670 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4671 if offPrevNewline >= 0:
4672 cchIndent -= offPrevNewline + 1;
4673 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4674
4675 # Start a new block.
4676 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4677 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4678 g_aoMcBlocks.append(self.oCurMcBlock);
4679 self.cTotalMcBlocks += 1;
4680 self.iMcBlockInFunc += 1;
4681 return True;
4682
4683 @staticmethod
4684 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
4685 """
4686 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
4687 extracting a statement block from a string that's the result of macro
4688 expansion and therefore contains multiple "sub-lines" as it were.
4689
4690 Returns list of lines covering offBegin thru offEnd in sRawLine.
4691 """
4692
4693 off = sRawLine.find('\n', offEnd);
4694 if off > 0:
4695 sRawLine = sRawLine[:off + 1];
4696
4697 off = sRawLine.rfind('\n', 0, offBegin) + 1;
4698 sRawLine = sRawLine[off:];
4699 if not sRawLine.strip().startswith(sBeginStmt):
4700 sRawLine = sRawLine[offBegin - off:]
4701
4702 return [sLine + '\n' for sLine in sRawLine.split('\n')];
4703
4704 def workerIemMcEnd(self, offEndStatementInLine):
4705 """
4706 Process a IEM_MC_END macro invocation.
4707 """
4708 if self.fDebugMc:
4709 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4710
4711 # Check preconditions.
4712 if not self.oCurMcBlock:
4713 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4714
4715 #
4716 # HACK ALERT! For blocks orginating from macro expansion the start and
4717 # end line will be the same, but the line has multiple
4718 # newlines inside it. So, we have to do some extra tricks
4719 # to get the lines out of there. We ASSUME macros aren't
4720 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4721 #
4722 if self.iLine > self.oCurMcBlock.iBeginLine:
4723 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4724 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4725 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4726 else:
4727 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
4728 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
4729
4730 #
4731 # Strip anything following the IEM_MC_END(); statement in the final line,
4732 # so that we don't carry on any trailing 'break' after macro expansions
4733 # like for iemOp_movsb_Xb_Yb.
4734 #
4735 while asLines[-1].strip() == '':
4736 asLines.pop();
4737 sFinal = asLines[-1];
4738 offFinalEnd = sFinal.find('IEM_MC_END');
4739 offEndInFinal = offFinalEnd;
4740 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4741 offFinalEnd += len('IEM_MC_END');
4742
4743 while sFinal[offFinalEnd].isspace():
4744 offFinalEnd += 1;
4745 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4746 offFinalEnd += 1;
4747
4748 while sFinal[offFinalEnd].isspace():
4749 offFinalEnd += 1;
4750 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4751 offFinalEnd += 1;
4752
4753 while sFinal[offFinalEnd].isspace():
4754 offFinalEnd += 1;
4755 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4756 offFinalEnd += 1;
4757
4758 asLines[-1] = sFinal[: offFinalEnd];
4759
4760 #
4761 # Complete and discard the current block.
4762 #
4763 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
4764 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
4765 self.oCurMcBlock = None;
4766 return True;
4767
4768 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
4769 """
4770 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
4771 """
4772 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
4773 if self.fDebugMc:
4774 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
4775 #self.debug('%s<eos>' % (sCode,));
4776
4777 # Check preconditions.
4778 if not self.oCurFunction:
4779 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
4780 if self.oCurMcBlock:
4781 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
4782
4783 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4784 cchIndent = offBeginStatementInCodeStr;
4785 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4786 if offPrevNewline >= 0:
4787 cchIndent -= offPrevNewline + 1;
4788 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4789
4790 # Start a new block.
4791 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4792 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4793
4794 # Parse the statment.
4795 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
4796 if asArgs is None:
4797 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
4798 if len(asArgs) != cParams + 3:
4799 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s!'
4800 % (sStmt, len(asArgs), cParams + 3,));
4801
4802 oMcBlock.aoStmts = [McStmtCall(asArgs[0], asArgs[1:], 1),];
4803
4804 # These MCs are not typically part of macro expansions, but let's get
4805 # it out of the way immediately if it's the case.
4806 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
4807 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
4808 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
4809 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
4810 asLines[-1] = asLines[-1][:offAfter + 1];
4811 else:
4812 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
4813 offAfter, sStmt);
4814 assert asLines[-1].find(';') >= 0;
4815 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
4816
4817 assert asLines[0].find(sStmt) >= 0;
4818 #if not asLines[0].strip().startswith(sStmt):
4819 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
4820
4821 # Advance to the line with the closing ')'.
4822 self.iLine += cLines;
4823
4824 # Complete the block.
4825 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
4826
4827 g_aoMcBlocks.append(oMcBlock);
4828 self.cTotalMcBlocks += 1;
4829 self.iMcBlockInFunc += 1;
4830
4831 return True;
4832
4833 def workerStartFunction(self, asArgs):
4834 """
4835 Deals with the start of a decoder function.
4836
4837 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4838 macros, so we get a argument list for these where the 0th argument is the
4839 macro name.
4840 """
4841 # Complete any existing function.
4842 if self.oCurFunction:
4843 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4844
4845 # Create the new function.
4846 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4847 return True;
4848
4849 def checkCodeForMacro(self, sCode, offLine):
4850 """
4851 Checks code for relevant macro invocation.
4852 """
4853
4854 #
4855 # Scan macro invocations.
4856 #
4857 if sCode.find('(') > 0:
4858 # Look for instruction decoder function definitions. ASSUME single line.
4859 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4860 [ 'FNIEMOP_DEF',
4861 'FNIEMOPRM_DEF',
4862 'FNIEMOP_STUB',
4863 'FNIEMOP_STUB_1',
4864 'FNIEMOP_UD_STUB',
4865 'FNIEMOP_UD_STUB_1' ]);
4866 if asArgs is not None:
4867 self.workerStartFunction(asArgs);
4868 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4869
4870 if not self.aoCurInstrs:
4871 self.addInstruction();
4872 for oInstr in self.aoCurInstrs:
4873 if oInstr.iLineFnIemOpMacro == -1:
4874 oInstr.iLineFnIemOpMacro = self.iLine;
4875 else:
4876 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4877 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4878 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4879 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4880 if asArgs[0].find('STUB') > 0:
4881 self.doneInstructions(fEndOfFunction = True);
4882 return True;
4883
4884 # Check for worker function definitions, so we can get a context for MC blocks.
4885 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4886 [ 'FNIEMOP_DEF_1',
4887 'FNIEMOP_DEF_2', ]);
4888 if asArgs is not None:
4889 self.workerStartFunction(asArgs);
4890 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4891 return True;
4892
4893 # IEMOP_HLP_DONE_VEX_DECODING_*
4894 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4895 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4896 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4897 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4898 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4899 ]);
4900 if asArgs is not None:
4901 sMacro = asArgs[0];
4902 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4903 for oInstr in self.aoCurInstrs:
4904 if 'vex_l_zero' not in oInstr.dHints:
4905 if oInstr.iLineMnemonicMacro >= 0:
4906 self.errorOnLine(oInstr.iLineMnemonicMacro,
4907 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4908 oInstr.dHints['vex_l_zero'] = True;
4909
4910 #
4911 # IEMOP_MNEMONIC*
4912 #
4913 if sCode.find('IEMOP_MNEMONIC') >= 0:
4914 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4915 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4916 if asArgs is not None:
4917 if len(self.aoCurInstrs) == 1:
4918 oInstr = self.aoCurInstrs[0];
4919 if oInstr.sStats is None:
4920 oInstr.sStats = asArgs[1];
4921 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4922
4923 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4924 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4925 if asArgs is not None:
4926 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4927 asArgs[7], []);
4928 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4929 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4930 if asArgs is not None:
4931 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4932 asArgs[8], [asArgs[6],]);
4933 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4934 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4935 if asArgs is not None:
4936 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4937 asArgs[9], [asArgs[6], asArgs[7]]);
4938 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4939 # a_fIemHints)
4940 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4941 if asArgs is not None:
4942 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4943 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4944 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4945 # a_fIemHints)
4946 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4947 if asArgs is not None:
4948 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4949 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4950
4951 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4952 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4953 if asArgs is not None:
4954 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4955 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4956 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4957 if asArgs is not None:
4958 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4959 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4960 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4961 if asArgs is not None:
4962 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4963 [asArgs[4], asArgs[5],]);
4964 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4965 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4966 if asArgs is not None:
4967 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4968 [asArgs[4], asArgs[5], asArgs[6],]);
4969 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4970 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4971 if asArgs is not None:
4972 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4973 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4974
4975 #
4976 # IEM_MC_BEGIN + IEM_MC_END.
4977 # We must support multiple instances per code snippet.
4978 #
4979 offCode = sCode.find('IEM_MC_');
4980 if offCode >= 0:
4981 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4982 if oMatch.group(1) == 'END':
4983 self.workerIemMcEnd(offLine + oMatch.start());
4984 elif oMatch.group(1) == 'BEGIN':
4985 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4986 else:
4987 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
4988 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
4989 return True;
4990
4991 return False;
4992
4993 def workerPreProcessRecreateMacroRegex(self):
4994 """
4995 Recreates self.oReMacros when self.dMacros changes.
4996 """
4997 if self.dMacros:
4998 sRegex = '';
4999 for sName, oMacro in self.dMacros.items():
5000 if sRegex:
5001 sRegex += '|' + sName;
5002 else:
5003 sRegex = '\\b(' + sName;
5004 if oMacro.asArgs is not None:
5005 sRegex += '\s*\(';
5006 else:
5007 sRegex += '\\b';
5008 sRegex += ')';
5009 self.oReMacros = re.compile(sRegex);
5010 else:
5011 self.oReMacros = None;
5012 return True;
5013
5014 def workerPreProcessDefine(self, sRest):
5015 """
5016 Handles a macro #define, the sRest is what follows after the directive word.
5017 """
5018
5019 #
5020 # If using line continutation, just concat all the lines together,
5021 # preserving the newline character but not the escaping.
5022 #
5023 iLineStart = self.iLine;
5024 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5025 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5026 self.iLine += 1;
5027 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
5028
5029 #
5030 # Use regex to split out the name, argument list and body.
5031 # If this fails, we assume it's a simple macro.
5032 #
5033 oMatch = self.oReHashDefine2.match(sRest);
5034 if oMatch:
5035 sAllArgs = oMatch.group(2).strip();
5036 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5037 sBody = oMatch.group(3);
5038 else:
5039 oMatch = self.oReHashDefine3.match(sRest);
5040 if not oMatch:
5041 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
5042 return self.error('bogus macro definition: %s' % (sRest,));
5043 asArgs = None;
5044 sBody = oMatch.group(2);
5045 sName = oMatch.group(1);
5046 assert sName == sName.strip();
5047 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5048
5049 #
5050 # Is this of any interest to us? We do NOT support MC blocks wihtin
5051 # nested macro expansion, just to avoid lots of extra work.
5052 #
5053 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5054 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5055 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5056 # siblings in the recompiler. This is a lot simpler than nested macro
5057 # expansion and lots of heuristics for locating all the relevant macros.
5058 # Also, this way we don't produce lots of unnecessary threaded functions.
5059 #
5060 if sBody.find("IEM_MC_BEGIN") < 0:
5061 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
5062 return True;
5063
5064 #
5065 # Add the macro.
5066 #
5067 if self.fDebugPreProc:
5068 self.debug('#define %s on line %u' % (sName, self.iLine,));
5069 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5070 return self.workerPreProcessRecreateMacroRegex();
5071
5072 def workerPreProcessUndef(self, sRest):
5073 """
5074 Handles a macro #undef, the sRest is what follows after the directive word.
5075 """
5076 # Quick comment strip and isolate the name.
5077 offSlash = sRest.find('/');
5078 if offSlash > 0:
5079 sRest = sRest[:offSlash];
5080 sName = sRest.strip();
5081
5082 # Remove the macro if we're clocking it.
5083 if sName in self.dMacros:
5084 if self.fDebugPreProc:
5085 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5086 del self.dMacros[sName];
5087 return self.workerPreProcessRecreateMacroRegex();
5088
5089 return True;
5090
5091 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
5092 """
5093 Handles a preprocessor directive.
5094 """
5095 oMatch = self.oReHashDefine.match(sLine);
5096 if oMatch:
5097 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
5098
5099 oMatch = self.oReHashUndef.match(sLine);
5100 if oMatch:
5101 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
5102 return False;
5103
5104 def expandMacros(self, sLine, oMatch):
5105 """
5106 Expands macros we know about in the given line.
5107 Currently we ASSUME there is only one and that is what oMatch matched.
5108 """
5109 #
5110 # Get our bearings.
5111 #
5112 offMatch = oMatch.start();
5113 sName = oMatch.group(1);
5114 assert sName == sLine[oMatch.start() : oMatch.end()];
5115 fWithArgs = sName.endswith('(');
5116 if fWithArgs:
5117 sName = sName[:-1].strip();
5118 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5119
5120 #
5121 # Deal with simple macro invocations w/o parameters.
5122 #
5123 if not fWithArgs:
5124 if self.fDebugPreProc:
5125 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5126 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5127
5128 #
5129 # Complicated macro with parameters.
5130 # Start by extracting the parameters. ASSUMES they are all on the same line!
5131 #
5132 cLevel = 1;
5133 offCur = oMatch.end();
5134 offCurArg = offCur;
5135 asArgs = [];
5136 while True:
5137 if offCur >= len(sLine):
5138 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5139 ch = sLine[offCur];
5140 if ch == '(':
5141 cLevel += 1;
5142 elif ch == ')':
5143 cLevel -= 1;
5144 if cLevel == 0:
5145 asArgs.append(sLine[offCurArg:offCur].strip());
5146 break;
5147 elif ch == ',' and cLevel == 1:
5148 asArgs.append(sLine[offCurArg:offCur].strip());
5149 offCurArg = offCur + 1;
5150 offCur += 1;
5151 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5152 asArgs = [];
5153 if len(oMacro.asArgs) != len(asArgs):
5154 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5155
5156 #
5157 # Do the expanding.
5158 #
5159 if self.fDebugPreProc:
5160 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5161 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5162
5163 def parse(self):
5164 """
5165 Parses the given file.
5166
5167 Returns number or errors.
5168 Raises exception on fatal trouble.
5169 """
5170 #self.debug('Parsing %s' % (self.sSrcFile,));
5171
5172 #
5173 # Loop thru the lines.
5174 #
5175 # Please mind that self.iLine may be updated by checkCodeForMacro and
5176 # other worker methods.
5177 #
5178 while self.iLine < len(self.asLines):
5179 sLine = self.asLines[self.iLine];
5180 self.iLine += 1;
5181 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5182
5183 # Expand macros we know about if we're currently in code.
5184 if self.iState == self.kiCode and self.oReMacros:
5185 oMatch = self.oReMacros.search(sLine);
5186 if oMatch:
5187 sLine = self.expandMacros(sLine, oMatch);
5188 if self.fDebugPreProc:
5189 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5190 self.asLines[self.iLine - 1] = sLine;
5191
5192 # Look for comments.
5193 offSlash = sLine.find('/');
5194 if offSlash >= 0:
5195 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5196 offLine = 0;
5197 while offLine < len(sLine):
5198 if self.iState == self.kiCode:
5199 # Look for substantial multiline comment so we pass the following MC as a whole line:
5200 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5201 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5202 offHit = sLine.find('/*', offLine);
5203 while offHit >= 0:
5204 offEnd = sLine.find('*/', offHit + 2);
5205 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5206 break;
5207 offHit = sLine.find('/*', offEnd);
5208
5209 if offHit >= 0:
5210 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5211 self.sComment = '';
5212 self.iCommentLine = self.iLine;
5213 self.iState = self.kiCommentMulti;
5214 offLine = offHit + 2;
5215 else:
5216 self.checkCodeForMacro(sLine[offLine:], offLine);
5217 offLine = len(sLine);
5218
5219 elif self.iState == self.kiCommentMulti:
5220 offHit = sLine.find('*/', offLine);
5221 if offHit >= 0:
5222 self.sComment += sLine[offLine:offHit];
5223 self.iState = self.kiCode;
5224 offLine = offHit + 2;
5225 self.parseComment();
5226 else:
5227 self.sComment += sLine[offLine:];
5228 offLine = len(sLine);
5229 else:
5230 assert False;
5231 # C++ line comment.
5232 elif offSlash > 0:
5233 self.checkCodeForMacro(sLine[:offSlash], 0);
5234
5235 # No slash, but append the line if in multi-line comment.
5236 elif self.iState == self.kiCommentMulti:
5237 #self.debug('line %d: multi' % (self.iLine,));
5238 self.sComment += sLine;
5239
5240 # No slash, but check if this is a macro #define or #undef, since we
5241 # need to be able to selectively expand the ones containing MC blocks.
5242 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5243 if self.fDebugPreProc:
5244 self.debug('line %d: pre-proc' % (self.iLine,));
5245 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5246
5247 # No slash, but check code line for relevant macro.
5248 elif ( self.iState == self.kiCode
5249 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5250 #self.debug('line %d: macro' % (self.iLine,));
5251 self.checkCodeForMacro(sLine, 0);
5252
5253 # If the line is a '}' in the first position, complete the instructions.
5254 elif self.iState == self.kiCode and sLine[0] == '}':
5255 #self.debug('line %d: }' % (self.iLine,));
5256 self.doneInstructions(fEndOfFunction = True);
5257
5258 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5259 # so we can check/add @oppfx info from it.
5260 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5261 self.parseFunctionTable(sLine);
5262
5263 self.doneInstructions(fEndOfFunction = True);
5264 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5265 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5266 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5267 return self.printErrors();
5268
5269## The parsed content of IEMAllInstCommonBodyMacros.h.
5270g_oParsedCommonBodyMacros = None # type: SimpleParser
5271
5272def __parseFileByName(sSrcFile, sDefaultMap):
5273 """
5274 Parses one source file for instruction specfications.
5275 """
5276 #
5277 # Read sSrcFile into a line array.
5278 #
5279 try:
5280 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5281 except Exception as oXcpt:
5282 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5283 try:
5284 asLines = oFile.readlines();
5285 except Exception as oXcpt:
5286 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5287 finally:
5288 oFile.close();
5289
5290 #
5291 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
5292 # can use the macros from it when processing the other files.
5293 #
5294 global g_oParsedCommonBodyMacros;
5295 if g_oParsedCommonBodyMacros is None:
5296 # Locate the file.
5297 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
5298 if not os.path.isfile(sCommonBodyMacros):
5299 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
5300
5301 # Read it.
5302 try:
5303 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5304 asIncFiles = oIncFile.readlines();
5305 except Exception as oXcpt:
5306 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5307
5308 # Parse it.
5309 try:
5310 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5311 if oParser.parse() != 0:
5312 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5313 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5314 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5315 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5316 oParser.cTotalMcBlocks,
5317 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5318 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5319 except ParserException as oXcpt:
5320 print(str(oXcpt), file = sys.stderr);
5321 raise;
5322 g_oParsedCommonBodyMacros = oParser;
5323
5324 #
5325 # Do the parsing.
5326 #
5327 try:
5328 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5329 return (oParser.parse(), oParser) ;
5330 except ParserException as oXcpt:
5331 print(str(oXcpt), file = sys.stderr);
5332 raise;
5333
5334
5335def __doTestCopying():
5336 """
5337 Executes the asCopyTests instructions.
5338 """
5339 asErrors = [];
5340 for oDstInstr in g_aoAllInstructions:
5341 if oDstInstr.asCopyTests:
5342 for sSrcInstr in oDstInstr.asCopyTests:
5343 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5344 if oSrcInstr:
5345 aoSrcInstrs = [oSrcInstr,];
5346 else:
5347 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5348 if aoSrcInstrs:
5349 for oSrcInstr in aoSrcInstrs:
5350 if oSrcInstr != oDstInstr:
5351 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5352 else:
5353 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5354 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5355 else:
5356 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5357 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5358
5359 if asErrors:
5360 sys.stderr.write(u''.join(asErrors));
5361 return len(asErrors);
5362
5363
5364def __applyOnlyTest():
5365 """
5366 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5367 all other instructions so that only these get tested.
5368 """
5369 if g_aoOnlyTestInstructions:
5370 for oInstr in g_aoAllInstructions:
5371 if oInstr.aoTests:
5372 if oInstr not in g_aoOnlyTestInstructions:
5373 oInstr.aoTests = [];
5374 return 0;
5375
5376## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
5377g_aaoAllInstrFilesAndDefaultMapAndSet = (
5378 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
5379 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
5380 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
5381 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
5382 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
5383 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
5384 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
5385 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
5386 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
5387);
5388
5389def __parseFilesWorker(asFilesAndDefaultMap):
5390 """
5391 Parses all the IEMAllInstruction*.cpp.h files.
5392
5393 Returns a list of the parsers on success.
5394 Raises exception on failure.
5395 """
5396 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5397 cErrors = 0;
5398 aoParsers = [];
5399 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5400 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5401 sFilename = os.path.join(sSrcDir, sFilename);
5402 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5403 cErrors += cThisErrors;
5404 aoParsers.append(oParser);
5405 cErrors += __doTestCopying();
5406 cErrors += __applyOnlyTest();
5407
5408 # Total stub stats:
5409 cTotalStubs = 0;
5410 for oInstr in g_aoAllInstructions:
5411 cTotalStubs += oInstr.fStub;
5412 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5413 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5414 file = sys.stderr);
5415
5416 if cErrors != 0:
5417 raise Exception('%d parse errors' % (cErrors,));
5418 return aoParsers;
5419
5420
5421def parseFiles(asFiles):
5422 """
5423 Parses a selection of IEMAllInstruction*.cpp.h files.
5424
5425 Returns a list of the parsers on success.
5426 Raises exception on failure.
5427 """
5428 # Look up default maps for the files and call __parseFilesWorker to do the job.
5429 asFilesAndDefaultMap = [];
5430 for sFilename in asFiles:
5431 sName = os.path.split(sFilename)[1].lower();
5432 sMap = None;
5433 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
5434 if aoInfo[0].lower() == sName:
5435 sMap = aoInfo[1];
5436 break;
5437 if not sMap:
5438 raise Exception('Unable to classify file: %s' % (sFilename,));
5439 asFilesAndDefaultMap.append((sFilename, sMap));
5440
5441 return __parseFilesWorker(asFilesAndDefaultMap);
5442
5443
5444def parseAll():
5445 """
5446 Parses all the IEMAllInstruction*.cpp.h files.
5447
5448 Returns a list of the parsers on success.
5449 Raises exception on failure.
5450 """
5451 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet]);
5452
5453
5454#
5455# Generators (may perhaps move later).
5456#
5457def __formatDisassemblerTableEntry(oInstr):
5458 """
5459 """
5460 sMacro = 'OP';
5461 cMaxOperands = 3;
5462 if len(oInstr.aoOperands) > 3:
5463 sMacro = 'OPVEX'
5464 cMaxOperands = 4;
5465 assert len(oInstr.aoOperands) <= cMaxOperands;
5466
5467 #
5468 # Format string.
5469 #
5470 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5471 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5472 sTmp += ' ' if iOperand == 0 else ',';
5473 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5474 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5475 else:
5476 sTmp += g_kdOpTypes[oOperand.sType][2];
5477 sTmp += '",';
5478 asColumns = [ sTmp, ];
5479
5480 #
5481 # Decoders.
5482 #
5483 iStart = len(asColumns);
5484 if oInstr.sEncoding is None:
5485 pass;
5486 elif oInstr.sEncoding == 'ModR/M':
5487 # ASSUME the first operand is using the ModR/M encoding
5488 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5489 asColumns.append('IDX_ParseModRM,');
5490 elif oInstr.sEncoding in [ 'prefix', ]:
5491 for oOperand in oInstr.aoOperands:
5492 asColumns.append('0,');
5493 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5494 pass;
5495 elif oInstr.sEncoding == 'VEX.ModR/M':
5496 asColumns.append('IDX_ParseModRM,');
5497 elif oInstr.sEncoding == 'vex2':
5498 asColumns.append('IDX_ParseVex2b,')
5499 elif oInstr.sEncoding == 'vex3':
5500 asColumns.append('IDX_ParseVex3b,')
5501 elif oInstr.sEncoding in g_dInstructionMaps:
5502 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5503 else:
5504 ## @todo
5505 #IDX_ParseTwoByteEsc,
5506 #IDX_ParseGrp1,
5507 #IDX_ParseShiftGrp2,
5508 #IDX_ParseGrp3,
5509 #IDX_ParseGrp4,
5510 #IDX_ParseGrp5,
5511 #IDX_Parse3DNow,
5512 #IDX_ParseGrp6,
5513 #IDX_ParseGrp7,
5514 #IDX_ParseGrp8,
5515 #IDX_ParseGrp9,
5516 #IDX_ParseGrp10,
5517 #IDX_ParseGrp12,
5518 #IDX_ParseGrp13,
5519 #IDX_ParseGrp14,
5520 #IDX_ParseGrp15,
5521 #IDX_ParseGrp16,
5522 #IDX_ParseThreeByteEsc4,
5523 #IDX_ParseThreeByteEsc5,
5524 #IDX_ParseModFence,
5525 #IDX_ParseEscFP,
5526 #IDX_ParseNopPause,
5527 #IDX_ParseInvOpModRM,
5528 assert False, str(oInstr);
5529
5530 # Check for immediates and stuff in the remaining operands.
5531 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5532 sIdx = g_kdOpTypes[oOperand.sType][0];
5533 #if sIdx != 'IDX_UseModRM':
5534 asColumns.append(sIdx + ',');
5535 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5536
5537 #
5538 # Opcode and operands.
5539 #
5540 assert oInstr.sDisEnum, str(oInstr);
5541 asColumns.append(oInstr.sDisEnum + ',');
5542 iStart = len(asColumns)
5543 for oOperand in oInstr.aoOperands:
5544 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5545 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5546
5547 #
5548 # Flags.
5549 #
5550 sTmp = '';
5551 for sHint in sorted(oInstr.dHints.keys()):
5552 sDefine = g_kdHints[sHint];
5553 if sDefine.startswith('DISOPTYPE_'):
5554 if sTmp:
5555 sTmp += ' | ' + sDefine;
5556 else:
5557 sTmp += sDefine;
5558 if sTmp:
5559 sTmp += '),';
5560 else:
5561 sTmp += '0),';
5562 asColumns.append(sTmp);
5563
5564 #
5565 # Format the columns into a line.
5566 #
5567 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5568 sLine = '';
5569 for i, s in enumerate(asColumns):
5570 if len(sLine) < aoffColumns[i]:
5571 sLine += ' ' * (aoffColumns[i] - len(sLine));
5572 else:
5573 sLine += ' ';
5574 sLine += s;
5575
5576 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5577 # DISOPTYPE_HARMLESS),
5578 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5579 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5580 return sLine;
5581
5582def __checkIfShortTable(aoTableOrdered, oMap):
5583 """
5584 Returns (iInstr, cInstructions, fShortTable)
5585 """
5586
5587 # Determin how much we can trim off.
5588 cInstructions = len(aoTableOrdered);
5589 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5590 cInstructions -= 1;
5591
5592 iInstr = 0;
5593 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5594 iInstr += 1;
5595
5596 # If we can save more than 30%, we go for the short table version.
5597 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5598 return (iInstr, cInstructions, True);
5599 _ = oMap; # Use this for overriding.
5600
5601 # Output the full table.
5602 return (0, len(aoTableOrdered), False);
5603
5604def generateDisassemblerTables(oDstFile = sys.stdout):
5605 """
5606 Generates disassembler tables.
5607
5608 Returns exit code.
5609 """
5610
5611 #
5612 # Parse all.
5613 #
5614 try:
5615 parseAll();
5616 except Exception as oXcpt:
5617 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5618 traceback.print_exc(file = sys.stderr);
5619 return 1;
5620
5621
5622 #
5623 # The disassembler uses a slightly different table layout to save space,
5624 # since several of the prefix varia
5625 #
5626 aoDisasmMaps = [];
5627 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5628 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5629 if oMap.sSelector != 'byte+pfx':
5630 aoDisasmMaps.append(oMap);
5631 else:
5632 # Split the map by prefix.
5633 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5634 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5635 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5636 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5637
5638 #
5639 # Dump each map.
5640 #
5641 asHeaderLines = [];
5642 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5643 for oMap in aoDisasmMaps:
5644 sName = oMap.sName;
5645
5646 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5647
5648 #
5649 # Get the instructions for the map and see if we can do a short version or not.
5650 #
5651 aoTableOrder = oMap.getInstructionsInTableOrder();
5652 cEntriesPerByte = oMap.getEntriesPerByte();
5653 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5654
5655 #
5656 # Output the table start.
5657 # Note! Short tables are static and only accessible via the map range record.
5658 #
5659 asLines = [];
5660 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5661 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5662 if fShortTable:
5663 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5664 else:
5665 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5666 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5667 asLines.append('{');
5668
5669 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5670 asLines.append(' /* %#04x: */' % (iInstrStart,));
5671
5672 #
5673 # Output the instructions.
5674 #
5675 iInstr = iInstrStart;
5676 while iInstr < iInstrEnd:
5677 oInstr = aoTableOrder[iInstr];
5678 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5679 if iInstr != iInstrStart:
5680 asLines.append('');
5681 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5682
5683 if oInstr is None:
5684 # Invalid. Optimize blocks of invalid instructions.
5685 cInvalidInstrs = 1;
5686 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5687 cInvalidInstrs += 1;
5688 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5689 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5690 iInstr += 0x10 * cEntriesPerByte - 1;
5691 elif cEntriesPerByte > 1:
5692 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5693 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5694 iInstr += 3;
5695 else:
5696 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5697 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5698 else:
5699 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5700 elif isinstance(oInstr, list):
5701 if len(oInstr) != 0:
5702 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5703 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5704 else:
5705 asLines.append(__formatDisassemblerTableEntry(oInstr));
5706 else:
5707 asLines.append(__formatDisassemblerTableEntry(oInstr));
5708
5709 iInstr += 1;
5710
5711 if iInstrStart >= iInstrEnd:
5712 asLines.append(' /* dummy */ INVALID_OPCODE');
5713
5714 asLines.append('};');
5715 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5716
5717 #
5718 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5719 #
5720 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5721 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5722 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5723
5724 #
5725 # Write out the lines.
5726 #
5727 oDstFile.write('\n'.join(asLines));
5728 oDstFile.write('\n');
5729 oDstFile.write('\n');
5730 #break; #for now
5731 return 0;
5732
5733if __name__ == '__main__':
5734 sys.exit(generateDisassemblerTables());
5735
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette