VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 99492

Last change on this file since 99492 was 99343, checked in by vboxsync, 21 months ago

VMM/IEM: IEM_MC_MAYBE_RAISE_PCLMUL_RELATED_XCPT -> IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT, since the CPUID check was removed they are identical. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 253.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 99343 2023-04-07 12:44:56Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 99343 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [];
1828 self.aoElseBranch = [];
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode, sName = 'C++'):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873
1874 def renderCode(self, cchIndent = 0):
1875 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1876 if self.fDecode:
1877 sRet = sRet.replace('\n', ' // C++ decode\n');
1878 else:
1879 sRet = sRet.replace('\n', ' // C++ normal\n');
1880 return sRet;
1881
1882class McCppCond(McStmtCond):
1883 """
1884 C++/C 'if' statement.
1885 """
1886 def __init__(self, sCode, fDecode):
1887 McStmtCond.__init__(self, 'C++/if', [sCode,]);
1888 self.fDecode = fDecode;
1889
1890 def renderCode(self, cchIndent = 0):
1891 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1892 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1893 sRet += ' ' * cchIndent + '{\n';
1894 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1895 sRet += ' ' * cchIndent + '}\n';
1896 if self.aoElseBranch:
1897 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1898 sRet += ' ' * cchIndent + '{\n';
1899 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1900 sRet += ' ' * cchIndent + '}\n';
1901 return sRet;
1902
1903class McCppPreProc(McCppGeneric):
1904 """
1905 C++/C Preprocessor directive.
1906 """
1907 def __init__(self, sCode):
1908 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1909
1910 def renderCode(self, cchIndent = 0):
1911 return self.asParams[0] + '\n';
1912
1913
1914class McBlock(object):
1915 """
1916 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1917 """
1918
1919 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1920 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1921 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1922 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1923 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1924 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1925 self.oFunction = oFunction; ##< The function the block resides in.
1926 self.sFunction = oFunction.sName; ##< The name of the function the block resides in. DEPRECATED.
1927 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1928 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1929 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1930 ## Decoded statements in the block.
1931 self.aoStmts = [] # type: list(McStmt)
1932
1933 def complete(self, iEndLine, offEndLine, asLines):
1934 """
1935 Completes the microcode block.
1936 """
1937 assert self.iEndLine == -1;
1938 self.iEndLine = iEndLine;
1939 self.offEndLine = offEndLine;
1940 self.asLines = asLines;
1941
1942 def raiseDecodeError(self, sRawCode, off, sMessage):
1943 """ Raises a decoding error. """
1944 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1945 iLine = sRawCode.count('\n', 0, off);
1946 raise ParserException('%s:%d:%d: parsing error: %s'
1947 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1948
1949 def raiseStmtError(self, sName, sMessage):
1950 """ Raises a statement parser error. """
1951 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1952
1953 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1954 """ Check the parameter count, raising an error it doesn't match. """
1955 if len(asParams) != cParamsExpected:
1956 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1957 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1958 return True;
1959
1960 @staticmethod
1961 def parseMcGeneric(oSelf, sName, asParams):
1962 """ Generic parser that returns a plain McStmt object. """
1963 _ = oSelf;
1964 return McStmt(sName, asParams);
1965
1966 @staticmethod
1967 def parseMcGenericCond(oSelf, sName, asParams):
1968 """ Generic parser that returns a plain McStmtCond object. """
1969 _ = oSelf;
1970 return McStmtCond(sName, asParams);
1971
1972 @staticmethod
1973 def parseMcBegin(oSelf, sName, asParams):
1974 """ IEM_MC_BEGIN """
1975 oSelf.checkStmtParamCount(sName, asParams, 2);
1976 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1977
1978 @staticmethod
1979 def parseMcArg(oSelf, sName, asParams):
1980 """ IEM_MC_ARG """
1981 oSelf.checkStmtParamCount(sName, asParams, 3);
1982 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1983
1984 @staticmethod
1985 def parseMcArgConst(oSelf, sName, asParams):
1986 """ IEM_MC_ARG_CONST """
1987 oSelf.checkStmtParamCount(sName, asParams, 4);
1988 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
1989
1990 @staticmethod
1991 def parseMcArgLocalRef(oSelf, sName, asParams):
1992 """ IEM_MC_ARG_LOCAL_REF """
1993 oSelf.checkStmtParamCount(sName, asParams, 4);
1994 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
1995
1996 @staticmethod
1997 def parseMcArgLocalEFlags(oSelf, sName, asParams):
1998 """ IEM_MC_ARG_LOCAL_EFLAGS """
1999 oSelf.checkStmtParamCount(sName, asParams, 3);
2000 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2001 return (
2002 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2003 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2004 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2005 );
2006
2007 @staticmethod
2008 def parseMcLocal(oSelf, sName, asParams):
2009 """ IEM_MC_LOCAL """
2010 oSelf.checkStmtParamCount(sName, asParams, 2);
2011 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2012
2013 @staticmethod
2014 def parseMcLocalConst(oSelf, sName, asParams):
2015 """ IEM_MC_LOCAL_CONST """
2016 oSelf.checkStmtParamCount(sName, asParams, 3);
2017 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2018
2019 @staticmethod
2020 def parseMcCallAImpl(oSelf, sName, asParams):
2021 """ IEM_MC_CALL_AIMPL_3|4 """
2022 cArgs = int(sName[-1]);
2023 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2024 return McStmtCall(sName, asParams, 1, 0);
2025
2026 @staticmethod
2027 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2028 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2029 cArgs = int(sName[-1]);
2030 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2031 return McStmtCall(sName, asParams, 0);
2032
2033 @staticmethod
2034 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2035 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2036 cArgs = int(sName[-1]);
2037 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2038 return McStmtCall(sName, asParams, 0);
2039
2040 @staticmethod
2041 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2042 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2043 cArgs = int(sName[-1]);
2044 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2045 return McStmtCall(sName, asParams, 0);
2046
2047 @staticmethod
2048 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2049 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2050 cArgs = int(sName[-1]);
2051 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2052 return McStmtCall(sName, asParams, 0);
2053
2054 @staticmethod
2055 def parseMcCallSseAImpl(oSelf, sName, asParams):
2056 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2057 cArgs = int(sName[-1]);
2058 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2059 return McStmtCall(sName, asParams, 0);
2060
2061 @staticmethod
2062 def parseMcCallCImpl(oSelf, sName, asParams):
2063 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2064 cArgs = int(sName[-1]);
2065 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2066 return McStmtCall(sName, asParams, 0);
2067
2068 @staticmethod
2069 def stripComments(sCode):
2070 """ Returns sCode with comments removed. """
2071 off = 0;
2072 while off < len(sCode):
2073 off = sCode.find('/', off);
2074 if off < 0 or off + 1 >= len(sCode):
2075 break;
2076
2077 if sCode[off + 1] == '/':
2078 # C++ comment.
2079 offEnd = sCode.find('\n', off + 2);
2080 if offEnd < 0:
2081 return sCode[:off].rstrip();
2082 sCode = sCode[ : off] + sCode[offEnd : ];
2083 off += 1;
2084
2085 elif sCode[off + 1] == '*':
2086 # C comment
2087 offEnd = sCode.find('*/', off + 2);
2088 if offEnd < 0:
2089 return sCode[:off].rstrip();
2090 sSep = ' ';
2091 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2092 sSep = '';
2093 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2094 off += len(sSep);
2095
2096 else:
2097 # Not a comment.
2098 off += 1;
2099 return sCode;
2100
2101 @staticmethod
2102 def extractParam(sCode, offParam):
2103 """
2104 Extracts the parameter value at offParam in sCode.
2105 Returns stripped value and the end offset of the terminating ',' or ')'.
2106 """
2107 # Extract it.
2108 cNesting = 0;
2109 offStart = offParam;
2110 while offParam < len(sCode):
2111 ch = sCode[offParam];
2112 if ch == '(':
2113 cNesting += 1;
2114 elif ch == ')':
2115 if cNesting == 0:
2116 break;
2117 cNesting -= 1;
2118 elif ch == ',' and cNesting == 0:
2119 break;
2120 offParam += 1;
2121 return (sCode[offStart : offParam].strip(), offParam);
2122
2123 @staticmethod
2124 def extractParams(sCode, offOpenParen):
2125 """
2126 Parses a parameter list.
2127 Returns the list of parameter values and the offset of the closing parentheses.
2128 Returns (None, len(sCode)) on if no closing parentheses was found.
2129 """
2130 assert sCode[offOpenParen] == '(';
2131 asParams = [];
2132 off = offOpenParen + 1;
2133 while off < len(sCode):
2134 ch = sCode[off];
2135 if ch.isspace():
2136 off += 1;
2137 elif ch != ')':
2138 (sParam, off) = McBlock.extractParam(sCode, off);
2139 asParams.append(sParam);
2140 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2141 if sCode[off] == ',':
2142 off += 1;
2143 else:
2144 return (asParams, off);
2145 return (None, off);
2146
2147 @staticmethod
2148 def findClosingBraces(sCode, off, offStop):
2149 """
2150 Finds the matching '}' for the '{' at off in sCode.
2151 Returns offset of the matching '}' on success, otherwise -1.
2152
2153 Note! Does not take comments into account.
2154 """
2155 cDepth = 1;
2156 off += 1;
2157 while off < offStop:
2158 offClose = sCode.find('}', off, offStop);
2159 if offClose < 0:
2160 break;
2161 cDepth += sCode.count('{', off, offClose);
2162 cDepth -= 1;
2163 if cDepth == 0:
2164 return offClose;
2165 off = offClose + 1;
2166 return -1;
2167
2168 @staticmethod
2169 def countSpacesAt(sCode, off, offStop):
2170 """ Returns the number of space characters at off in sCode. """
2171 offStart = off;
2172 while off < offStop and sCode[off].isspace():
2173 off += 1;
2174 return off - offStart;
2175
2176 @staticmethod
2177 def skipSpacesAt(sCode, off, offStop):
2178 """ Returns first offset at or after off for a non-space character. """
2179 return off + McBlock.countSpacesAt(sCode, off, offStop);
2180
2181 @staticmethod
2182 def isSubstrAt(sStr, off, sSubStr):
2183 """ Returns true of sSubStr is found at off in sStr. """
2184 return sStr[off : off + len(sSubStr)] == sSubStr;
2185
2186 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2187 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2188 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2189 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2190 + r')');
2191
2192 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2193 """
2194 Decodes sRawCode[off : offStop].
2195
2196 Returns list of McStmt instances.
2197 Raises ParserException on failure.
2198 """
2199 if offStop < 0:
2200 offStop = len(sRawCode);
2201 aoStmts = [];
2202 while off < offStop:
2203 ch = sRawCode[off];
2204
2205 #
2206 # Skip spaces and comments.
2207 #
2208 if ch.isspace():
2209 off += 1;
2210
2211 elif ch == '/':
2212 ch = sRawCode[off + 1];
2213 if ch == '/': # C++ comment.
2214 off = sRawCode.find('\n', off + 2);
2215 if off < 0:
2216 break;
2217 off += 1;
2218 elif ch == '*': # C comment.
2219 off = sRawCode.find('*/', off + 2);
2220 if off < 0:
2221 break;
2222 off += 2;
2223 else:
2224 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2225
2226 #
2227 # Is it a MC statement.
2228 #
2229 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2230 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2231 # Extract it and strip comments from it.
2232 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2233 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2234 if offEnd <= off:
2235 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2236 else:
2237 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2238 if offEnd <= off:
2239 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2240 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2241 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2242 offEnd -= 1;
2243 while offEnd > off and sRawCode[offEnd - 1].isspace():
2244 offEnd -= 1;
2245
2246 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2247
2248 # Isolate the statement name.
2249 offOpenParen = sRawStmt.find('(');
2250 if offOpenParen < 0:
2251 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2252 sName = sRawStmt[: offOpenParen].strip();
2253
2254 # Extract the parameters.
2255 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2256 if asParams is None:
2257 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2258 if offCloseParen + 1 != len(sRawStmt):
2259 self.raiseDecodeError(sRawCode, off,
2260 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2261
2262 # Hand it to the handler.
2263 fnParser = g_dMcStmtParsers.get(sName);
2264 if not fnParser:
2265 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2266 oStmt = fnParser(self, sName, asParams);
2267 if not isinstance(oStmt, (list, tuple)):
2268 aoStmts.append(oStmt);
2269 else:
2270 aoStmts.extend(oStmt);
2271
2272 #
2273 # If conditional, we need to parse the whole statement.
2274 #
2275 # For reasons of simplicity, we assume the following structure
2276 # and parse each branch in a recursive call:
2277 # IEM_MC_IF_XXX() {
2278 # IEM_MC_WHATEVER();
2279 # } IEM_MC_ELSE() {
2280 # IEM_MC_WHATEVER();
2281 # } IEM_MC_ENDIF();
2282 #
2283 if sName.startswith('IEM_MC_IF_'):
2284 if iLevel > 1:
2285 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2286
2287 # Find start of the IF block:
2288 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2289 if sRawCode[offBlock1] != '{':
2290 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2291
2292 # Find the end of it.
2293 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2294 if offBlock1End < 0:
2295 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2296
2297 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2298
2299 # Is there an else section?
2300 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2301 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2302 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2303 if sRawCode[off] != '(':
2304 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2305 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2306 if sRawCode[off] != ')':
2307 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2308
2309 # Find start of the ELSE block.
2310 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2311 if sRawCode[offBlock2] != '{':
2312 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2313
2314 # Find the end of it.
2315 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2316 if offBlock2End < 0:
2317 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2318
2319 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2320 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2321
2322 # Parse past the endif statement.
2323 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2324 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2325 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2326 if sRawCode[off] != '(':
2327 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2328 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2329 if sRawCode[off] != ')':
2330 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2331 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2332 if sRawCode[off] != ';':
2333 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2334 off += 1;
2335
2336 else:
2337 # Advance.
2338 off = offEnd + 1;
2339
2340 #
2341 # Otherwise it must be a C/C++ statement of sorts.
2342 #
2343 else:
2344 # Find the end of the statement. if and else requires special handling.
2345 sCondExpr = None;
2346 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2347 if oMatch:
2348 if oMatch.group(1)[-1] == '(':
2349 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2350 else:
2351 offEnd = oMatch.end();
2352 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2353 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2354 elif ch == '#':
2355 offEnd = sRawCode.find('\n', off, offStop);
2356 if offEnd < 0:
2357 offEnd = offStop;
2358 offEnd -= 1;
2359 while offEnd > off and sRawCode[offEnd - 1].isspace():
2360 offEnd -= 1;
2361 else:
2362 offEnd = sRawCode.find(';', off);
2363 if offEnd < 0:
2364 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2365
2366 # Check this and the following statement whether it might have
2367 # something to do with decoding. This is a statement filter
2368 # criteria when generating the threaded functions blocks.
2369 offNextEnd = sRawCode.find(';', offEnd + 1);
2370 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2371 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2372 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2373 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2374 );
2375
2376 if not oMatch:
2377 if ch != '#':
2378 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2379 else:
2380 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2381 off = offEnd + 1;
2382 elif oMatch.group(1).startswith('if'):
2383 #
2384 # if () xxx [else yyy] statement.
2385 #
2386 oStmt = McCppCond(sCondExpr, fDecode);
2387 aoStmts.append(oStmt);
2388 off = offEnd + 1;
2389
2390 # Following the if () we can either have a {} containing zero or more statements
2391 # or we have a single statement.
2392 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2393 if sRawCode[offBlock1] == '{':
2394 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2395 if offBlock1End < 0:
2396 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2397 offBlock1 += 1;
2398 else:
2399 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2400 if offBlock1End < 0:
2401 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2402
2403 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2404
2405 # The else is optional and can likewise be followed by {} or a single statement.
2406 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2407 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2408 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2409 if sRawCode[offBlock2] == '{':
2410 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2411 if offBlock2End < 0:
2412 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2413 offBlock2 += 1;
2414 else:
2415 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2416 if offBlock2End < 0:
2417 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2418
2419 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2420 off = offBlock2End + 1;
2421
2422 elif oMatch.group(1) == 'else':
2423 # Problematic 'else' branch, typically involving #ifdefs.
2424 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2425
2426 return aoStmts;
2427
2428 def decode(self):
2429 """
2430 Decodes the block, populating self.aoStmts.
2431 Returns the statement list.
2432 Raises ParserException on failure.
2433 """
2434 self.aoStmts = self.decodeCode(''.join(self.asLines));
2435 return self.aoStmts;
2436
2437
2438## IEM_MC_XXX -> parser dictionary.
2439# The raw table was generated via the following command
2440# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2441# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2442g_dMcStmtParsers = {
2443 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2444 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2445 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2446 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2447 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2448 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2449 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2450 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2451 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2452 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2453 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2454 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2455 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2456 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2457 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2458 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2459 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2460 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2461 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2462 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2463 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2464 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2465 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2466 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2467 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2468 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2469 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2470 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2471 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2472 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2473 'IEM_MC_ARG': McBlock.parseMcArg,
2474 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2475 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2476 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2477 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2478 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2479 'IEM_MC_ASSIGN_U8_SX_U64': McBlock.parseMcGeneric,
2480 'IEM_MC_ASSIGN_U32_SX_U64': McBlock.parseMcGeneric,
2481 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2482 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2483 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2484 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2485 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2486 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2487 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2488 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2489 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2490 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2491 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2492 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2493 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2494 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2495 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2496 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2497 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2498 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2499 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2500 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2501 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2502 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2503 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2504 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2505 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2506 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2507 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2508 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2509 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2510 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2511 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2512 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2513 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2514 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2515 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2516 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2517 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2518 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2519 'IEM_MC_DEFER_TO_CIMPL_0': McBlock.parseMcGeneric,
2520 'IEM_MC_DEFER_TO_CIMPL_1': McBlock.parseMcGeneric,
2521 'IEM_MC_DEFER_TO_CIMPL_2': McBlock.parseMcGeneric,
2522 'IEM_MC_DEFER_TO_CIMPL_3': McBlock.parseMcGeneric,
2523 'IEM_MC_END': McBlock.parseMcGeneric,
2524 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2525 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2526 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2527 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2528 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2529 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2530 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2531 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2532 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2533 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2534 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2535 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2536 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2537 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2538 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2539 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2557 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2558 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2559 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2560 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2561 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2562 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2563 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2564 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2565 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2566 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2567 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2568 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2569 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2570 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2571 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2572 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2573 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2574 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2575 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2576 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2577 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2578 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2579 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2580 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2581 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2582 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2583 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2584 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2585 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2586 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2587 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2588 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2589 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2590 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2591 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2592 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2593 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2594 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2595 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2596 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2597 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2598 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2599 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2600 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2601 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2602 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2603 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2604 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2605 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2606 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2607 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2608 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2609 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2610 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2611 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2612 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2613 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2614 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2615 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2616 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2617 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2618 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2619 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2620 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2621 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2622 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2623 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2624 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2625 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2626 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2627 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2628 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2629 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2630 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2631 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2632 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2633 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2634 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2635 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2636 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2637 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2638 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2639 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2640 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2641 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2642 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2643 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2644 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2645 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2646 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2647 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2648 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2649 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2650 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2651 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2652 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2653 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2654 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2655 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2656 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2657 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2658 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2659 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2660 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2661 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2662 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2663 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2664 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2665 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2666 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2667 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2668 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2669 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2670 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2671 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2672 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2673 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2674 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2675 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2676 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2677 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2678 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2679 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2680 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2681 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2682 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2683 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2684 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2685 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2686 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2687 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2688 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2689 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2690 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2691 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2692 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2693 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2694 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2695 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2696 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2697 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2698 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2699 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2700 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2701 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2702 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2703 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2704 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2705 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2706 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2707 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2708 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2709 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2710 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2711 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2712 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2713 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2714 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2715 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2716 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2717 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2718 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2719 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2720 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2721 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2722 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2723 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2724 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2725 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2726 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2727 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2728 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2729 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2730 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2731 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2732 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2733 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2734 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2735 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2736 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2737 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2738 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2739 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2740 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2741 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2742 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2743 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2744 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2745 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2746 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2747 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2748 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2749 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2750 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2751 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2752 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2753 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2754 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2755 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2758 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2759 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2760 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2761 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2762 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2763 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2764 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2765 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2766 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2767 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2768 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2769 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2770 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2771 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2772 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2773 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2774 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2775 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2776 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2777 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2778 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2779 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2780 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2781 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2782 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2783 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2784 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2785 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2786 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2787 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2788 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2789 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2790 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2791 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2792 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2793 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2794 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2795 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2796 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2797 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2798 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2799 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2800 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2801 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2802 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2803 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2804 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2805 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2806 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2807 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2808 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2809};
2810
2811## List of microcode blocks.
2812g_aoMcBlocks = [] # type: list(McBlock)
2813
2814
2815
2816class ParserException(Exception):
2817 """ Parser exception """
2818 def __init__(self, sMessage):
2819 Exception.__init__(self, sMessage);
2820
2821
2822class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2823 """
2824 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2825 """
2826
2827 ## @name Parser state.
2828 ## @{
2829 kiCode = 0;
2830 kiCommentMulti = 1;
2831 ## @}
2832
2833 class Macro(object):
2834 """ Macro """
2835 def __init__(self, sName, asArgs, sBody, iLine):
2836 self.sName = sName; ##< The macro name.
2837 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2838 self.sBody = sBody;
2839 self.iLine = iLine;
2840 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2841
2842 @staticmethod
2843 def _needSpace(ch):
2844 """ This is just to make the expanded output a bit prettier. """
2845 return ch.isspace() and ch != '(';
2846
2847 def expandMacro(self, oParent, asArgs = None):
2848 """ Expands the macro body with the given arguments. """
2849 _ = oParent;
2850 sBody = self.sBody;
2851
2852 if self.oReArgMatch:
2853 assert len(asArgs) == len(self.asArgs);
2854 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2855
2856 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2857 oMatch = self.oReArgMatch.search(sBody);
2858 while oMatch:
2859 sName = oMatch.group(2);
2860 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2861 sValue = dArgs[sName];
2862 sPre = '';
2863 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2864 sPre = ' ';
2865 sPost = '';
2866 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2867 sPost = ' ';
2868 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2869 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2870 else:
2871 assert not asArgs;
2872
2873 return sBody;
2874
2875
2876 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2877 self.sSrcFile = sSrcFile;
2878 self.asLines = asLines;
2879 self.iLine = 0;
2880 self.iState = self.kiCode;
2881 self.sComment = '';
2882 self.iCommentLine = 0;
2883 self.aoCurInstrs = [] # type: list(Instruction)
2884 self.oCurFunction = None # type: DecoderFunction
2885 self.iMcBlockInFunc = 0;
2886 self.oCurMcBlock = None # type: McBlock
2887 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2888 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2889 if oInheritMacrosFrom:
2890 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2891 self.oReMacros = oInheritMacrosFrom.oReMacros;
2892
2893 assert sDefaultMap in g_dInstructionMaps;
2894 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2895
2896 self.cTotalInstr = 0;
2897 self.cTotalStubs = 0;
2898 self.cTotalTagged = 0;
2899 self.cTotalMcBlocks = 0;
2900
2901 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2902 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2903 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2904 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2905 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2906 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2907 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2908 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2909 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2910 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2911 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2912 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2913 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
2914
2915 self.fDebug = True;
2916 self.fDebugMc = False;
2917 self.fDebugPreProc = False;
2918
2919 self.dTagHandlers = {
2920 '@opbrief': self.parseTagOpBrief,
2921 '@opdesc': self.parseTagOpDesc,
2922 '@opmnemonic': self.parseTagOpMnemonic,
2923 '@op1': self.parseTagOpOperandN,
2924 '@op2': self.parseTagOpOperandN,
2925 '@op3': self.parseTagOpOperandN,
2926 '@op4': self.parseTagOpOperandN,
2927 '@oppfx': self.parseTagOpPfx,
2928 '@opmaps': self.parseTagOpMaps,
2929 '@opcode': self.parseTagOpcode,
2930 '@opcodesub': self.parseTagOpcodeSub,
2931 '@openc': self.parseTagOpEnc,
2932 '@opfltest': self.parseTagOpEFlags,
2933 '@opflmodify': self.parseTagOpEFlags,
2934 '@opflundef': self.parseTagOpEFlags,
2935 '@opflset': self.parseTagOpEFlags,
2936 '@opflclear': self.parseTagOpEFlags,
2937 '@ophints': self.parseTagOpHints,
2938 '@opdisenum': self.parseTagOpDisEnum,
2939 '@opmincpu': self.parseTagOpMinCpu,
2940 '@opcpuid': self.parseTagOpCpuId,
2941 '@opgroup': self.parseTagOpGroup,
2942 '@opunused': self.parseTagOpUnusedInvalid,
2943 '@opinvalid': self.parseTagOpUnusedInvalid,
2944 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2945 '@optest': self.parseTagOpTest,
2946 '@optestign': self.parseTagOpTestIgnore,
2947 '@optestignore': self.parseTagOpTestIgnore,
2948 '@opcopytests': self.parseTagOpCopyTests,
2949 '@oponly': self.parseTagOpOnlyTest,
2950 '@oponlytest': self.parseTagOpOnlyTest,
2951 '@opxcpttype': self.parseTagOpXcptType,
2952 '@opstats': self.parseTagOpStats,
2953 '@opfunction': self.parseTagOpFunction,
2954 '@opdone': self.parseTagOpDone,
2955 };
2956 for i in range(48):
2957 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2958 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2959
2960 self.asErrors = [];
2961
2962 def raiseError(self, sMessage):
2963 """
2964 Raise error prefixed with the source and line number.
2965 """
2966 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2967
2968 def raiseCommentError(self, iLineInComment, sMessage):
2969 """
2970 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2971 """
2972 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2973
2974 def error(self, sMessage):
2975 """
2976 Adds an error.
2977 returns False;
2978 """
2979 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2980 return False;
2981
2982 def errorOnLine(self, iLine, sMessage):
2983 """
2984 Adds an error.
2985 returns False;
2986 """
2987 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
2988 return False;
2989
2990 def errorComment(self, iLineInComment, sMessage):
2991 """
2992 Adds a comment error.
2993 returns False;
2994 """
2995 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2996 return False;
2997
2998 def printErrors(self):
2999 """
3000 Print the errors to stderr.
3001 Returns number of errors.
3002 """
3003 if self.asErrors:
3004 sys.stderr.write(u''.join(self.asErrors));
3005 return len(self.asErrors);
3006
3007 def debug(self, sMessage):
3008 """
3009 For debugging.
3010 """
3011 if self.fDebug:
3012 print('debug: %s' % (sMessage,), file = sys.stderr);
3013
3014 def stripComments(self, sLine):
3015 """
3016 Returns sLine with comments stripped.
3017
3018 Complains if traces of incomplete multi-line comments are encountered.
3019 """
3020 sLine = self.oReComment.sub(" ", sLine);
3021 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3022 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3023 return sLine;
3024
3025 def parseFunctionTable(self, sLine):
3026 """
3027 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3028
3029 Note! Updates iLine as it consumes the whole table.
3030 """
3031
3032 #
3033 # Extract the table name.
3034 #
3035 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3036 oMap = g_dInstructionMapsByIemName.get(sName);
3037 if not oMap:
3038 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3039 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3040
3041 #
3042 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3043 # entries per byte:
3044 # no prefix, 066h prefix, f3h prefix, f2h prefix
3045 # Those tables has 256 & 32 entries respectively.
3046 #
3047 cEntriesPerByte = 4;
3048 cValidTableLength = 1024;
3049 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3050
3051 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3052 if oEntriesMatch:
3053 cEntriesPerByte = 1;
3054 cValidTableLength = int(oEntriesMatch.group(1));
3055 asPrefixes = (None,);
3056
3057 #
3058 # The next line should be '{' and nothing else.
3059 #
3060 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3061 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3062 self.iLine += 1;
3063
3064 #
3065 # Parse till we find the end of the table.
3066 #
3067 iEntry = 0;
3068 while self.iLine < len(self.asLines):
3069 # Get the next line and strip comments and spaces (assumes no
3070 # multi-line comments).
3071 sLine = self.asLines[self.iLine];
3072 self.iLine += 1;
3073 sLine = self.stripComments(sLine).strip();
3074
3075 # Split the line up into entries, expanding IEMOP_X4 usage.
3076 asEntries = sLine.split(',');
3077 for i in range(len(asEntries) - 1, -1, -1):
3078 sEntry = asEntries[i].strip();
3079 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3080 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3081 asEntries.insert(i + 1, sEntry);
3082 asEntries.insert(i + 1, sEntry);
3083 asEntries.insert(i + 1, sEntry);
3084 if sEntry:
3085 asEntries[i] = sEntry;
3086 else:
3087 del asEntries[i];
3088
3089 # Process the entries.
3090 for sEntry in asEntries:
3091 if sEntry in ('};', '}'):
3092 if iEntry != cValidTableLength:
3093 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3094 return True;
3095 if sEntry.startswith('iemOp_Invalid'):
3096 pass; # skip
3097 else:
3098 # Look up matching instruction by function.
3099 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3100 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3101 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3102 if aoInstr:
3103 if not isinstance(aoInstr, list):
3104 aoInstr = [aoInstr,];
3105 oInstr = None;
3106 for oCurInstr in aoInstr:
3107 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3108 pass;
3109 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3110 oCurInstr.sPrefix = sPrefix;
3111 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3112 oCurInstr.sOpcode = sOpcode;
3113 oCurInstr.sPrefix = sPrefix;
3114 else:
3115 continue;
3116 oInstr = oCurInstr;
3117 break;
3118 if not oInstr:
3119 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3120 aoInstr.append(oInstr);
3121 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3122 g_aoAllInstructions.append(oInstr);
3123 oMap.aoInstructions.append(oInstr);
3124 else:
3125 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3126 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3127 iEntry += 1;
3128
3129 return self.error('Unexpected end of file in PFNIEMOP table');
3130
3131 def addInstruction(self, iLine = None):
3132 """
3133 Adds an instruction.
3134 """
3135 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3136 g_aoAllInstructions.append(oInstr);
3137 self.aoCurInstrs.append(oInstr);
3138 return oInstr;
3139
3140 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3141 """
3142 Derives the mnemonic and operands from a IEM stats base name like string.
3143 """
3144 if oInstr.sMnemonic is None:
3145 asWords = sStats.split('_');
3146 oInstr.sMnemonic = asWords[0].lower();
3147 if len(asWords) > 1 and not oInstr.aoOperands:
3148 for sType in asWords[1:]:
3149 if sType in g_kdOpTypes:
3150 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3151 else:
3152 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3153 return False;
3154 return True;
3155
3156 def doneInstructionOne(self, oInstr, iLine):
3157 """
3158 Complete the parsing by processing, validating and expanding raw inputs.
3159 """
3160 assert oInstr.iLineCompleted is None;
3161 oInstr.iLineCompleted = iLine;
3162
3163 #
3164 # Specified instructions.
3165 #
3166 if oInstr.cOpTags > 0:
3167 if oInstr.sStats is None:
3168 pass;
3169
3170 #
3171 # Unspecified legacy stuff. We generally only got a few things to go on here.
3172 # /** Opcode 0x0f 0x00 /0. */
3173 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3174 #
3175 else:
3176 #if oInstr.sRawOldOpcodes:
3177 #
3178 #if oInstr.sMnemonic:
3179 pass;
3180
3181 #
3182 # Common defaults.
3183 #
3184
3185 # Guess mnemonic and operands from stats if the former is missing.
3186 if oInstr.sMnemonic is None:
3187 if oInstr.sStats is not None:
3188 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3189 elif oInstr.sFunction is not None:
3190 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3191
3192 # Derive the disassembler op enum constant from the mnemonic.
3193 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3194 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3195
3196 # Derive the IEM statistics base name from mnemonic and operand types.
3197 if oInstr.sStats is None:
3198 if oInstr.sFunction is not None:
3199 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3200 elif oInstr.sMnemonic is not None:
3201 oInstr.sStats = oInstr.sMnemonic;
3202 for oOperand in oInstr.aoOperands:
3203 if oOperand.sType:
3204 oInstr.sStats += '_' + oOperand.sType;
3205
3206 # Derive the IEM function name from mnemonic and operand types.
3207 if oInstr.sFunction is None:
3208 if oInstr.sMnemonic is not None:
3209 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3210 for oOperand in oInstr.aoOperands:
3211 if oOperand.sType:
3212 oInstr.sFunction += '_' + oOperand.sType;
3213 elif oInstr.sStats:
3214 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3215
3216 #
3217 # Apply default map and then add the instruction to all it's groups.
3218 #
3219 if not oInstr.aoMaps:
3220 oInstr.aoMaps = [ self.oDefaultMap, ];
3221 for oMap in oInstr.aoMaps:
3222 oMap.aoInstructions.append(oInstr);
3223
3224 #
3225 # Derive encoding from operands and maps.
3226 #
3227 if oInstr.sEncoding is None:
3228 if not oInstr.aoOperands:
3229 if oInstr.fUnused and oInstr.sSubOpcode:
3230 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3231 else:
3232 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3233 elif oInstr.aoOperands[0].usesModRM():
3234 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3235 or oInstr.onlyInVexMaps():
3236 oInstr.sEncoding = 'VEX.ModR/M';
3237 else:
3238 oInstr.sEncoding = 'ModR/M';
3239
3240 #
3241 # Check the opstat value and add it to the opstat indexed dictionary.
3242 #
3243 if oInstr.sStats:
3244 if oInstr.sStats not in g_dAllInstructionsByStat:
3245 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3246 else:
3247 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3248 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3249
3250 #
3251 # Add to function indexed dictionary. We allow multiple instructions per function.
3252 #
3253 if oInstr.sFunction:
3254 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3255 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3256 else:
3257 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3258
3259 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3260 return True;
3261
3262 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3263 """
3264 Done with current instruction.
3265 """
3266 for oInstr in self.aoCurInstrs:
3267 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3268 if oInstr.fStub:
3269 self.cTotalStubs += 1;
3270
3271 self.cTotalInstr += len(self.aoCurInstrs);
3272
3273 self.sComment = '';
3274 self.aoCurInstrs = [];
3275 if fEndOfFunction:
3276 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3277 if self.oCurFunction:
3278 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3279 self.oCurFunction = None;
3280 self.iMcBlockInFunc = 0;
3281 return True;
3282
3283 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3284 """
3285 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3286 is False, only None values and empty strings are replaced.
3287 """
3288 for oInstr in self.aoCurInstrs:
3289 if fOverwrite is not True:
3290 oOldValue = getattr(oInstr, sAttrib);
3291 if oOldValue is not None:
3292 continue;
3293 setattr(oInstr, sAttrib, oValue);
3294
3295 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3296 """
3297 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3298 If fOverwrite is False, only None values and empty strings are replaced.
3299 """
3300 for oInstr in self.aoCurInstrs:
3301 aoArray = getattr(oInstr, sAttrib);
3302 while len(aoArray) <= iEntry:
3303 aoArray.append(None);
3304 if fOverwrite is True or aoArray[iEntry] is None:
3305 aoArray[iEntry] = oValue;
3306
3307 def parseCommentOldOpcode(self, asLines):
3308 """ Deals with 'Opcode 0xff /4' like comments """
3309 asWords = asLines[0].split();
3310 if len(asWords) >= 2 \
3311 and asWords[0] == 'Opcode' \
3312 and ( asWords[1].startswith('0x')
3313 or asWords[1].startswith('0X')):
3314 asWords = asWords[:1];
3315 for iWord, sWord in enumerate(asWords):
3316 if sWord.startswith('0X'):
3317 sWord = '0x' + sWord[:2];
3318 asWords[iWord] = asWords;
3319 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3320
3321 return False;
3322
3323 def ensureInstructionForOpTag(self, iTagLine):
3324 """ Ensure there is an instruction for the op-tag being parsed. """
3325 if not self.aoCurInstrs:
3326 self.addInstruction(self.iCommentLine + iTagLine);
3327 for oInstr in self.aoCurInstrs:
3328 oInstr.cOpTags += 1;
3329 if oInstr.cOpTags == 1:
3330 self.cTotalTagged += 1;
3331 return self.aoCurInstrs[-1];
3332
3333 @staticmethod
3334 def flattenSections(aasSections):
3335 """
3336 Flattens multiline sections into stripped single strings.
3337 Returns list of strings, on section per string.
3338 """
3339 asRet = [];
3340 for asLines in aasSections:
3341 if asLines:
3342 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3343 return asRet;
3344
3345 @staticmethod
3346 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3347 """
3348 Flattens sections into a simple stripped string with newlines as
3349 section breaks. The final section does not sport a trailing newline.
3350 """
3351 # Typical: One section with a single line.
3352 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3353 return aasSections[0][0].strip();
3354
3355 sRet = '';
3356 for iSection, asLines in enumerate(aasSections):
3357 if asLines:
3358 if iSection > 0:
3359 sRet += sSectionSep;
3360 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3361 return sRet;
3362
3363
3364
3365 ## @name Tag parsers
3366 ## @{
3367
3368 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3369 """
3370 Tag: \@opbrief
3371 Value: Text description, multiple sections, appended.
3372
3373 Brief description. If not given, it's the first sentence from @opdesc.
3374 """
3375 oInstr = self.ensureInstructionForOpTag(iTagLine);
3376
3377 # Flatten and validate the value.
3378 sBrief = self.flattenAllSections(aasSections);
3379 if not sBrief:
3380 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3381 if sBrief[-1] != '.':
3382 sBrief = sBrief + '.';
3383 if len(sBrief) > 180:
3384 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3385 offDot = sBrief.find('.');
3386 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3387 offDot = sBrief.find('.', offDot + 1);
3388 if offDot >= 0 and offDot != len(sBrief) - 1:
3389 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3390
3391 # Update the instruction.
3392 if oInstr.sBrief is not None:
3393 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3394 % (sTag, oInstr.sBrief, sBrief,));
3395 _ = iEndLine;
3396 return True;
3397
3398 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3399 """
3400 Tag: \@opdesc
3401 Value: Text description, multiple sections, appended.
3402
3403 It is used to describe instructions.
3404 """
3405 oInstr = self.ensureInstructionForOpTag(iTagLine);
3406 if aasSections:
3407 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3408 return True;
3409
3410 _ = sTag; _ = iEndLine;
3411 return True;
3412
3413 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3414 """
3415 Tag: @opmenmonic
3416 Value: mnemonic
3417
3418 The 'mnemonic' value must be a valid C identifier string. Because of
3419 prefixes, groups and whatnot, there times when the mnemonic isn't that
3420 of an actual assembler mnemonic.
3421 """
3422 oInstr = self.ensureInstructionForOpTag(iTagLine);
3423
3424 # Flatten and validate the value.
3425 sMnemonic = self.flattenAllSections(aasSections);
3426 if not self.oReMnemonic.match(sMnemonic):
3427 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3428 if oInstr.sMnemonic is not None:
3429 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3430 % (sTag, oInstr.sMnemonic, sMnemonic,));
3431 oInstr.sMnemonic = sMnemonic
3432
3433 _ = iEndLine;
3434 return True;
3435
3436 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3437 """
3438 Tags: \@op1, \@op2, \@op3, \@op4
3439 Value: [where:]type
3440
3441 The 'where' value indicates where the operand is found, like the 'reg'
3442 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3443 a list.
3444
3445 The 'type' value indicates the operand type. These follow the types
3446 given in the opcode tables in the CPU reference manuals.
3447 See Instruction.kdOperandTypes for a list.
3448
3449 """
3450 oInstr = self.ensureInstructionForOpTag(iTagLine);
3451 idxOp = int(sTag[-1]) - 1;
3452 assert 0 <= idxOp < 4;
3453
3454 # flatten, split up, and validate the "where:type" value.
3455 sFlattened = self.flattenAllSections(aasSections);
3456 asSplit = sFlattened.split(':');
3457 if len(asSplit) == 1:
3458 sType = asSplit[0];
3459 sWhere = None;
3460 elif len(asSplit) == 2:
3461 (sWhere, sType) = asSplit;
3462 else:
3463 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3464
3465 if sType not in g_kdOpTypes:
3466 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3467 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3468 if sWhere is None:
3469 sWhere = g_kdOpTypes[sType][1];
3470 elif sWhere not in g_kdOpLocations:
3471 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3472 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3473
3474 # Insert the operand, refusing to overwrite an existing one.
3475 while idxOp >= len(oInstr.aoOperands):
3476 oInstr.aoOperands.append(None);
3477 if oInstr.aoOperands[idxOp] is not None:
3478 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3479 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3480 sWhere, sType,));
3481 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3482
3483 _ = iEndLine;
3484 return True;
3485
3486 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3487 """
3488 Tag: \@opmaps
3489 Value: map[,map2]
3490
3491 Indicates which maps the instruction is in. There is a default map
3492 associated with each input file.
3493 """
3494 oInstr = self.ensureInstructionForOpTag(iTagLine);
3495
3496 # Flatten, split up and validate the value.
3497 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3498 asMaps = sFlattened.split(',');
3499 if not asMaps:
3500 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3501 for sMap in asMaps:
3502 if sMap not in g_dInstructionMaps:
3503 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3504 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3505
3506 # Add the maps to the current list. Throw errors on duplicates.
3507 for oMap in oInstr.aoMaps:
3508 if oMap.sName in asMaps:
3509 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3510
3511 for sMap in asMaps:
3512 oMap = g_dInstructionMaps[sMap];
3513 if oMap not in oInstr.aoMaps:
3514 oInstr.aoMaps.append(oMap);
3515 else:
3516 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3517
3518 _ = iEndLine;
3519 return True;
3520
3521 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3522 """
3523 Tag: \@oppfx
3524 Value: n/a|none|0x66|0xf3|0xf2
3525
3526 Required prefix for the instruction. (In a (E)VEX context this is the
3527 value of the 'pp' field rather than an actual prefix.)
3528 """
3529 oInstr = self.ensureInstructionForOpTag(iTagLine);
3530
3531 # Flatten and validate the value.
3532 sFlattened = self.flattenAllSections(aasSections);
3533 asPrefixes = sFlattened.split();
3534 if len(asPrefixes) > 1:
3535 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3536
3537 sPrefix = asPrefixes[0].lower();
3538 if sPrefix == 'none':
3539 sPrefix = 'none';
3540 elif sPrefix == 'n/a':
3541 sPrefix = None;
3542 else:
3543 if len(sPrefix) == 2:
3544 sPrefix = '0x' + sPrefix;
3545 if not _isValidOpcodeByte(sPrefix):
3546 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3547
3548 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3549 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3550
3551 # Set it.
3552 if oInstr.sPrefix is not None:
3553 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3554 oInstr.sPrefix = sPrefix;
3555
3556 _ = iEndLine;
3557 return True;
3558
3559 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3560 """
3561 Tag: \@opcode
3562 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3563
3564 The opcode byte or sub-byte for the instruction in the context of a map.
3565 """
3566 oInstr = self.ensureInstructionForOpTag(iTagLine);
3567
3568 # Flatten and validate the value.
3569 sOpcode = self.flattenAllSections(aasSections);
3570 if _isValidOpcodeByte(sOpcode):
3571 pass;
3572 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3573 pass;
3574 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3575 pass;
3576 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3577 pass;
3578 else:
3579 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3580
3581 # Set it.
3582 if oInstr.sOpcode is not None:
3583 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3584 oInstr.sOpcode = sOpcode;
3585
3586 _ = iEndLine;
3587 return True;
3588
3589 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3590 """
3591 Tag: \@opcodesub
3592 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3593 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3594
3595 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3596 represents exactly two different instructions. The more proper way would
3597 be to go via maps with two members, but this is faster.
3598 """
3599 oInstr = self.ensureInstructionForOpTag(iTagLine);
3600
3601 # Flatten and validate the value.
3602 sSubOpcode = self.flattenAllSections(aasSections);
3603 if sSubOpcode not in g_kdSubOpcodes:
3604 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3605 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3606
3607 # Set it.
3608 if oInstr.sSubOpcode is not None:
3609 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3610 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3611 oInstr.sSubOpcode = sSubOpcode;
3612
3613 _ = iEndLine;
3614 return True;
3615
3616 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3617 """
3618 Tag: \@openc
3619 Value: ModR/M|fixed|prefix|<map name>
3620
3621 The instruction operand encoding style.
3622 """
3623 oInstr = self.ensureInstructionForOpTag(iTagLine);
3624
3625 # Flatten and validate the value.
3626 sEncoding = self.flattenAllSections(aasSections);
3627 if sEncoding in g_kdEncodings:
3628 pass;
3629 elif sEncoding in g_dInstructionMaps:
3630 pass;
3631 elif not _isValidOpcodeByte(sEncoding):
3632 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3633
3634 # Set it.
3635 if oInstr.sEncoding is not None:
3636 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3637 % ( sTag, oInstr.sEncoding, sEncoding,));
3638 oInstr.sEncoding = sEncoding;
3639
3640 _ = iEndLine;
3641 return True;
3642
3643 ## EFlags tag to Instruction attribute name.
3644 kdOpFlagToAttr = {
3645 '@opfltest': 'asFlTest',
3646 '@opflmodify': 'asFlModify',
3647 '@opflundef': 'asFlUndefined',
3648 '@opflset': 'asFlSet',
3649 '@opflclear': 'asFlClear',
3650 };
3651
3652 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3653 """
3654 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3655 Value: <eflags specifier>
3656
3657 """
3658 oInstr = self.ensureInstructionForOpTag(iTagLine);
3659
3660 # Flatten, split up and validate the values.
3661 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3662 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3663 asFlags = [];
3664 else:
3665 fRc = True;
3666 for iFlag, sFlag in enumerate(asFlags):
3667 if sFlag not in g_kdEFlagsMnemonics:
3668 if sFlag.strip() in g_kdEFlagsMnemonics:
3669 asFlags[iFlag] = sFlag.strip();
3670 else:
3671 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3672 if not fRc:
3673 return False;
3674
3675 # Set them.
3676 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3677 if asOld is not None:
3678 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3679 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3680
3681 _ = iEndLine;
3682 return True;
3683
3684 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3685 """
3686 Tag: \@ophints
3687 Value: Comma or space separated list of flags and hints.
3688
3689 This covers the disassembler flags table and more.
3690 """
3691 oInstr = self.ensureInstructionForOpTag(iTagLine);
3692
3693 # Flatten as a space separated list, split it up and validate the values.
3694 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3695 if len(asHints) == 1 and asHints[0].lower() == 'none':
3696 asHints = [];
3697 else:
3698 fRc = True;
3699 for iHint, sHint in enumerate(asHints):
3700 if sHint not in g_kdHints:
3701 if sHint.strip() in g_kdHints:
3702 sHint[iHint] = sHint.strip();
3703 else:
3704 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3705 if not fRc:
3706 return False;
3707
3708 # Append them.
3709 for sHint in asHints:
3710 if sHint not in oInstr.dHints:
3711 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3712 else:
3713 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3714
3715 _ = iEndLine;
3716 return True;
3717
3718 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3719 """
3720 Tag: \@opdisenum
3721 Value: OP_XXXX
3722
3723 This is for select a specific (legacy) disassembler enum value for the
3724 instruction.
3725 """
3726 oInstr = self.ensureInstructionForOpTag(iTagLine);
3727
3728 # Flatten and split.
3729 asWords = self.flattenAllSections(aasSections).split();
3730 if len(asWords) != 1:
3731 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3732 if not asWords:
3733 return False;
3734 sDisEnum = asWords[0];
3735 if not self.oReDisEnum.match(sDisEnum):
3736 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3737 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3738
3739 # Set it.
3740 if oInstr.sDisEnum is not None:
3741 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3742 oInstr.sDisEnum = sDisEnum;
3743
3744 _ = iEndLine;
3745 return True;
3746
3747 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3748 """
3749 Tag: \@opmincpu
3750 Value: <simple CPU name>
3751
3752 Indicates when this instruction was introduced.
3753 """
3754 oInstr = self.ensureInstructionForOpTag(iTagLine);
3755
3756 # Flatten the value, split into words, make sure there's just one, valid it.
3757 asCpus = self.flattenAllSections(aasSections).split();
3758 if len(asCpus) > 1:
3759 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3760
3761 sMinCpu = asCpus[0];
3762 if sMinCpu in g_kdCpuNames:
3763 oInstr.sMinCpu = sMinCpu;
3764 else:
3765 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3766 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3767
3768 # Set it.
3769 if oInstr.sMinCpu is None:
3770 oInstr.sMinCpu = sMinCpu;
3771 elif oInstr.sMinCpu != sMinCpu:
3772 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3773
3774 _ = iEndLine;
3775 return True;
3776
3777 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3778 """
3779 Tag: \@opcpuid
3780 Value: none | <CPUID flag specifier>
3781
3782 CPUID feature bit which is required for the instruction to be present.
3783 """
3784 oInstr = self.ensureInstructionForOpTag(iTagLine);
3785
3786 # Flatten as a space separated list, split it up and validate the values.
3787 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3788 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3789 asCpuIds = [];
3790 else:
3791 fRc = True;
3792 for iCpuId, sCpuId in enumerate(asCpuIds):
3793 if sCpuId not in g_kdCpuIdFlags:
3794 if sCpuId.strip() in g_kdCpuIdFlags:
3795 sCpuId[iCpuId] = sCpuId.strip();
3796 else:
3797 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3798 if not fRc:
3799 return False;
3800
3801 # Append them.
3802 for sCpuId in asCpuIds:
3803 if sCpuId not in oInstr.asCpuIds:
3804 oInstr.asCpuIds.append(sCpuId);
3805 else:
3806 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3807
3808 _ = iEndLine;
3809 return True;
3810
3811 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3812 """
3813 Tag: \@opgroup
3814 Value: op_grp1[_subgrp2[_subsubgrp3]]
3815
3816 Instruction grouping.
3817 """
3818 oInstr = self.ensureInstructionForOpTag(iTagLine);
3819
3820 # Flatten as a space separated list, split it up and validate the values.
3821 asGroups = self.flattenAllSections(aasSections).split();
3822 if len(asGroups) != 1:
3823 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3824 sGroup = asGroups[0];
3825 if not self.oReGroupName.match(sGroup):
3826 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3827 % (sTag, sGroup, self.oReGroupName.pattern));
3828
3829 # Set it.
3830 if oInstr.sGroup is not None:
3831 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3832 oInstr.sGroup = sGroup;
3833
3834 _ = iEndLine;
3835 return True;
3836
3837 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3838 """
3839 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3840 Value: <invalid opcode behaviour style>
3841
3842 The \@opunused indicates the specification is for a currently unused
3843 instruction encoding.
3844
3845 The \@opinvalid indicates the specification is for an invalid currently
3846 instruction encoding (like UD2).
3847
3848 The \@opinvlstyle just indicates how CPUs decode the instruction when
3849 not supported (\@opcpuid, \@opmincpu) or disabled.
3850 """
3851 oInstr = self.ensureInstructionForOpTag(iTagLine);
3852
3853 # Flatten as a space separated list, split it up and validate the values.
3854 asStyles = self.flattenAllSections(aasSections).split();
3855 if len(asStyles) != 1:
3856 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3857 sStyle = asStyles[0];
3858 if sStyle not in g_kdInvalidStyles:
3859 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3860 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3861 # Set it.
3862 if oInstr.sInvalidStyle is not None:
3863 return self.errorComment(iTagLine,
3864 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3865 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3866 oInstr.sInvalidStyle = sStyle;
3867 if sTag == '@opunused':
3868 oInstr.fUnused = True;
3869 elif sTag == '@opinvalid':
3870 oInstr.fInvalid = True;
3871
3872 _ = iEndLine;
3873 return True;
3874
3875 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3876 """
3877 Tag: \@optest
3878 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3879 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3880
3881 The main idea here is to generate basic instruction tests.
3882
3883 The probably simplest way of handling the diverse input, would be to use
3884 it to produce size optimized byte code for a simple interpreter that
3885 modifies the register input and output states.
3886
3887 An alternative to the interpreter would be creating multiple tables,
3888 but that becomes rather complicated wrt what goes where and then to use
3889 them in an efficient manner.
3890 """
3891 oInstr = self.ensureInstructionForOpTag(iTagLine);
3892
3893 #
3894 # Do it section by section.
3895 #
3896 for asSectionLines in aasSections:
3897 #
3898 # Sort the input into outputs, inputs and selector conditions.
3899 #
3900 sFlatSection = self.flattenAllSections([asSectionLines,]);
3901 if not sFlatSection:
3902 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3903 continue;
3904 oTest = InstructionTest(oInstr);
3905
3906 asSelectors = [];
3907 asInputs = [];
3908 asOutputs = [];
3909 asCur = asOutputs;
3910 fRc = True;
3911 asWords = sFlatSection.split();
3912 for iWord in range(len(asWords) - 1, -1, -1):
3913 sWord = asWords[iWord];
3914 # Check for array switchers.
3915 if sWord == '->':
3916 if asCur != asOutputs:
3917 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3918 break;
3919 asCur = asInputs;
3920 elif sWord == '/':
3921 if asCur != asInputs:
3922 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3923 break;
3924 asCur = asSelectors;
3925 else:
3926 asCur.insert(0, sWord);
3927
3928 #
3929 # Validate and add selectors.
3930 #
3931 for sCond in asSelectors:
3932 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3933 oSelector = None;
3934 for sOp in TestSelector.kasCompareOps:
3935 off = sCondExp.find(sOp);
3936 if off >= 0:
3937 sVariable = sCondExp[:off];
3938 sValue = sCondExp[off + len(sOp):];
3939 if sVariable in TestSelector.kdVariables:
3940 if sValue in TestSelector.kdVariables[sVariable]:
3941 oSelector = TestSelector(sVariable, sOp, sValue);
3942 else:
3943 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3944 % ( sTag, sValue, sCond,
3945 TestSelector.kdVariables[sVariable].keys(),));
3946 else:
3947 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3948 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3949 break;
3950 if oSelector is not None:
3951 for oExisting in oTest.aoSelectors:
3952 if oExisting.sVariable == oSelector.sVariable:
3953 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3954 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3955 oTest.aoSelectors.append(oSelector);
3956 else:
3957 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3958
3959 #
3960 # Validate outputs and inputs, adding them to the test as we go along.
3961 #
3962 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3963 asValidFieldKinds = [ 'both', sDesc, ];
3964 for sItem in asItems:
3965 oItem = None;
3966 for sOp in TestInOut.kasOperators:
3967 off = sItem.find(sOp);
3968 if off < 0:
3969 continue;
3970 sField = sItem[:off];
3971 sValueType = sItem[off + len(sOp):];
3972 if sField in TestInOut.kdFields \
3973 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3974 asSplit = sValueType.split(':', 1);
3975 sValue = asSplit[0];
3976 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3977 if sType in TestInOut.kdTypes:
3978 oValid = TestInOut.kdTypes[sType].validate(sValue);
3979 if oValid is True:
3980 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3981 oItem = TestInOut(sField, sOp, sValue, sType);
3982 else:
3983 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3984 % ( sTag, sDesc, sItem, ));
3985 else:
3986 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
3987 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
3988 else:
3989 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
3990 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
3991 else:
3992 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
3993 % ( sTag, sDesc, sField, sItem,
3994 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
3995 if asVal[1] in asValidFieldKinds]),));
3996 break;
3997 if oItem is not None:
3998 for oExisting in aoDst:
3999 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4000 self.errorComment(iTagLine,
4001 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4002 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4003 aoDst.append(oItem);
4004 else:
4005 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4006
4007 #
4008 # .
4009 #
4010 if fRc:
4011 oInstr.aoTests.append(oTest);
4012 else:
4013 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4014 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4015 % (sTag, asSelectors, asInputs, asOutputs,));
4016
4017 _ = iEndLine;
4018 return True;
4019
4020 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4021 """
4022 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4023 """
4024 oInstr = self.ensureInstructionForOpTag(iTagLine);
4025
4026 iTest = 0;
4027 if sTag[-1] == ']':
4028 iTest = int(sTag[8:-1]);
4029 else:
4030 iTest = int(sTag[7:]);
4031
4032 if iTest != len(oInstr.aoTests):
4033 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4034 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4035
4036 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4037 """
4038 Tag: \@optestign | \@optestignore
4039 Value: <value is ignored>
4040
4041 This is a simple trick to ignore a test while debugging another.
4042
4043 See also \@oponlytest.
4044 """
4045 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4046 return True;
4047
4048 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4049 """
4050 Tag: \@opcopytests
4051 Value: <opstat | function> [..]
4052 Example: \@opcopytests add_Eb_Gb
4053
4054 Trick to avoid duplicating tests for different encodings of the same
4055 operation.
4056 """
4057 oInstr = self.ensureInstructionForOpTag(iTagLine);
4058
4059 # Flatten, validate and append the copy job to the instruction. We execute
4060 # them after parsing all the input so we can handle forward references.
4061 asToCopy = self.flattenAllSections(aasSections).split();
4062 if not asToCopy:
4063 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4064 for sToCopy in asToCopy:
4065 if sToCopy not in oInstr.asCopyTests:
4066 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4067 oInstr.asCopyTests.append(sToCopy);
4068 else:
4069 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4070 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4071 else:
4072 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4073
4074 _ = iEndLine;
4075 return True;
4076
4077 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4078 """
4079 Tag: \@oponlytest | \@oponly
4080 Value: none
4081
4082 Only test instructions with this tag. This is a trick that is handy
4083 for singling out one or two new instructions or tests.
4084
4085 See also \@optestignore.
4086 """
4087 oInstr = self.ensureInstructionForOpTag(iTagLine);
4088
4089 # Validate and add instruction to only test dictionary.
4090 sValue = self.flattenAllSections(aasSections).strip();
4091 if sValue:
4092 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4093
4094 if oInstr not in g_aoOnlyTestInstructions:
4095 g_aoOnlyTestInstructions.append(oInstr);
4096
4097 _ = iEndLine;
4098 return True;
4099
4100 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4101 """
4102 Tag: \@opxcpttype
4103 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4104
4105 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4106 """
4107 oInstr = self.ensureInstructionForOpTag(iTagLine);
4108
4109 # Flatten as a space separated list, split it up and validate the values.
4110 asTypes = self.flattenAllSections(aasSections).split();
4111 if len(asTypes) != 1:
4112 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4113 sType = asTypes[0];
4114 if sType not in g_kdXcptTypes:
4115 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4116 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4117 # Set it.
4118 if oInstr.sXcptType is not None:
4119 return self.errorComment(iTagLine,
4120 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4121 % ( sTag, oInstr.sXcptType, sType,));
4122 oInstr.sXcptType = sType;
4123
4124 _ = iEndLine;
4125 return True;
4126
4127 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4128 """
4129 Tag: \@opfunction
4130 Value: <VMM function name>
4131
4132 This is for explicitly setting the IEM function name. Normally we pick
4133 this up from the FNIEMOP_XXX macro invocation after the description, or
4134 generate it from the mnemonic and operands.
4135
4136 It it thought it maybe necessary to set it when specifying instructions
4137 which implementation isn't following immediately or aren't implemented yet.
4138 """
4139 oInstr = self.ensureInstructionForOpTag(iTagLine);
4140
4141 # Flatten and validate the value.
4142 sFunction = self.flattenAllSections(aasSections);
4143 if not self.oReFunctionName.match(sFunction):
4144 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4145 % (sTag, sFunction, self.oReFunctionName.pattern));
4146
4147 if oInstr.sFunction is not None:
4148 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4149 % (sTag, oInstr.sFunction, sFunction,));
4150 oInstr.sFunction = sFunction;
4151
4152 _ = iEndLine;
4153 return True;
4154
4155 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4156 """
4157 Tag: \@opstats
4158 Value: <VMM statistics base name>
4159
4160 This is for explicitly setting the statistics name. Normally we pick
4161 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4162 the mnemonic and operands.
4163
4164 It it thought it maybe necessary to set it when specifying instructions
4165 which implementation isn't following immediately or aren't implemented yet.
4166 """
4167 oInstr = self.ensureInstructionForOpTag(iTagLine);
4168
4169 # Flatten and validate the value.
4170 sStats = self.flattenAllSections(aasSections);
4171 if not self.oReStatsName.match(sStats):
4172 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4173 % (sTag, sStats, self.oReStatsName.pattern));
4174
4175 if oInstr.sStats is not None:
4176 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4177 % (sTag, oInstr.sStats, sStats,));
4178 oInstr.sStats = sStats;
4179
4180 _ = iEndLine;
4181 return True;
4182
4183 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4184 """
4185 Tag: \@opdone
4186 Value: none
4187
4188 Used to explictily flush the instructions that have been specified.
4189 """
4190 sFlattened = self.flattenAllSections(aasSections);
4191 if sFlattened != '':
4192 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4193 _ = sTag; _ = iEndLine;
4194 return self.doneInstructions();
4195
4196 ## @}
4197
4198
4199 def parseComment(self):
4200 """
4201 Parse the current comment (self.sComment).
4202
4203 If it's a opcode specifiying comment, we reset the macro stuff.
4204 """
4205 #
4206 # Reject if comment doesn't seem to contain anything interesting.
4207 #
4208 if self.sComment.find('Opcode') < 0 \
4209 and self.sComment.find('@') < 0:
4210 return False;
4211
4212 #
4213 # Split the comment into lines, removing leading asterisks and spaces.
4214 # Also remove leading and trailing empty lines.
4215 #
4216 asLines = self.sComment.split('\n');
4217 for iLine, sLine in enumerate(asLines):
4218 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4219
4220 while asLines and not asLines[0]:
4221 self.iCommentLine += 1;
4222 asLines.pop(0);
4223
4224 while asLines and not asLines[-1]:
4225 asLines.pop(len(asLines) - 1);
4226
4227 #
4228 # Check for old style: Opcode 0x0f 0x12
4229 #
4230 if asLines[0].startswith('Opcode '):
4231 self.parseCommentOldOpcode(asLines);
4232
4233 #
4234 # Look for @op* tagged data.
4235 #
4236 cOpTags = 0;
4237 sFlatDefault = None;
4238 sCurTag = '@default';
4239 iCurTagLine = 0;
4240 asCurSection = [];
4241 aasSections = [ asCurSection, ];
4242 for iLine, sLine in enumerate(asLines):
4243 if not sLine.startswith('@'):
4244 if sLine:
4245 asCurSection.append(sLine);
4246 elif asCurSection:
4247 asCurSection = [];
4248 aasSections.append(asCurSection);
4249 else:
4250 #
4251 # Process the previous tag.
4252 #
4253 if not asCurSection and len(aasSections) > 1:
4254 aasSections.pop(-1);
4255 if sCurTag in self.dTagHandlers:
4256 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4257 cOpTags += 1;
4258 elif sCurTag.startswith('@op'):
4259 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4260 elif sCurTag == '@default':
4261 sFlatDefault = self.flattenAllSections(aasSections);
4262 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4263 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4264 elif sCurTag in ['@encoding', '@opencoding']:
4265 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4266
4267 #
4268 # New tag.
4269 #
4270 asSplit = sLine.split(None, 1);
4271 sCurTag = asSplit[0].lower();
4272 if len(asSplit) > 1:
4273 asCurSection = [asSplit[1],];
4274 else:
4275 asCurSection = [];
4276 aasSections = [asCurSection, ];
4277 iCurTagLine = iLine;
4278
4279 #
4280 # Process the final tag.
4281 #
4282 if not asCurSection and len(aasSections) > 1:
4283 aasSections.pop(-1);
4284 if sCurTag in self.dTagHandlers:
4285 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4286 cOpTags += 1;
4287 elif sCurTag.startswith('@op'):
4288 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4289 elif sCurTag == '@default':
4290 sFlatDefault = self.flattenAllSections(aasSections);
4291
4292 #
4293 # Don't allow default text in blocks containing @op*.
4294 #
4295 if cOpTags > 0 and sFlatDefault:
4296 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4297
4298 return True;
4299
4300 def parseMacroInvocation(self, sInvocation):
4301 """
4302 Parses a macro invocation.
4303
4304 Returns a tuple, first element is the offset following the macro
4305 invocation. The second element is a list of macro arguments, where the
4306 zero'th is the macro name.
4307 """
4308 # First the name.
4309 offOpen = sInvocation.find('(');
4310 if offOpen <= 0:
4311 self.raiseError("macro invocation open parenthesis not found");
4312 sName = sInvocation[:offOpen].strip();
4313 if not self.oReMacroName.match(sName):
4314 return self.error("invalid macro name '%s'" % (sName,));
4315 asRet = [sName, ];
4316
4317 # Arguments.
4318 iLine = self.iLine;
4319 cDepth = 1;
4320 off = offOpen + 1;
4321 offStart = off;
4322 chQuote = None;
4323 while cDepth > 0:
4324 if off >= len(sInvocation):
4325 if iLine >= len(self.asLines):
4326 self.error('macro invocation beyond end of file');
4327 return (off, asRet);
4328 sInvocation += self.asLines[iLine];
4329 iLine += 1;
4330 ch = sInvocation[off];
4331
4332 if chQuote:
4333 if ch == '\\' and off + 1 < len(sInvocation):
4334 off += 1;
4335 elif ch == chQuote:
4336 chQuote = None;
4337 elif ch in ('"', '\'',):
4338 chQuote = ch;
4339 elif ch in (',', ')',):
4340 if cDepth == 1:
4341 asRet.append(sInvocation[offStart:off].strip());
4342 offStart = off + 1;
4343 if ch == ')':
4344 cDepth -= 1;
4345 elif ch == '(':
4346 cDepth += 1;
4347 off += 1;
4348
4349 return (off, asRet);
4350
4351 def findAndParseMacroInvocationEx(self, sCode, sMacro):
4352 """
4353 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
4354 """
4355 offHit = sCode.find(sMacro);
4356 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4357 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
4358 return (offHit + offAfter, asRet);
4359 return (len(sCode), None);
4360
4361 def findAndParseMacroInvocation(self, sCode, sMacro):
4362 """
4363 Returns None if not found, arguments as per parseMacroInvocation if found.
4364 """
4365 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
4366
4367 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4368 """
4369 Returns same as findAndParseMacroInvocation.
4370 """
4371 for sMacro in asMacro:
4372 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4373 if asRet is not None:
4374 return asRet;
4375 return None;
4376
4377 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4378 sDisHints, sIemHints, asOperands):
4379 """
4380 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4381 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4382 """
4383 #
4384 # Some invocation checks.
4385 #
4386 if sUpper != sUpper.upper():
4387 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4388 if sLower != sLower.lower():
4389 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4390 if sUpper.lower() != sLower:
4391 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4392 if not self.oReMnemonic.match(sLower):
4393 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4394
4395 #
4396 # Check if sIemHints tells us to not consider this macro invocation.
4397 #
4398 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4399 return True;
4400
4401 # Apply to the last instruction only for now.
4402 if not self.aoCurInstrs:
4403 self.addInstruction();
4404 oInstr = self.aoCurInstrs[-1];
4405 if oInstr.iLineMnemonicMacro == -1:
4406 oInstr.iLineMnemonicMacro = self.iLine;
4407 else:
4408 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4409 % (sMacro, oInstr.iLineMnemonicMacro,));
4410
4411 # Mnemonic
4412 if oInstr.sMnemonic is None:
4413 oInstr.sMnemonic = sLower;
4414 elif oInstr.sMnemonic != sLower:
4415 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4416
4417 # Process operands.
4418 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4419 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4420 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4421 for iOperand, sType in enumerate(asOperands):
4422 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4423 if sWhere is None:
4424 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4425 if iOperand < len(oInstr.aoOperands): # error recovery.
4426 sWhere = oInstr.aoOperands[iOperand].sWhere;
4427 sType = oInstr.aoOperands[iOperand].sType;
4428 else:
4429 sWhere = 'reg';
4430 sType = 'Gb';
4431 if iOperand == len(oInstr.aoOperands):
4432 oInstr.aoOperands.append(Operand(sWhere, sType))
4433 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4434 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4435 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4436 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4437
4438 # Encoding.
4439 if sForm not in g_kdIemForms:
4440 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4441 else:
4442 if oInstr.sEncoding is None:
4443 oInstr.sEncoding = g_kdIemForms[sForm][0];
4444 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4445 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4446 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4447
4448 # Check the parameter locations for the encoding.
4449 if g_kdIemForms[sForm][1] is not None:
4450 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4451 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4452 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4453 else:
4454 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4455 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4456 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4457 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4458 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4459 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4460 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4461 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4462 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4463 or sForm.replace('VEX','').find('V') < 0) ):
4464 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4465 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4466 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4467 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4468 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4469 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4470 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4471 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4472 oInstr.aoOperands[iOperand].sWhere));
4473
4474
4475 # Check @opcodesub
4476 if oInstr.sSubOpcode \
4477 and g_kdIemForms[sForm][2] \
4478 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4479 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4480 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4481
4482 # Stats.
4483 if not self.oReStatsName.match(sStats):
4484 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4485 elif oInstr.sStats is None:
4486 oInstr.sStats = sStats;
4487 elif oInstr.sStats != sStats:
4488 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4489 % (sMacro, oInstr.sStats, sStats,));
4490
4491 # Process the hints (simply merge with @ophints w/o checking anything).
4492 for sHint in sDisHints.split('|'):
4493 sHint = sHint.strip();
4494 if sHint.startswith('DISOPTYPE_'):
4495 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4496 if sShortHint in g_kdHints:
4497 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4498 else:
4499 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4500 elif sHint != '0':
4501 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4502
4503 for sHint in sIemHints.split('|'):
4504 sHint = sHint.strip();
4505 if sHint.startswith('IEMOPHINT_'):
4506 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4507 if sShortHint in g_kdHints:
4508 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4509 else:
4510 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4511 elif sHint != '0':
4512 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4513
4514 _ = sAsm;
4515 return True;
4516
4517 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4518 """
4519 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4520 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4521 """
4522 if not asOperands:
4523 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4524 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4525 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4526
4527 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4528 """
4529 Process a IEM_MC_BEGIN macro invocation.
4530 """
4531 if self.fDebugMc:
4532 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4533 #self.debug('%s<eos>' % (sCode,));
4534
4535 # Check preconditions.
4536 if not self.oCurFunction:
4537 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4538 if self.oCurMcBlock:
4539 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4540
4541 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4542 cchIndent = offBeginStatementInCodeStr;
4543 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4544 if offPrevNewline >= 0:
4545 cchIndent -= offPrevNewline + 1;
4546 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4547
4548 # Start a new block.
4549 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4550 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4551 g_aoMcBlocks.append(self.oCurMcBlock);
4552 self.cTotalMcBlocks += 1;
4553 self.iMcBlockInFunc += 1;
4554 return True;
4555
4556 def workerIemMcEnd(self, offEndStatementInLine):
4557 """
4558 Process a IEM_MC_END macro invocation.
4559 """
4560 if self.fDebugMc:
4561 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4562
4563 # Check preconditions.
4564 if not self.oCurMcBlock:
4565 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4566
4567 #
4568 # HACK ALERT! For blocks orginating from macro expansion the start and
4569 # end line will be the same, but the line has multiple
4570 # newlines inside it. So, we have to do some extra tricks
4571 # to get the lines out of there. We ASSUME macros aren't
4572 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4573 #
4574 if self.iLine > self.oCurMcBlock.iBeginLine:
4575 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4576 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4577 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4578 else:
4579 sRawLine = self.asLines[self.iLine - 1];
4580
4581 off = sRawLine.find('\n', offEndStatementInLine);
4582 if off > 0:
4583 sRawLine = sRawLine[:off + 1];
4584
4585 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
4586 sRawLine = sRawLine[off:];
4587 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
4588 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
4589
4590 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
4591
4592 #
4593 # Strip anything following the IEM_MC_END(); statement in the final line,
4594 # so that we don't carry on any trailing 'break' after macro expansions
4595 # like for iemOp_movsb_Xb_Yb.
4596 #
4597 while asLines[-1].strip() == '':
4598 asLines.pop();
4599 sFinal = asLines[-1];
4600 offFinalEnd = sFinal.find('IEM_MC_END');
4601 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4602 offFinalEnd += len('IEM_MC_END');
4603
4604 while sFinal[offFinalEnd].isspace():
4605 offFinalEnd += 1;
4606 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4607 offFinalEnd += 1;
4608
4609 while sFinal[offFinalEnd].isspace():
4610 offFinalEnd += 1;
4611 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4612 offFinalEnd += 1;
4613
4614 while sFinal[offFinalEnd].isspace():
4615 offFinalEnd += 1;
4616 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4617 offFinalEnd += 1;
4618
4619 asLines[-1] = sFinal[: offFinalEnd];
4620
4621 #
4622 # Complete and discard the current block.
4623 #
4624 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
4625 self.oCurMcBlock = None;
4626 return True;
4627
4628 def workerStartFunction(self, asArgs):
4629 """
4630 Deals with the start of a decoder function.
4631
4632 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4633 macros, so we get a argument list for these where the 0th argument is the
4634 macro name.
4635 """
4636 # Complete any existing function.
4637 if self.oCurFunction:
4638 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4639
4640 # Create the new function.
4641 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4642 return True;
4643
4644 def checkCodeForMacro(self, sCode, offLine):
4645 """
4646 Checks code for relevant macro invocation.
4647 """
4648
4649 #
4650 # Scan macro invocations.
4651 #
4652 if sCode.find('(') > 0:
4653 # Look for instruction decoder function definitions. ASSUME single line.
4654 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4655 [ 'FNIEMOP_DEF',
4656 'FNIEMOPRM_DEF',
4657 'FNIEMOP_STUB',
4658 'FNIEMOP_STUB_1',
4659 'FNIEMOP_UD_STUB',
4660 'FNIEMOP_UD_STUB_1' ]);
4661 if asArgs is not None:
4662 self.workerStartFunction(asArgs);
4663 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4664
4665 if not self.aoCurInstrs:
4666 self.addInstruction();
4667 for oInstr in self.aoCurInstrs:
4668 if oInstr.iLineFnIemOpMacro == -1:
4669 oInstr.iLineFnIemOpMacro = self.iLine;
4670 else:
4671 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4672 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4673 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4674 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4675 if asArgs[0].find('STUB') > 0:
4676 self.doneInstructions(fEndOfFunction = True);
4677 return True;
4678
4679 # Check for worker function definitions, so we can get a context for MC blocks.
4680 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4681 [ 'FNIEMOP_DEF_1',
4682 'FNIEMOP_DEF_2', ]);
4683 if asArgs is not None:
4684 self.workerStartFunction(asArgs);
4685 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4686 return True;
4687
4688 # IEMOP_HLP_DONE_VEX_DECODING_*
4689 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4690 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4691 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4692 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4693 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4694 ]);
4695 if asArgs is not None:
4696 sMacro = asArgs[0];
4697 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4698 for oInstr in self.aoCurInstrs:
4699 if 'vex_l_zero' not in oInstr.dHints:
4700 if oInstr.iLineMnemonicMacro >= 0:
4701 self.errorOnLine(oInstr.iLineMnemonicMacro,
4702 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4703 oInstr.dHints['vex_l_zero'] = True;
4704
4705 #
4706 # IEMOP_MNEMONIC*
4707 #
4708 if sCode.find('IEMOP_MNEMONIC') >= 0:
4709 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4710 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4711 if asArgs is not None:
4712 if len(self.aoCurInstrs) == 1:
4713 oInstr = self.aoCurInstrs[0];
4714 if oInstr.sStats is None:
4715 oInstr.sStats = asArgs[1];
4716 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4717
4718 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4719 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4720 if asArgs is not None:
4721 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4722 asArgs[7], []);
4723 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4724 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4725 if asArgs is not None:
4726 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4727 asArgs[8], [asArgs[6],]);
4728 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4729 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4730 if asArgs is not None:
4731 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4732 asArgs[9], [asArgs[6], asArgs[7]]);
4733 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4734 # a_fIemHints)
4735 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4736 if asArgs is not None:
4737 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4738 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4739 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4740 # a_fIemHints)
4741 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4742 if asArgs is not None:
4743 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4744 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4745
4746 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4747 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4748 if asArgs is not None:
4749 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4750 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4751 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4752 if asArgs is not None:
4753 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4754 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4755 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4756 if asArgs is not None:
4757 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4758 [asArgs[4], asArgs[5],]);
4759 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4760 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4761 if asArgs is not None:
4762 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4763 [asArgs[4], asArgs[5], asArgs[6],]);
4764 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4765 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4766 if asArgs is not None:
4767 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4768 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4769
4770 #
4771 # IEM_MC_BEGIN + IEM_MC_END.
4772 # We must support multiple instances per code snippet.
4773 #
4774 offCode = sCode.find('IEM_MC_');
4775 if offCode >= 0:
4776 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4777 if oMatch.group(1) == 'END':
4778 self.workerIemMcEnd(offLine + oMatch.start());
4779 else:
4780 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4781 return True;
4782
4783 return False;
4784
4785 def workerPreProcessRecreateMacroRegex(self):
4786 """
4787 Recreates self.oReMacros when self.dMacros changes.
4788 """
4789 if self.dMacros:
4790 sRegex = '';
4791 for sName, oMacro in self.dMacros.items():
4792 if sRegex:
4793 sRegex += '|' + sName;
4794 else:
4795 sRegex = '\\b(' + sName;
4796 if oMacro.asArgs is not None:
4797 sRegex += '\s*\(';
4798 else:
4799 sRegex += '\\b';
4800 sRegex += ')';
4801 self.oReMacros = re.compile(sRegex);
4802 else:
4803 self.oReMacros = None;
4804 return True;
4805
4806 def workerPreProcessDefine(self, sRest):
4807 """
4808 Handles a macro #define, the sRest is what follows after the directive word.
4809 """
4810
4811 #
4812 # If using line continutation, just concat all the lines together,
4813 # preserving the newline character but not the escaping.
4814 #
4815 iLineStart = self.iLine;
4816 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4817 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4818 self.iLine += 1;
4819 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4820
4821 #
4822 # Use regex to split out the name, argument list and body.
4823 # If this fails, we assume it's a simple macro.
4824 #
4825 oMatch = self.oReHashDefine2.match(sRest);
4826 if oMatch:
4827 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4828 sBody = oMatch.group(3);
4829 else:
4830 oMatch = self.oReHashDefine3.match(sRest);
4831 if not oMatch:
4832 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4833 return self.error('bogus macro definition: %s' % (sRest,));
4834 asArgs = None;
4835 sBody = oMatch.group(2);
4836 sName = oMatch.group(1);
4837 assert sName == sName.strip();
4838 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4839
4840 #
4841 # Is this of any interest to us? We do NOT support MC blocks wihtin
4842 # nested macro expansion, just to avoid lots of extra work.
4843 #
4844 if sBody.find("IEM_MC_BEGIN") < 0:
4845 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4846 return True;
4847
4848 #
4849 # Add the macro.
4850 #
4851 if self.fDebugPreProc:
4852 self.debug('#define %s on line %u' % (sName, self.iLine,));
4853 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4854 return self.workerPreProcessRecreateMacroRegex();
4855
4856 def workerPreProcessUndef(self, sRest):
4857 """
4858 Handles a macro #undef, the sRest is what follows after the directive word.
4859 """
4860 # Quick comment strip and isolate the name.
4861 offSlash = sRest.find('/');
4862 if offSlash > 0:
4863 sRest = sRest[:offSlash];
4864 sName = sRest.strip();
4865
4866 # Remove the macro if we're clocking it.
4867 if sName in self.dMacros:
4868 if self.fDebugPreProc:
4869 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4870 del self.dMacros[sName];
4871 return self.workerPreProcessRecreateMacroRegex();
4872
4873 return True;
4874
4875 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4876 """
4877 Handles a preprocessor directive.
4878 """
4879 oMatch = self.oReHashDefine.match(sLine);
4880 if oMatch:
4881 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4882
4883 oMatch = self.oReHashUndef.match(sLine);
4884 if oMatch:
4885 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4886 return False;
4887
4888 def expandMacros(self, sLine, oMatch):
4889 """
4890 Expands macros we know about in the given line.
4891 Currently we ASSUME there is only one and that is what oMatch matched.
4892 """
4893 #
4894 # Get our bearings.
4895 #
4896 offMatch = oMatch.start();
4897 sName = oMatch.group(1);
4898 assert sName == sLine[oMatch.start() : oMatch.end()];
4899 fWithArgs = sName.endswith('(');
4900 if fWithArgs:
4901 sName = sName[:-1].strip();
4902 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
4903
4904 #
4905 # Deal with simple macro invocations w/o parameters.
4906 #
4907 if not fWithArgs:
4908 if self.fDebugPreProc:
4909 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
4910 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
4911
4912 #
4913 # Complicated macro with parameters.
4914 # Start by extracting the parameters. ASSUMES they are all on the same line!
4915 #
4916 cLevel = 1;
4917 offCur = oMatch.end();
4918 offCurArg = offCur;
4919 asArgs = [];
4920 while True:
4921 if offCur >= len(sLine):
4922 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
4923 ch = sLine[offCur];
4924 if ch == '(':
4925 cLevel += 1;
4926 elif ch == ')':
4927 cLevel -= 1;
4928 if cLevel == 0:
4929 asArgs.append(sLine[offCurArg:offCur].strip());
4930 break;
4931 elif ch == ',' and cLevel == 1:
4932 asArgs.append(sLine[offCurArg:offCur].strip());
4933 offCurArg = offCur + 1;
4934 offCur += 1;
4935 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
4936 asArgs = [];
4937 if len(oMacro.asArgs) != len(asArgs):
4938 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
4939
4940 #
4941 # Do the expanding.
4942 #
4943 if self.fDebugPreProc:
4944 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
4945 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
4946
4947 def parse(self):
4948 """
4949 Parses the given file.
4950 Returns number or errors.
4951 Raises exception on fatal trouble.
4952 """
4953 #self.debug('Parsing %s' % (self.sSrcFile,));
4954
4955 while self.iLine < len(self.asLines):
4956 sLine = self.asLines[self.iLine];
4957 self.iLine += 1;
4958 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
4959
4960 # Expand macros we know about if we're currently in code.
4961 if self.iState == self.kiCode and self.oReMacros:
4962 oMatch = self.oReMacros.search(sLine);
4963 if oMatch:
4964 sLine = self.expandMacros(sLine, oMatch);
4965 if self.fDebugPreProc:
4966 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
4967 self.asLines[self.iLine - 1] = sLine;
4968
4969 # Look for comments.
4970 offSlash = sLine.find('/');
4971 if offSlash >= 0:
4972 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
4973 offLine = 0;
4974 while offLine < len(sLine):
4975 if self.iState == self.kiCode:
4976 # Look for substantial multiline comment so we pass the following MC as a whole line:
4977 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
4978 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
4979 offHit = sLine.find('/*', offLine);
4980 while offHit >= 0:
4981 offEnd = sLine.find('*/', offHit + 2);
4982 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
4983 break;
4984 offHit = sLine.find('/*', offEnd);
4985
4986 if offHit >= 0:
4987 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
4988 self.sComment = '';
4989 self.iCommentLine = self.iLine;
4990 self.iState = self.kiCommentMulti;
4991 offLine = offHit + 2;
4992 else:
4993 self.checkCodeForMacro(sLine[offLine:], offLine);
4994 offLine = len(sLine);
4995
4996 elif self.iState == self.kiCommentMulti:
4997 offHit = sLine.find('*/', offLine);
4998 if offHit >= 0:
4999 self.sComment += sLine[offLine:offHit];
5000 self.iState = self.kiCode;
5001 offLine = offHit + 2;
5002 self.parseComment();
5003 else:
5004 self.sComment += sLine[offLine:];
5005 offLine = len(sLine);
5006 else:
5007 assert False;
5008 # C++ line comment.
5009 elif offSlash > 0:
5010 self.checkCodeForMacro(sLine[:offSlash], 0);
5011
5012 # No slash, but append the line if in multi-line comment.
5013 elif self.iState == self.kiCommentMulti:
5014 #self.debug('line %d: multi' % (self.iLine,));
5015 self.sComment += sLine;
5016
5017 # No slash, but check if this is a macro #define or #undef, since we
5018 # need to be able to selectively expand the ones containing MC blocks.
5019 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5020 if self.fDebugPreProc:
5021 self.debug('line %d: pre-proc' % (self.iLine,));
5022 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5023
5024 # No slash, but check code line for relevant macro.
5025 elif ( self.iState == self.kiCode
5026 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5027 #self.debug('line %d: macro' % (self.iLine,));
5028 self.checkCodeForMacro(sLine, 0);
5029
5030 # If the line is a '}' in the first position, complete the instructions.
5031 elif self.iState == self.kiCode and sLine[0] == '}':
5032 #self.debug('line %d: }' % (self.iLine,));
5033 self.doneInstructions(fEndOfFunction = True);
5034
5035 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5036 # so we can check/add @oppfx info from it.
5037 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5038 self.parseFunctionTable(sLine);
5039
5040 self.doneInstructions(fEndOfFunction = True);
5041 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5042 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5043 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5044 return self.printErrors();
5045
5046## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
5047g_oParsedCommonBodyMacros = None # type: SimpleParser
5048
5049def __parseFileByName(sSrcFile, sDefaultMap):
5050 """
5051 Parses one source file for instruction specfications.
5052 """
5053 #
5054 # Read sSrcFile into a line array.
5055 #
5056 try:
5057 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5058 except Exception as oXcpt:
5059 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5060 try:
5061 asLines = oFile.readlines();
5062 except Exception as oXcpt:
5063 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5064 finally:
5065 oFile.close();
5066
5067 #
5068 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
5069 # can use the macros from it when processing the other files.
5070 #
5071 global g_oParsedCommonBodyMacros;
5072 if g_oParsedCommonBodyMacros is None:
5073 # Locate the file.
5074 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5075 if not os.path.isfile(sCommonBodyMacros):
5076 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5077
5078 # Read it.
5079 try:
5080 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5081 asIncFiles = oIncFile.readlines();
5082 except Exception as oXcpt:
5083 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5084
5085 # Parse it.
5086 try:
5087 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5088 if oParser.parse() != 0:
5089 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5090 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5091 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5092 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5093 oParser.cTotalMcBlocks,
5094 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5095 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5096 except ParserException as oXcpt:
5097 print(str(oXcpt), file = sys.stderr);
5098 raise;
5099 g_oParsedCommonBodyMacros = oParser;
5100
5101 #
5102 # Do the parsing.
5103 #
5104 try:
5105 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5106 return (oParser.parse(), oParser) ;
5107 except ParserException as oXcpt:
5108 print(str(oXcpt), file = sys.stderr);
5109 raise;
5110
5111
5112def __doTestCopying():
5113 """
5114 Executes the asCopyTests instructions.
5115 """
5116 asErrors = [];
5117 for oDstInstr in g_aoAllInstructions:
5118 if oDstInstr.asCopyTests:
5119 for sSrcInstr in oDstInstr.asCopyTests:
5120 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5121 if oSrcInstr:
5122 aoSrcInstrs = [oSrcInstr,];
5123 else:
5124 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5125 if aoSrcInstrs:
5126 for oSrcInstr in aoSrcInstrs:
5127 if oSrcInstr != oDstInstr:
5128 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5129 else:
5130 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5131 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5132 else:
5133 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5134 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5135
5136 if asErrors:
5137 sys.stderr.write(u''.join(asErrors));
5138 return len(asErrors);
5139
5140
5141def __applyOnlyTest():
5142 """
5143 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5144 all other instructions so that only these get tested.
5145 """
5146 if g_aoOnlyTestInstructions:
5147 for oInstr in g_aoAllInstructions:
5148 if oInstr.aoTests:
5149 if oInstr not in g_aoOnlyTestInstructions:
5150 oInstr.aoTests = [];
5151 return 0;
5152
5153## List of all main instruction files and their default maps.
5154g_aasAllInstrFilesAndDefaultMap = (
5155 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5156 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5157 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5158 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5159 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5160 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5161 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5162 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5163 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5164);
5165
5166def __parseFilesWorker(asFilesAndDefaultMap):
5167 """
5168 Parses all the IEMAllInstruction*.cpp.h files.
5169
5170 Returns a list of the parsers on success.
5171 Raises exception on failure.
5172 """
5173 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5174 cErrors = 0;
5175 aoParsers = [];
5176 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5177 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5178 sFilename = os.path.join(sSrcDir, sFilename);
5179 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5180 cErrors += cThisErrors;
5181 aoParsers.append(oParser);
5182 cErrors += __doTestCopying();
5183 cErrors += __applyOnlyTest();
5184
5185 # Total stub stats:
5186 cTotalStubs = 0;
5187 for oInstr in g_aoAllInstructions:
5188 cTotalStubs += oInstr.fStub;
5189 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5190 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5191 file = sys.stderr);
5192
5193 if cErrors != 0:
5194 raise Exception('%d parse errors' % (cErrors,));
5195 return aoParsers;
5196
5197
5198def parseFiles(asFiles):
5199 """
5200 Parses a selection of IEMAllInstruction*.cpp.h files.
5201
5202 Returns a list of the parsers on success.
5203 Raises exception on failure.
5204 """
5205 # Look up default maps for the files and call __parseFilesWorker to do the job.
5206 asFilesAndDefaultMap = [];
5207 for sFilename in asFiles:
5208 sName = os.path.split(sFilename)[1].lower();
5209 sMap = None;
5210 for asCur in g_aasAllInstrFilesAndDefaultMap:
5211 if asCur[0].lower() == sName:
5212 sMap = asCur[1];
5213 break;
5214 if not sMap:
5215 raise Exception('Unable to classify file: %s' % (sFilename,));
5216 asFilesAndDefaultMap.append((sFilename, sMap));
5217
5218 return __parseFilesWorker(asFilesAndDefaultMap);
5219
5220
5221def parseAll():
5222 """
5223 Parses all the IEMAllInstruction*.cpp.h files.
5224
5225 Returns a list of the parsers on success.
5226 Raises exception on failure.
5227 """
5228 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5229
5230
5231#
5232# Generators (may perhaps move later).
5233#
5234def __formatDisassemblerTableEntry(oInstr):
5235 """
5236 """
5237 sMacro = 'OP';
5238 cMaxOperands = 3;
5239 if len(oInstr.aoOperands) > 3:
5240 sMacro = 'OPVEX'
5241 cMaxOperands = 4;
5242 assert len(oInstr.aoOperands) <= cMaxOperands;
5243
5244 #
5245 # Format string.
5246 #
5247 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5248 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5249 sTmp += ' ' if iOperand == 0 else ',';
5250 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5251 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5252 else:
5253 sTmp += g_kdOpTypes[oOperand.sType][2];
5254 sTmp += '",';
5255 asColumns = [ sTmp, ];
5256
5257 #
5258 # Decoders.
5259 #
5260 iStart = len(asColumns);
5261 if oInstr.sEncoding is None:
5262 pass;
5263 elif oInstr.sEncoding == 'ModR/M':
5264 # ASSUME the first operand is using the ModR/M encoding
5265 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5266 asColumns.append('IDX_ParseModRM,');
5267 elif oInstr.sEncoding in [ 'prefix', ]:
5268 for oOperand in oInstr.aoOperands:
5269 asColumns.append('0,');
5270 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5271 pass;
5272 elif oInstr.sEncoding == 'VEX.ModR/M':
5273 asColumns.append('IDX_ParseModRM,');
5274 elif oInstr.sEncoding == 'vex2':
5275 asColumns.append('IDX_ParseVex2b,')
5276 elif oInstr.sEncoding == 'vex3':
5277 asColumns.append('IDX_ParseVex3b,')
5278 elif oInstr.sEncoding in g_dInstructionMaps:
5279 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5280 else:
5281 ## @todo
5282 #IDX_ParseTwoByteEsc,
5283 #IDX_ParseGrp1,
5284 #IDX_ParseShiftGrp2,
5285 #IDX_ParseGrp3,
5286 #IDX_ParseGrp4,
5287 #IDX_ParseGrp5,
5288 #IDX_Parse3DNow,
5289 #IDX_ParseGrp6,
5290 #IDX_ParseGrp7,
5291 #IDX_ParseGrp8,
5292 #IDX_ParseGrp9,
5293 #IDX_ParseGrp10,
5294 #IDX_ParseGrp12,
5295 #IDX_ParseGrp13,
5296 #IDX_ParseGrp14,
5297 #IDX_ParseGrp15,
5298 #IDX_ParseGrp16,
5299 #IDX_ParseThreeByteEsc4,
5300 #IDX_ParseThreeByteEsc5,
5301 #IDX_ParseModFence,
5302 #IDX_ParseEscFP,
5303 #IDX_ParseNopPause,
5304 #IDX_ParseInvOpModRM,
5305 assert False, str(oInstr);
5306
5307 # Check for immediates and stuff in the remaining operands.
5308 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5309 sIdx = g_kdOpTypes[oOperand.sType][0];
5310 #if sIdx != 'IDX_UseModRM':
5311 asColumns.append(sIdx + ',');
5312 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5313
5314 #
5315 # Opcode and operands.
5316 #
5317 assert oInstr.sDisEnum, str(oInstr);
5318 asColumns.append(oInstr.sDisEnum + ',');
5319 iStart = len(asColumns)
5320 for oOperand in oInstr.aoOperands:
5321 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5322 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5323
5324 #
5325 # Flags.
5326 #
5327 sTmp = '';
5328 for sHint in sorted(oInstr.dHints.keys()):
5329 sDefine = g_kdHints[sHint];
5330 if sDefine.startswith('DISOPTYPE_'):
5331 if sTmp:
5332 sTmp += ' | ' + sDefine;
5333 else:
5334 sTmp += sDefine;
5335 if sTmp:
5336 sTmp += '),';
5337 else:
5338 sTmp += '0),';
5339 asColumns.append(sTmp);
5340
5341 #
5342 # Format the columns into a line.
5343 #
5344 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5345 sLine = '';
5346 for i, s in enumerate(asColumns):
5347 if len(sLine) < aoffColumns[i]:
5348 sLine += ' ' * (aoffColumns[i] - len(sLine));
5349 else:
5350 sLine += ' ';
5351 sLine += s;
5352
5353 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5354 # DISOPTYPE_HARMLESS),
5355 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5356 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5357 return sLine;
5358
5359def __checkIfShortTable(aoTableOrdered, oMap):
5360 """
5361 Returns (iInstr, cInstructions, fShortTable)
5362 """
5363
5364 # Determin how much we can trim off.
5365 cInstructions = len(aoTableOrdered);
5366 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5367 cInstructions -= 1;
5368
5369 iInstr = 0;
5370 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5371 iInstr += 1;
5372
5373 # If we can save more than 30%, we go for the short table version.
5374 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5375 return (iInstr, cInstructions, True);
5376 _ = oMap; # Use this for overriding.
5377
5378 # Output the full table.
5379 return (0, len(aoTableOrdered), False);
5380
5381def generateDisassemblerTables(oDstFile = sys.stdout):
5382 """
5383 Generates disassembler tables.
5384
5385 Returns exit code.
5386 """
5387
5388 #
5389 # Parse all.
5390 #
5391 try:
5392 parseAll();
5393 except Exception as oXcpt:
5394 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5395 traceback.print_exc(file = sys.stderr);
5396 return 1;
5397
5398
5399 #
5400 # The disassembler uses a slightly different table layout to save space,
5401 # since several of the prefix varia
5402 #
5403 aoDisasmMaps = [];
5404 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5405 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5406 if oMap.sSelector != 'byte+pfx':
5407 aoDisasmMaps.append(oMap);
5408 else:
5409 # Split the map by prefix.
5410 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5411 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5412 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5413 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5414
5415 #
5416 # Dump each map.
5417 #
5418 asHeaderLines = [];
5419 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5420 for oMap in aoDisasmMaps:
5421 sName = oMap.sName;
5422
5423 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5424
5425 #
5426 # Get the instructions for the map and see if we can do a short version or not.
5427 #
5428 aoTableOrder = oMap.getInstructionsInTableOrder();
5429 cEntriesPerByte = oMap.getEntriesPerByte();
5430 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5431
5432 #
5433 # Output the table start.
5434 # Note! Short tables are static and only accessible via the map range record.
5435 #
5436 asLines = [];
5437 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5438 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5439 if fShortTable:
5440 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5441 else:
5442 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5443 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5444 asLines.append('{');
5445
5446 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5447 asLines.append(' /* %#04x: */' % (iInstrStart,));
5448
5449 #
5450 # Output the instructions.
5451 #
5452 iInstr = iInstrStart;
5453 while iInstr < iInstrEnd:
5454 oInstr = aoTableOrder[iInstr];
5455 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5456 if iInstr != iInstrStart:
5457 asLines.append('');
5458 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5459
5460 if oInstr is None:
5461 # Invalid. Optimize blocks of invalid instructions.
5462 cInvalidInstrs = 1;
5463 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5464 cInvalidInstrs += 1;
5465 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5466 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5467 iInstr += 0x10 * cEntriesPerByte - 1;
5468 elif cEntriesPerByte > 1:
5469 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5470 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5471 iInstr += 3;
5472 else:
5473 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5474 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5475 else:
5476 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5477 elif isinstance(oInstr, list):
5478 if len(oInstr) != 0:
5479 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5480 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5481 else:
5482 asLines.append(__formatDisassemblerTableEntry(oInstr));
5483 else:
5484 asLines.append(__formatDisassemblerTableEntry(oInstr));
5485
5486 iInstr += 1;
5487
5488 if iInstrStart >= iInstrEnd:
5489 asLines.append(' /* dummy */ INVALID_OPCODE');
5490
5491 asLines.append('};');
5492 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5493
5494 #
5495 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5496 #
5497 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5498 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5499 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5500
5501 #
5502 # Write out the lines.
5503 #
5504 oDstFile.write('\n'.join(asLines));
5505 oDstFile.write('\n');
5506 oDstFile.write('\n');
5507 #break; #for now
5508 return 0;
5509
5510if __name__ == '__main__':
5511 sys.exit(generateDisassemblerTables());
5512
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette