VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 99335

Last change on this file since 99335 was 99335, checked in by vboxsync, 23 months ago

VMM/IEM: IEM_MC_MAYBE_RAISE_AVX2_RELATED_XCPT -> IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT, since the CPUID check was removed they are identical. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 253.8 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 99335 2023-04-07 12:24:52Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 99335 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [];
1828 self.aoElseBranch = [];
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode, sName = 'C++'):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873
1874 def renderCode(self, cchIndent = 0):
1875 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1876 if self.fDecode:
1877 sRet = sRet.replace('\n', ' // C++ decode\n');
1878 else:
1879 sRet = sRet.replace('\n', ' // C++ normal\n');
1880 return sRet;
1881
1882class McCppCond(McStmtCond):
1883 """
1884 C++/C 'if' statement.
1885 """
1886 def __init__(self, sCode, fDecode):
1887 McStmtCond.__init__(self, 'C++/if', [sCode,]);
1888 self.fDecode = fDecode;
1889
1890 def renderCode(self, cchIndent = 0):
1891 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1892 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1893 sRet += ' ' * cchIndent + '{\n';
1894 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1895 sRet += ' ' * cchIndent + '}\n';
1896 if self.aoElseBranch:
1897 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1898 sRet += ' ' * cchIndent + '{\n';
1899 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1900 sRet += ' ' * cchIndent + '}\n';
1901 return sRet;
1902
1903class McCppPreProc(McCppGeneric):
1904 """
1905 C++/C Preprocessor directive.
1906 """
1907 def __init__(self, sCode):
1908 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1909
1910 def renderCode(self, cchIndent = 0):
1911 return self.asParams[0] + '\n';
1912
1913
1914class McBlock(object):
1915 """
1916 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1917 """
1918
1919 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1920 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1921 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1922 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1923 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1924 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1925 self.oFunction = oFunction; ##< The function the block resides in.
1926 self.sFunction = oFunction.sName; ##< The name of the function the block resides in. DEPRECATED.
1927 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1928 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1929 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1930 ## Decoded statements in the block.
1931 self.aoStmts = [] # type: list(McStmt)
1932
1933 def complete(self, iEndLine, offEndLine, asLines):
1934 """
1935 Completes the microcode block.
1936 """
1937 assert self.iEndLine == -1;
1938 self.iEndLine = iEndLine;
1939 self.offEndLine = offEndLine;
1940 self.asLines = asLines;
1941
1942 def raiseDecodeError(self, sRawCode, off, sMessage):
1943 """ Raises a decoding error. """
1944 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1945 iLine = sRawCode.count('\n', 0, off);
1946 raise ParserException('%s:%d:%d: parsing error: %s'
1947 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1948
1949 def raiseStmtError(self, sName, sMessage):
1950 """ Raises a statement parser error. """
1951 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1952
1953 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1954 """ Check the parameter count, raising an error it doesn't match. """
1955 if len(asParams) != cParamsExpected:
1956 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1957 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1958 return True;
1959
1960 @staticmethod
1961 def parseMcGeneric(oSelf, sName, asParams):
1962 """ Generic parser that returns a plain McStmt object. """
1963 _ = oSelf;
1964 return McStmt(sName, asParams);
1965
1966 @staticmethod
1967 def parseMcGenericCond(oSelf, sName, asParams):
1968 """ Generic parser that returns a plain McStmtCond object. """
1969 _ = oSelf;
1970 return McStmtCond(sName, asParams);
1971
1972 @staticmethod
1973 def parseMcBegin(oSelf, sName, asParams):
1974 """ IEM_MC_BEGIN """
1975 oSelf.checkStmtParamCount(sName, asParams, 2);
1976 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1977
1978 @staticmethod
1979 def parseMcArg(oSelf, sName, asParams):
1980 """ IEM_MC_ARG """
1981 oSelf.checkStmtParamCount(sName, asParams, 3);
1982 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1983
1984 @staticmethod
1985 def parseMcArgConst(oSelf, sName, asParams):
1986 """ IEM_MC_ARG_CONST """
1987 oSelf.checkStmtParamCount(sName, asParams, 4);
1988 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
1989
1990 @staticmethod
1991 def parseMcArgLocalRef(oSelf, sName, asParams):
1992 """ IEM_MC_ARG_LOCAL_REF """
1993 oSelf.checkStmtParamCount(sName, asParams, 4);
1994 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
1995
1996 @staticmethod
1997 def parseMcArgLocalEFlags(oSelf, sName, asParams):
1998 """ IEM_MC_ARG_LOCAL_EFLAGS """
1999 oSelf.checkStmtParamCount(sName, asParams, 3);
2000 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2001 return (
2002 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2003 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2004 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2005 );
2006
2007 @staticmethod
2008 def parseMcLocal(oSelf, sName, asParams):
2009 """ IEM_MC_LOCAL """
2010 oSelf.checkStmtParamCount(sName, asParams, 2);
2011 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2012
2013 @staticmethod
2014 def parseMcLocalConst(oSelf, sName, asParams):
2015 """ IEM_MC_LOCAL_CONST """
2016 oSelf.checkStmtParamCount(sName, asParams, 3);
2017 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2018
2019 @staticmethod
2020 def parseMcCallAImpl(oSelf, sName, asParams):
2021 """ IEM_MC_CALL_AIMPL_3|4 """
2022 cArgs = int(sName[-1]);
2023 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2024 return McStmtCall(sName, asParams, 1, 0);
2025
2026 @staticmethod
2027 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2028 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2029 cArgs = int(sName[-1]);
2030 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2031 return McStmtCall(sName, asParams, 0);
2032
2033 @staticmethod
2034 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2035 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2036 cArgs = int(sName[-1]);
2037 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2038 return McStmtCall(sName, asParams, 0);
2039
2040 @staticmethod
2041 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2042 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2043 cArgs = int(sName[-1]);
2044 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2045 return McStmtCall(sName, asParams, 0);
2046
2047 @staticmethod
2048 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2049 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2050 cArgs = int(sName[-1]);
2051 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2052 return McStmtCall(sName, asParams, 0);
2053
2054 @staticmethod
2055 def parseMcCallSseAImpl(oSelf, sName, asParams):
2056 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2057 cArgs = int(sName[-1]);
2058 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2059 return McStmtCall(sName, asParams, 0);
2060
2061 @staticmethod
2062 def parseMcCallCImpl(oSelf, sName, asParams):
2063 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2064 cArgs = int(sName[-1]);
2065 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2066 return McStmtCall(sName, asParams, 0);
2067
2068 @staticmethod
2069 def stripComments(sCode):
2070 """ Returns sCode with comments removed. """
2071 off = 0;
2072 while off < len(sCode):
2073 off = sCode.find('/', off);
2074 if off < 0 or off + 1 >= len(sCode):
2075 break;
2076
2077 if sCode[off + 1] == '/':
2078 # C++ comment.
2079 offEnd = sCode.find('\n', off + 2);
2080 if offEnd < 0:
2081 return sCode[:off].rstrip();
2082 sCode = sCode[ : off] + sCode[offEnd : ];
2083 off += 1;
2084
2085 elif sCode[off + 1] == '*':
2086 # C comment
2087 offEnd = sCode.find('*/', off + 2);
2088 if offEnd < 0:
2089 return sCode[:off].rstrip();
2090 sSep = ' ';
2091 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2092 sSep = '';
2093 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2094 off += len(sSep);
2095
2096 else:
2097 # Not a comment.
2098 off += 1;
2099 return sCode;
2100
2101 @staticmethod
2102 def extractParam(sCode, offParam):
2103 """
2104 Extracts the parameter value at offParam in sCode.
2105 Returns stripped value and the end offset of the terminating ',' or ')'.
2106 """
2107 # Extract it.
2108 cNesting = 0;
2109 offStart = offParam;
2110 while offParam < len(sCode):
2111 ch = sCode[offParam];
2112 if ch == '(':
2113 cNesting += 1;
2114 elif ch == ')':
2115 if cNesting == 0:
2116 break;
2117 cNesting -= 1;
2118 elif ch == ',' and cNesting == 0:
2119 break;
2120 offParam += 1;
2121 return (sCode[offStart : offParam].strip(), offParam);
2122
2123 @staticmethod
2124 def extractParams(sCode, offOpenParen):
2125 """
2126 Parses a parameter list.
2127 Returns the list of parameter values and the offset of the closing parentheses.
2128 Returns (None, len(sCode)) on if no closing parentheses was found.
2129 """
2130 assert sCode[offOpenParen] == '(';
2131 asParams = [];
2132 off = offOpenParen + 1;
2133 while off < len(sCode):
2134 ch = sCode[off];
2135 if ch.isspace():
2136 off += 1;
2137 elif ch != ')':
2138 (sParam, off) = McBlock.extractParam(sCode, off);
2139 asParams.append(sParam);
2140 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2141 if sCode[off] == ',':
2142 off += 1;
2143 else:
2144 return (asParams, off);
2145 return (None, off);
2146
2147 @staticmethod
2148 def findClosingBraces(sCode, off, offStop):
2149 """
2150 Finds the matching '}' for the '{' at off in sCode.
2151 Returns offset of the matching '}' on success, otherwise -1.
2152
2153 Note! Does not take comments into account.
2154 """
2155 cDepth = 1;
2156 off += 1;
2157 while off < offStop:
2158 offClose = sCode.find('}', off, offStop);
2159 if offClose < 0:
2160 break;
2161 cDepth += sCode.count('{', off, offClose);
2162 cDepth -= 1;
2163 if cDepth == 0:
2164 return offClose;
2165 off = offClose + 1;
2166 return -1;
2167
2168 @staticmethod
2169 def countSpacesAt(sCode, off, offStop):
2170 """ Returns the number of space characters at off in sCode. """
2171 offStart = off;
2172 while off < offStop and sCode[off].isspace():
2173 off += 1;
2174 return off - offStart;
2175
2176 @staticmethod
2177 def skipSpacesAt(sCode, off, offStop):
2178 """ Returns first offset at or after off for a non-space character. """
2179 return off + McBlock.countSpacesAt(sCode, off, offStop);
2180
2181 @staticmethod
2182 def isSubstrAt(sStr, off, sSubStr):
2183 """ Returns true of sSubStr is found at off in sStr. """
2184 return sStr[off : off + len(sSubStr)] == sSubStr;
2185
2186 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2187 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2188 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2189 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2190 + r')');
2191
2192 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2193 """
2194 Decodes sRawCode[off : offStop].
2195
2196 Returns list of McStmt instances.
2197 Raises ParserException on failure.
2198 """
2199 if offStop < 0:
2200 offStop = len(sRawCode);
2201 aoStmts = [];
2202 while off < offStop:
2203 ch = sRawCode[off];
2204
2205 #
2206 # Skip spaces and comments.
2207 #
2208 if ch.isspace():
2209 off += 1;
2210
2211 elif ch == '/':
2212 ch = sRawCode[off + 1];
2213 if ch == '/': # C++ comment.
2214 off = sRawCode.find('\n', off + 2);
2215 if off < 0:
2216 break;
2217 off += 1;
2218 elif ch == '*': # C comment.
2219 off = sRawCode.find('*/', off + 2);
2220 if off < 0:
2221 break;
2222 off += 2;
2223 else:
2224 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2225
2226 #
2227 # Is it a MC statement.
2228 #
2229 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2230 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2231 # Extract it and strip comments from it.
2232 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2233 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2234 if offEnd <= off:
2235 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2236 else:
2237 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2238 if offEnd <= off:
2239 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2240 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2241 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2242 offEnd -= 1;
2243 while offEnd > off and sRawCode[offEnd - 1].isspace():
2244 offEnd -= 1;
2245
2246 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2247
2248 # Isolate the statement name.
2249 offOpenParen = sRawStmt.find('(');
2250 if offOpenParen < 0:
2251 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2252 sName = sRawStmt[: offOpenParen].strip();
2253
2254 # Extract the parameters.
2255 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2256 if asParams is None:
2257 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2258 if offCloseParen + 1 != len(sRawStmt):
2259 self.raiseDecodeError(sRawCode, off,
2260 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2261
2262 # Hand it to the handler.
2263 fnParser = g_dMcStmtParsers.get(sName);
2264 if not fnParser:
2265 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2266 oStmt = fnParser(self, sName, asParams);
2267 if not isinstance(oStmt, (list, tuple)):
2268 aoStmts.append(oStmt);
2269 else:
2270 aoStmts.extend(oStmt);
2271
2272 #
2273 # If conditional, we need to parse the whole statement.
2274 #
2275 # For reasons of simplicity, we assume the following structure
2276 # and parse each branch in a recursive call:
2277 # IEM_MC_IF_XXX() {
2278 # IEM_MC_WHATEVER();
2279 # } IEM_MC_ELSE() {
2280 # IEM_MC_WHATEVER();
2281 # } IEM_MC_ENDIF();
2282 #
2283 if sName.startswith('IEM_MC_IF_'):
2284 if iLevel > 1:
2285 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2286
2287 # Find start of the IF block:
2288 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2289 if sRawCode[offBlock1] != '{':
2290 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2291
2292 # Find the end of it.
2293 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2294 if offBlock1End < 0:
2295 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2296
2297 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2298
2299 # Is there an else section?
2300 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2301 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2302 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2303 if sRawCode[off] != '(':
2304 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2305 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2306 if sRawCode[off] != ')':
2307 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2308
2309 # Find start of the ELSE block.
2310 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2311 if sRawCode[offBlock2] != '{':
2312 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2313
2314 # Find the end of it.
2315 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2316 if offBlock2End < 0:
2317 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2318
2319 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2320 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2321
2322 # Parse past the endif statement.
2323 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2324 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2325 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2326 if sRawCode[off] != '(':
2327 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2328 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2329 if sRawCode[off] != ')':
2330 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2331 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2332 if sRawCode[off] != ';':
2333 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2334 off += 1;
2335
2336 else:
2337 # Advance.
2338 off = offEnd + 1;
2339
2340 #
2341 # Otherwise it must be a C/C++ statement of sorts.
2342 #
2343 else:
2344 # Find the end of the statement. if and else requires special handling.
2345 sCondExpr = None;
2346 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2347 if oMatch:
2348 if oMatch.group(1)[-1] == '(':
2349 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2350 else:
2351 offEnd = oMatch.end();
2352 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2353 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2354 elif ch == '#':
2355 offEnd = sRawCode.find('\n', off, offStop);
2356 if offEnd < 0:
2357 offEnd = offStop;
2358 offEnd -= 1;
2359 while offEnd > off and sRawCode[offEnd - 1].isspace():
2360 offEnd -= 1;
2361 else:
2362 offEnd = sRawCode.find(';', off);
2363 if offEnd < 0:
2364 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2365
2366 # Check this and the following statement whether it might have
2367 # something to do with decoding. This is a statement filter
2368 # criteria when generating the threaded functions blocks.
2369 offNextEnd = sRawCode.find(';', offEnd + 1);
2370 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2371 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2372 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2373 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2374 );
2375
2376 if not oMatch:
2377 if ch != '#':
2378 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2379 else:
2380 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2381 off = offEnd + 1;
2382 elif oMatch.group(1).startswith('if'):
2383 #
2384 # if () xxx [else yyy] statement.
2385 #
2386 oStmt = McCppCond(sCondExpr, fDecode);
2387 aoStmts.append(oStmt);
2388 off = offEnd + 1;
2389
2390 # Following the if () we can either have a {} containing zero or more statements
2391 # or we have a single statement.
2392 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2393 if sRawCode[offBlock1] == '{':
2394 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2395 if offBlock1End < 0:
2396 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2397 offBlock1 += 1;
2398 else:
2399 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2400 if offBlock1End < 0:
2401 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2402
2403 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2404
2405 # The else is optional and can likewise be followed by {} or a single statement.
2406 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2407 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2408 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2409 if sRawCode[offBlock2] == '{':
2410 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2411 if offBlock2End < 0:
2412 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2413 offBlock2 += 1;
2414 else:
2415 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2416 if offBlock2End < 0:
2417 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2418
2419 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2420 off = offBlock2End + 1;
2421
2422 elif oMatch.group(1) == 'else':
2423 # Problematic 'else' branch, typically involving #ifdefs.
2424 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2425
2426 return aoStmts;
2427
2428 def decode(self):
2429 """
2430 Decodes the block, populating self.aoStmts.
2431 Returns the statement list.
2432 Raises ParserException on failure.
2433 """
2434 self.aoStmts = self.decodeCode(''.join(self.asLines));
2435 return self.aoStmts;
2436
2437
2438## IEM_MC_XXX -> parser dictionary.
2439# The raw table was generated via the following command
2440# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2441# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2442g_dMcStmtParsers = {
2443 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2444 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2445 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2446 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2447 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2448 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2449 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2450 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2451 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2452 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2453 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2454 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2455 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2456 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2457 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2458 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2459 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2460 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2461 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2462 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2463 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2464 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2465 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2466 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2467 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2468 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2469 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2470 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2471 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2472 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2473 'IEM_MC_ARG': McBlock.parseMcArg,
2474 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2475 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2476 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2477 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2478 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2479 'IEM_MC_ASSIGN_U8_SX_U64': McBlock.parseMcGeneric,
2480 'IEM_MC_ASSIGN_U32_SX_U64': McBlock.parseMcGeneric,
2481 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2482 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2483 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2484 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2485 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2486 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2487 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2488 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2489 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2490 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2491 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2492 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2493 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2494 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2495 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2496 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2497 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2498 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2499 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2500 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2501 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2502 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2503 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2504 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2505 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2506 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2507 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2508 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2509 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2510 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2511 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2512 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2513 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2514 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2515 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2516 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2517 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2518 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2519 'IEM_MC_DEFER_TO_CIMPL_0': McBlock.parseMcGeneric,
2520 'IEM_MC_DEFER_TO_CIMPL_1': McBlock.parseMcGeneric,
2521 'IEM_MC_DEFER_TO_CIMPL_2': McBlock.parseMcGeneric,
2522 'IEM_MC_DEFER_TO_CIMPL_3': McBlock.parseMcGeneric,
2523 'IEM_MC_END': McBlock.parseMcGeneric,
2524 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2525 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2526 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2527 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2528 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2529 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2530 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2531 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2532 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2533 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2534 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2535 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2536 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2537 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2538 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2539 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2557 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2558 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2559 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2560 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2561 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2562 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2563 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2564 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2565 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2566 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2567 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2568 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2569 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2570 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2571 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2572 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2573 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2574 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2575 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2576 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2577 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2578 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2579 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2580 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2581 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2582 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2583 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2584 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2585 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2586 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2587 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2588 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2589 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2590 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2591 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2592 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2593 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2594 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2595 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2596 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2597 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2598 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2599 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2600 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2601 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2602 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2603 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2604 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2605 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2606 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2607 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2608 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2609 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2610 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2611 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2612 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2613 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2614 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2615 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2616 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2617 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2618 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2619 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2620 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2621 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2622 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2623 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2624 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2625 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2626 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2627 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2628 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2629 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2630 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2631 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2632 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2633 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2634 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2635 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2636 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2637 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2638 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2639 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2640 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2641 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2642 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2643 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2644 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2645 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2646 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2647 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2648 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2649 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2650 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2651 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2652 'IEM_MC_MAYBE_RAISE_AESNI_RELATED_XCPT': McBlock.parseMcGeneric,
2653 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2654 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2655 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2656 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2657 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2658 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2659 'IEM_MC_MAYBE_RAISE_PCLMUL_RELATED_XCPT': McBlock.parseMcGeneric,
2660 'IEM_MC_MAYBE_RAISE_SHA_RELATED_XCPT': McBlock.parseMcGeneric,
2661 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2662 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2663 'IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT': McBlock.parseMcGeneric,
2664 'IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2665 'IEM_MC_MAYBE_RAISE_SSE41_RELATED_XCPT': McBlock.parseMcGeneric,
2666 'IEM_MC_MAYBE_RAISE_SSE42_RELATED_XCPT': McBlock.parseMcGeneric,
2667 'IEM_MC_MAYBE_RAISE_SSSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2668 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2669 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2670 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2671 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2672 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2673 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2674 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2675 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2676 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2677 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2678 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2679 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2680 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2681 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2682 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2683 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2684 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2685 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2686 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2687 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2688 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2689 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2690 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2691 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2692 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2693 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2694 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2695 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2696 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2697 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2698 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2699 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2700 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2701 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2702 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2703 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2704 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2705 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2706 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2707 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2708 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2709 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2710 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2711 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2712 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2713 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2714 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2715 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2716 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2717 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2718 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2719 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2720 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2721 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2722 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2723 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2724 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2725 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2726 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2727 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2728 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2729 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2730 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2731 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2732 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2733 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2734 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2735 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2736 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2737 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2738 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2739 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2740 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2741 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2742 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2743 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2744 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2745 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2746 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2747 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2748 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2749 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2750 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2751 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2752 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2753 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2754 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2755 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2758 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2759 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2760 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2761 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2762 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2763 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2764 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2765 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2766 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2767 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2768 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2769 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2770 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2771 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2772 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2773 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2774 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2775 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2776 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2777 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2778 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2779 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2780 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2781 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2782 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2783 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2784 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2785 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2786 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2787 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2788 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2789 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2790 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2791 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2792 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2793 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2794 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2795 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2796 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2797 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2798 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2799 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2800 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2801 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2802 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2803 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2804 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2805 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2806 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2807 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2808 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2809 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2810 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2811 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2812 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2813 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2814 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2815 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2816 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2817};
2818
2819## List of microcode blocks.
2820g_aoMcBlocks = [] # type: list(McBlock)
2821
2822
2823
2824class ParserException(Exception):
2825 """ Parser exception """
2826 def __init__(self, sMessage):
2827 Exception.__init__(self, sMessage);
2828
2829
2830class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2831 """
2832 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2833 """
2834
2835 ## @name Parser state.
2836 ## @{
2837 kiCode = 0;
2838 kiCommentMulti = 1;
2839 ## @}
2840
2841 class Macro(object):
2842 """ Macro """
2843 def __init__(self, sName, asArgs, sBody, iLine):
2844 self.sName = sName; ##< The macro name.
2845 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2846 self.sBody = sBody;
2847 self.iLine = iLine;
2848 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2849
2850 @staticmethod
2851 def _needSpace(ch):
2852 """ This is just to make the expanded output a bit prettier. """
2853 return ch.isspace() and ch != '(';
2854
2855 def expandMacro(self, oParent, asArgs = None):
2856 """ Expands the macro body with the given arguments. """
2857 _ = oParent;
2858 sBody = self.sBody;
2859
2860 if self.oReArgMatch:
2861 assert len(asArgs) == len(self.asArgs);
2862 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2863
2864 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2865 oMatch = self.oReArgMatch.search(sBody);
2866 while oMatch:
2867 sName = oMatch.group(2);
2868 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2869 sValue = dArgs[sName];
2870 sPre = '';
2871 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2872 sPre = ' ';
2873 sPost = '';
2874 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2875 sPost = ' ';
2876 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2877 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2878 else:
2879 assert not asArgs;
2880
2881 return sBody;
2882
2883
2884 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2885 self.sSrcFile = sSrcFile;
2886 self.asLines = asLines;
2887 self.iLine = 0;
2888 self.iState = self.kiCode;
2889 self.sComment = '';
2890 self.iCommentLine = 0;
2891 self.aoCurInstrs = [] # type: list(Instruction)
2892 self.oCurFunction = None # type: DecoderFunction
2893 self.iMcBlockInFunc = 0;
2894 self.oCurMcBlock = None # type: McBlock
2895 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2896 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2897 if oInheritMacrosFrom:
2898 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2899 self.oReMacros = oInheritMacrosFrom.oReMacros;
2900
2901 assert sDefaultMap in g_dInstructionMaps;
2902 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2903
2904 self.cTotalInstr = 0;
2905 self.cTotalStubs = 0;
2906 self.cTotalTagged = 0;
2907 self.cTotalMcBlocks = 0;
2908
2909 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2910 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2911 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2912 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2913 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2914 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2915 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2916 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2917 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2918 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2919 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2920 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2921 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
2922
2923 self.fDebug = True;
2924 self.fDebugMc = False;
2925 self.fDebugPreProc = False;
2926
2927 self.dTagHandlers = {
2928 '@opbrief': self.parseTagOpBrief,
2929 '@opdesc': self.parseTagOpDesc,
2930 '@opmnemonic': self.parseTagOpMnemonic,
2931 '@op1': self.parseTagOpOperandN,
2932 '@op2': self.parseTagOpOperandN,
2933 '@op3': self.parseTagOpOperandN,
2934 '@op4': self.parseTagOpOperandN,
2935 '@oppfx': self.parseTagOpPfx,
2936 '@opmaps': self.parseTagOpMaps,
2937 '@opcode': self.parseTagOpcode,
2938 '@opcodesub': self.parseTagOpcodeSub,
2939 '@openc': self.parseTagOpEnc,
2940 '@opfltest': self.parseTagOpEFlags,
2941 '@opflmodify': self.parseTagOpEFlags,
2942 '@opflundef': self.parseTagOpEFlags,
2943 '@opflset': self.parseTagOpEFlags,
2944 '@opflclear': self.parseTagOpEFlags,
2945 '@ophints': self.parseTagOpHints,
2946 '@opdisenum': self.parseTagOpDisEnum,
2947 '@opmincpu': self.parseTagOpMinCpu,
2948 '@opcpuid': self.parseTagOpCpuId,
2949 '@opgroup': self.parseTagOpGroup,
2950 '@opunused': self.parseTagOpUnusedInvalid,
2951 '@opinvalid': self.parseTagOpUnusedInvalid,
2952 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2953 '@optest': self.parseTagOpTest,
2954 '@optestign': self.parseTagOpTestIgnore,
2955 '@optestignore': self.parseTagOpTestIgnore,
2956 '@opcopytests': self.parseTagOpCopyTests,
2957 '@oponly': self.parseTagOpOnlyTest,
2958 '@oponlytest': self.parseTagOpOnlyTest,
2959 '@opxcpttype': self.parseTagOpXcptType,
2960 '@opstats': self.parseTagOpStats,
2961 '@opfunction': self.parseTagOpFunction,
2962 '@opdone': self.parseTagOpDone,
2963 };
2964 for i in range(48):
2965 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2966 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2967
2968 self.asErrors = [];
2969
2970 def raiseError(self, sMessage):
2971 """
2972 Raise error prefixed with the source and line number.
2973 """
2974 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2975
2976 def raiseCommentError(self, iLineInComment, sMessage):
2977 """
2978 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2979 """
2980 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2981
2982 def error(self, sMessage):
2983 """
2984 Adds an error.
2985 returns False;
2986 """
2987 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2988 return False;
2989
2990 def errorOnLine(self, iLine, sMessage):
2991 """
2992 Adds an error.
2993 returns False;
2994 """
2995 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
2996 return False;
2997
2998 def errorComment(self, iLineInComment, sMessage):
2999 """
3000 Adds a comment error.
3001 returns False;
3002 """
3003 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3004 return False;
3005
3006 def printErrors(self):
3007 """
3008 Print the errors to stderr.
3009 Returns number of errors.
3010 """
3011 if self.asErrors:
3012 sys.stderr.write(u''.join(self.asErrors));
3013 return len(self.asErrors);
3014
3015 def debug(self, sMessage):
3016 """
3017 For debugging.
3018 """
3019 if self.fDebug:
3020 print('debug: %s' % (sMessage,), file = sys.stderr);
3021
3022 def stripComments(self, sLine):
3023 """
3024 Returns sLine with comments stripped.
3025
3026 Complains if traces of incomplete multi-line comments are encountered.
3027 """
3028 sLine = self.oReComment.sub(" ", sLine);
3029 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3030 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3031 return sLine;
3032
3033 def parseFunctionTable(self, sLine):
3034 """
3035 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3036
3037 Note! Updates iLine as it consumes the whole table.
3038 """
3039
3040 #
3041 # Extract the table name.
3042 #
3043 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3044 oMap = g_dInstructionMapsByIemName.get(sName);
3045 if not oMap:
3046 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3047 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3048
3049 #
3050 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3051 # entries per byte:
3052 # no prefix, 066h prefix, f3h prefix, f2h prefix
3053 # Those tables has 256 & 32 entries respectively.
3054 #
3055 cEntriesPerByte = 4;
3056 cValidTableLength = 1024;
3057 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3058
3059 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3060 if oEntriesMatch:
3061 cEntriesPerByte = 1;
3062 cValidTableLength = int(oEntriesMatch.group(1));
3063 asPrefixes = (None,);
3064
3065 #
3066 # The next line should be '{' and nothing else.
3067 #
3068 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3069 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3070 self.iLine += 1;
3071
3072 #
3073 # Parse till we find the end of the table.
3074 #
3075 iEntry = 0;
3076 while self.iLine < len(self.asLines):
3077 # Get the next line and strip comments and spaces (assumes no
3078 # multi-line comments).
3079 sLine = self.asLines[self.iLine];
3080 self.iLine += 1;
3081 sLine = self.stripComments(sLine).strip();
3082
3083 # Split the line up into entries, expanding IEMOP_X4 usage.
3084 asEntries = sLine.split(',');
3085 for i in range(len(asEntries) - 1, -1, -1):
3086 sEntry = asEntries[i].strip();
3087 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3088 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3089 asEntries.insert(i + 1, sEntry);
3090 asEntries.insert(i + 1, sEntry);
3091 asEntries.insert(i + 1, sEntry);
3092 if sEntry:
3093 asEntries[i] = sEntry;
3094 else:
3095 del asEntries[i];
3096
3097 # Process the entries.
3098 for sEntry in asEntries:
3099 if sEntry in ('};', '}'):
3100 if iEntry != cValidTableLength:
3101 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3102 return True;
3103 if sEntry.startswith('iemOp_Invalid'):
3104 pass; # skip
3105 else:
3106 # Look up matching instruction by function.
3107 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3108 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3109 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3110 if aoInstr:
3111 if not isinstance(aoInstr, list):
3112 aoInstr = [aoInstr,];
3113 oInstr = None;
3114 for oCurInstr in aoInstr:
3115 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3116 pass;
3117 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3118 oCurInstr.sPrefix = sPrefix;
3119 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3120 oCurInstr.sOpcode = sOpcode;
3121 oCurInstr.sPrefix = sPrefix;
3122 else:
3123 continue;
3124 oInstr = oCurInstr;
3125 break;
3126 if not oInstr:
3127 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3128 aoInstr.append(oInstr);
3129 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3130 g_aoAllInstructions.append(oInstr);
3131 oMap.aoInstructions.append(oInstr);
3132 else:
3133 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3134 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3135 iEntry += 1;
3136
3137 return self.error('Unexpected end of file in PFNIEMOP table');
3138
3139 def addInstruction(self, iLine = None):
3140 """
3141 Adds an instruction.
3142 """
3143 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3144 g_aoAllInstructions.append(oInstr);
3145 self.aoCurInstrs.append(oInstr);
3146 return oInstr;
3147
3148 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3149 """
3150 Derives the mnemonic and operands from a IEM stats base name like string.
3151 """
3152 if oInstr.sMnemonic is None:
3153 asWords = sStats.split('_');
3154 oInstr.sMnemonic = asWords[0].lower();
3155 if len(asWords) > 1 and not oInstr.aoOperands:
3156 for sType in asWords[1:]:
3157 if sType in g_kdOpTypes:
3158 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3159 else:
3160 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3161 return False;
3162 return True;
3163
3164 def doneInstructionOne(self, oInstr, iLine):
3165 """
3166 Complete the parsing by processing, validating and expanding raw inputs.
3167 """
3168 assert oInstr.iLineCompleted is None;
3169 oInstr.iLineCompleted = iLine;
3170
3171 #
3172 # Specified instructions.
3173 #
3174 if oInstr.cOpTags > 0:
3175 if oInstr.sStats is None:
3176 pass;
3177
3178 #
3179 # Unspecified legacy stuff. We generally only got a few things to go on here.
3180 # /** Opcode 0x0f 0x00 /0. */
3181 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3182 #
3183 else:
3184 #if oInstr.sRawOldOpcodes:
3185 #
3186 #if oInstr.sMnemonic:
3187 pass;
3188
3189 #
3190 # Common defaults.
3191 #
3192
3193 # Guess mnemonic and operands from stats if the former is missing.
3194 if oInstr.sMnemonic is None:
3195 if oInstr.sStats is not None:
3196 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3197 elif oInstr.sFunction is not None:
3198 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3199
3200 # Derive the disassembler op enum constant from the mnemonic.
3201 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3202 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3203
3204 # Derive the IEM statistics base name from mnemonic and operand types.
3205 if oInstr.sStats is None:
3206 if oInstr.sFunction is not None:
3207 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3208 elif oInstr.sMnemonic is not None:
3209 oInstr.sStats = oInstr.sMnemonic;
3210 for oOperand in oInstr.aoOperands:
3211 if oOperand.sType:
3212 oInstr.sStats += '_' + oOperand.sType;
3213
3214 # Derive the IEM function name from mnemonic and operand types.
3215 if oInstr.sFunction is None:
3216 if oInstr.sMnemonic is not None:
3217 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3218 for oOperand in oInstr.aoOperands:
3219 if oOperand.sType:
3220 oInstr.sFunction += '_' + oOperand.sType;
3221 elif oInstr.sStats:
3222 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3223
3224 #
3225 # Apply default map and then add the instruction to all it's groups.
3226 #
3227 if not oInstr.aoMaps:
3228 oInstr.aoMaps = [ self.oDefaultMap, ];
3229 for oMap in oInstr.aoMaps:
3230 oMap.aoInstructions.append(oInstr);
3231
3232 #
3233 # Derive encoding from operands and maps.
3234 #
3235 if oInstr.sEncoding is None:
3236 if not oInstr.aoOperands:
3237 if oInstr.fUnused and oInstr.sSubOpcode:
3238 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3239 else:
3240 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3241 elif oInstr.aoOperands[0].usesModRM():
3242 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3243 or oInstr.onlyInVexMaps():
3244 oInstr.sEncoding = 'VEX.ModR/M';
3245 else:
3246 oInstr.sEncoding = 'ModR/M';
3247
3248 #
3249 # Check the opstat value and add it to the opstat indexed dictionary.
3250 #
3251 if oInstr.sStats:
3252 if oInstr.sStats not in g_dAllInstructionsByStat:
3253 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3254 else:
3255 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3256 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3257
3258 #
3259 # Add to function indexed dictionary. We allow multiple instructions per function.
3260 #
3261 if oInstr.sFunction:
3262 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3263 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3264 else:
3265 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3266
3267 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3268 return True;
3269
3270 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3271 """
3272 Done with current instruction.
3273 """
3274 for oInstr in self.aoCurInstrs:
3275 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3276 if oInstr.fStub:
3277 self.cTotalStubs += 1;
3278
3279 self.cTotalInstr += len(self.aoCurInstrs);
3280
3281 self.sComment = '';
3282 self.aoCurInstrs = [];
3283 if fEndOfFunction:
3284 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3285 if self.oCurFunction:
3286 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3287 self.oCurFunction = None;
3288 self.iMcBlockInFunc = 0;
3289 return True;
3290
3291 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3292 """
3293 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3294 is False, only None values and empty strings are replaced.
3295 """
3296 for oInstr in self.aoCurInstrs:
3297 if fOverwrite is not True:
3298 oOldValue = getattr(oInstr, sAttrib);
3299 if oOldValue is not None:
3300 continue;
3301 setattr(oInstr, sAttrib, oValue);
3302
3303 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3304 """
3305 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3306 If fOverwrite is False, only None values and empty strings are replaced.
3307 """
3308 for oInstr in self.aoCurInstrs:
3309 aoArray = getattr(oInstr, sAttrib);
3310 while len(aoArray) <= iEntry:
3311 aoArray.append(None);
3312 if fOverwrite is True or aoArray[iEntry] is None:
3313 aoArray[iEntry] = oValue;
3314
3315 def parseCommentOldOpcode(self, asLines):
3316 """ Deals with 'Opcode 0xff /4' like comments """
3317 asWords = asLines[0].split();
3318 if len(asWords) >= 2 \
3319 and asWords[0] == 'Opcode' \
3320 and ( asWords[1].startswith('0x')
3321 or asWords[1].startswith('0X')):
3322 asWords = asWords[:1];
3323 for iWord, sWord in enumerate(asWords):
3324 if sWord.startswith('0X'):
3325 sWord = '0x' + sWord[:2];
3326 asWords[iWord] = asWords;
3327 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3328
3329 return False;
3330
3331 def ensureInstructionForOpTag(self, iTagLine):
3332 """ Ensure there is an instruction for the op-tag being parsed. """
3333 if not self.aoCurInstrs:
3334 self.addInstruction(self.iCommentLine + iTagLine);
3335 for oInstr in self.aoCurInstrs:
3336 oInstr.cOpTags += 1;
3337 if oInstr.cOpTags == 1:
3338 self.cTotalTagged += 1;
3339 return self.aoCurInstrs[-1];
3340
3341 @staticmethod
3342 def flattenSections(aasSections):
3343 """
3344 Flattens multiline sections into stripped single strings.
3345 Returns list of strings, on section per string.
3346 """
3347 asRet = [];
3348 for asLines in aasSections:
3349 if asLines:
3350 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3351 return asRet;
3352
3353 @staticmethod
3354 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3355 """
3356 Flattens sections into a simple stripped string with newlines as
3357 section breaks. The final section does not sport a trailing newline.
3358 """
3359 # Typical: One section with a single line.
3360 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3361 return aasSections[0][0].strip();
3362
3363 sRet = '';
3364 for iSection, asLines in enumerate(aasSections):
3365 if asLines:
3366 if iSection > 0:
3367 sRet += sSectionSep;
3368 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3369 return sRet;
3370
3371
3372
3373 ## @name Tag parsers
3374 ## @{
3375
3376 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3377 """
3378 Tag: \@opbrief
3379 Value: Text description, multiple sections, appended.
3380
3381 Brief description. If not given, it's the first sentence from @opdesc.
3382 """
3383 oInstr = self.ensureInstructionForOpTag(iTagLine);
3384
3385 # Flatten and validate the value.
3386 sBrief = self.flattenAllSections(aasSections);
3387 if not sBrief:
3388 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3389 if sBrief[-1] != '.':
3390 sBrief = sBrief + '.';
3391 if len(sBrief) > 180:
3392 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3393 offDot = sBrief.find('.');
3394 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3395 offDot = sBrief.find('.', offDot + 1);
3396 if offDot >= 0 and offDot != len(sBrief) - 1:
3397 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3398
3399 # Update the instruction.
3400 if oInstr.sBrief is not None:
3401 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3402 % (sTag, oInstr.sBrief, sBrief,));
3403 _ = iEndLine;
3404 return True;
3405
3406 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3407 """
3408 Tag: \@opdesc
3409 Value: Text description, multiple sections, appended.
3410
3411 It is used to describe instructions.
3412 """
3413 oInstr = self.ensureInstructionForOpTag(iTagLine);
3414 if aasSections:
3415 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3416 return True;
3417
3418 _ = sTag; _ = iEndLine;
3419 return True;
3420
3421 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3422 """
3423 Tag: @opmenmonic
3424 Value: mnemonic
3425
3426 The 'mnemonic' value must be a valid C identifier string. Because of
3427 prefixes, groups and whatnot, there times when the mnemonic isn't that
3428 of an actual assembler mnemonic.
3429 """
3430 oInstr = self.ensureInstructionForOpTag(iTagLine);
3431
3432 # Flatten and validate the value.
3433 sMnemonic = self.flattenAllSections(aasSections);
3434 if not self.oReMnemonic.match(sMnemonic):
3435 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3436 if oInstr.sMnemonic is not None:
3437 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3438 % (sTag, oInstr.sMnemonic, sMnemonic,));
3439 oInstr.sMnemonic = sMnemonic
3440
3441 _ = iEndLine;
3442 return True;
3443
3444 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3445 """
3446 Tags: \@op1, \@op2, \@op3, \@op4
3447 Value: [where:]type
3448
3449 The 'where' value indicates where the operand is found, like the 'reg'
3450 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3451 a list.
3452
3453 The 'type' value indicates the operand type. These follow the types
3454 given in the opcode tables in the CPU reference manuals.
3455 See Instruction.kdOperandTypes for a list.
3456
3457 """
3458 oInstr = self.ensureInstructionForOpTag(iTagLine);
3459 idxOp = int(sTag[-1]) - 1;
3460 assert 0 <= idxOp < 4;
3461
3462 # flatten, split up, and validate the "where:type" value.
3463 sFlattened = self.flattenAllSections(aasSections);
3464 asSplit = sFlattened.split(':');
3465 if len(asSplit) == 1:
3466 sType = asSplit[0];
3467 sWhere = None;
3468 elif len(asSplit) == 2:
3469 (sWhere, sType) = asSplit;
3470 else:
3471 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3472
3473 if sType not in g_kdOpTypes:
3474 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3475 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3476 if sWhere is None:
3477 sWhere = g_kdOpTypes[sType][1];
3478 elif sWhere not in g_kdOpLocations:
3479 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3480 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3481
3482 # Insert the operand, refusing to overwrite an existing one.
3483 while idxOp >= len(oInstr.aoOperands):
3484 oInstr.aoOperands.append(None);
3485 if oInstr.aoOperands[idxOp] is not None:
3486 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3487 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3488 sWhere, sType,));
3489 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3490
3491 _ = iEndLine;
3492 return True;
3493
3494 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3495 """
3496 Tag: \@opmaps
3497 Value: map[,map2]
3498
3499 Indicates which maps the instruction is in. There is a default map
3500 associated with each input file.
3501 """
3502 oInstr = self.ensureInstructionForOpTag(iTagLine);
3503
3504 # Flatten, split up and validate the value.
3505 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3506 asMaps = sFlattened.split(',');
3507 if not asMaps:
3508 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3509 for sMap in asMaps:
3510 if sMap not in g_dInstructionMaps:
3511 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3512 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3513
3514 # Add the maps to the current list. Throw errors on duplicates.
3515 for oMap in oInstr.aoMaps:
3516 if oMap.sName in asMaps:
3517 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3518
3519 for sMap in asMaps:
3520 oMap = g_dInstructionMaps[sMap];
3521 if oMap not in oInstr.aoMaps:
3522 oInstr.aoMaps.append(oMap);
3523 else:
3524 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3525
3526 _ = iEndLine;
3527 return True;
3528
3529 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3530 """
3531 Tag: \@oppfx
3532 Value: n/a|none|0x66|0xf3|0xf2
3533
3534 Required prefix for the instruction. (In a (E)VEX context this is the
3535 value of the 'pp' field rather than an actual prefix.)
3536 """
3537 oInstr = self.ensureInstructionForOpTag(iTagLine);
3538
3539 # Flatten and validate the value.
3540 sFlattened = self.flattenAllSections(aasSections);
3541 asPrefixes = sFlattened.split();
3542 if len(asPrefixes) > 1:
3543 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3544
3545 sPrefix = asPrefixes[0].lower();
3546 if sPrefix == 'none':
3547 sPrefix = 'none';
3548 elif sPrefix == 'n/a':
3549 sPrefix = None;
3550 else:
3551 if len(sPrefix) == 2:
3552 sPrefix = '0x' + sPrefix;
3553 if not _isValidOpcodeByte(sPrefix):
3554 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3555
3556 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3557 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3558
3559 # Set it.
3560 if oInstr.sPrefix is not None:
3561 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3562 oInstr.sPrefix = sPrefix;
3563
3564 _ = iEndLine;
3565 return True;
3566
3567 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3568 """
3569 Tag: \@opcode
3570 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3571
3572 The opcode byte or sub-byte for the instruction in the context of a map.
3573 """
3574 oInstr = self.ensureInstructionForOpTag(iTagLine);
3575
3576 # Flatten and validate the value.
3577 sOpcode = self.flattenAllSections(aasSections);
3578 if _isValidOpcodeByte(sOpcode):
3579 pass;
3580 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3581 pass;
3582 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3583 pass;
3584 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3585 pass;
3586 else:
3587 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3588
3589 # Set it.
3590 if oInstr.sOpcode is not None:
3591 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3592 oInstr.sOpcode = sOpcode;
3593
3594 _ = iEndLine;
3595 return True;
3596
3597 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3598 """
3599 Tag: \@opcodesub
3600 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3601 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3602
3603 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3604 represents exactly two different instructions. The more proper way would
3605 be to go via maps with two members, but this is faster.
3606 """
3607 oInstr = self.ensureInstructionForOpTag(iTagLine);
3608
3609 # Flatten and validate the value.
3610 sSubOpcode = self.flattenAllSections(aasSections);
3611 if sSubOpcode not in g_kdSubOpcodes:
3612 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3613 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3614
3615 # Set it.
3616 if oInstr.sSubOpcode is not None:
3617 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3618 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3619 oInstr.sSubOpcode = sSubOpcode;
3620
3621 _ = iEndLine;
3622 return True;
3623
3624 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3625 """
3626 Tag: \@openc
3627 Value: ModR/M|fixed|prefix|<map name>
3628
3629 The instruction operand encoding style.
3630 """
3631 oInstr = self.ensureInstructionForOpTag(iTagLine);
3632
3633 # Flatten and validate the value.
3634 sEncoding = self.flattenAllSections(aasSections);
3635 if sEncoding in g_kdEncodings:
3636 pass;
3637 elif sEncoding in g_dInstructionMaps:
3638 pass;
3639 elif not _isValidOpcodeByte(sEncoding):
3640 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3641
3642 # Set it.
3643 if oInstr.sEncoding is not None:
3644 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3645 % ( sTag, oInstr.sEncoding, sEncoding,));
3646 oInstr.sEncoding = sEncoding;
3647
3648 _ = iEndLine;
3649 return True;
3650
3651 ## EFlags tag to Instruction attribute name.
3652 kdOpFlagToAttr = {
3653 '@opfltest': 'asFlTest',
3654 '@opflmodify': 'asFlModify',
3655 '@opflundef': 'asFlUndefined',
3656 '@opflset': 'asFlSet',
3657 '@opflclear': 'asFlClear',
3658 };
3659
3660 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3661 """
3662 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3663 Value: <eflags specifier>
3664
3665 """
3666 oInstr = self.ensureInstructionForOpTag(iTagLine);
3667
3668 # Flatten, split up and validate the values.
3669 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3670 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3671 asFlags = [];
3672 else:
3673 fRc = True;
3674 for iFlag, sFlag in enumerate(asFlags):
3675 if sFlag not in g_kdEFlagsMnemonics:
3676 if sFlag.strip() in g_kdEFlagsMnemonics:
3677 asFlags[iFlag] = sFlag.strip();
3678 else:
3679 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3680 if not fRc:
3681 return False;
3682
3683 # Set them.
3684 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3685 if asOld is not None:
3686 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3687 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3688
3689 _ = iEndLine;
3690 return True;
3691
3692 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3693 """
3694 Tag: \@ophints
3695 Value: Comma or space separated list of flags and hints.
3696
3697 This covers the disassembler flags table and more.
3698 """
3699 oInstr = self.ensureInstructionForOpTag(iTagLine);
3700
3701 # Flatten as a space separated list, split it up and validate the values.
3702 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3703 if len(asHints) == 1 and asHints[0].lower() == 'none':
3704 asHints = [];
3705 else:
3706 fRc = True;
3707 for iHint, sHint in enumerate(asHints):
3708 if sHint not in g_kdHints:
3709 if sHint.strip() in g_kdHints:
3710 sHint[iHint] = sHint.strip();
3711 else:
3712 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3713 if not fRc:
3714 return False;
3715
3716 # Append them.
3717 for sHint in asHints:
3718 if sHint not in oInstr.dHints:
3719 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3720 else:
3721 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3722
3723 _ = iEndLine;
3724 return True;
3725
3726 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3727 """
3728 Tag: \@opdisenum
3729 Value: OP_XXXX
3730
3731 This is for select a specific (legacy) disassembler enum value for the
3732 instruction.
3733 """
3734 oInstr = self.ensureInstructionForOpTag(iTagLine);
3735
3736 # Flatten and split.
3737 asWords = self.flattenAllSections(aasSections).split();
3738 if len(asWords) != 1:
3739 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3740 if not asWords:
3741 return False;
3742 sDisEnum = asWords[0];
3743 if not self.oReDisEnum.match(sDisEnum):
3744 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3745 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3746
3747 # Set it.
3748 if oInstr.sDisEnum is not None:
3749 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3750 oInstr.sDisEnum = sDisEnum;
3751
3752 _ = iEndLine;
3753 return True;
3754
3755 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3756 """
3757 Tag: \@opmincpu
3758 Value: <simple CPU name>
3759
3760 Indicates when this instruction was introduced.
3761 """
3762 oInstr = self.ensureInstructionForOpTag(iTagLine);
3763
3764 # Flatten the value, split into words, make sure there's just one, valid it.
3765 asCpus = self.flattenAllSections(aasSections).split();
3766 if len(asCpus) > 1:
3767 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3768
3769 sMinCpu = asCpus[0];
3770 if sMinCpu in g_kdCpuNames:
3771 oInstr.sMinCpu = sMinCpu;
3772 else:
3773 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3774 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3775
3776 # Set it.
3777 if oInstr.sMinCpu is None:
3778 oInstr.sMinCpu = sMinCpu;
3779 elif oInstr.sMinCpu != sMinCpu:
3780 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3781
3782 _ = iEndLine;
3783 return True;
3784
3785 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3786 """
3787 Tag: \@opcpuid
3788 Value: none | <CPUID flag specifier>
3789
3790 CPUID feature bit which is required for the instruction to be present.
3791 """
3792 oInstr = self.ensureInstructionForOpTag(iTagLine);
3793
3794 # Flatten as a space separated list, split it up and validate the values.
3795 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3796 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3797 asCpuIds = [];
3798 else:
3799 fRc = True;
3800 for iCpuId, sCpuId in enumerate(asCpuIds):
3801 if sCpuId not in g_kdCpuIdFlags:
3802 if sCpuId.strip() in g_kdCpuIdFlags:
3803 sCpuId[iCpuId] = sCpuId.strip();
3804 else:
3805 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3806 if not fRc:
3807 return False;
3808
3809 # Append them.
3810 for sCpuId in asCpuIds:
3811 if sCpuId not in oInstr.asCpuIds:
3812 oInstr.asCpuIds.append(sCpuId);
3813 else:
3814 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3815
3816 _ = iEndLine;
3817 return True;
3818
3819 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3820 """
3821 Tag: \@opgroup
3822 Value: op_grp1[_subgrp2[_subsubgrp3]]
3823
3824 Instruction grouping.
3825 """
3826 oInstr = self.ensureInstructionForOpTag(iTagLine);
3827
3828 # Flatten as a space separated list, split it up and validate the values.
3829 asGroups = self.flattenAllSections(aasSections).split();
3830 if len(asGroups) != 1:
3831 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3832 sGroup = asGroups[0];
3833 if not self.oReGroupName.match(sGroup):
3834 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3835 % (sTag, sGroup, self.oReGroupName.pattern));
3836
3837 # Set it.
3838 if oInstr.sGroup is not None:
3839 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3840 oInstr.sGroup = sGroup;
3841
3842 _ = iEndLine;
3843 return True;
3844
3845 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3846 """
3847 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3848 Value: <invalid opcode behaviour style>
3849
3850 The \@opunused indicates the specification is for a currently unused
3851 instruction encoding.
3852
3853 The \@opinvalid indicates the specification is for an invalid currently
3854 instruction encoding (like UD2).
3855
3856 The \@opinvlstyle just indicates how CPUs decode the instruction when
3857 not supported (\@opcpuid, \@opmincpu) or disabled.
3858 """
3859 oInstr = self.ensureInstructionForOpTag(iTagLine);
3860
3861 # Flatten as a space separated list, split it up and validate the values.
3862 asStyles = self.flattenAllSections(aasSections).split();
3863 if len(asStyles) != 1:
3864 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3865 sStyle = asStyles[0];
3866 if sStyle not in g_kdInvalidStyles:
3867 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3868 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3869 # Set it.
3870 if oInstr.sInvalidStyle is not None:
3871 return self.errorComment(iTagLine,
3872 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3873 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3874 oInstr.sInvalidStyle = sStyle;
3875 if sTag == '@opunused':
3876 oInstr.fUnused = True;
3877 elif sTag == '@opinvalid':
3878 oInstr.fInvalid = True;
3879
3880 _ = iEndLine;
3881 return True;
3882
3883 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3884 """
3885 Tag: \@optest
3886 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3887 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3888
3889 The main idea here is to generate basic instruction tests.
3890
3891 The probably simplest way of handling the diverse input, would be to use
3892 it to produce size optimized byte code for a simple interpreter that
3893 modifies the register input and output states.
3894
3895 An alternative to the interpreter would be creating multiple tables,
3896 but that becomes rather complicated wrt what goes where and then to use
3897 them in an efficient manner.
3898 """
3899 oInstr = self.ensureInstructionForOpTag(iTagLine);
3900
3901 #
3902 # Do it section by section.
3903 #
3904 for asSectionLines in aasSections:
3905 #
3906 # Sort the input into outputs, inputs and selector conditions.
3907 #
3908 sFlatSection = self.flattenAllSections([asSectionLines,]);
3909 if not sFlatSection:
3910 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3911 continue;
3912 oTest = InstructionTest(oInstr);
3913
3914 asSelectors = [];
3915 asInputs = [];
3916 asOutputs = [];
3917 asCur = asOutputs;
3918 fRc = True;
3919 asWords = sFlatSection.split();
3920 for iWord in range(len(asWords) - 1, -1, -1):
3921 sWord = asWords[iWord];
3922 # Check for array switchers.
3923 if sWord == '->':
3924 if asCur != asOutputs:
3925 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3926 break;
3927 asCur = asInputs;
3928 elif sWord == '/':
3929 if asCur != asInputs:
3930 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3931 break;
3932 asCur = asSelectors;
3933 else:
3934 asCur.insert(0, sWord);
3935
3936 #
3937 # Validate and add selectors.
3938 #
3939 for sCond in asSelectors:
3940 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3941 oSelector = None;
3942 for sOp in TestSelector.kasCompareOps:
3943 off = sCondExp.find(sOp);
3944 if off >= 0:
3945 sVariable = sCondExp[:off];
3946 sValue = sCondExp[off + len(sOp):];
3947 if sVariable in TestSelector.kdVariables:
3948 if sValue in TestSelector.kdVariables[sVariable]:
3949 oSelector = TestSelector(sVariable, sOp, sValue);
3950 else:
3951 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3952 % ( sTag, sValue, sCond,
3953 TestSelector.kdVariables[sVariable].keys(),));
3954 else:
3955 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3956 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3957 break;
3958 if oSelector is not None:
3959 for oExisting in oTest.aoSelectors:
3960 if oExisting.sVariable == oSelector.sVariable:
3961 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3962 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3963 oTest.aoSelectors.append(oSelector);
3964 else:
3965 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3966
3967 #
3968 # Validate outputs and inputs, adding them to the test as we go along.
3969 #
3970 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3971 asValidFieldKinds = [ 'both', sDesc, ];
3972 for sItem in asItems:
3973 oItem = None;
3974 for sOp in TestInOut.kasOperators:
3975 off = sItem.find(sOp);
3976 if off < 0:
3977 continue;
3978 sField = sItem[:off];
3979 sValueType = sItem[off + len(sOp):];
3980 if sField in TestInOut.kdFields \
3981 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3982 asSplit = sValueType.split(':', 1);
3983 sValue = asSplit[0];
3984 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3985 if sType in TestInOut.kdTypes:
3986 oValid = TestInOut.kdTypes[sType].validate(sValue);
3987 if oValid is True:
3988 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3989 oItem = TestInOut(sField, sOp, sValue, sType);
3990 else:
3991 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3992 % ( sTag, sDesc, sItem, ));
3993 else:
3994 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
3995 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
3996 else:
3997 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
3998 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
3999 else:
4000 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4001 % ( sTag, sDesc, sField, sItem,
4002 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4003 if asVal[1] in asValidFieldKinds]),));
4004 break;
4005 if oItem is not None:
4006 for oExisting in aoDst:
4007 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4008 self.errorComment(iTagLine,
4009 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4010 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4011 aoDst.append(oItem);
4012 else:
4013 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4014
4015 #
4016 # .
4017 #
4018 if fRc:
4019 oInstr.aoTests.append(oTest);
4020 else:
4021 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4022 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4023 % (sTag, asSelectors, asInputs, asOutputs,));
4024
4025 _ = iEndLine;
4026 return True;
4027
4028 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4029 """
4030 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4031 """
4032 oInstr = self.ensureInstructionForOpTag(iTagLine);
4033
4034 iTest = 0;
4035 if sTag[-1] == ']':
4036 iTest = int(sTag[8:-1]);
4037 else:
4038 iTest = int(sTag[7:]);
4039
4040 if iTest != len(oInstr.aoTests):
4041 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4042 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4043
4044 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4045 """
4046 Tag: \@optestign | \@optestignore
4047 Value: <value is ignored>
4048
4049 This is a simple trick to ignore a test while debugging another.
4050
4051 See also \@oponlytest.
4052 """
4053 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4054 return True;
4055
4056 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4057 """
4058 Tag: \@opcopytests
4059 Value: <opstat | function> [..]
4060 Example: \@opcopytests add_Eb_Gb
4061
4062 Trick to avoid duplicating tests for different encodings of the same
4063 operation.
4064 """
4065 oInstr = self.ensureInstructionForOpTag(iTagLine);
4066
4067 # Flatten, validate and append the copy job to the instruction. We execute
4068 # them after parsing all the input so we can handle forward references.
4069 asToCopy = self.flattenAllSections(aasSections).split();
4070 if not asToCopy:
4071 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4072 for sToCopy in asToCopy:
4073 if sToCopy not in oInstr.asCopyTests:
4074 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4075 oInstr.asCopyTests.append(sToCopy);
4076 else:
4077 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4078 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4079 else:
4080 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4081
4082 _ = iEndLine;
4083 return True;
4084
4085 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4086 """
4087 Tag: \@oponlytest | \@oponly
4088 Value: none
4089
4090 Only test instructions with this tag. This is a trick that is handy
4091 for singling out one or two new instructions or tests.
4092
4093 See also \@optestignore.
4094 """
4095 oInstr = self.ensureInstructionForOpTag(iTagLine);
4096
4097 # Validate and add instruction to only test dictionary.
4098 sValue = self.flattenAllSections(aasSections).strip();
4099 if sValue:
4100 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4101
4102 if oInstr not in g_aoOnlyTestInstructions:
4103 g_aoOnlyTestInstructions.append(oInstr);
4104
4105 _ = iEndLine;
4106 return True;
4107
4108 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4109 """
4110 Tag: \@opxcpttype
4111 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4112
4113 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4114 """
4115 oInstr = self.ensureInstructionForOpTag(iTagLine);
4116
4117 # Flatten as a space separated list, split it up and validate the values.
4118 asTypes = self.flattenAllSections(aasSections).split();
4119 if len(asTypes) != 1:
4120 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4121 sType = asTypes[0];
4122 if sType not in g_kdXcptTypes:
4123 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4124 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4125 # Set it.
4126 if oInstr.sXcptType is not None:
4127 return self.errorComment(iTagLine,
4128 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4129 % ( sTag, oInstr.sXcptType, sType,));
4130 oInstr.sXcptType = sType;
4131
4132 _ = iEndLine;
4133 return True;
4134
4135 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4136 """
4137 Tag: \@opfunction
4138 Value: <VMM function name>
4139
4140 This is for explicitly setting the IEM function name. Normally we pick
4141 this up from the FNIEMOP_XXX macro invocation after the description, or
4142 generate it from the mnemonic and operands.
4143
4144 It it thought it maybe necessary to set it when specifying instructions
4145 which implementation isn't following immediately or aren't implemented yet.
4146 """
4147 oInstr = self.ensureInstructionForOpTag(iTagLine);
4148
4149 # Flatten and validate the value.
4150 sFunction = self.flattenAllSections(aasSections);
4151 if not self.oReFunctionName.match(sFunction):
4152 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4153 % (sTag, sFunction, self.oReFunctionName.pattern));
4154
4155 if oInstr.sFunction is not None:
4156 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4157 % (sTag, oInstr.sFunction, sFunction,));
4158 oInstr.sFunction = sFunction;
4159
4160 _ = iEndLine;
4161 return True;
4162
4163 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4164 """
4165 Tag: \@opstats
4166 Value: <VMM statistics base name>
4167
4168 This is for explicitly setting the statistics name. Normally we pick
4169 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4170 the mnemonic and operands.
4171
4172 It it thought it maybe necessary to set it when specifying instructions
4173 which implementation isn't following immediately or aren't implemented yet.
4174 """
4175 oInstr = self.ensureInstructionForOpTag(iTagLine);
4176
4177 # Flatten and validate the value.
4178 sStats = self.flattenAllSections(aasSections);
4179 if not self.oReStatsName.match(sStats):
4180 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4181 % (sTag, sStats, self.oReStatsName.pattern));
4182
4183 if oInstr.sStats is not None:
4184 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4185 % (sTag, oInstr.sStats, sStats,));
4186 oInstr.sStats = sStats;
4187
4188 _ = iEndLine;
4189 return True;
4190
4191 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4192 """
4193 Tag: \@opdone
4194 Value: none
4195
4196 Used to explictily flush the instructions that have been specified.
4197 """
4198 sFlattened = self.flattenAllSections(aasSections);
4199 if sFlattened != '':
4200 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4201 _ = sTag; _ = iEndLine;
4202 return self.doneInstructions();
4203
4204 ## @}
4205
4206
4207 def parseComment(self):
4208 """
4209 Parse the current comment (self.sComment).
4210
4211 If it's a opcode specifiying comment, we reset the macro stuff.
4212 """
4213 #
4214 # Reject if comment doesn't seem to contain anything interesting.
4215 #
4216 if self.sComment.find('Opcode') < 0 \
4217 and self.sComment.find('@') < 0:
4218 return False;
4219
4220 #
4221 # Split the comment into lines, removing leading asterisks and spaces.
4222 # Also remove leading and trailing empty lines.
4223 #
4224 asLines = self.sComment.split('\n');
4225 for iLine, sLine in enumerate(asLines):
4226 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4227
4228 while asLines and not asLines[0]:
4229 self.iCommentLine += 1;
4230 asLines.pop(0);
4231
4232 while asLines and not asLines[-1]:
4233 asLines.pop(len(asLines) - 1);
4234
4235 #
4236 # Check for old style: Opcode 0x0f 0x12
4237 #
4238 if asLines[0].startswith('Opcode '):
4239 self.parseCommentOldOpcode(asLines);
4240
4241 #
4242 # Look for @op* tagged data.
4243 #
4244 cOpTags = 0;
4245 sFlatDefault = None;
4246 sCurTag = '@default';
4247 iCurTagLine = 0;
4248 asCurSection = [];
4249 aasSections = [ asCurSection, ];
4250 for iLine, sLine in enumerate(asLines):
4251 if not sLine.startswith('@'):
4252 if sLine:
4253 asCurSection.append(sLine);
4254 elif asCurSection:
4255 asCurSection = [];
4256 aasSections.append(asCurSection);
4257 else:
4258 #
4259 # Process the previous tag.
4260 #
4261 if not asCurSection and len(aasSections) > 1:
4262 aasSections.pop(-1);
4263 if sCurTag in self.dTagHandlers:
4264 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4265 cOpTags += 1;
4266 elif sCurTag.startswith('@op'):
4267 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4268 elif sCurTag == '@default':
4269 sFlatDefault = self.flattenAllSections(aasSections);
4270 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4271 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4272 elif sCurTag in ['@encoding', '@opencoding']:
4273 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4274
4275 #
4276 # New tag.
4277 #
4278 asSplit = sLine.split(None, 1);
4279 sCurTag = asSplit[0].lower();
4280 if len(asSplit) > 1:
4281 asCurSection = [asSplit[1],];
4282 else:
4283 asCurSection = [];
4284 aasSections = [asCurSection, ];
4285 iCurTagLine = iLine;
4286
4287 #
4288 # Process the final tag.
4289 #
4290 if not asCurSection and len(aasSections) > 1:
4291 aasSections.pop(-1);
4292 if sCurTag in self.dTagHandlers:
4293 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4294 cOpTags += 1;
4295 elif sCurTag.startswith('@op'):
4296 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4297 elif sCurTag == '@default':
4298 sFlatDefault = self.flattenAllSections(aasSections);
4299
4300 #
4301 # Don't allow default text in blocks containing @op*.
4302 #
4303 if cOpTags > 0 and sFlatDefault:
4304 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4305
4306 return True;
4307
4308 def parseMacroInvocation(self, sInvocation):
4309 """
4310 Parses a macro invocation.
4311
4312 Returns a tuple, first element is the offset following the macro
4313 invocation. The second element is a list of macro arguments, where the
4314 zero'th is the macro name.
4315 """
4316 # First the name.
4317 offOpen = sInvocation.find('(');
4318 if offOpen <= 0:
4319 self.raiseError("macro invocation open parenthesis not found");
4320 sName = sInvocation[:offOpen].strip();
4321 if not self.oReMacroName.match(sName):
4322 return self.error("invalid macro name '%s'" % (sName,));
4323 asRet = [sName, ];
4324
4325 # Arguments.
4326 iLine = self.iLine;
4327 cDepth = 1;
4328 off = offOpen + 1;
4329 offStart = off;
4330 chQuote = None;
4331 while cDepth > 0:
4332 if off >= len(sInvocation):
4333 if iLine >= len(self.asLines):
4334 self.error('macro invocation beyond end of file');
4335 return (off, asRet);
4336 sInvocation += self.asLines[iLine];
4337 iLine += 1;
4338 ch = sInvocation[off];
4339
4340 if chQuote:
4341 if ch == '\\' and off + 1 < len(sInvocation):
4342 off += 1;
4343 elif ch == chQuote:
4344 chQuote = None;
4345 elif ch in ('"', '\'',):
4346 chQuote = ch;
4347 elif ch in (',', ')',):
4348 if cDepth == 1:
4349 asRet.append(sInvocation[offStart:off].strip());
4350 offStart = off + 1;
4351 if ch == ')':
4352 cDepth -= 1;
4353 elif ch == '(':
4354 cDepth += 1;
4355 off += 1;
4356
4357 return (off, asRet);
4358
4359 def findAndParseMacroInvocationEx(self, sCode, sMacro):
4360 """
4361 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
4362 """
4363 offHit = sCode.find(sMacro);
4364 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4365 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
4366 return (offHit + offAfter, asRet);
4367 return (len(sCode), None);
4368
4369 def findAndParseMacroInvocation(self, sCode, sMacro):
4370 """
4371 Returns None if not found, arguments as per parseMacroInvocation if found.
4372 """
4373 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
4374
4375 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4376 """
4377 Returns same as findAndParseMacroInvocation.
4378 """
4379 for sMacro in asMacro:
4380 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4381 if asRet is not None:
4382 return asRet;
4383 return None;
4384
4385 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4386 sDisHints, sIemHints, asOperands):
4387 """
4388 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4389 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4390 """
4391 #
4392 # Some invocation checks.
4393 #
4394 if sUpper != sUpper.upper():
4395 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4396 if sLower != sLower.lower():
4397 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4398 if sUpper.lower() != sLower:
4399 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4400 if not self.oReMnemonic.match(sLower):
4401 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4402
4403 #
4404 # Check if sIemHints tells us to not consider this macro invocation.
4405 #
4406 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4407 return True;
4408
4409 # Apply to the last instruction only for now.
4410 if not self.aoCurInstrs:
4411 self.addInstruction();
4412 oInstr = self.aoCurInstrs[-1];
4413 if oInstr.iLineMnemonicMacro == -1:
4414 oInstr.iLineMnemonicMacro = self.iLine;
4415 else:
4416 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4417 % (sMacro, oInstr.iLineMnemonicMacro,));
4418
4419 # Mnemonic
4420 if oInstr.sMnemonic is None:
4421 oInstr.sMnemonic = sLower;
4422 elif oInstr.sMnemonic != sLower:
4423 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4424
4425 # Process operands.
4426 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4427 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4428 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4429 for iOperand, sType in enumerate(asOperands):
4430 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4431 if sWhere is None:
4432 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4433 if iOperand < len(oInstr.aoOperands): # error recovery.
4434 sWhere = oInstr.aoOperands[iOperand].sWhere;
4435 sType = oInstr.aoOperands[iOperand].sType;
4436 else:
4437 sWhere = 'reg';
4438 sType = 'Gb';
4439 if iOperand == len(oInstr.aoOperands):
4440 oInstr.aoOperands.append(Operand(sWhere, sType))
4441 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4442 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4443 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4444 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4445
4446 # Encoding.
4447 if sForm not in g_kdIemForms:
4448 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4449 else:
4450 if oInstr.sEncoding is None:
4451 oInstr.sEncoding = g_kdIemForms[sForm][0];
4452 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4453 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4454 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4455
4456 # Check the parameter locations for the encoding.
4457 if g_kdIemForms[sForm][1] is not None:
4458 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4459 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4460 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4461 else:
4462 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4463 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4464 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4465 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4466 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4467 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4468 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4469 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4470 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4471 or sForm.replace('VEX','').find('V') < 0) ):
4472 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4473 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4474 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4475 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4476 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4477 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4478 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4479 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4480 oInstr.aoOperands[iOperand].sWhere));
4481
4482
4483 # Check @opcodesub
4484 if oInstr.sSubOpcode \
4485 and g_kdIemForms[sForm][2] \
4486 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4487 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4488 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4489
4490 # Stats.
4491 if not self.oReStatsName.match(sStats):
4492 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4493 elif oInstr.sStats is None:
4494 oInstr.sStats = sStats;
4495 elif oInstr.sStats != sStats:
4496 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4497 % (sMacro, oInstr.sStats, sStats,));
4498
4499 # Process the hints (simply merge with @ophints w/o checking anything).
4500 for sHint in sDisHints.split('|'):
4501 sHint = sHint.strip();
4502 if sHint.startswith('DISOPTYPE_'):
4503 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4504 if sShortHint in g_kdHints:
4505 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4506 else:
4507 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4508 elif sHint != '0':
4509 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4510
4511 for sHint in sIemHints.split('|'):
4512 sHint = sHint.strip();
4513 if sHint.startswith('IEMOPHINT_'):
4514 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4515 if sShortHint in g_kdHints:
4516 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4517 else:
4518 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4519 elif sHint != '0':
4520 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4521
4522 _ = sAsm;
4523 return True;
4524
4525 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4526 """
4527 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4528 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4529 """
4530 if not asOperands:
4531 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4532 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4533 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4534
4535 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4536 """
4537 Process a IEM_MC_BEGIN macro invocation.
4538 """
4539 if self.fDebugMc:
4540 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4541 #self.debug('%s<eos>' % (sCode,));
4542
4543 # Check preconditions.
4544 if not self.oCurFunction:
4545 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4546 if self.oCurMcBlock:
4547 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4548
4549 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4550 cchIndent = offBeginStatementInCodeStr;
4551 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4552 if offPrevNewline >= 0:
4553 cchIndent -= offPrevNewline + 1;
4554 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4555
4556 # Start a new block.
4557 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4558 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4559 g_aoMcBlocks.append(self.oCurMcBlock);
4560 self.cTotalMcBlocks += 1;
4561 self.iMcBlockInFunc += 1;
4562 return True;
4563
4564 def workerIemMcEnd(self, offEndStatementInLine):
4565 """
4566 Process a IEM_MC_END macro invocation.
4567 """
4568 if self.fDebugMc:
4569 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4570
4571 # Check preconditions.
4572 if not self.oCurMcBlock:
4573 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4574
4575 #
4576 # HACK ALERT! For blocks orginating from macro expansion the start and
4577 # end line will be the same, but the line has multiple
4578 # newlines inside it. So, we have to do some extra tricks
4579 # to get the lines out of there. We ASSUME macros aren't
4580 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4581 #
4582 if self.iLine > self.oCurMcBlock.iBeginLine:
4583 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4584 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4585 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4586 else:
4587 sRawLine = self.asLines[self.iLine - 1];
4588
4589 off = sRawLine.find('\n', offEndStatementInLine);
4590 if off > 0:
4591 sRawLine = sRawLine[:off + 1];
4592
4593 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
4594 sRawLine = sRawLine[off:];
4595 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
4596 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
4597
4598 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
4599
4600 #
4601 # Strip anything following the IEM_MC_END(); statement in the final line,
4602 # so that we don't carry on any trailing 'break' after macro expansions
4603 # like for iemOp_movsb_Xb_Yb.
4604 #
4605 while asLines[-1].strip() == '':
4606 asLines.pop();
4607 sFinal = asLines[-1];
4608 offFinalEnd = sFinal.find('IEM_MC_END');
4609 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4610 offFinalEnd += len('IEM_MC_END');
4611
4612 while sFinal[offFinalEnd].isspace():
4613 offFinalEnd += 1;
4614 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4615 offFinalEnd += 1;
4616
4617 while sFinal[offFinalEnd].isspace():
4618 offFinalEnd += 1;
4619 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4620 offFinalEnd += 1;
4621
4622 while sFinal[offFinalEnd].isspace():
4623 offFinalEnd += 1;
4624 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4625 offFinalEnd += 1;
4626
4627 asLines[-1] = sFinal[: offFinalEnd];
4628
4629 #
4630 # Complete and discard the current block.
4631 #
4632 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
4633 self.oCurMcBlock = None;
4634 return True;
4635
4636 def workerStartFunction(self, asArgs):
4637 """
4638 Deals with the start of a decoder function.
4639
4640 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4641 macros, so we get a argument list for these where the 0th argument is the
4642 macro name.
4643 """
4644 # Complete any existing function.
4645 if self.oCurFunction:
4646 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4647
4648 # Create the new function.
4649 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4650 return True;
4651
4652 def checkCodeForMacro(self, sCode, offLine):
4653 """
4654 Checks code for relevant macro invocation.
4655 """
4656
4657 #
4658 # Scan macro invocations.
4659 #
4660 if sCode.find('(') > 0:
4661 # Look for instruction decoder function definitions. ASSUME single line.
4662 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4663 [ 'FNIEMOP_DEF',
4664 'FNIEMOPRM_DEF',
4665 'FNIEMOP_STUB',
4666 'FNIEMOP_STUB_1',
4667 'FNIEMOP_UD_STUB',
4668 'FNIEMOP_UD_STUB_1' ]);
4669 if asArgs is not None:
4670 self.workerStartFunction(asArgs);
4671 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4672
4673 if not self.aoCurInstrs:
4674 self.addInstruction();
4675 for oInstr in self.aoCurInstrs:
4676 if oInstr.iLineFnIemOpMacro == -1:
4677 oInstr.iLineFnIemOpMacro = self.iLine;
4678 else:
4679 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4680 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4681 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4682 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4683 if asArgs[0].find('STUB') > 0:
4684 self.doneInstructions(fEndOfFunction = True);
4685 return True;
4686
4687 # Check for worker function definitions, so we can get a context for MC blocks.
4688 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4689 [ 'FNIEMOP_DEF_1',
4690 'FNIEMOP_DEF_2', ]);
4691 if asArgs is not None:
4692 self.workerStartFunction(asArgs);
4693 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4694 return True;
4695
4696 # IEMOP_HLP_DONE_VEX_DECODING_*
4697 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4698 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4699 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4700 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4701 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4702 ]);
4703 if asArgs is not None:
4704 sMacro = asArgs[0];
4705 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4706 for oInstr in self.aoCurInstrs:
4707 if 'vex_l_zero' not in oInstr.dHints:
4708 if oInstr.iLineMnemonicMacro >= 0:
4709 self.errorOnLine(oInstr.iLineMnemonicMacro,
4710 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4711 oInstr.dHints['vex_l_zero'] = True;
4712
4713 #
4714 # IEMOP_MNEMONIC*
4715 #
4716 if sCode.find('IEMOP_MNEMONIC') >= 0:
4717 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4718 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4719 if asArgs is not None:
4720 if len(self.aoCurInstrs) == 1:
4721 oInstr = self.aoCurInstrs[0];
4722 if oInstr.sStats is None:
4723 oInstr.sStats = asArgs[1];
4724 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4725
4726 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4727 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4728 if asArgs is not None:
4729 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4730 asArgs[7], []);
4731 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4732 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4733 if asArgs is not None:
4734 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4735 asArgs[8], [asArgs[6],]);
4736 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4737 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4738 if asArgs is not None:
4739 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4740 asArgs[9], [asArgs[6], asArgs[7]]);
4741 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4742 # a_fIemHints)
4743 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4744 if asArgs is not None:
4745 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4746 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4747 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4748 # a_fIemHints)
4749 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4750 if asArgs is not None:
4751 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4752 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4753
4754 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4755 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4756 if asArgs is not None:
4757 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4758 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4759 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4760 if asArgs is not None:
4761 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4762 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4763 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4764 if asArgs is not None:
4765 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4766 [asArgs[4], asArgs[5],]);
4767 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4768 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4769 if asArgs is not None:
4770 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4771 [asArgs[4], asArgs[5], asArgs[6],]);
4772 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4773 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4774 if asArgs is not None:
4775 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4776 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4777
4778 #
4779 # IEM_MC_BEGIN + IEM_MC_END.
4780 # We must support multiple instances per code snippet.
4781 #
4782 offCode = sCode.find('IEM_MC_');
4783 if offCode >= 0:
4784 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4785 if oMatch.group(1) == 'END':
4786 self.workerIemMcEnd(offLine + oMatch.start());
4787 else:
4788 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4789 return True;
4790
4791 return False;
4792
4793 def workerPreProcessRecreateMacroRegex(self):
4794 """
4795 Recreates self.oReMacros when self.dMacros changes.
4796 """
4797 if self.dMacros:
4798 sRegex = '';
4799 for sName, oMacro in self.dMacros.items():
4800 if sRegex:
4801 sRegex += '|' + sName;
4802 else:
4803 sRegex = '\\b(' + sName;
4804 if oMacro.asArgs is not None:
4805 sRegex += '\s*\(';
4806 else:
4807 sRegex += '\\b';
4808 sRegex += ')';
4809 self.oReMacros = re.compile(sRegex);
4810 else:
4811 self.oReMacros = None;
4812 return True;
4813
4814 def workerPreProcessDefine(self, sRest):
4815 """
4816 Handles a macro #define, the sRest is what follows after the directive word.
4817 """
4818
4819 #
4820 # If using line continutation, just concat all the lines together,
4821 # preserving the newline character but not the escaping.
4822 #
4823 iLineStart = self.iLine;
4824 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4825 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4826 self.iLine += 1;
4827 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4828
4829 #
4830 # Use regex to split out the name, argument list and body.
4831 # If this fails, we assume it's a simple macro.
4832 #
4833 oMatch = self.oReHashDefine2.match(sRest);
4834 if oMatch:
4835 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4836 sBody = oMatch.group(3);
4837 else:
4838 oMatch = self.oReHashDefine3.match(sRest);
4839 if not oMatch:
4840 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4841 return self.error('bogus macro definition: %s' % (sRest,));
4842 asArgs = None;
4843 sBody = oMatch.group(2);
4844 sName = oMatch.group(1);
4845 assert sName == sName.strip();
4846 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4847
4848 #
4849 # Is this of any interest to us? We do NOT support MC blocks wihtin
4850 # nested macro expansion, just to avoid lots of extra work.
4851 #
4852 if sBody.find("IEM_MC_BEGIN") < 0:
4853 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4854 return True;
4855
4856 #
4857 # Add the macro.
4858 #
4859 if self.fDebugPreProc:
4860 self.debug('#define %s on line %u' % (sName, self.iLine,));
4861 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4862 return self.workerPreProcessRecreateMacroRegex();
4863
4864 def workerPreProcessUndef(self, sRest):
4865 """
4866 Handles a macro #undef, the sRest is what follows after the directive word.
4867 """
4868 # Quick comment strip and isolate the name.
4869 offSlash = sRest.find('/');
4870 if offSlash > 0:
4871 sRest = sRest[:offSlash];
4872 sName = sRest.strip();
4873
4874 # Remove the macro if we're clocking it.
4875 if sName in self.dMacros:
4876 if self.fDebugPreProc:
4877 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4878 del self.dMacros[sName];
4879 return self.workerPreProcessRecreateMacroRegex();
4880
4881 return True;
4882
4883 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4884 """
4885 Handles a preprocessor directive.
4886 """
4887 oMatch = self.oReHashDefine.match(sLine);
4888 if oMatch:
4889 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4890
4891 oMatch = self.oReHashUndef.match(sLine);
4892 if oMatch:
4893 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4894 return False;
4895
4896 def expandMacros(self, sLine, oMatch):
4897 """
4898 Expands macros we know about in the given line.
4899 Currently we ASSUME there is only one and that is what oMatch matched.
4900 """
4901 #
4902 # Get our bearings.
4903 #
4904 offMatch = oMatch.start();
4905 sName = oMatch.group(1);
4906 assert sName == sLine[oMatch.start() : oMatch.end()];
4907 fWithArgs = sName.endswith('(');
4908 if fWithArgs:
4909 sName = sName[:-1].strip();
4910 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
4911
4912 #
4913 # Deal with simple macro invocations w/o parameters.
4914 #
4915 if not fWithArgs:
4916 if self.fDebugPreProc:
4917 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
4918 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
4919
4920 #
4921 # Complicated macro with parameters.
4922 # Start by extracting the parameters. ASSUMES they are all on the same line!
4923 #
4924 cLevel = 1;
4925 offCur = oMatch.end();
4926 offCurArg = offCur;
4927 asArgs = [];
4928 while True:
4929 if offCur >= len(sLine):
4930 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
4931 ch = sLine[offCur];
4932 if ch == '(':
4933 cLevel += 1;
4934 elif ch == ')':
4935 cLevel -= 1;
4936 if cLevel == 0:
4937 asArgs.append(sLine[offCurArg:offCur].strip());
4938 break;
4939 elif ch == ',' and cLevel == 1:
4940 asArgs.append(sLine[offCurArg:offCur].strip());
4941 offCurArg = offCur + 1;
4942 offCur += 1;
4943 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
4944 asArgs = [];
4945 if len(oMacro.asArgs) != len(asArgs):
4946 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
4947
4948 #
4949 # Do the expanding.
4950 #
4951 if self.fDebugPreProc:
4952 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
4953 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
4954
4955 def parse(self):
4956 """
4957 Parses the given file.
4958 Returns number or errors.
4959 Raises exception on fatal trouble.
4960 """
4961 #self.debug('Parsing %s' % (self.sSrcFile,));
4962
4963 while self.iLine < len(self.asLines):
4964 sLine = self.asLines[self.iLine];
4965 self.iLine += 1;
4966 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
4967
4968 # Expand macros we know about if we're currently in code.
4969 if self.iState == self.kiCode and self.oReMacros:
4970 oMatch = self.oReMacros.search(sLine);
4971 if oMatch:
4972 sLine = self.expandMacros(sLine, oMatch);
4973 if self.fDebugPreProc:
4974 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
4975 self.asLines[self.iLine - 1] = sLine;
4976
4977 # Look for comments.
4978 offSlash = sLine.find('/');
4979 if offSlash >= 0:
4980 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
4981 offLine = 0;
4982 while offLine < len(sLine):
4983 if self.iState == self.kiCode:
4984 # Look for substantial multiline comment so we pass the following MC as a whole line:
4985 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
4986 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
4987 offHit = sLine.find('/*', offLine);
4988 while offHit >= 0:
4989 offEnd = sLine.find('*/', offHit + 2);
4990 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
4991 break;
4992 offHit = sLine.find('/*', offEnd);
4993
4994 if offHit >= 0:
4995 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
4996 self.sComment = '';
4997 self.iCommentLine = self.iLine;
4998 self.iState = self.kiCommentMulti;
4999 offLine = offHit + 2;
5000 else:
5001 self.checkCodeForMacro(sLine[offLine:], offLine);
5002 offLine = len(sLine);
5003
5004 elif self.iState == self.kiCommentMulti:
5005 offHit = sLine.find('*/', offLine);
5006 if offHit >= 0:
5007 self.sComment += sLine[offLine:offHit];
5008 self.iState = self.kiCode;
5009 offLine = offHit + 2;
5010 self.parseComment();
5011 else:
5012 self.sComment += sLine[offLine:];
5013 offLine = len(sLine);
5014 else:
5015 assert False;
5016 # C++ line comment.
5017 elif offSlash > 0:
5018 self.checkCodeForMacro(sLine[:offSlash], 0);
5019
5020 # No slash, but append the line if in multi-line comment.
5021 elif self.iState == self.kiCommentMulti:
5022 #self.debug('line %d: multi' % (self.iLine,));
5023 self.sComment += sLine;
5024
5025 # No slash, but check if this is a macro #define or #undef, since we
5026 # need to be able to selectively expand the ones containing MC blocks.
5027 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5028 if self.fDebugPreProc:
5029 self.debug('line %d: pre-proc' % (self.iLine,));
5030 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5031
5032 # No slash, but check code line for relevant macro.
5033 elif ( self.iState == self.kiCode
5034 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5035 #self.debug('line %d: macro' % (self.iLine,));
5036 self.checkCodeForMacro(sLine, 0);
5037
5038 # If the line is a '}' in the first position, complete the instructions.
5039 elif self.iState == self.kiCode and sLine[0] == '}':
5040 #self.debug('line %d: }' % (self.iLine,));
5041 self.doneInstructions(fEndOfFunction = True);
5042
5043 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5044 # so we can check/add @oppfx info from it.
5045 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5046 self.parseFunctionTable(sLine);
5047
5048 self.doneInstructions(fEndOfFunction = True);
5049 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5050 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5051 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5052 return self.printErrors();
5053
5054## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
5055g_oParsedCommonBodyMacros = None # type: SimpleParser
5056
5057def __parseFileByName(sSrcFile, sDefaultMap):
5058 """
5059 Parses one source file for instruction specfications.
5060 """
5061 #
5062 # Read sSrcFile into a line array.
5063 #
5064 try:
5065 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5066 except Exception as oXcpt:
5067 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5068 try:
5069 asLines = oFile.readlines();
5070 except Exception as oXcpt:
5071 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5072 finally:
5073 oFile.close();
5074
5075 #
5076 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
5077 # can use the macros from it when processing the other files.
5078 #
5079 global g_oParsedCommonBodyMacros;
5080 if g_oParsedCommonBodyMacros is None:
5081 # Locate the file.
5082 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5083 if not os.path.isfile(sCommonBodyMacros):
5084 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5085
5086 # Read it.
5087 try:
5088 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5089 asIncFiles = oIncFile.readlines();
5090 except Exception as oXcpt:
5091 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5092
5093 # Parse it.
5094 try:
5095 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5096 if oParser.parse() != 0:
5097 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5098 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5099 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5100 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5101 oParser.cTotalMcBlocks,
5102 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5103 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5104 except ParserException as oXcpt:
5105 print(str(oXcpt), file = sys.stderr);
5106 raise;
5107 g_oParsedCommonBodyMacros = oParser;
5108
5109 #
5110 # Do the parsing.
5111 #
5112 try:
5113 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5114 return (oParser.parse(), oParser) ;
5115 except ParserException as oXcpt:
5116 print(str(oXcpt), file = sys.stderr);
5117 raise;
5118
5119
5120def __doTestCopying():
5121 """
5122 Executes the asCopyTests instructions.
5123 """
5124 asErrors = [];
5125 for oDstInstr in g_aoAllInstructions:
5126 if oDstInstr.asCopyTests:
5127 for sSrcInstr in oDstInstr.asCopyTests:
5128 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5129 if oSrcInstr:
5130 aoSrcInstrs = [oSrcInstr,];
5131 else:
5132 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5133 if aoSrcInstrs:
5134 for oSrcInstr in aoSrcInstrs:
5135 if oSrcInstr != oDstInstr:
5136 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5137 else:
5138 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5139 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5140 else:
5141 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5142 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5143
5144 if asErrors:
5145 sys.stderr.write(u''.join(asErrors));
5146 return len(asErrors);
5147
5148
5149def __applyOnlyTest():
5150 """
5151 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5152 all other instructions so that only these get tested.
5153 """
5154 if g_aoOnlyTestInstructions:
5155 for oInstr in g_aoAllInstructions:
5156 if oInstr.aoTests:
5157 if oInstr not in g_aoOnlyTestInstructions:
5158 oInstr.aoTests = [];
5159 return 0;
5160
5161## List of all main instruction files and their default maps.
5162g_aasAllInstrFilesAndDefaultMap = (
5163 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5164 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5165 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5166 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5167 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5168 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5169 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5170 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5171 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5172);
5173
5174def __parseFilesWorker(asFilesAndDefaultMap):
5175 """
5176 Parses all the IEMAllInstruction*.cpp.h files.
5177
5178 Returns a list of the parsers on success.
5179 Raises exception on failure.
5180 """
5181 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5182 cErrors = 0;
5183 aoParsers = [];
5184 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5185 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5186 sFilename = os.path.join(sSrcDir, sFilename);
5187 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5188 cErrors += cThisErrors;
5189 aoParsers.append(oParser);
5190 cErrors += __doTestCopying();
5191 cErrors += __applyOnlyTest();
5192
5193 # Total stub stats:
5194 cTotalStubs = 0;
5195 for oInstr in g_aoAllInstructions:
5196 cTotalStubs += oInstr.fStub;
5197 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5198 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5199 file = sys.stderr);
5200
5201 if cErrors != 0:
5202 raise Exception('%d parse errors' % (cErrors,));
5203 return aoParsers;
5204
5205
5206def parseFiles(asFiles):
5207 """
5208 Parses a selection of IEMAllInstruction*.cpp.h files.
5209
5210 Returns a list of the parsers on success.
5211 Raises exception on failure.
5212 """
5213 # Look up default maps for the files and call __parseFilesWorker to do the job.
5214 asFilesAndDefaultMap = [];
5215 for sFilename in asFiles:
5216 sName = os.path.split(sFilename)[1].lower();
5217 sMap = None;
5218 for asCur in g_aasAllInstrFilesAndDefaultMap:
5219 if asCur[0].lower() == sName:
5220 sMap = asCur[1];
5221 break;
5222 if not sMap:
5223 raise Exception('Unable to classify file: %s' % (sFilename,));
5224 asFilesAndDefaultMap.append((sFilename, sMap));
5225
5226 return __parseFilesWorker(asFilesAndDefaultMap);
5227
5228
5229def parseAll():
5230 """
5231 Parses all the IEMAllInstruction*.cpp.h files.
5232
5233 Returns a list of the parsers on success.
5234 Raises exception on failure.
5235 """
5236 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5237
5238
5239#
5240# Generators (may perhaps move later).
5241#
5242def __formatDisassemblerTableEntry(oInstr):
5243 """
5244 """
5245 sMacro = 'OP';
5246 cMaxOperands = 3;
5247 if len(oInstr.aoOperands) > 3:
5248 sMacro = 'OPVEX'
5249 cMaxOperands = 4;
5250 assert len(oInstr.aoOperands) <= cMaxOperands;
5251
5252 #
5253 # Format string.
5254 #
5255 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5256 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5257 sTmp += ' ' if iOperand == 0 else ',';
5258 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5259 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5260 else:
5261 sTmp += g_kdOpTypes[oOperand.sType][2];
5262 sTmp += '",';
5263 asColumns = [ sTmp, ];
5264
5265 #
5266 # Decoders.
5267 #
5268 iStart = len(asColumns);
5269 if oInstr.sEncoding is None:
5270 pass;
5271 elif oInstr.sEncoding == 'ModR/M':
5272 # ASSUME the first operand is using the ModR/M encoding
5273 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5274 asColumns.append('IDX_ParseModRM,');
5275 elif oInstr.sEncoding in [ 'prefix', ]:
5276 for oOperand in oInstr.aoOperands:
5277 asColumns.append('0,');
5278 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5279 pass;
5280 elif oInstr.sEncoding == 'VEX.ModR/M':
5281 asColumns.append('IDX_ParseModRM,');
5282 elif oInstr.sEncoding == 'vex2':
5283 asColumns.append('IDX_ParseVex2b,')
5284 elif oInstr.sEncoding == 'vex3':
5285 asColumns.append('IDX_ParseVex3b,')
5286 elif oInstr.sEncoding in g_dInstructionMaps:
5287 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5288 else:
5289 ## @todo
5290 #IDX_ParseTwoByteEsc,
5291 #IDX_ParseGrp1,
5292 #IDX_ParseShiftGrp2,
5293 #IDX_ParseGrp3,
5294 #IDX_ParseGrp4,
5295 #IDX_ParseGrp5,
5296 #IDX_Parse3DNow,
5297 #IDX_ParseGrp6,
5298 #IDX_ParseGrp7,
5299 #IDX_ParseGrp8,
5300 #IDX_ParseGrp9,
5301 #IDX_ParseGrp10,
5302 #IDX_ParseGrp12,
5303 #IDX_ParseGrp13,
5304 #IDX_ParseGrp14,
5305 #IDX_ParseGrp15,
5306 #IDX_ParseGrp16,
5307 #IDX_ParseThreeByteEsc4,
5308 #IDX_ParseThreeByteEsc5,
5309 #IDX_ParseModFence,
5310 #IDX_ParseEscFP,
5311 #IDX_ParseNopPause,
5312 #IDX_ParseInvOpModRM,
5313 assert False, str(oInstr);
5314
5315 # Check for immediates and stuff in the remaining operands.
5316 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5317 sIdx = g_kdOpTypes[oOperand.sType][0];
5318 #if sIdx != 'IDX_UseModRM':
5319 asColumns.append(sIdx + ',');
5320 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5321
5322 #
5323 # Opcode and operands.
5324 #
5325 assert oInstr.sDisEnum, str(oInstr);
5326 asColumns.append(oInstr.sDisEnum + ',');
5327 iStart = len(asColumns)
5328 for oOperand in oInstr.aoOperands:
5329 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5330 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5331
5332 #
5333 # Flags.
5334 #
5335 sTmp = '';
5336 for sHint in sorted(oInstr.dHints.keys()):
5337 sDefine = g_kdHints[sHint];
5338 if sDefine.startswith('DISOPTYPE_'):
5339 if sTmp:
5340 sTmp += ' | ' + sDefine;
5341 else:
5342 sTmp += sDefine;
5343 if sTmp:
5344 sTmp += '),';
5345 else:
5346 sTmp += '0),';
5347 asColumns.append(sTmp);
5348
5349 #
5350 # Format the columns into a line.
5351 #
5352 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5353 sLine = '';
5354 for i, s in enumerate(asColumns):
5355 if len(sLine) < aoffColumns[i]:
5356 sLine += ' ' * (aoffColumns[i] - len(sLine));
5357 else:
5358 sLine += ' ';
5359 sLine += s;
5360
5361 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5362 # DISOPTYPE_HARMLESS),
5363 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5364 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5365 return sLine;
5366
5367def __checkIfShortTable(aoTableOrdered, oMap):
5368 """
5369 Returns (iInstr, cInstructions, fShortTable)
5370 """
5371
5372 # Determin how much we can trim off.
5373 cInstructions = len(aoTableOrdered);
5374 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5375 cInstructions -= 1;
5376
5377 iInstr = 0;
5378 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5379 iInstr += 1;
5380
5381 # If we can save more than 30%, we go for the short table version.
5382 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5383 return (iInstr, cInstructions, True);
5384 _ = oMap; # Use this for overriding.
5385
5386 # Output the full table.
5387 return (0, len(aoTableOrdered), False);
5388
5389def generateDisassemblerTables(oDstFile = sys.stdout):
5390 """
5391 Generates disassembler tables.
5392
5393 Returns exit code.
5394 """
5395
5396 #
5397 # Parse all.
5398 #
5399 try:
5400 parseAll();
5401 except Exception as oXcpt:
5402 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5403 traceback.print_exc(file = sys.stderr);
5404 return 1;
5405
5406
5407 #
5408 # The disassembler uses a slightly different table layout to save space,
5409 # since several of the prefix varia
5410 #
5411 aoDisasmMaps = [];
5412 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5413 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5414 if oMap.sSelector != 'byte+pfx':
5415 aoDisasmMaps.append(oMap);
5416 else:
5417 # Split the map by prefix.
5418 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5419 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5420 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5421 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5422
5423 #
5424 # Dump each map.
5425 #
5426 asHeaderLines = [];
5427 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5428 for oMap in aoDisasmMaps:
5429 sName = oMap.sName;
5430
5431 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5432
5433 #
5434 # Get the instructions for the map and see if we can do a short version or not.
5435 #
5436 aoTableOrder = oMap.getInstructionsInTableOrder();
5437 cEntriesPerByte = oMap.getEntriesPerByte();
5438 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5439
5440 #
5441 # Output the table start.
5442 # Note! Short tables are static and only accessible via the map range record.
5443 #
5444 asLines = [];
5445 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5446 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5447 if fShortTable:
5448 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5449 else:
5450 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5451 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5452 asLines.append('{');
5453
5454 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5455 asLines.append(' /* %#04x: */' % (iInstrStart,));
5456
5457 #
5458 # Output the instructions.
5459 #
5460 iInstr = iInstrStart;
5461 while iInstr < iInstrEnd:
5462 oInstr = aoTableOrder[iInstr];
5463 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5464 if iInstr != iInstrStart:
5465 asLines.append('');
5466 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5467
5468 if oInstr is None:
5469 # Invalid. Optimize blocks of invalid instructions.
5470 cInvalidInstrs = 1;
5471 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5472 cInvalidInstrs += 1;
5473 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5474 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5475 iInstr += 0x10 * cEntriesPerByte - 1;
5476 elif cEntriesPerByte > 1:
5477 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5478 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5479 iInstr += 3;
5480 else:
5481 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5482 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5483 else:
5484 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5485 elif isinstance(oInstr, list):
5486 if len(oInstr) != 0:
5487 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5488 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5489 else:
5490 asLines.append(__formatDisassemblerTableEntry(oInstr));
5491 else:
5492 asLines.append(__formatDisassemblerTableEntry(oInstr));
5493
5494 iInstr += 1;
5495
5496 if iInstrStart >= iInstrEnd:
5497 asLines.append(' /* dummy */ INVALID_OPCODE');
5498
5499 asLines.append('};');
5500 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5501
5502 #
5503 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5504 #
5505 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5506 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5507 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5508
5509 #
5510 # Write out the lines.
5511 #
5512 oDstFile.write('\n'.join(asLines));
5513 oDstFile.write('\n');
5514 oDstFile.write('\n');
5515 #break; #for now
5516 return 0;
5517
5518if __name__ == '__main__':
5519 sys.exit(generateDisassemblerTables());
5520
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette