VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 95394

Last change on this file since 95394 was 95347, checked in by vboxsync, 3 years ago

VMM/IEM: Implemented the MULX instruction. bugref:9898

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 167.6 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 95347 2022-06-22 22:14:17Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017-2022 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 95347 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rDX': [],
191 'rSI': [],
192 'rDI': [],
193 'rFLAGS': [],
194 'CS': [],
195 'DS': [],
196 'ES': [],
197 'FS': [],
198 'GS': [],
199 'SS': [],
200};
201
202## \@op[1-4] types
203##
204## Value fields:
205## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
206## - 1: the location (g_kdOpLocations).
207## - 2: disassembler format string version of the type.
208## - 3: disassembler OP_PARAM_XXX (XXX only).
209## - 4: IEM form matching instruction.
210##
211## Note! See the A.2.1 in SDM vol 2 for the type names.
212g_kdOpTypes = {
213 # Fixed addresses
214 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
215
216 # ModR/M.rm
217 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
218 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
219 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
220 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
221 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
222 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
223 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
224 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
225 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
226 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
227 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
228 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
229 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
230 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
231 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
232 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
233 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
234 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
235 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
236 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
237 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
238 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
239 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
240 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
241 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
242
243 # ModR/M.rm - register only.
244 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
245 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
246 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
247 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
248 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
249 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
250 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
251
252 # ModR/M.rm - memory only.
253 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
254 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
255 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
256 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
257 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
258 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
259 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
260 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
261 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
262 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
263 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
264 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
265 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
266 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
267 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
268
269 # ModR/M.reg
270 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
271 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
272 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
273 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
274 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
275 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
276 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
277 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
278 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
279 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
280 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
281 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
282 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
283 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
284 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
285 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
286 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
287 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
288 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
289 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
290 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
291 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
292 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
293 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
294 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
295 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
296 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
297 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
298 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
299 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
300 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
301
302 # VEX.vvvv
303 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
304 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
305 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
306 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
307
308 # Immediate values.
309 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
310 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
311 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
312 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
313 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
314 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
315
316 # Address operands (no ModR/M).
317 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
318 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
319
320 # Relative jump targets
321 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
322 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
323
324 # DS:rSI
325 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
326 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
327 # ES:rDI
328 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
329 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
330
331 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
332
333 # Fixed registers.
334 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
335 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
336 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
337 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
338 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
339 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
340 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
341 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
342 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
343};
344
345# IDX_ParseFixedReg
346# IDX_ParseVexDest
347
348
349## IEMFORM_XXX mappings.
350g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
351 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
352 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
353 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
354 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
355 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
356 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
357 'M': ( 'ModR/M', [ 'rm', ], '', ),
358 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
359 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
360 'R': ( 'ModR/M', [ 'reg', ], '', ),
361
362 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
363 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
364 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
365 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
366 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
367 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
368 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
369 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
370 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
371 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
372 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
373 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
374 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
375 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
376 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
377 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
378 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
379 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
380 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
381 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
382 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
383 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
384
385 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
386 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
387 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
388
389 'FIXED': ( 'fixed', None, '', ),
390};
391
392## \@oppfx values.
393g_kdPrefixes = {
394 'none': [],
395 '0x66': [],
396 '0xf3': [],
397 '0xf2': [],
398};
399
400## Special \@opcode tag values.
401g_kdSpecialOpcodes = {
402 '/reg': [],
403 'mr/reg': [],
404 '11 /reg': [],
405 '!11 /reg': [],
406 '11 mr/reg': [],
407 '!11 mr/reg': [],
408};
409
410## Special \@opcodesub tag values.
411## The first value is the real value for aliases.
412## The second value is for bs3cg1.
413g_kdSubOpcodes = {
414 'none': [ None, '', ],
415 '11 mr/reg': [ '11 mr/reg', '', ],
416 '11': [ '11 mr/reg', '', ], ##< alias
417 '!11 mr/reg': [ '!11 mr/reg', '', ],
418 '!11': [ '!11 mr/reg', '', ], ##< alias
419 'rex.w=0': [ 'rex.w=0', 'WZ', ],
420 'w=0': [ 'rex.w=0', '', ], ##< alias
421 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
422 'w=1': [ 'rex.w=1', '', ], ##< alias
423 'vex.l=0': [ 'vex.l=0', 'L0', ],
424 'vex.l=1': [ 'vex.l=0', 'L1', ],
425 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
426 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
427 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
428 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
429};
430
431## Valid values for \@openc
432g_kdEncodings = {
433 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
434 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
435 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
436 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
437 'prefix': [ None, ], ##< Prefix
438};
439
440## \@opunused, \@opinvalid, \@opinvlstyle
441g_kdInvalidStyles = {
442 'immediate': [], ##< CPU stops decoding immediately after the opcode.
443 'vex.modrm': [], ##< VEX+ModR/M, everyone.
444 'intel-modrm': [], ##< Intel decodes ModR/M.
445 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
446 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
447 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
448};
449
450g_kdCpuNames = {
451 '8086': (),
452 '80186': (),
453 '80286': (),
454 '80386': (),
455 '80486': (),
456};
457
458## \@opcpuid
459g_kdCpuIdFlags = {
460 'vme': 'X86_CPUID_FEATURE_EDX_VME',
461 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
462 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
463 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
464 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
465 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
466 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
467 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
468 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
469 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
470 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
471 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
472 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
473 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
474 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
475 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
476 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
477 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
478 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
479 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
480 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
481 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
482 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
483 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
484 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
485 'aes': 'X86_CPUID_FEATURE_ECX_AES',
486 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
487 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
488 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
489 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
490 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
491
492 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
493 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
494 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
495 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
496 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
497 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
498 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
499 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
500 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
501 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
502};
503
504## \@ophints values.
505g_kdHints = {
506 'invalid': 'DISOPTYPE_INVALID', ##<
507 'harmless': 'DISOPTYPE_HARMLESS', ##<
508 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
509 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
510 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
511 'portio': 'DISOPTYPE_PORTIO', ##<
512 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
513 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
514 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
515 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
516 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
517 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
518 'illegal': 'DISOPTYPE_ILLEGAL', ##<
519 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
520 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
521 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
522 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
523 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
524 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
525 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
526 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
527 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
528 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
529 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
530 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
531 ## (only in 16 & 32 bits mode!)
532 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
533 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
534 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
535 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
536 'ignores_rexw': '', ##< Ignores REX.W.
537 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
538 'vex_l_zero': '', ##< VEX.L must be 0.
539 'vex_l_ignored': '', ##< VEX.L is ignored.
540 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
541 'lock_allowed': '', ##< Lock prefix allowed.
542};
543
544## \@opxcpttype values (see SDMv2 2.4, 2.7).
545g_kdXcptTypes = {
546 'none': [],
547 '1': [],
548 '2': [],
549 '3': [],
550 '4': [],
551 '4UA': [],
552 '5': [],
553 '5LZ': [], # LZ = VEX.L must be zero.
554 '6': [],
555 '7': [],
556 '7LZ': [],
557 '8': [],
558 '11': [],
559 '12': [],
560 'E1': [],
561 'E1NF': [],
562 'E2': [],
563 'E3': [],
564 'E3NF': [],
565 'E4': [],
566 'E4NF': [],
567 'E5': [],
568 'E5NF': [],
569 'E6': [],
570 'E6NF': [],
571 'E7NF': [],
572 'E9': [],
573 'E9NF': [],
574 'E10': [],
575 'E11': [],
576 'E12': [],
577 'E12NF': [],
578};
579
580
581def _isValidOpcodeByte(sOpcode):
582 """
583 Checks if sOpcode is a valid lower case opcode byte.
584 Returns true/false.
585 """
586 if len(sOpcode) == 4:
587 if sOpcode[:2] == '0x':
588 if sOpcode[2] in '0123456789abcdef':
589 if sOpcode[3] in '0123456789abcdef':
590 return True;
591 return False;
592
593
594class InstructionMap(object):
595 """
596 Instruction map.
597
598 The opcode map provides the lead opcode bytes (empty for the one byte
599 opcode map). An instruction can be member of multiple opcode maps as long
600 as it uses the same opcode value within the map (because of VEX).
601 """
602
603 kdEncodings = {
604 'legacy': [],
605 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
606 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
607 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
608 'xop8': [], ##< XOP prefix with vvvvv = 8
609 'xop9': [], ##< XOP prefix with vvvvv = 9
610 'xop10': [], ##< XOP prefix with vvvvv = 10
611 };
612 ## Selectors.
613 ## 1. The first value is the number of table entries required by a
614 ## decoder or disassembler for this type of selector.
615 ## 2. The second value is how many entries per opcode byte if applicable.
616 kdSelectors = {
617 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
618 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
619 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
620 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
621 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
622 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
623 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
624 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
625 };
626
627 ## Define the subentry number according to the Instruction::sPrefix
628 ## value for 'byte+pfx' selected tables.
629 kiPrefixOrder = {
630 'none': 0,
631 '0x66': 1,
632 '0xf3': 2,
633 '0xf2': 3,
634 };
635
636 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
637 sEncoding = 'legacy', sDisParse = None):
638 assert sSelector in self.kdSelectors;
639 assert sEncoding in self.kdEncodings;
640 if asLeadOpcodes is None:
641 asLeadOpcodes = [];
642 else:
643 for sOpcode in asLeadOpcodes:
644 assert _isValidOpcodeByte(sOpcode);
645 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
646
647 self.sName = sName;
648 self.sIemName = sIemName;
649 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
650 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
651 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
652 self.aoInstructions = [] # type: Instruction
653 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
654
655 def copy(self, sNewName, sPrefixFilter = None):
656 """
657 Copies the table with filtering instruction by sPrefix if not None.
658 """
659 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
660 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
661 else self.sSelector,
662 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
663 if sPrefixFilter is None:
664 oCopy.aoInstructions = list(self.aoInstructions);
665 else:
666 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
667 return oCopy;
668
669 def getTableSize(self):
670 """
671 Number of table entries. This corresponds directly to the selector.
672 """
673 return self.kdSelectors[self.sSelector][0];
674
675 def getEntriesPerByte(self):
676 """
677 Number of table entries per opcode bytes.
678
679 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
680 the others it will just return 1.
681 """
682 return self.kdSelectors[self.sSelector][1];
683
684 def getInstructionIndex(self, oInstr):
685 """
686 Returns the table index for the instruction.
687 """
688 bOpcode = oInstr.getOpcodeByte();
689
690 # The byte selectors are simple. We need a full opcode byte and need just return it.
691 if self.sSelector == 'byte':
692 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
693 return bOpcode;
694
695 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
696 if self.sSelector == 'byte+pfx':
697 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
698 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
699 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
700
701 # The other selectors needs masking and shifting.
702 if self.sSelector == '/r':
703 return (bOpcode >> 3) & 0x7;
704
705 if self.sSelector == 'mod /r':
706 return (bOpcode >> 3) & 0x1f;
707
708 if self.sSelector == 'memreg /r':
709 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
710
711 if self.sSelector == '!11 /r':
712 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
713 return (bOpcode >> 3) & 0x7;
714
715 if self.sSelector == '11 /r':
716 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
717 return (bOpcode >> 3) & 0x7;
718
719 if self.sSelector == '11':
720 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
721 return bOpcode & 0x3f;
722
723 assert False, self.sSelector;
724 return -1;
725
726 def getInstructionsInTableOrder(self):
727 """
728 Get instructions in table order.
729
730 Returns array of instructions. Normally there is exactly one
731 instruction per entry. However the entry could also be None if
732 not instruction was specified for that opcode value. Or there
733 could be a list of instructions to deal with special encodings
734 where for instance prefix (e.g. REX.W) encodes a different
735 instruction or different CPUs have different instructions or
736 prefixes in the same place.
737 """
738 # Start with empty table.
739 cTable = self.getTableSize();
740 aoTable = [None] * cTable;
741
742 # Insert the instructions.
743 for oInstr in self.aoInstructions:
744 if oInstr.sOpcode:
745 idxOpcode = self.getInstructionIndex(oInstr);
746 assert idxOpcode < cTable, str(idxOpcode);
747
748 oExisting = aoTable[idxOpcode];
749 if oExisting is None:
750 aoTable[idxOpcode] = oInstr;
751 elif not isinstance(oExisting, list):
752 aoTable[idxOpcode] = list([oExisting, oInstr]);
753 else:
754 oExisting.append(oInstr);
755
756 return aoTable;
757
758
759 def getDisasTableName(self):
760 """
761 Returns the disassembler table name for this map.
762 """
763 sName = 'g_aDisas';
764 for sWord in self.sName.split('_'):
765 if sWord == 'm': # suffix indicating modrm.mod==mem
766 sName += '_m';
767 elif sWord == 'r': # suffix indicating modrm.mod==reg
768 sName += '_r';
769 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
770 sName += '_' + sWord;
771 else:
772 sWord = sWord.replace('grp', 'Grp');
773 sWord = sWord.replace('map', 'Map');
774 sName += sWord[0].upper() + sWord[1:];
775 return sName;
776
777 def getDisasRangeName(self):
778 """
779 Returns the disassembler table range name for this map.
780 """
781 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
782
783 def isVexMap(self):
784 """ Returns True if a VEX map. """
785 return self.sEncoding.startswith('vex');
786
787
788class TestType(object):
789 """
790 Test value type.
791
792 This base class deals with integer like values. The fUnsigned constructor
793 parameter indicates the default stance on zero vs sign extending. It is
794 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
795 """
796 def __init__(self, sName, acbSizes = None, fUnsigned = True):
797 self.sName = sName;
798 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
799 self.fUnsigned = fUnsigned;
800
801 class BadValue(Exception):
802 """ Bad value exception. """
803 def __init__(self, sMessage):
804 Exception.__init__(self, sMessage);
805 self.sMessage = sMessage;
806
807 ## For ascii ~ operator.
808 kdHexInv = {
809 '0': 'f',
810 '1': 'e',
811 '2': 'd',
812 '3': 'c',
813 '4': 'b',
814 '5': 'a',
815 '6': '9',
816 '7': '8',
817 '8': '7',
818 '9': '6',
819 'a': '5',
820 'b': '4',
821 'c': '3',
822 'd': '2',
823 'e': '1',
824 'f': '0',
825 };
826
827 def get(self, sValue):
828 """
829 Get the shortest normal sized byte representation of oValue.
830
831 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
832 The latter form is for AND+OR pairs where the first entry is what to
833 AND with the field and the second the one or OR with.
834
835 Raises BadValue if invalid value.
836 """
837 if not sValue:
838 raise TestType.BadValue('empty value');
839
840 # Deal with sign and detect hexadecimal or decimal.
841 fSignExtend = not self.fUnsigned;
842 if sValue[0] == '-' or sValue[0] == '+':
843 fSignExtend = True;
844 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
845 else:
846 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
847
848 # try convert it to long integer.
849 try:
850 iValue = long(sValue, 16 if fHex else 10);
851 except Exception as oXcpt:
852 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
853
854 # Convert the hex string and pad it to a decent value. Negative values
855 # needs to be manually converted to something non-negative (~-n + 1).
856 if iValue >= 0:
857 sHex = hex(iValue);
858 if sys.version_info[0] < 3:
859 assert sHex[-1] == 'L';
860 sHex = sHex[:-1];
861 assert sHex[:2] == '0x';
862 sHex = sHex[2:];
863 else:
864 sHex = hex(-iValue - 1);
865 if sys.version_info[0] < 3:
866 assert sHex[-1] == 'L';
867 sHex = sHex[:-1];
868 assert sHex[:2] == '0x';
869 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
870 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
871 sHex = 'f' + sHex;
872
873 cDigits = len(sHex);
874 if cDigits <= self.acbSizes[-1] * 2:
875 for cb in self.acbSizes:
876 cNaturalDigits = cb * 2;
877 if cDigits <= cNaturalDigits:
878 break;
879 else:
880 cNaturalDigits = self.acbSizes[-1] * 2;
881 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
882 assert isinstance(cNaturalDigits, int)
883
884 if cNaturalDigits != cDigits:
885 cNeeded = cNaturalDigits - cDigits;
886 if iValue >= 0:
887 sHex = ('0' * cNeeded) + sHex;
888 else:
889 sHex = ('f' * cNeeded) + sHex;
890
891 # Invert and convert to bytearray and return it.
892 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
893
894 return ((fSignExtend, abValue),);
895
896 def validate(self, sValue):
897 """
898 Returns True if value is okay, error message on failure.
899 """
900 try:
901 self.get(sValue);
902 except TestType.BadValue as oXcpt:
903 return oXcpt.sMessage;
904 return True;
905
906 def isAndOrPair(self, sValue):
907 """
908 Checks if sValue is a pair.
909 """
910 _ = sValue;
911 return False;
912
913
914class TestTypeEflags(TestType):
915 """
916 Special value parsing for EFLAGS/RFLAGS/FLAGS.
917 """
918
919 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
920
921 def __init__(self, sName):
922 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
923
924 def get(self, sValue):
925 fClear = 0;
926 fSet = 0;
927 for sFlag in sValue.split(','):
928 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
929 if sConstant is None:
930 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
931 if sConstant[0] == '!':
932 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
933 else:
934 fSet |= g_kdX86EFlagsConstants[sConstant];
935
936 aoSet = TestType.get(self, '0x%x' % (fSet,));
937 if fClear != 0:
938 aoClear = TestType.get(self, '%#x' % (fClear,))
939 assert self.isAndOrPair(sValue) is True;
940 return (aoClear[0], aoSet[0]);
941 assert self.isAndOrPair(sValue) is False;
942 return aoSet;
943
944 def isAndOrPair(self, sValue):
945 for sZeroFlag in self.kdZeroValueFlags:
946 if sValue.find(sZeroFlag) >= 0:
947 return True;
948 return False;
949
950class TestTypeFromDict(TestType):
951 """
952 Special value parsing for CR0.
953 """
954
955 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
956
957 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
958 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
959 self.kdConstantsAndValues = kdConstantsAndValues;
960 self.sConstantPrefix = sConstantPrefix;
961
962 def get(self, sValue):
963 fValue = 0;
964 for sFlag in sValue.split(','):
965 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
966 if fFlagValue is None:
967 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
968 fValue |= fFlagValue;
969 return TestType.get(self, '0x%x' % (fValue,));
970
971
972class TestInOut(object):
973 """
974 One input or output state modifier.
975
976 This should be thought as values to modify BS3REGCTX and extended (needs
977 to be structured) state.
978 """
979 ## Assigned operators.
980 kasOperators = [
981 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
982 '&~=',
983 '&=',
984 '|=',
985 '='
986 ];
987 ## Types
988 kdTypes = {
989 'uint': TestType('uint', fUnsigned = True),
990 'int': TestType('int'),
991 'efl': TestTypeEflags('efl'),
992 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
993 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
994 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
995 };
996 ## CPU context fields.
997 kdFields = {
998 # name: ( default type, [both|input|output], )
999 # Operands.
1000 'op1': ( 'uint', 'both', ), ## \@op1
1001 'op2': ( 'uint', 'both', ), ## \@op2
1002 'op3': ( 'uint', 'both', ), ## \@op3
1003 'op4': ( 'uint', 'both', ), ## \@op4
1004 # Flags.
1005 'efl': ( 'efl', 'both', ),
1006 'efl_undef': ( 'uint', 'output', ),
1007 # 8-bit GPRs.
1008 'al': ( 'uint', 'both', ),
1009 'cl': ( 'uint', 'both', ),
1010 'dl': ( 'uint', 'both', ),
1011 'bl': ( 'uint', 'both', ),
1012 'ah': ( 'uint', 'both', ),
1013 'ch': ( 'uint', 'both', ),
1014 'dh': ( 'uint', 'both', ),
1015 'bh': ( 'uint', 'both', ),
1016 'r8l': ( 'uint', 'both', ),
1017 'r9l': ( 'uint', 'both', ),
1018 'r10l': ( 'uint', 'both', ),
1019 'r11l': ( 'uint', 'both', ),
1020 'r12l': ( 'uint', 'both', ),
1021 'r13l': ( 'uint', 'both', ),
1022 'r14l': ( 'uint', 'both', ),
1023 'r15l': ( 'uint', 'both', ),
1024 # 16-bit GPRs.
1025 'ax': ( 'uint', 'both', ),
1026 'dx': ( 'uint', 'both', ),
1027 'cx': ( 'uint', 'both', ),
1028 'bx': ( 'uint', 'both', ),
1029 'sp': ( 'uint', 'both', ),
1030 'bp': ( 'uint', 'both', ),
1031 'si': ( 'uint', 'both', ),
1032 'di': ( 'uint', 'both', ),
1033 'r8w': ( 'uint', 'both', ),
1034 'r9w': ( 'uint', 'both', ),
1035 'r10w': ( 'uint', 'both', ),
1036 'r11w': ( 'uint', 'both', ),
1037 'r12w': ( 'uint', 'both', ),
1038 'r13w': ( 'uint', 'both', ),
1039 'r14w': ( 'uint', 'both', ),
1040 'r15w': ( 'uint', 'both', ),
1041 # 32-bit GPRs.
1042 'eax': ( 'uint', 'both', ),
1043 'edx': ( 'uint', 'both', ),
1044 'ecx': ( 'uint', 'both', ),
1045 'ebx': ( 'uint', 'both', ),
1046 'esp': ( 'uint', 'both', ),
1047 'ebp': ( 'uint', 'both', ),
1048 'esi': ( 'uint', 'both', ),
1049 'edi': ( 'uint', 'both', ),
1050 'r8d': ( 'uint', 'both', ),
1051 'r9d': ( 'uint', 'both', ),
1052 'r10d': ( 'uint', 'both', ),
1053 'r11d': ( 'uint', 'both', ),
1054 'r12d': ( 'uint', 'both', ),
1055 'r13d': ( 'uint', 'both', ),
1056 'r14d': ( 'uint', 'both', ),
1057 'r15d': ( 'uint', 'both', ),
1058 # 64-bit GPRs.
1059 'rax': ( 'uint', 'both', ),
1060 'rdx': ( 'uint', 'both', ),
1061 'rcx': ( 'uint', 'both', ),
1062 'rbx': ( 'uint', 'both', ),
1063 'rsp': ( 'uint', 'both', ),
1064 'rbp': ( 'uint', 'both', ),
1065 'rsi': ( 'uint', 'both', ),
1066 'rdi': ( 'uint', 'both', ),
1067 'r8': ( 'uint', 'both', ),
1068 'r9': ( 'uint', 'both', ),
1069 'r10': ( 'uint', 'both', ),
1070 'r11': ( 'uint', 'both', ),
1071 'r12': ( 'uint', 'both', ),
1072 'r13': ( 'uint', 'both', ),
1073 'r14': ( 'uint', 'both', ),
1074 'r15': ( 'uint', 'both', ),
1075 # 16-bit, 32-bit or 64-bit registers according to operand size.
1076 'oz.rax': ( 'uint', 'both', ),
1077 'oz.rdx': ( 'uint', 'both', ),
1078 'oz.rcx': ( 'uint', 'both', ),
1079 'oz.rbx': ( 'uint', 'both', ),
1080 'oz.rsp': ( 'uint', 'both', ),
1081 'oz.rbp': ( 'uint', 'both', ),
1082 'oz.rsi': ( 'uint', 'both', ),
1083 'oz.rdi': ( 'uint', 'both', ),
1084 'oz.r8': ( 'uint', 'both', ),
1085 'oz.r9': ( 'uint', 'both', ),
1086 'oz.r10': ( 'uint', 'both', ),
1087 'oz.r11': ( 'uint', 'both', ),
1088 'oz.r12': ( 'uint', 'both', ),
1089 'oz.r13': ( 'uint', 'both', ),
1090 'oz.r14': ( 'uint', 'both', ),
1091 'oz.r15': ( 'uint', 'both', ),
1092 # Control registers.
1093 'cr0': ( 'cr0', 'both', ),
1094 'cr4': ( 'cr4', 'both', ),
1095 'xcr0': ( 'xcr0', 'both', ),
1096 # FPU Registers
1097 'fcw': ( 'uint', 'both', ),
1098 'fsw': ( 'uint', 'both', ),
1099 'ftw': ( 'uint', 'both', ),
1100 'fop': ( 'uint', 'both', ),
1101 'fpuip': ( 'uint', 'both', ),
1102 'fpucs': ( 'uint', 'both', ),
1103 'fpudp': ( 'uint', 'both', ),
1104 'fpuds': ( 'uint', 'both', ),
1105 'mxcsr': ( 'uint', 'both', ),
1106 'st0': ( 'uint', 'both', ),
1107 'st1': ( 'uint', 'both', ),
1108 'st2': ( 'uint', 'both', ),
1109 'st3': ( 'uint', 'both', ),
1110 'st4': ( 'uint', 'both', ),
1111 'st5': ( 'uint', 'both', ),
1112 'st6': ( 'uint', 'both', ),
1113 'st7': ( 'uint', 'both', ),
1114 # MMX registers.
1115 'mm0': ( 'uint', 'both', ),
1116 'mm1': ( 'uint', 'both', ),
1117 'mm2': ( 'uint', 'both', ),
1118 'mm3': ( 'uint', 'both', ),
1119 'mm4': ( 'uint', 'both', ),
1120 'mm5': ( 'uint', 'both', ),
1121 'mm6': ( 'uint', 'both', ),
1122 'mm7': ( 'uint', 'both', ),
1123 # SSE registers.
1124 'xmm0': ( 'uint', 'both', ),
1125 'xmm1': ( 'uint', 'both', ),
1126 'xmm2': ( 'uint', 'both', ),
1127 'xmm3': ( 'uint', 'both', ),
1128 'xmm4': ( 'uint', 'both', ),
1129 'xmm5': ( 'uint', 'both', ),
1130 'xmm6': ( 'uint', 'both', ),
1131 'xmm7': ( 'uint', 'both', ),
1132 'xmm8': ( 'uint', 'both', ),
1133 'xmm9': ( 'uint', 'both', ),
1134 'xmm10': ( 'uint', 'both', ),
1135 'xmm11': ( 'uint', 'both', ),
1136 'xmm12': ( 'uint', 'both', ),
1137 'xmm13': ( 'uint', 'both', ),
1138 'xmm14': ( 'uint', 'both', ),
1139 'xmm15': ( 'uint', 'both', ),
1140 'xmm0.lo': ( 'uint', 'both', ),
1141 'xmm1.lo': ( 'uint', 'both', ),
1142 'xmm2.lo': ( 'uint', 'both', ),
1143 'xmm3.lo': ( 'uint', 'both', ),
1144 'xmm4.lo': ( 'uint', 'both', ),
1145 'xmm5.lo': ( 'uint', 'both', ),
1146 'xmm6.lo': ( 'uint', 'both', ),
1147 'xmm7.lo': ( 'uint', 'both', ),
1148 'xmm8.lo': ( 'uint', 'both', ),
1149 'xmm9.lo': ( 'uint', 'both', ),
1150 'xmm10.lo': ( 'uint', 'both', ),
1151 'xmm11.lo': ( 'uint', 'both', ),
1152 'xmm12.lo': ( 'uint', 'both', ),
1153 'xmm13.lo': ( 'uint', 'both', ),
1154 'xmm14.lo': ( 'uint', 'both', ),
1155 'xmm15.lo': ( 'uint', 'both', ),
1156 'xmm0.hi': ( 'uint', 'both', ),
1157 'xmm1.hi': ( 'uint', 'both', ),
1158 'xmm2.hi': ( 'uint', 'both', ),
1159 'xmm3.hi': ( 'uint', 'both', ),
1160 'xmm4.hi': ( 'uint', 'both', ),
1161 'xmm5.hi': ( 'uint', 'both', ),
1162 'xmm6.hi': ( 'uint', 'both', ),
1163 'xmm7.hi': ( 'uint', 'both', ),
1164 'xmm8.hi': ( 'uint', 'both', ),
1165 'xmm9.hi': ( 'uint', 'both', ),
1166 'xmm10.hi': ( 'uint', 'both', ),
1167 'xmm11.hi': ( 'uint', 'both', ),
1168 'xmm12.hi': ( 'uint', 'both', ),
1169 'xmm13.hi': ( 'uint', 'both', ),
1170 'xmm14.hi': ( 'uint', 'both', ),
1171 'xmm15.hi': ( 'uint', 'both', ),
1172 'xmm0.lo.zx': ( 'uint', 'both', ),
1173 'xmm1.lo.zx': ( 'uint', 'both', ),
1174 'xmm2.lo.zx': ( 'uint', 'both', ),
1175 'xmm3.lo.zx': ( 'uint', 'both', ),
1176 'xmm4.lo.zx': ( 'uint', 'both', ),
1177 'xmm5.lo.zx': ( 'uint', 'both', ),
1178 'xmm6.lo.zx': ( 'uint', 'both', ),
1179 'xmm7.lo.zx': ( 'uint', 'both', ),
1180 'xmm8.lo.zx': ( 'uint', 'both', ),
1181 'xmm9.lo.zx': ( 'uint', 'both', ),
1182 'xmm10.lo.zx': ( 'uint', 'both', ),
1183 'xmm11.lo.zx': ( 'uint', 'both', ),
1184 'xmm12.lo.zx': ( 'uint', 'both', ),
1185 'xmm13.lo.zx': ( 'uint', 'both', ),
1186 'xmm14.lo.zx': ( 'uint', 'both', ),
1187 'xmm15.lo.zx': ( 'uint', 'both', ),
1188 'xmm0.dw0': ( 'uint', 'both', ),
1189 'xmm1.dw0': ( 'uint', 'both', ),
1190 'xmm2.dw0': ( 'uint', 'both', ),
1191 'xmm3.dw0': ( 'uint', 'both', ),
1192 'xmm4.dw0': ( 'uint', 'both', ),
1193 'xmm5.dw0': ( 'uint', 'both', ),
1194 'xmm6.dw0': ( 'uint', 'both', ),
1195 'xmm7.dw0': ( 'uint', 'both', ),
1196 'xmm8.dw0': ( 'uint', 'both', ),
1197 'xmm9.dw0': ( 'uint', 'both', ),
1198 'xmm10.dw0': ( 'uint', 'both', ),
1199 'xmm11.dw0': ( 'uint', 'both', ),
1200 'xmm12.dw0': ( 'uint', 'both', ),
1201 'xmm13.dw0': ( 'uint', 'both', ),
1202 'xmm14.dw0': ( 'uint', 'both', ),
1203 'xmm15_dw0': ( 'uint', 'both', ),
1204 # AVX registers.
1205 'ymm0': ( 'uint', 'both', ),
1206 'ymm1': ( 'uint', 'both', ),
1207 'ymm2': ( 'uint', 'both', ),
1208 'ymm3': ( 'uint', 'both', ),
1209 'ymm4': ( 'uint', 'both', ),
1210 'ymm5': ( 'uint', 'both', ),
1211 'ymm6': ( 'uint', 'both', ),
1212 'ymm7': ( 'uint', 'both', ),
1213 'ymm8': ( 'uint', 'both', ),
1214 'ymm9': ( 'uint', 'both', ),
1215 'ymm10': ( 'uint', 'both', ),
1216 'ymm11': ( 'uint', 'both', ),
1217 'ymm12': ( 'uint', 'both', ),
1218 'ymm13': ( 'uint', 'both', ),
1219 'ymm14': ( 'uint', 'both', ),
1220 'ymm15': ( 'uint', 'both', ),
1221
1222 # Special ones.
1223 'value.xcpt': ( 'uint', 'output', ),
1224 };
1225
1226 def __init__(self, sField, sOp, sValue, sType):
1227 assert sField in self.kdFields;
1228 assert sOp in self.kasOperators;
1229 self.sField = sField;
1230 self.sOp = sOp;
1231 self.sValue = sValue;
1232 self.sType = sType;
1233 assert isinstance(sField, str);
1234 assert isinstance(sOp, str);
1235 assert isinstance(sType, str);
1236 assert isinstance(sValue, str);
1237
1238
1239class TestSelector(object):
1240 """
1241 One selector for an instruction test.
1242 """
1243 ## Selector compare operators.
1244 kasCompareOps = [ '==', '!=' ];
1245 ## Selector variables and their valid values.
1246 kdVariables = {
1247 # Operand size.
1248 'size': {
1249 'o16': 'size_o16',
1250 'o32': 'size_o32',
1251 'o64': 'size_o64',
1252 },
1253 # VEX.L value.
1254 'vex.l': {
1255 '0': 'vexl_0',
1256 '1': 'vexl_1',
1257 },
1258 # Execution ring.
1259 'ring': {
1260 '0': 'ring_0',
1261 '1': 'ring_1',
1262 '2': 'ring_2',
1263 '3': 'ring_3',
1264 '0..2': 'ring_0_thru_2',
1265 '1..3': 'ring_1_thru_3',
1266 },
1267 # Basic code mode.
1268 'codebits': {
1269 '64': 'code_64bit',
1270 '32': 'code_32bit',
1271 '16': 'code_16bit',
1272 },
1273 # cpu modes.
1274 'mode': {
1275 'real': 'mode_real',
1276 'prot': 'mode_prot',
1277 'long': 'mode_long',
1278 'v86': 'mode_v86',
1279 'smm': 'mode_smm',
1280 'vmx': 'mode_vmx',
1281 'svm': 'mode_svm',
1282 },
1283 # paging on/off
1284 'paging': {
1285 'on': 'paging_on',
1286 'off': 'paging_off',
1287 },
1288 # CPU vendor
1289 'vendor': {
1290 'amd': 'vendor_amd',
1291 'intel': 'vendor_intel',
1292 'via': 'vendor_via',
1293 },
1294 };
1295 ## Selector shorthand predicates.
1296 ## These translates into variable expressions.
1297 kdPredicates = {
1298 'o16': 'size==o16',
1299 'o32': 'size==o32',
1300 'o64': 'size==o64',
1301 'ring0': 'ring==0',
1302 '!ring0': 'ring==1..3',
1303 'ring1': 'ring==1',
1304 'ring2': 'ring==2',
1305 'ring3': 'ring==3',
1306 'user': 'ring==3',
1307 'supervisor': 'ring==0..2',
1308 '16-bit': 'codebits==16',
1309 '32-bit': 'codebits==32',
1310 '64-bit': 'codebits==64',
1311 'real': 'mode==real',
1312 'prot': 'mode==prot',
1313 'long': 'mode==long',
1314 'v86': 'mode==v86',
1315 'smm': 'mode==smm',
1316 'vmx': 'mode==vmx',
1317 'svm': 'mode==svm',
1318 'paging': 'paging==on',
1319 '!paging': 'paging==off',
1320 'amd': 'vendor==amd',
1321 '!amd': 'vendor!=amd',
1322 'intel': 'vendor==intel',
1323 '!intel': 'vendor!=intel',
1324 'via': 'vendor==via',
1325 '!via': 'vendor!=via',
1326 };
1327
1328 def __init__(self, sVariable, sOp, sValue):
1329 assert sVariable in self.kdVariables;
1330 assert sOp in self.kasCompareOps;
1331 assert sValue in self.kdVariables[sVariable];
1332 self.sVariable = sVariable;
1333 self.sOp = sOp;
1334 self.sValue = sValue;
1335
1336
1337class InstructionTest(object):
1338 """
1339 Instruction test.
1340 """
1341
1342 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1343 self.oInstr = oInstr # type: InstructionTest
1344 self.aoInputs = [] # type: list(TestInOut)
1345 self.aoOutputs = [] # type: list(TestInOut)
1346 self.aoSelectors = [] # type: list(TestSelector)
1347
1348 def toString(self, fRepr = False):
1349 """
1350 Converts it to string representation.
1351 """
1352 asWords = [];
1353 if self.aoSelectors:
1354 for oSelector in self.aoSelectors:
1355 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1356 asWords.append('/');
1357
1358 for oModifier in self.aoInputs:
1359 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1360
1361 asWords.append('->');
1362
1363 for oModifier in self.aoOutputs:
1364 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1365
1366 if fRepr:
1367 return '<' + ' '.join(asWords) + '>';
1368 return ' '.join(asWords);
1369
1370 def __str__(self):
1371 """ Provide string represenation. """
1372 return self.toString(False);
1373
1374 def __repr__(self):
1375 """ Provide unambigious string representation. """
1376 return self.toString(True);
1377
1378class Operand(object):
1379 """
1380 Instruction operand.
1381 """
1382
1383 def __init__(self, sWhere, sType):
1384 assert sWhere in g_kdOpLocations, sWhere;
1385 assert sType in g_kdOpTypes, sType;
1386 self.sWhere = sWhere; ##< g_kdOpLocations
1387 self.sType = sType; ##< g_kdOpTypes
1388
1389 def usesModRM(self):
1390 """ Returns True if using some form of ModR/M encoding. """
1391 return self.sType[0] in ['E', 'G', 'M'];
1392
1393
1394
1395class Instruction(object): # pylint: disable=too-many-instance-attributes
1396 """
1397 Instruction.
1398 """
1399
1400 def __init__(self, sSrcFile, iLine):
1401 ## @name Core attributes.
1402 ## @{
1403 self.oParent = None # type: Instruction
1404 self.sMnemonic = None;
1405 self.sBrief = None;
1406 self.asDescSections = [] # type: list(str)
1407 self.aoMaps = [] # type: list(InstructionMap)
1408 self.aoOperands = [] # type: list(Operand)
1409 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1410 self.sOpcode = None # type: str
1411 self.sSubOpcode = None # type: str
1412 self.sEncoding = None;
1413 self.asFlTest = None;
1414 self.asFlModify = None;
1415 self.asFlUndefined = None;
1416 self.asFlSet = None;
1417 self.asFlClear = None;
1418 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1419 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1420 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1421 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1422 self.aoTests = [] # type: list(InstructionTest)
1423 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1424 self.oCpuExpr = None; ##< Some CPU restriction expression...
1425 self.sGroup = None;
1426 self.fUnused = False; ##< Unused instruction.
1427 self.fInvalid = False; ##< Invalid instruction (like UD2).
1428 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1429 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1430 ## @}
1431
1432 ## @name Implementation attributes.
1433 ## @{
1434 self.sStats = None;
1435 self.sFunction = None;
1436 self.fStub = False;
1437 self.fUdStub = False;
1438 ## @}
1439
1440 ## @name Decoding info
1441 ## @{
1442 self.sSrcFile = sSrcFile;
1443 self.iLineCreated = iLine;
1444 self.iLineCompleted = None;
1445 self.cOpTags = 0;
1446 self.iLineFnIemOpMacro = -1;
1447 self.iLineMnemonicMacro = -1;
1448 ## @}
1449
1450 ## @name Intermediate input fields.
1451 ## @{
1452 self.sRawDisOpNo = None;
1453 self.asRawDisParams = [];
1454 self.sRawIemOpFlags = None;
1455 self.sRawOldOpcodes = None;
1456 self.asCopyTests = [];
1457 ## @}
1458
1459 def toString(self, fRepr = False):
1460 """ Turn object into a string. """
1461 aasFields = [];
1462
1463 aasFields.append(['opcode', self.sOpcode]);
1464 if self.sPrefix:
1465 aasFields.append(['prefix', self.sPrefix]);
1466 aasFields.append(['mnemonic', self.sMnemonic]);
1467 for iOperand, oOperand in enumerate(self.aoOperands):
1468 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1469 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1470 aasFields.append(['encoding', self.sEncoding]);
1471 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1472 aasFields.append(['disenum', self.sDisEnum]);
1473 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1474 aasFields.append(['group', self.sGroup]);
1475 if self.fUnused: aasFields.append(['unused', 'True']);
1476 if self.fInvalid: aasFields.append(['invalid', 'True']);
1477 aasFields.append(['invlstyle', self.sInvalidStyle]);
1478 aasFields.append(['fltest', self.asFlTest]);
1479 aasFields.append(['flmodify', self.asFlModify]);
1480 aasFields.append(['flundef', self.asFlUndefined]);
1481 aasFields.append(['flset', self.asFlSet]);
1482 aasFields.append(['flclear', self.asFlClear]);
1483 aasFields.append(['mincpu', self.sMinCpu]);
1484 aasFields.append(['stats', self.sStats]);
1485 aasFields.append(['sFunction', self.sFunction]);
1486 if self.fStub: aasFields.append(['fStub', 'True']);
1487 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1488 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1489 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1490 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1491
1492 sRet = '<' if fRepr else '';
1493 for sField, sValue in aasFields:
1494 if sValue is not None:
1495 if len(sRet) > 1:
1496 sRet += '; ';
1497 sRet += '%s=%s' % (sField, sValue,);
1498 if fRepr:
1499 sRet += '>';
1500
1501 return sRet;
1502
1503 def __str__(self):
1504 """ Provide string represenation. """
1505 return self.toString(False);
1506
1507 def __repr__(self):
1508 """ Provide unambigious string representation. """
1509 return self.toString(True);
1510
1511 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1512 """
1513 Makes a copy of the object for the purpose of putting in a different map
1514 or a different place in the current map.
1515 """
1516 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1517
1518 oCopy.oParent = self;
1519 oCopy.sMnemonic = self.sMnemonic;
1520 oCopy.sBrief = self.sBrief;
1521 oCopy.asDescSections = list(self.asDescSections);
1522 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1523 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1524 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1525 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1526 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1527 oCopy.sEncoding = self.sEncoding;
1528 oCopy.asFlTest = self.asFlTest;
1529 oCopy.asFlModify = self.asFlModify;
1530 oCopy.asFlUndefined = self.asFlUndefined;
1531 oCopy.asFlSet = self.asFlSet;
1532 oCopy.asFlClear = self.asFlClear;
1533 oCopy.dHints = dict(self.dHints);
1534 oCopy.sDisEnum = self.sDisEnum;
1535 oCopy.asCpuIds = list(self.asCpuIds);
1536 oCopy.asReqFeatures = list(self.asReqFeatures);
1537 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1538 oCopy.sMinCpu = self.sMinCpu;
1539 oCopy.oCpuExpr = self.oCpuExpr;
1540 oCopy.sGroup = self.sGroup;
1541 oCopy.fUnused = self.fUnused;
1542 oCopy.fInvalid = self.fInvalid;
1543 oCopy.sInvalidStyle = self.sInvalidStyle;
1544 oCopy.sXcptType = self.sXcptType;
1545
1546 oCopy.sStats = self.sStats;
1547 oCopy.sFunction = self.sFunction;
1548 oCopy.fStub = self.fStub;
1549 oCopy.fUdStub = self.fUdStub;
1550
1551 oCopy.iLineCompleted = self.iLineCompleted;
1552 oCopy.cOpTags = self.cOpTags;
1553 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1554 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1555
1556 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1557 oCopy.asRawDisParams = list(self.asRawDisParams);
1558 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1559 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1560 oCopy.asCopyTests = list(self.asCopyTests);
1561
1562 return oCopy;
1563
1564 def getOpcodeByte(self):
1565 """
1566 Decodes sOpcode into a byte range integer value.
1567 Raises exception if sOpcode is None or invalid.
1568 """
1569 if self.sOpcode is None:
1570 raise Exception('No opcode byte for %s!' % (self,));
1571 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1572
1573 # Full hex byte form.
1574 if sOpcode[:2] == '0x':
1575 return int(sOpcode, 16);
1576
1577 # The /r form:
1578 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1579 return int(sOpcode[-1:]) << 3;
1580
1581 # The 11/r form:
1582 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1583 return (int(sOpcode[-1:]) << 3) | 0xc0;
1584
1585 # The !11/r form (returns mod=1):
1586 ## @todo this doesn't really work...
1587 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1588 return (int(sOpcode[-1:]) << 3) | 0x80;
1589
1590 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1591
1592 @staticmethod
1593 def _flagsToIntegerMask(asFlags):
1594 """
1595 Returns the integer mask value for asFlags.
1596 """
1597 uRet = 0;
1598 if asFlags:
1599 for sFlag in asFlags:
1600 sConstant = g_kdEFlagsMnemonics[sFlag];
1601 assert sConstant[0] != '!', sConstant
1602 uRet |= g_kdX86EFlagsConstants[sConstant];
1603 return uRet;
1604
1605 def getTestedFlagsMask(self):
1606 """ Returns asFlTest into a integer mask value """
1607 return self._flagsToIntegerMask(self.asFlTest);
1608
1609 def getModifiedFlagsMask(self):
1610 """ Returns asFlModify into a integer mask value """
1611 return self._flagsToIntegerMask(self.asFlModify);
1612
1613 def getUndefinedFlagsMask(self):
1614 """ Returns asFlUndefined into a integer mask value """
1615 return self._flagsToIntegerMask(self.asFlUndefined);
1616
1617 def getSetFlagsMask(self):
1618 """ Returns asFlSet into a integer mask value """
1619 return self._flagsToIntegerMask(self.asFlSet);
1620
1621 def getClearedFlagsMask(self):
1622 """ Returns asFlClear into a integer mask value """
1623 return self._flagsToIntegerMask(self.asFlClear);
1624
1625 def onlyInVexMaps(self):
1626 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1627 if not self.aoMaps:
1628 return False;
1629 for oMap in self.aoMaps:
1630 if not oMap.isVexMap():
1631 return False;
1632 return True;
1633
1634
1635
1636## All the instructions.
1637g_aoAllInstructions = [] # type: list(Instruction)
1638
1639## All the instructions indexed by statistics name (opstat).
1640g_dAllInstructionsByStat = {} # type: dict(Instruction)
1641
1642## All the instructions indexed by function name (opfunction).
1643g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1644
1645## Instructions tagged by oponlytest
1646g_aoOnlyTestInstructions = [] # type: list(Instruction)
1647
1648## Instruction maps.
1649g_aoInstructionMaps = [
1650 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1651 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1652 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1653 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1654 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1655 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1656 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1657 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1658 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1659 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1660 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1661 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1662 ## @todo g_apfnEscF1_E0toFF
1663 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1664 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1665 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1666 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1667 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1668 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1669 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1670 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1671
1672 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1673 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1674 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1675 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1676 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1677 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1678 ## @todo What about g_apfnGroup9MemReg?
1679 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1680 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1681 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1682 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1683 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1684 ## @todo What about g_apfnGroup15RegReg?
1685 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1686 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1687 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1688
1689 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1690 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1691
1692 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1693 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1694 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1695 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1696 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1697 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1698
1699 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1700 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1701
1702 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1703 InstructionMap('xopmap8', sEncoding = 'xop8'),
1704 InstructionMap('xopmap9', sEncoding = 'xop9'),
1705 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1706 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1707 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1708 InstructionMap('xopmap10', sEncoding = 'xop10'),
1709 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1710];
1711g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1712g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1713
1714
1715
1716class ParserException(Exception):
1717 """ Parser exception """
1718 def __init__(self, sMessage):
1719 Exception.__init__(self, sMessage);
1720
1721
1722class SimpleParser(object):
1723 """
1724 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1725 """
1726
1727 ## @name Parser state.
1728 ## @{
1729 kiCode = 0;
1730 kiCommentMulti = 1;
1731 ## @}
1732
1733 def __init__(self, sSrcFile, asLines, sDefaultMap):
1734 self.sSrcFile = sSrcFile;
1735 self.asLines = asLines;
1736 self.iLine = 0;
1737 self.iState = self.kiCode;
1738 self.sComment = '';
1739 self.iCommentLine = 0;
1740 self.aoCurInstrs = [];
1741
1742 assert sDefaultMap in g_dInstructionMaps;
1743 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1744
1745 self.cTotalInstr = 0;
1746 self.cTotalStubs = 0;
1747 self.cTotalTagged = 0;
1748
1749 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1750 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1751 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1752 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1753 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1754 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1755 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
1756 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
1757 self.fDebug = True;
1758
1759 self.dTagHandlers = {
1760 '@opbrief': self.parseTagOpBrief,
1761 '@opdesc': self.parseTagOpDesc,
1762 '@opmnemonic': self.parseTagOpMnemonic,
1763 '@op1': self.parseTagOpOperandN,
1764 '@op2': self.parseTagOpOperandN,
1765 '@op3': self.parseTagOpOperandN,
1766 '@op4': self.parseTagOpOperandN,
1767 '@oppfx': self.parseTagOpPfx,
1768 '@opmaps': self.parseTagOpMaps,
1769 '@opcode': self.parseTagOpcode,
1770 '@opcodesub': self.parseTagOpcodeSub,
1771 '@openc': self.parseTagOpEnc,
1772 '@opfltest': self.parseTagOpEFlags,
1773 '@opflmodify': self.parseTagOpEFlags,
1774 '@opflundef': self.parseTagOpEFlags,
1775 '@opflset': self.parseTagOpEFlags,
1776 '@opflclear': self.parseTagOpEFlags,
1777 '@ophints': self.parseTagOpHints,
1778 '@opdisenum': self.parseTagOpDisEnum,
1779 '@opmincpu': self.parseTagOpMinCpu,
1780 '@opcpuid': self.parseTagOpCpuId,
1781 '@opgroup': self.parseTagOpGroup,
1782 '@opunused': self.parseTagOpUnusedInvalid,
1783 '@opinvalid': self.parseTagOpUnusedInvalid,
1784 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1785 '@optest': self.parseTagOpTest,
1786 '@optestign': self.parseTagOpTestIgnore,
1787 '@optestignore': self.parseTagOpTestIgnore,
1788 '@opcopytests': self.parseTagOpCopyTests,
1789 '@oponly': self.parseTagOpOnlyTest,
1790 '@oponlytest': self.parseTagOpOnlyTest,
1791 '@opxcpttype': self.parseTagOpXcptType,
1792 '@opstats': self.parseTagOpStats,
1793 '@opfunction': self.parseTagOpFunction,
1794 '@opdone': self.parseTagOpDone,
1795 };
1796 for i in range(48):
1797 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1798 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1799
1800 self.asErrors = [];
1801
1802 def raiseError(self, sMessage):
1803 """
1804 Raise error prefixed with the source and line number.
1805 """
1806 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1807
1808 def raiseCommentError(self, iLineInComment, sMessage):
1809 """
1810 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1811 """
1812 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1813
1814 def error(self, sMessage):
1815 """
1816 Adds an error.
1817 returns False;
1818 """
1819 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1820 return False;
1821
1822 def errorOnLine(self, iLine, sMessage):
1823 """
1824 Adds an error.
1825 returns False;
1826 """
1827 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
1828 return False;
1829
1830 def errorComment(self, iLineInComment, sMessage):
1831 """
1832 Adds a comment error.
1833 returns False;
1834 """
1835 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1836 return False;
1837
1838 def printErrors(self):
1839 """
1840 Print the errors to stderr.
1841 Returns number of errors.
1842 """
1843 if self.asErrors:
1844 sys.stderr.write(u''.join(self.asErrors));
1845 return len(self.asErrors);
1846
1847 def debug(self, sMessage):
1848 """
1849 For debugging.
1850 """
1851 if self.fDebug:
1852 print('debug: %s' % (sMessage,));
1853
1854 def stripComments(self, sLine):
1855 """
1856 Returns sLine with comments stripped.
1857
1858 Complains if traces of incomplete multi-line comments are encountered.
1859 """
1860 sLine = self.oReComment.sub(" ", sLine);
1861 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
1862 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
1863 return sLine;
1864
1865 def parseFunctionTable(self, sLine):
1866 """
1867 Parses a PFNIEMOP table, updating/checking the @oppfx value.
1868
1869 Note! Updates iLine as it consumes the whole table.
1870 """
1871
1872 #
1873 # Extract the table name.
1874 #
1875 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
1876 oMap = g_dInstructionMapsByIemName.get(sName);
1877 if not oMap:
1878 self.debug('No map for PFNIEMOP table: %s' % (sName,));
1879 oMap = self.oDefaultMap; # This is wrong wrong wrong.
1880
1881 #
1882 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
1883 # entries per byte:
1884 # no prefix, 066h prefix, f3h prefix, f2h prefix
1885 # Those tables has 256 & 32 entries respectively.
1886 #
1887 cEntriesPerByte = 4;
1888 cValidTableLength = 1024;
1889 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
1890
1891 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
1892 if oEntriesMatch:
1893 cEntriesPerByte = 1;
1894 cValidTableLength = int(oEntriesMatch.group(1));
1895 asPrefixes = (None,);
1896
1897 #
1898 # The next line should be '{' and nothing else.
1899 #
1900 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
1901 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
1902 self.iLine += 1;
1903
1904 #
1905 # Parse till we find the end of the table.
1906 #
1907 iEntry = 0;
1908 while self.iLine < len(self.asLines):
1909 # Get the next line and strip comments and spaces (assumes no
1910 # multi-line comments).
1911 sLine = self.asLines[self.iLine];
1912 self.iLine += 1;
1913 sLine = self.stripComments(sLine).strip();
1914
1915 # Split the line up into entries, expanding IEMOP_X4 usage.
1916 asEntries = sLine.split(',');
1917 for i in range(len(asEntries) - 1, -1, -1):
1918 sEntry = asEntries[i].strip();
1919 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
1920 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
1921 asEntries.insert(i + 1, sEntry);
1922 asEntries.insert(i + 1, sEntry);
1923 asEntries.insert(i + 1, sEntry);
1924 if sEntry:
1925 asEntries[i] = sEntry;
1926 else:
1927 del asEntries[i];
1928
1929 # Process the entries.
1930 for sEntry in asEntries:
1931 if sEntry in ('};', '}'):
1932 if iEntry != cValidTableLength:
1933 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
1934 return True;
1935 if sEntry.startswith('iemOp_Invalid'):
1936 pass; # skip
1937 else:
1938 # Look up matching instruction by function.
1939 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
1940 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
1941 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
1942 if aoInstr:
1943 if not isinstance(aoInstr, list):
1944 aoInstr = [aoInstr,];
1945 oInstr = None;
1946 for oCurInstr in aoInstr:
1947 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
1948 pass;
1949 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
1950 oCurInstr.sPrefix = sPrefix;
1951 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
1952 oCurInstr.sOpcode = sOpcode;
1953 oCurInstr.sPrefix = sPrefix;
1954 else:
1955 continue;
1956 oInstr = oCurInstr;
1957 break;
1958 if not oInstr:
1959 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
1960 aoInstr.append(oInstr);
1961 g_dAllInstructionsByFunction[sEntry] = aoInstr;
1962 g_aoAllInstructions.append(oInstr);
1963 oMap.aoInstructions.append(oInstr);
1964 else:
1965 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
1966 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
1967 iEntry += 1;
1968
1969 return self.error('Unexpected end of file in PFNIEMOP table');
1970
1971 def addInstruction(self, iLine = None):
1972 """
1973 Adds an instruction.
1974 """
1975 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1976 g_aoAllInstructions.append(oInstr);
1977 self.aoCurInstrs.append(oInstr);
1978 return oInstr;
1979
1980 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1981 """
1982 Derives the mnemonic and operands from a IEM stats base name like string.
1983 """
1984 if oInstr.sMnemonic is None:
1985 asWords = sStats.split('_');
1986 oInstr.sMnemonic = asWords[0].lower();
1987 if len(asWords) > 1 and not oInstr.aoOperands:
1988 for sType in asWords[1:]:
1989 if sType in g_kdOpTypes:
1990 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1991 else:
1992 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1993 return False;
1994 return True;
1995
1996 def doneInstructionOne(self, oInstr, iLine):
1997 """
1998 Complete the parsing by processing, validating and expanding raw inputs.
1999 """
2000 assert oInstr.iLineCompleted is None;
2001 oInstr.iLineCompleted = iLine;
2002
2003 #
2004 # Specified instructions.
2005 #
2006 if oInstr.cOpTags > 0:
2007 if oInstr.sStats is None:
2008 pass;
2009
2010 #
2011 # Unspecified legacy stuff. We generally only got a few things to go on here.
2012 # /** Opcode 0x0f 0x00 /0. */
2013 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
2014 #
2015 else:
2016 #if oInstr.sRawOldOpcodes:
2017 #
2018 #if oInstr.sMnemonic:
2019 pass;
2020
2021 #
2022 # Common defaults.
2023 #
2024
2025 # Guess mnemonic and operands from stats if the former is missing.
2026 if oInstr.sMnemonic is None:
2027 if oInstr.sStats is not None:
2028 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
2029 elif oInstr.sFunction is not None:
2030 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
2031
2032 # Derive the disassembler op enum constant from the mnemonic.
2033 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
2034 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
2035
2036 # Derive the IEM statistics base name from mnemonic and operand types.
2037 if oInstr.sStats is None:
2038 if oInstr.sFunction is not None:
2039 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
2040 elif oInstr.sMnemonic is not None:
2041 oInstr.sStats = oInstr.sMnemonic;
2042 for oOperand in oInstr.aoOperands:
2043 if oOperand.sType:
2044 oInstr.sStats += '_' + oOperand.sType;
2045
2046 # Derive the IEM function name from mnemonic and operand types.
2047 if oInstr.sFunction is None:
2048 if oInstr.sMnemonic is not None:
2049 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
2050 for oOperand in oInstr.aoOperands:
2051 if oOperand.sType:
2052 oInstr.sFunction += '_' + oOperand.sType;
2053 elif oInstr.sStats:
2054 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
2055
2056 #
2057 # Apply default map and then add the instruction to all it's groups.
2058 #
2059 if not oInstr.aoMaps:
2060 oInstr.aoMaps = [ self.oDefaultMap, ];
2061 for oMap in oInstr.aoMaps:
2062 oMap.aoInstructions.append(oInstr);
2063
2064 #
2065 # Derive encoding from operands and maps.
2066 #
2067 if oInstr.sEncoding is None:
2068 if not oInstr.aoOperands:
2069 if oInstr.fUnused and oInstr.sSubOpcode:
2070 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
2071 else:
2072 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
2073 elif oInstr.aoOperands[0].usesModRM():
2074 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
2075 or oInstr.onlyInVexMaps():
2076 oInstr.sEncoding = 'VEX.ModR/M';
2077 else:
2078 oInstr.sEncoding = 'ModR/M';
2079
2080 #
2081 # Check the opstat value and add it to the opstat indexed dictionary.
2082 #
2083 if oInstr.sStats:
2084 if oInstr.sStats not in g_dAllInstructionsByStat:
2085 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
2086 else:
2087 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
2088 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
2089
2090 #
2091 # Add to function indexed dictionary. We allow multiple instructions per function.
2092 #
2093 if oInstr.sFunction:
2094 if oInstr.sFunction not in g_dAllInstructionsByFunction:
2095 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
2096 else:
2097 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
2098
2099 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
2100 return True;
2101
2102 def doneInstructions(self, iLineInComment = None):
2103 """
2104 Done with current instruction.
2105 """
2106 for oInstr in self.aoCurInstrs:
2107 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
2108 if oInstr.fStub:
2109 self.cTotalStubs += 1;
2110
2111 self.cTotalInstr += len(self.aoCurInstrs);
2112
2113 self.sComment = '';
2114 self.aoCurInstrs = [];
2115 return True;
2116
2117 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
2118 """
2119 Sets the sAttrib of all current instruction to oValue. If fOverwrite
2120 is False, only None values and empty strings are replaced.
2121 """
2122 for oInstr in self.aoCurInstrs:
2123 if fOverwrite is not True:
2124 oOldValue = getattr(oInstr, sAttrib);
2125 if oOldValue is not None:
2126 continue;
2127 setattr(oInstr, sAttrib, oValue);
2128
2129 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
2130 """
2131 Sets the iEntry of the array sAttrib of all current instruction to oValue.
2132 If fOverwrite is False, only None values and empty strings are replaced.
2133 """
2134 for oInstr in self.aoCurInstrs:
2135 aoArray = getattr(oInstr, sAttrib);
2136 while len(aoArray) <= iEntry:
2137 aoArray.append(None);
2138 if fOverwrite is True or aoArray[iEntry] is None:
2139 aoArray[iEntry] = oValue;
2140
2141 def parseCommentOldOpcode(self, asLines):
2142 """ Deals with 'Opcode 0xff /4' like comments """
2143 asWords = asLines[0].split();
2144 if len(asWords) >= 2 \
2145 and asWords[0] == 'Opcode' \
2146 and ( asWords[1].startswith('0x')
2147 or asWords[1].startswith('0X')):
2148 asWords = asWords[:1];
2149 for iWord, sWord in enumerate(asWords):
2150 if sWord.startswith('0X'):
2151 sWord = '0x' + sWord[:2];
2152 asWords[iWord] = asWords;
2153 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
2154
2155 return False;
2156
2157 def ensureInstructionForOpTag(self, iTagLine):
2158 """ Ensure there is an instruction for the op-tag being parsed. """
2159 if not self.aoCurInstrs:
2160 self.addInstruction(self.iCommentLine + iTagLine);
2161 for oInstr in self.aoCurInstrs:
2162 oInstr.cOpTags += 1;
2163 if oInstr.cOpTags == 1:
2164 self.cTotalTagged += 1;
2165 return self.aoCurInstrs[-1];
2166
2167 @staticmethod
2168 def flattenSections(aasSections):
2169 """
2170 Flattens multiline sections into stripped single strings.
2171 Returns list of strings, on section per string.
2172 """
2173 asRet = [];
2174 for asLines in aasSections:
2175 if asLines:
2176 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
2177 return asRet;
2178
2179 @staticmethod
2180 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
2181 """
2182 Flattens sections into a simple stripped string with newlines as
2183 section breaks. The final section does not sport a trailing newline.
2184 """
2185 # Typical: One section with a single line.
2186 if len(aasSections) == 1 and len(aasSections[0]) == 1:
2187 return aasSections[0][0].strip();
2188
2189 sRet = '';
2190 for iSection, asLines in enumerate(aasSections):
2191 if asLines:
2192 if iSection > 0:
2193 sRet += sSectionSep;
2194 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
2195 return sRet;
2196
2197
2198
2199 ## @name Tag parsers
2200 ## @{
2201
2202 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
2203 """
2204 Tag: \@opbrief
2205 Value: Text description, multiple sections, appended.
2206
2207 Brief description. If not given, it's the first sentence from @opdesc.
2208 """
2209 oInstr = self.ensureInstructionForOpTag(iTagLine);
2210
2211 # Flatten and validate the value.
2212 sBrief = self.flattenAllSections(aasSections);
2213 if not sBrief:
2214 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2215 if sBrief[-1] != '.':
2216 sBrief = sBrief + '.';
2217 if len(sBrief) > 180:
2218 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
2219 offDot = sBrief.find('.');
2220 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
2221 offDot = sBrief.find('.', offDot + 1);
2222 if offDot >= 0 and offDot != len(sBrief) - 1:
2223 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
2224
2225 # Update the instruction.
2226 if oInstr.sBrief is not None:
2227 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
2228 % (sTag, oInstr.sBrief, sBrief,));
2229 _ = iEndLine;
2230 return True;
2231
2232 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
2233 """
2234 Tag: \@opdesc
2235 Value: Text description, multiple sections, appended.
2236
2237 It is used to describe instructions.
2238 """
2239 oInstr = self.ensureInstructionForOpTag(iTagLine);
2240 if aasSections:
2241 oInstr.asDescSections.extend(self.flattenSections(aasSections));
2242 return True;
2243
2244 _ = sTag; _ = iEndLine;
2245 return True;
2246
2247 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
2248 """
2249 Tag: @opmenmonic
2250 Value: mnemonic
2251
2252 The 'mnemonic' value must be a valid C identifier string. Because of
2253 prefixes, groups and whatnot, there times when the mnemonic isn't that
2254 of an actual assembler mnemonic.
2255 """
2256 oInstr = self.ensureInstructionForOpTag(iTagLine);
2257
2258 # Flatten and validate the value.
2259 sMnemonic = self.flattenAllSections(aasSections);
2260 if not self.oReMnemonic.match(sMnemonic):
2261 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
2262 if oInstr.sMnemonic is not None:
2263 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
2264 % (sTag, oInstr.sMnemonic, sMnemonic,));
2265 oInstr.sMnemonic = sMnemonic
2266
2267 _ = iEndLine;
2268 return True;
2269
2270 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2271 """
2272 Tags: \@op1, \@op2, \@op3, \@op4
2273 Value: [where:]type
2274
2275 The 'where' value indicates where the operand is found, like the 'reg'
2276 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2277 a list.
2278
2279 The 'type' value indicates the operand type. These follow the types
2280 given in the opcode tables in the CPU reference manuals.
2281 See Instruction.kdOperandTypes for a list.
2282
2283 """
2284 oInstr = self.ensureInstructionForOpTag(iTagLine);
2285 idxOp = int(sTag[-1]) - 1;
2286 assert 0 <= idxOp < 4;
2287
2288 # flatten, split up, and validate the "where:type" value.
2289 sFlattened = self.flattenAllSections(aasSections);
2290 asSplit = sFlattened.split(':');
2291 if len(asSplit) == 1:
2292 sType = asSplit[0];
2293 sWhere = None;
2294 elif len(asSplit) == 2:
2295 (sWhere, sType) = asSplit;
2296 else:
2297 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2298
2299 if sType not in g_kdOpTypes:
2300 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2301 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2302 if sWhere is None:
2303 sWhere = g_kdOpTypes[sType][1];
2304 elif sWhere not in g_kdOpLocations:
2305 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2306 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2307
2308 # Insert the operand, refusing to overwrite an existing one.
2309 while idxOp >= len(oInstr.aoOperands):
2310 oInstr.aoOperands.append(None);
2311 if oInstr.aoOperands[idxOp] is not None:
2312 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2313 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2314 sWhere, sType,));
2315 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2316
2317 _ = iEndLine;
2318 return True;
2319
2320 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2321 """
2322 Tag: \@opmaps
2323 Value: map[,map2]
2324
2325 Indicates which maps the instruction is in. There is a default map
2326 associated with each input file.
2327 """
2328 oInstr = self.ensureInstructionForOpTag(iTagLine);
2329
2330 # Flatten, split up and validate the value.
2331 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2332 asMaps = sFlattened.split(',');
2333 if not asMaps:
2334 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2335 for sMap in asMaps:
2336 if sMap not in g_dInstructionMaps:
2337 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2338 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2339
2340 # Add the maps to the current list. Throw errors on duplicates.
2341 for oMap in oInstr.aoMaps:
2342 if oMap.sName in asMaps:
2343 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2344
2345 for sMap in asMaps:
2346 oMap = g_dInstructionMaps[sMap];
2347 if oMap not in oInstr.aoMaps:
2348 oInstr.aoMaps.append(oMap);
2349 else:
2350 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2351
2352 _ = iEndLine;
2353 return True;
2354
2355 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2356 """
2357 Tag: \@oppfx
2358 Value: n/a|none|0x66|0xf3|0xf2
2359
2360 Required prefix for the instruction. (In a (E)VEX context this is the
2361 value of the 'pp' field rather than an actual prefix.)
2362 """
2363 oInstr = self.ensureInstructionForOpTag(iTagLine);
2364
2365 # Flatten and validate the value.
2366 sFlattened = self.flattenAllSections(aasSections);
2367 asPrefixes = sFlattened.split();
2368 if len(asPrefixes) > 1:
2369 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2370
2371 sPrefix = asPrefixes[0].lower();
2372 if sPrefix == 'none':
2373 sPrefix = 'none';
2374 elif sPrefix == 'n/a':
2375 sPrefix = None;
2376 else:
2377 if len(sPrefix) == 2:
2378 sPrefix = '0x' + sPrefix;
2379 if not _isValidOpcodeByte(sPrefix):
2380 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2381
2382 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2383 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2384
2385 # Set it.
2386 if oInstr.sPrefix is not None:
2387 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2388 oInstr.sPrefix = sPrefix;
2389
2390 _ = iEndLine;
2391 return True;
2392
2393 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2394 """
2395 Tag: \@opcode
2396 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2397
2398 The opcode byte or sub-byte for the instruction in the context of a map.
2399 """
2400 oInstr = self.ensureInstructionForOpTag(iTagLine);
2401
2402 # Flatten and validate the value.
2403 sOpcode = self.flattenAllSections(aasSections);
2404 if _isValidOpcodeByte(sOpcode):
2405 pass;
2406 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2407 pass;
2408 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2409 pass;
2410 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2411 pass;
2412 else:
2413 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2414
2415 # Set it.
2416 if oInstr.sOpcode is not None:
2417 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2418 oInstr.sOpcode = sOpcode;
2419
2420 _ = iEndLine;
2421 return True;
2422
2423 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2424 """
2425 Tag: \@opcodesub
2426 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
2427 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
2428
2429 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2430 represents exactly two different instructions. The more proper way would
2431 be to go via maps with two members, but this is faster.
2432 """
2433 oInstr = self.ensureInstructionForOpTag(iTagLine);
2434
2435 # Flatten and validate the value.
2436 sSubOpcode = self.flattenAllSections(aasSections);
2437 if sSubOpcode not in g_kdSubOpcodes:
2438 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2439 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2440
2441 # Set it.
2442 if oInstr.sSubOpcode is not None:
2443 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2444 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2445 oInstr.sSubOpcode = sSubOpcode;
2446
2447 _ = iEndLine;
2448 return True;
2449
2450 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2451 """
2452 Tag: \@openc
2453 Value: ModR/M|fixed|prefix|<map name>
2454
2455 The instruction operand encoding style.
2456 """
2457 oInstr = self.ensureInstructionForOpTag(iTagLine);
2458
2459 # Flatten and validate the value.
2460 sEncoding = self.flattenAllSections(aasSections);
2461 if sEncoding in g_kdEncodings:
2462 pass;
2463 elif sEncoding in g_dInstructionMaps:
2464 pass;
2465 elif not _isValidOpcodeByte(sEncoding):
2466 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2467
2468 # Set it.
2469 if oInstr.sEncoding is not None:
2470 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2471 % ( sTag, oInstr.sEncoding, sEncoding,));
2472 oInstr.sEncoding = sEncoding;
2473
2474 _ = iEndLine;
2475 return True;
2476
2477 ## EFlags tag to Instruction attribute name.
2478 kdOpFlagToAttr = {
2479 '@opfltest': 'asFlTest',
2480 '@opflmodify': 'asFlModify',
2481 '@opflundef': 'asFlUndefined',
2482 '@opflset': 'asFlSet',
2483 '@opflclear': 'asFlClear',
2484 };
2485
2486 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2487 """
2488 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2489 Value: <eflags specifier>
2490
2491 """
2492 oInstr = self.ensureInstructionForOpTag(iTagLine);
2493
2494 # Flatten, split up and validate the values.
2495 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2496 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2497 asFlags = [];
2498 else:
2499 fRc = True;
2500 for iFlag, sFlag in enumerate(asFlags):
2501 if sFlag not in g_kdEFlagsMnemonics:
2502 if sFlag.strip() in g_kdEFlagsMnemonics:
2503 asFlags[iFlag] = sFlag.strip();
2504 else:
2505 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2506 if not fRc:
2507 return False;
2508
2509 # Set them.
2510 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2511 if asOld is not None:
2512 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2513 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2514
2515 _ = iEndLine;
2516 return True;
2517
2518 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2519 """
2520 Tag: \@ophints
2521 Value: Comma or space separated list of flags and hints.
2522
2523 This covers the disassembler flags table and more.
2524 """
2525 oInstr = self.ensureInstructionForOpTag(iTagLine);
2526
2527 # Flatten as a space separated list, split it up and validate the values.
2528 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2529 if len(asHints) == 1 and asHints[0].lower() == 'none':
2530 asHints = [];
2531 else:
2532 fRc = True;
2533 for iHint, sHint in enumerate(asHints):
2534 if sHint not in g_kdHints:
2535 if sHint.strip() in g_kdHints:
2536 sHint[iHint] = sHint.strip();
2537 else:
2538 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2539 if not fRc:
2540 return False;
2541
2542 # Append them.
2543 for sHint in asHints:
2544 if sHint not in oInstr.dHints:
2545 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2546 else:
2547 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2548
2549 _ = iEndLine;
2550 return True;
2551
2552 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2553 """
2554 Tag: \@opdisenum
2555 Value: OP_XXXX
2556
2557 This is for select a specific (legacy) disassembler enum value for the
2558 instruction.
2559 """
2560 oInstr = self.ensureInstructionForOpTag(iTagLine);
2561
2562 # Flatten and split.
2563 asWords = self.flattenAllSections(aasSections).split();
2564 if len(asWords) != 1:
2565 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2566 if not asWords:
2567 return False;
2568 sDisEnum = asWords[0];
2569 if not self.oReDisEnum.match(sDisEnum):
2570 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2571 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2572
2573 # Set it.
2574 if oInstr.sDisEnum is not None:
2575 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2576 oInstr.sDisEnum = sDisEnum;
2577
2578 _ = iEndLine;
2579 return True;
2580
2581 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2582 """
2583 Tag: \@opmincpu
2584 Value: <simple CPU name>
2585
2586 Indicates when this instruction was introduced.
2587 """
2588 oInstr = self.ensureInstructionForOpTag(iTagLine);
2589
2590 # Flatten the value, split into words, make sure there's just one, valid it.
2591 asCpus = self.flattenAllSections(aasSections).split();
2592 if len(asCpus) > 1:
2593 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2594
2595 sMinCpu = asCpus[0];
2596 if sMinCpu in g_kdCpuNames:
2597 oInstr.sMinCpu = sMinCpu;
2598 else:
2599 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2600 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2601
2602 # Set it.
2603 if oInstr.sMinCpu is None:
2604 oInstr.sMinCpu = sMinCpu;
2605 elif oInstr.sMinCpu != sMinCpu:
2606 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2607
2608 _ = iEndLine;
2609 return True;
2610
2611 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2612 """
2613 Tag: \@opcpuid
2614 Value: none | <CPUID flag specifier>
2615
2616 CPUID feature bit which is required for the instruction to be present.
2617 """
2618 oInstr = self.ensureInstructionForOpTag(iTagLine);
2619
2620 # Flatten as a space separated list, split it up and validate the values.
2621 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2622 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2623 asCpuIds = [];
2624 else:
2625 fRc = True;
2626 for iCpuId, sCpuId in enumerate(asCpuIds):
2627 if sCpuId not in g_kdCpuIdFlags:
2628 if sCpuId.strip() in g_kdCpuIdFlags:
2629 sCpuId[iCpuId] = sCpuId.strip();
2630 else:
2631 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2632 if not fRc:
2633 return False;
2634
2635 # Append them.
2636 for sCpuId in asCpuIds:
2637 if sCpuId not in oInstr.asCpuIds:
2638 oInstr.asCpuIds.append(sCpuId);
2639 else:
2640 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2641
2642 _ = iEndLine;
2643 return True;
2644
2645 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2646 """
2647 Tag: \@opgroup
2648 Value: op_grp1[_subgrp2[_subsubgrp3]]
2649
2650 Instruction grouping.
2651 """
2652 oInstr = self.ensureInstructionForOpTag(iTagLine);
2653
2654 # Flatten as a space separated list, split it up and validate the values.
2655 asGroups = self.flattenAllSections(aasSections).split();
2656 if len(asGroups) != 1:
2657 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2658 sGroup = asGroups[0];
2659 if not self.oReGroupName.match(sGroup):
2660 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2661 % (sTag, sGroup, self.oReGroupName.pattern));
2662
2663 # Set it.
2664 if oInstr.sGroup is not None:
2665 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2666 oInstr.sGroup = sGroup;
2667
2668 _ = iEndLine;
2669 return True;
2670
2671 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2672 """
2673 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2674 Value: <invalid opcode behaviour style>
2675
2676 The \@opunused indicates the specification is for a currently unused
2677 instruction encoding.
2678
2679 The \@opinvalid indicates the specification is for an invalid currently
2680 instruction encoding (like UD2).
2681
2682 The \@opinvlstyle just indicates how CPUs decode the instruction when
2683 not supported (\@opcpuid, \@opmincpu) or disabled.
2684 """
2685 oInstr = self.ensureInstructionForOpTag(iTagLine);
2686
2687 # Flatten as a space separated list, split it up and validate the values.
2688 asStyles = self.flattenAllSections(aasSections).split();
2689 if len(asStyles) != 1:
2690 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2691 sStyle = asStyles[0];
2692 if sStyle not in g_kdInvalidStyles:
2693 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2694 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2695 # Set it.
2696 if oInstr.sInvalidStyle is not None:
2697 return self.errorComment(iTagLine,
2698 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2699 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2700 oInstr.sInvalidStyle = sStyle;
2701 if sTag == '@opunused':
2702 oInstr.fUnused = True;
2703 elif sTag == '@opinvalid':
2704 oInstr.fInvalid = True;
2705
2706 _ = iEndLine;
2707 return True;
2708
2709 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2710 """
2711 Tag: \@optest
2712 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2713 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2714
2715 The main idea here is to generate basic instruction tests.
2716
2717 The probably simplest way of handling the diverse input, would be to use
2718 it to produce size optimized byte code for a simple interpreter that
2719 modifies the register input and output states.
2720
2721 An alternative to the interpreter would be creating multiple tables,
2722 but that becomes rather complicated wrt what goes where and then to use
2723 them in an efficient manner.
2724 """
2725 oInstr = self.ensureInstructionForOpTag(iTagLine);
2726
2727 #
2728 # Do it section by section.
2729 #
2730 for asSectionLines in aasSections:
2731 #
2732 # Sort the input into outputs, inputs and selector conditions.
2733 #
2734 sFlatSection = self.flattenAllSections([asSectionLines,]);
2735 if not sFlatSection:
2736 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2737 continue;
2738 oTest = InstructionTest(oInstr);
2739
2740 asSelectors = [];
2741 asInputs = [];
2742 asOutputs = [];
2743 asCur = asOutputs;
2744 fRc = True;
2745 asWords = sFlatSection.split();
2746 for iWord in range(len(asWords) - 1, -1, -1):
2747 sWord = asWords[iWord];
2748 # Check for array switchers.
2749 if sWord == '->':
2750 if asCur != asOutputs:
2751 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2752 break;
2753 asCur = asInputs;
2754 elif sWord == '/':
2755 if asCur != asInputs:
2756 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2757 break;
2758 asCur = asSelectors;
2759 else:
2760 asCur.insert(0, sWord);
2761
2762 #
2763 # Validate and add selectors.
2764 #
2765 for sCond in asSelectors:
2766 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2767 oSelector = None;
2768 for sOp in TestSelector.kasCompareOps:
2769 off = sCondExp.find(sOp);
2770 if off >= 0:
2771 sVariable = sCondExp[:off];
2772 sValue = sCondExp[off + len(sOp):];
2773 if sVariable in TestSelector.kdVariables:
2774 if sValue in TestSelector.kdVariables[sVariable]:
2775 oSelector = TestSelector(sVariable, sOp, sValue);
2776 else:
2777 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2778 % ( sTag, sValue, sCond,
2779 TestSelector.kdVariables[sVariable].keys(),));
2780 else:
2781 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2782 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2783 break;
2784 if oSelector is not None:
2785 for oExisting in oTest.aoSelectors:
2786 if oExisting.sVariable == oSelector.sVariable:
2787 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2788 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2789 oTest.aoSelectors.append(oSelector);
2790 else:
2791 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2792
2793 #
2794 # Validate outputs and inputs, adding them to the test as we go along.
2795 #
2796 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2797 asValidFieldKinds = [ 'both', sDesc, ];
2798 for sItem in asItems:
2799 oItem = None;
2800 for sOp in TestInOut.kasOperators:
2801 off = sItem.find(sOp);
2802 if off < 0:
2803 continue;
2804 sField = sItem[:off];
2805 sValueType = sItem[off + len(sOp):];
2806 if sField in TestInOut.kdFields \
2807 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2808 asSplit = sValueType.split(':', 1);
2809 sValue = asSplit[0];
2810 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2811 if sType in TestInOut.kdTypes:
2812 oValid = TestInOut.kdTypes[sType].validate(sValue);
2813 if oValid is True:
2814 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2815 oItem = TestInOut(sField, sOp, sValue, sType);
2816 else:
2817 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2818 % ( sTag, sDesc, sItem, ));
2819 else:
2820 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2821 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2822 else:
2823 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2824 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2825 else:
2826 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2827 % ( sTag, sDesc, sField, sItem,
2828 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
2829 if asVal[1] in asValidFieldKinds]),));
2830 break;
2831 if oItem is not None:
2832 for oExisting in aoDst:
2833 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2834 self.errorComment(iTagLine,
2835 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2836 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2837 aoDst.append(oItem);
2838 else:
2839 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2840
2841 #
2842 # .
2843 #
2844 if fRc:
2845 oInstr.aoTests.append(oTest);
2846 else:
2847 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2848 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2849 % (sTag, asSelectors, asInputs, asOutputs,));
2850
2851 _ = iEndLine;
2852 return True;
2853
2854 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2855 """
2856 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2857 """
2858 oInstr = self.ensureInstructionForOpTag(iTagLine);
2859
2860 iTest = 0;
2861 if sTag[-1] == ']':
2862 iTest = int(sTag[8:-1]);
2863 else:
2864 iTest = int(sTag[7:]);
2865
2866 if iTest != len(oInstr.aoTests):
2867 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2868 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2869
2870 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2871 """
2872 Tag: \@optestign | \@optestignore
2873 Value: <value is ignored>
2874
2875 This is a simple trick to ignore a test while debugging another.
2876
2877 See also \@oponlytest.
2878 """
2879 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2880 return True;
2881
2882 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2883 """
2884 Tag: \@opcopytests
2885 Value: <opstat | function> [..]
2886 Example: \@opcopytests add_Eb_Gb
2887
2888 Trick to avoid duplicating tests for different encodings of the same
2889 operation.
2890 """
2891 oInstr = self.ensureInstructionForOpTag(iTagLine);
2892
2893 # Flatten, validate and append the copy job to the instruction. We execute
2894 # them after parsing all the input so we can handle forward references.
2895 asToCopy = self.flattenAllSections(aasSections).split();
2896 if not asToCopy:
2897 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2898 for sToCopy in asToCopy:
2899 if sToCopy not in oInstr.asCopyTests:
2900 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2901 oInstr.asCopyTests.append(sToCopy);
2902 else:
2903 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2904 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2905 else:
2906 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2907
2908 _ = iEndLine;
2909 return True;
2910
2911 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2912 """
2913 Tag: \@oponlytest | \@oponly
2914 Value: none
2915
2916 Only test instructions with this tag. This is a trick that is handy
2917 for singling out one or two new instructions or tests.
2918
2919 See also \@optestignore.
2920 """
2921 oInstr = self.ensureInstructionForOpTag(iTagLine);
2922
2923 # Validate and add instruction to only test dictionary.
2924 sValue = self.flattenAllSections(aasSections).strip();
2925 if sValue:
2926 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2927
2928 if oInstr not in g_aoOnlyTestInstructions:
2929 g_aoOnlyTestInstructions.append(oInstr);
2930
2931 _ = iEndLine;
2932 return True;
2933
2934 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2935 """
2936 Tag: \@opxcpttype
2937 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2938
2939 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2940 """
2941 oInstr = self.ensureInstructionForOpTag(iTagLine);
2942
2943 # Flatten as a space separated list, split it up and validate the values.
2944 asTypes = self.flattenAllSections(aasSections).split();
2945 if len(asTypes) != 1:
2946 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2947 sType = asTypes[0];
2948 if sType not in g_kdXcptTypes:
2949 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2950 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2951 # Set it.
2952 if oInstr.sXcptType is not None:
2953 return self.errorComment(iTagLine,
2954 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2955 % ( sTag, oInstr.sXcptType, sType,));
2956 oInstr.sXcptType = sType;
2957
2958 _ = iEndLine;
2959 return True;
2960
2961 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2962 """
2963 Tag: \@opfunction
2964 Value: <VMM function name>
2965
2966 This is for explicitly setting the IEM function name. Normally we pick
2967 this up from the FNIEMOP_XXX macro invocation after the description, or
2968 generate it from the mnemonic and operands.
2969
2970 It it thought it maybe necessary to set it when specifying instructions
2971 which implementation isn't following immediately or aren't implemented yet.
2972 """
2973 oInstr = self.ensureInstructionForOpTag(iTagLine);
2974
2975 # Flatten and validate the value.
2976 sFunction = self.flattenAllSections(aasSections);
2977 if not self.oReFunctionName.match(sFunction):
2978 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2979 % (sTag, sFunction, self.oReFunctionName.pattern));
2980
2981 if oInstr.sFunction is not None:
2982 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2983 % (sTag, oInstr.sFunction, sFunction,));
2984 oInstr.sFunction = sFunction;
2985
2986 _ = iEndLine;
2987 return True;
2988
2989 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2990 """
2991 Tag: \@opstats
2992 Value: <VMM statistics base name>
2993
2994 This is for explicitly setting the statistics name. Normally we pick
2995 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2996 the mnemonic and operands.
2997
2998 It it thought it maybe necessary to set it when specifying instructions
2999 which implementation isn't following immediately or aren't implemented yet.
3000 """
3001 oInstr = self.ensureInstructionForOpTag(iTagLine);
3002
3003 # Flatten and validate the value.
3004 sStats = self.flattenAllSections(aasSections);
3005 if not self.oReStatsName.match(sStats):
3006 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
3007 % (sTag, sStats, self.oReStatsName.pattern));
3008
3009 if oInstr.sStats is not None:
3010 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
3011 % (sTag, oInstr.sStats, sStats,));
3012 oInstr.sStats = sStats;
3013
3014 _ = iEndLine;
3015 return True;
3016
3017 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
3018 """
3019 Tag: \@opdone
3020 Value: none
3021
3022 Used to explictily flush the instructions that have been specified.
3023 """
3024 sFlattened = self.flattenAllSections(aasSections);
3025 if sFlattened != '':
3026 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
3027 _ = sTag; _ = iEndLine;
3028 return self.doneInstructions();
3029
3030 ## @}
3031
3032
3033 def parseComment(self):
3034 """
3035 Parse the current comment (self.sComment).
3036
3037 If it's a opcode specifiying comment, we reset the macro stuff.
3038 """
3039 #
3040 # Reject if comment doesn't seem to contain anything interesting.
3041 #
3042 if self.sComment.find('Opcode') < 0 \
3043 and self.sComment.find('@') < 0:
3044 return False;
3045
3046 #
3047 # Split the comment into lines, removing leading asterisks and spaces.
3048 # Also remove leading and trailing empty lines.
3049 #
3050 asLines = self.sComment.split('\n');
3051 for iLine, sLine in enumerate(asLines):
3052 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
3053
3054 while asLines and not asLines[0]:
3055 self.iCommentLine += 1;
3056 asLines.pop(0);
3057
3058 while asLines and not asLines[-1]:
3059 asLines.pop(len(asLines) - 1);
3060
3061 #
3062 # Check for old style: Opcode 0x0f 0x12
3063 #
3064 if asLines[0].startswith('Opcode '):
3065 self.parseCommentOldOpcode(asLines);
3066
3067 #
3068 # Look for @op* tagged data.
3069 #
3070 cOpTags = 0;
3071 sFlatDefault = None;
3072 sCurTag = '@default';
3073 iCurTagLine = 0;
3074 asCurSection = [];
3075 aasSections = [ asCurSection, ];
3076 for iLine, sLine in enumerate(asLines):
3077 if not sLine.startswith('@'):
3078 if sLine:
3079 asCurSection.append(sLine);
3080 elif asCurSection:
3081 asCurSection = [];
3082 aasSections.append(asCurSection);
3083 else:
3084 #
3085 # Process the previous tag.
3086 #
3087 if not asCurSection and len(aasSections) > 1:
3088 aasSections.pop(-1);
3089 if sCurTag in self.dTagHandlers:
3090 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3091 cOpTags += 1;
3092 elif sCurTag.startswith('@op'):
3093 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3094 elif sCurTag == '@default':
3095 sFlatDefault = self.flattenAllSections(aasSections);
3096 elif '@op' + sCurTag[1:] in self.dTagHandlers:
3097 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
3098 elif sCurTag in ['@encoding', '@opencoding']:
3099 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
3100
3101 #
3102 # New tag.
3103 #
3104 asSplit = sLine.split(None, 1);
3105 sCurTag = asSplit[0].lower();
3106 if len(asSplit) > 1:
3107 asCurSection = [asSplit[1],];
3108 else:
3109 asCurSection = [];
3110 aasSections = [asCurSection, ];
3111 iCurTagLine = iLine;
3112
3113 #
3114 # Process the final tag.
3115 #
3116 if not asCurSection and len(aasSections) > 1:
3117 aasSections.pop(-1);
3118 if sCurTag in self.dTagHandlers:
3119 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3120 cOpTags += 1;
3121 elif sCurTag.startswith('@op'):
3122 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3123 elif sCurTag == '@default':
3124 sFlatDefault = self.flattenAllSections(aasSections);
3125
3126 #
3127 # Don't allow default text in blocks containing @op*.
3128 #
3129 if cOpTags > 0 and sFlatDefault:
3130 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
3131
3132 return True;
3133
3134 def parseMacroInvocation(self, sInvocation):
3135 """
3136 Parses a macro invocation.
3137
3138 Returns a tuple, first element is the offset following the macro
3139 invocation. The second element is a list of macro arguments, where the
3140 zero'th is the macro name.
3141 """
3142 # First the name.
3143 offOpen = sInvocation.find('(');
3144 if offOpen <= 0:
3145 self.raiseError("macro invocation open parenthesis not found");
3146 sName = sInvocation[:offOpen].strip();
3147 if not self.oReMacroName.match(sName):
3148 return self.error("invalid macro name '%s'" % (sName,));
3149 asRet = [sName, ];
3150
3151 # Arguments.
3152 iLine = self.iLine;
3153 cDepth = 1;
3154 off = offOpen + 1;
3155 offStart = off;
3156 chQuote = None;
3157 while cDepth > 0:
3158 if off >= len(sInvocation):
3159 if iLine >= len(self.asLines):
3160 self.error('macro invocation beyond end of file');
3161 return (off, asRet);
3162 sInvocation += self.asLines[iLine];
3163 iLine += 1;
3164 ch = sInvocation[off];
3165
3166 if chQuote:
3167 if ch == '\\' and off + 1 < len(sInvocation):
3168 off += 1;
3169 elif ch == chQuote:
3170 chQuote = None;
3171 elif ch in ('"', '\'',):
3172 chQuote = ch;
3173 elif ch in (',', ')',):
3174 if cDepth == 1:
3175 asRet.append(sInvocation[offStart:off].strip());
3176 offStart = off + 1;
3177 if ch == ')':
3178 cDepth -= 1;
3179 elif ch == '(':
3180 cDepth += 1;
3181 off += 1;
3182
3183 return (off, asRet);
3184
3185 def findAndParseMacroInvocationEx(self, sCode, sMacro):
3186 """
3187 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
3188 """
3189 offHit = sCode.find(sMacro);
3190 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
3191 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
3192 return (offHit + offAfter, asRet);
3193 return (len(sCode), None);
3194
3195 def findAndParseMacroInvocation(self, sCode, sMacro):
3196 """
3197 Returns None if not found, arguments as per parseMacroInvocation if found.
3198 """
3199 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
3200
3201 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
3202 """
3203 Returns same as findAndParseMacroInvocation.
3204 """
3205 for sMacro in asMacro:
3206 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
3207 if asRet is not None:
3208 return asRet;
3209 return None;
3210
3211 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
3212 sDisHints, sIemHints, asOperands):
3213 """
3214 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
3215 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
3216 """
3217 #
3218 # Some invocation checks.
3219 #
3220 if sUpper != sUpper.upper():
3221 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
3222 if sLower != sLower.lower():
3223 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
3224 if sUpper.lower() != sLower:
3225 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
3226 if not self.oReMnemonic.match(sLower):
3227 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
3228
3229 #
3230 # Check if sIemHints tells us to not consider this macro invocation.
3231 #
3232 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
3233 return True;
3234
3235 # Apply to the last instruction only for now.
3236 if not self.aoCurInstrs:
3237 self.addInstruction();
3238 oInstr = self.aoCurInstrs[-1];
3239 if oInstr.iLineMnemonicMacro == -1:
3240 oInstr.iLineMnemonicMacro = self.iLine;
3241 else:
3242 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
3243 % (sMacro, oInstr.iLineMnemonicMacro,));
3244
3245 # Mnemonic
3246 if oInstr.sMnemonic is None:
3247 oInstr.sMnemonic = sLower;
3248 elif oInstr.sMnemonic != sLower:
3249 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
3250
3251 # Process operands.
3252 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
3253 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
3254 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
3255 for iOperand, sType in enumerate(asOperands):
3256 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
3257 if sWhere is None:
3258 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
3259 if iOperand < len(oInstr.aoOperands): # error recovery.
3260 sWhere = oInstr.aoOperands[iOperand].sWhere;
3261 sType = oInstr.aoOperands[iOperand].sType;
3262 else:
3263 sWhere = 'reg';
3264 sType = 'Gb';
3265 if iOperand == len(oInstr.aoOperands):
3266 oInstr.aoOperands.append(Operand(sWhere, sType))
3267 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
3268 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
3269 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
3270 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
3271
3272 # Encoding.
3273 if sForm not in g_kdIemForms:
3274 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
3275 else:
3276 if oInstr.sEncoding is None:
3277 oInstr.sEncoding = g_kdIemForms[sForm][0];
3278 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
3279 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3280 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3281
3282 # Check the parameter locations for the encoding.
3283 if g_kdIemForms[sForm][1] is not None:
3284 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
3285 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3286 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3287 else:
3288 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3289 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3290 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3291 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3292 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
3293 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
3294 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
3295 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
3296 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
3297 or sForm.replace('VEX','').find('V') < 0) ):
3298 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
3299 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
3300 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
3301 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
3302 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
3303 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
3304 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
3305 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
3306 oInstr.aoOperands[iOperand].sWhere));
3307
3308
3309 # Check @opcodesub
3310 if oInstr.sSubOpcode \
3311 and g_kdIemForms[sForm][2] \
3312 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
3313 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
3314 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
3315
3316 # Stats.
3317 if not self.oReStatsName.match(sStats):
3318 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3319 elif oInstr.sStats is None:
3320 oInstr.sStats = sStats;
3321 elif oInstr.sStats != sStats:
3322 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3323 % (sMacro, oInstr.sStats, sStats,));
3324
3325 # Process the hints (simply merge with @ophints w/o checking anything).
3326 for sHint in sDisHints.split('|'):
3327 sHint = sHint.strip();
3328 if sHint.startswith('DISOPTYPE_'):
3329 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3330 if sShortHint in g_kdHints:
3331 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3332 else:
3333 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3334 elif sHint != '0':
3335 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3336
3337 for sHint in sIemHints.split('|'):
3338 sHint = sHint.strip();
3339 if sHint.startswith('IEMOPHINT_'):
3340 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3341 if sShortHint in g_kdHints:
3342 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3343 else:
3344 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3345 elif sHint != '0':
3346 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3347
3348 _ = sAsm;
3349 return True;
3350
3351 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3352 """
3353 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3354 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3355 """
3356 if not asOperands:
3357 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3358 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3359 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3360
3361 def checkCodeForMacro(self, sCode):
3362 """
3363 Checks code for relevant macro invocation.
3364 """
3365 #
3366 # Scan macro invocations.
3367 #
3368 if sCode.find('(') > 0:
3369 # Look for instruction decoder function definitions. ASSUME single line.
3370 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3371 [ 'FNIEMOP_DEF',
3372 'FNIEMOP_STUB',
3373 'FNIEMOP_STUB_1',
3374 'FNIEMOP_UD_STUB',
3375 'FNIEMOP_UD_STUB_1' ]);
3376 if asArgs is not None:
3377 sFunction = asArgs[1];
3378
3379 if not self.aoCurInstrs:
3380 self.addInstruction();
3381 for oInstr in self.aoCurInstrs:
3382 if oInstr.iLineFnIemOpMacro == -1:
3383 oInstr.iLineFnIemOpMacro = self.iLine;
3384 else:
3385 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3386 self.setInstrunctionAttrib('sFunction', sFunction);
3387 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3388 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3389 if asArgs[0].find('STUB') > 0:
3390 self.doneInstructions();
3391 return True;
3392
3393 # IEMOP_HLP_DONE_VEX_DECODING_*
3394 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3395 [ 'IEMOP_HLP_DONE_VEX_DECODING',
3396 'IEMOP_HLP_DONE_VEX_DECODING_L0',
3397 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
3398 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
3399 ]);
3400 if asArgs is not None:
3401 sMacro = asArgs[0];
3402 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
3403 for oInstr in self.aoCurInstrs:
3404 if 'vex_l_zero' not in oInstr.dHints:
3405 if oInstr.iLineMnemonicMacro >= 0:
3406 self.errorOnLine(oInstr.iLineMnemonicMacro,
3407 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
3408 oInstr.dHints['vex_l_zero'] = True;
3409 return True;
3410
3411 #
3412 # IEMOP_MNEMONIC*
3413 #
3414
3415 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3416 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3417 if asArgs is not None:
3418 if len(self.aoCurInstrs) == 1:
3419 oInstr = self.aoCurInstrs[0];
3420 if oInstr.sStats is None:
3421 oInstr.sStats = asArgs[1];
3422 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3423
3424 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3425 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3426 if asArgs is not None:
3427 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3428 []);
3429 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3430 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3431 if asArgs is not None:
3432 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3433 [asArgs[6],]);
3434 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3435 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3436 if asArgs is not None:
3437 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3438 [asArgs[6], asArgs[7]]);
3439 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3440 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3441 if asArgs is not None:
3442 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3443 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3444 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3445 # a_fIemHints)
3446 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3447 if asArgs is not None:
3448 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3449 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3450
3451 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3452 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3453 if asArgs is not None:
3454 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3455 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3456 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3457 if asArgs is not None:
3458 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3459 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3460 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3461 if asArgs is not None:
3462 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3463 [asArgs[4], asArgs[5],]);
3464 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3465 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3466 if asArgs is not None:
3467 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3468 [asArgs[4], asArgs[5], asArgs[6],]);
3469 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3470 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3471 if asArgs is not None:
3472 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3473 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3474
3475 return False;
3476
3477
3478 def parse(self):
3479 """
3480 Parses the given file.
3481 Returns number or errors.
3482 Raises exception on fatal trouble.
3483 """
3484 #self.debug('Parsing %s' % (self.sSrcFile,));
3485
3486 while self.iLine < len(self.asLines):
3487 sLine = self.asLines[self.iLine];
3488 self.iLine += 1;
3489
3490 # We only look for comments, so only lines with a slash might possibly
3491 # influence the parser state.
3492 offSlash = sLine.find('/');
3493 if offSlash >= 0:
3494 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3495 offLine = 0;
3496 while offLine < len(sLine):
3497 if self.iState == self.kiCode:
3498 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3499 if offHit >= 0:
3500 self.checkCodeForMacro(sLine[offLine:offHit]);
3501 self.sComment = '';
3502 self.iCommentLine = self.iLine;
3503 self.iState = self.kiCommentMulti;
3504 offLine = offHit + 2;
3505 else:
3506 self.checkCodeForMacro(sLine[offLine:]);
3507 offLine = len(sLine);
3508
3509 elif self.iState == self.kiCommentMulti:
3510 offHit = sLine.find('*/', offLine);
3511 if offHit >= 0:
3512 self.sComment += sLine[offLine:offHit];
3513 self.iState = self.kiCode;
3514 offLine = offHit + 2;
3515 self.parseComment();
3516 else:
3517 self.sComment += sLine[offLine:];
3518 offLine = len(sLine);
3519 else:
3520 assert False;
3521 # C++ line comment.
3522 elif offSlash > 0:
3523 self.checkCodeForMacro(sLine[:offSlash]);
3524
3525 # No slash, but append the line if in multi-line comment.
3526 elif self.iState == self.kiCommentMulti:
3527 #self.debug('line %d: multi' % (self.iLine,));
3528 self.sComment += sLine;
3529
3530 # No slash, but check code line for relevant macro.
3531 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3532 #self.debug('line %d: macro' % (self.iLine,));
3533 self.checkCodeForMacro(sLine);
3534
3535 # If the line is a '}' in the first position, complete the instructions.
3536 elif self.iState == self.kiCode and sLine[0] == '}':
3537 #self.debug('line %d: }' % (self.iLine,));
3538 self.doneInstructions();
3539
3540 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
3541 # so we can check/add @oppfx info from it.
3542 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
3543 self.parseFunctionTable(sLine);
3544
3545 self.doneInstructions();
3546 self.debug('%3s stubs out of %3s instructions in %s'
3547 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3548 return self.printErrors();
3549
3550
3551def __parseFileByName(sSrcFile, sDefaultMap):
3552 """
3553 Parses one source file for instruction specfications.
3554 """
3555 #
3556 # Read sSrcFile into a line array.
3557 #
3558 try:
3559 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with
3560 except Exception as oXcpt:
3561 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3562 try:
3563 asLines = oFile.readlines();
3564 except Exception as oXcpt:
3565 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3566 finally:
3567 oFile.close();
3568
3569 #
3570 # Do the parsing.
3571 #
3572 try:
3573 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3574 except ParserException as oXcpt:
3575 print(str(oXcpt));
3576 raise;
3577
3578 return cErrors;
3579
3580
3581def __doTestCopying():
3582 """
3583 Executes the asCopyTests instructions.
3584 """
3585 asErrors = [];
3586 for oDstInstr in g_aoAllInstructions:
3587 if oDstInstr.asCopyTests:
3588 for sSrcInstr in oDstInstr.asCopyTests:
3589 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3590 if oSrcInstr:
3591 aoSrcInstrs = [oSrcInstr,];
3592 else:
3593 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3594 if aoSrcInstrs:
3595 for oSrcInstr in aoSrcInstrs:
3596 if oSrcInstr != oDstInstr:
3597 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3598 else:
3599 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3600 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3601 else:
3602 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3603 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3604
3605 if asErrors:
3606 sys.stderr.write(u''.join(asErrors));
3607 return len(asErrors);
3608
3609
3610def __applyOnlyTest():
3611 """
3612 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3613 all other instructions so that only these get tested.
3614 """
3615 if g_aoOnlyTestInstructions:
3616 for oInstr in g_aoAllInstructions:
3617 if oInstr.aoTests:
3618 if oInstr not in g_aoOnlyTestInstructions:
3619 oInstr.aoTests = [];
3620 return 0;
3621
3622def __parseAll():
3623 """
3624 Parses all the IEMAllInstruction*.cpp.h files.
3625
3626 Raises exception on failure.
3627 """
3628 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3629 cErrors = 0;
3630 for sDefaultMap, sName in [
3631 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3632 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3633 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3634 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3635 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3636 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3637 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3638 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3639 ]:
3640 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3641 cErrors += __doTestCopying();
3642 cErrors += __applyOnlyTest();
3643
3644 if cErrors != 0:
3645 #raise Exception('%d parse errors' % (cErrors,));
3646 sys.exit(1);
3647 return True;
3648
3649
3650
3651__parseAll();
3652
3653
3654#
3655# Generators (may perhaps move later).
3656#
3657def __formatDisassemblerTableEntry(oInstr):
3658 """
3659 """
3660 sMacro = 'OP';
3661 cMaxOperands = 3;
3662 if len(oInstr.aoOperands) > 3:
3663 sMacro = 'OPVEX'
3664 cMaxOperands = 4;
3665 assert len(oInstr.aoOperands) <= cMaxOperands;
3666
3667 #
3668 # Format string.
3669 #
3670 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3671 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3672 sTmp += ' ' if iOperand == 0 else ',';
3673 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3674 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3675 else:
3676 sTmp += g_kdOpTypes[oOperand.sType][2];
3677 sTmp += '",';
3678 asColumns = [ sTmp, ];
3679
3680 #
3681 # Decoders.
3682 #
3683 iStart = len(asColumns);
3684 if oInstr.sEncoding is None:
3685 pass;
3686 elif oInstr.sEncoding == 'ModR/M':
3687 # ASSUME the first operand is using the ModR/M encoding
3688 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3689 asColumns.append('IDX_ParseModRM,');
3690 elif oInstr.sEncoding in [ 'prefix', ]:
3691 for oOperand in oInstr.aoOperands:
3692 asColumns.append('0,');
3693 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
3694 pass;
3695 elif oInstr.sEncoding == 'VEX.ModR/M':
3696 asColumns.append('IDX_ParseModRM,');
3697 elif oInstr.sEncoding == 'vex2':
3698 asColumns.append('IDX_ParseVex2b,')
3699 elif oInstr.sEncoding == 'vex3':
3700 asColumns.append('IDX_ParseVex3b,')
3701 elif oInstr.sEncoding in g_dInstructionMaps:
3702 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3703 else:
3704 ## @todo
3705 #IDX_ParseTwoByteEsc,
3706 #IDX_ParseGrp1,
3707 #IDX_ParseShiftGrp2,
3708 #IDX_ParseGrp3,
3709 #IDX_ParseGrp4,
3710 #IDX_ParseGrp5,
3711 #IDX_Parse3DNow,
3712 #IDX_ParseGrp6,
3713 #IDX_ParseGrp7,
3714 #IDX_ParseGrp8,
3715 #IDX_ParseGrp9,
3716 #IDX_ParseGrp10,
3717 #IDX_ParseGrp12,
3718 #IDX_ParseGrp13,
3719 #IDX_ParseGrp14,
3720 #IDX_ParseGrp15,
3721 #IDX_ParseGrp16,
3722 #IDX_ParseThreeByteEsc4,
3723 #IDX_ParseThreeByteEsc5,
3724 #IDX_ParseModFence,
3725 #IDX_ParseEscFP,
3726 #IDX_ParseNopPause,
3727 #IDX_ParseInvOpModRM,
3728 assert False, str(oInstr);
3729
3730 # Check for immediates and stuff in the remaining operands.
3731 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3732 sIdx = g_kdOpTypes[oOperand.sType][0];
3733 #if sIdx != 'IDX_UseModRM':
3734 asColumns.append(sIdx + ',');
3735 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3736
3737 #
3738 # Opcode and operands.
3739 #
3740 assert oInstr.sDisEnum, str(oInstr);
3741 asColumns.append(oInstr.sDisEnum + ',');
3742 iStart = len(asColumns)
3743 for oOperand in oInstr.aoOperands:
3744 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3745 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3746
3747 #
3748 # Flags.
3749 #
3750 sTmp = '';
3751 for sHint in sorted(oInstr.dHints.keys()):
3752 sDefine = g_kdHints[sHint];
3753 if sDefine.startswith('DISOPTYPE_'):
3754 if sTmp:
3755 sTmp += ' | ' + sDefine;
3756 else:
3757 sTmp += sDefine;
3758 if sTmp:
3759 sTmp += '),';
3760 else:
3761 sTmp += '0),';
3762 asColumns.append(sTmp);
3763
3764 #
3765 # Format the columns into a line.
3766 #
3767 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3768 sLine = '';
3769 for i, s in enumerate(asColumns):
3770 if len(sLine) < aoffColumns[i]:
3771 sLine += ' ' * (aoffColumns[i] - len(sLine));
3772 else:
3773 sLine += ' ';
3774 sLine += s;
3775
3776 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3777 # DISOPTYPE_HARMLESS),
3778 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3779 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3780 return sLine;
3781
3782def __checkIfShortTable(aoTableOrdered, oMap):
3783 """
3784 Returns (iInstr, cInstructions, fShortTable)
3785 """
3786
3787 # Determin how much we can trim off.
3788 cInstructions = len(aoTableOrdered);
3789 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
3790 cInstructions -= 1;
3791
3792 iInstr = 0;
3793 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
3794 iInstr += 1;
3795
3796 # If we can save more than 30%, we go for the short table version.
3797 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
3798 return (iInstr, cInstructions, True);
3799 _ = oMap; # Use this for overriding.
3800
3801 # Output the full table.
3802 return (0, len(aoTableOrdered), False);
3803
3804def generateDisassemblerTables(oDstFile = sys.stdout):
3805 """
3806 Generates disassembler tables.
3807 """
3808
3809 #
3810 # The disassembler uses a slightly different table layout to save space,
3811 # since several of the prefix varia
3812 #
3813 aoDisasmMaps = [];
3814 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3815 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3816 if oMap.sSelector != 'byte+pfx':
3817 aoDisasmMaps.append(oMap);
3818 else:
3819 # Split the map by prefix.
3820 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
3821 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
3822 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
3823 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
3824
3825 #
3826 # Dump each map.
3827 #
3828 asHeaderLines = [];
3829 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),));
3830 for oMap in aoDisasmMaps:
3831 sName = oMap.sName;
3832
3833 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
3834
3835 #
3836 # Get the instructions for the map and see if we can do a short version or not.
3837 #
3838 aoTableOrder = oMap.getInstructionsInTableOrder();
3839 cEntriesPerByte = oMap.getEntriesPerByte();
3840 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
3841
3842 #
3843 # Output the table start.
3844 # Note! Short tables are static and only accessible via the map range record.
3845 #
3846 asLines = [];
3847 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3848 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3849 if fShortTable:
3850 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
3851 else:
3852 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3853 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3854 asLines.append('{');
3855
3856 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
3857 asLines.append(' /* %#04x: */' % (iInstrStart,));
3858
3859 #
3860 # Output the instructions.
3861 #
3862 iInstr = iInstrStart;
3863 while iInstr < iInstrEnd:
3864 oInstr = aoTableOrder[iInstr];
3865 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
3866 if iInstr != iInstrStart:
3867 asLines.append('');
3868 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
3869
3870 if oInstr is None:
3871 # Invalid. Optimize blocks of invalid instructions.
3872 cInvalidInstrs = 1;
3873 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
3874 cInvalidInstrs += 1;
3875 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
3876 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
3877 iInstr += 0x10 * cEntriesPerByte - 1;
3878 elif cEntriesPerByte > 1:
3879 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
3880 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
3881 iInstr += 3;
3882 else:
3883 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
3884 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
3885 else:
3886 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
3887 elif isinstance(oInstr, list):
3888 if len(oInstr) != 0:
3889 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
3890 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
3891 else:
3892 asLines.append(__formatDisassemblerTableEntry(oInstr));
3893 else:
3894 asLines.append(__formatDisassemblerTableEntry(oInstr));
3895
3896 iInstr += 1;
3897
3898 if iInstrStart >= iInstrEnd:
3899 asLines.append(' /* dummy */ INVALID_OPCODE');
3900
3901 asLines.append('};');
3902 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3903
3904 #
3905 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
3906 #
3907 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
3908 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
3909 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
3910
3911 #
3912 # Write out the lines.
3913 #
3914 oDstFile.write('\n'.join(asLines));
3915 oDstFile.write('\n');
3916 oDstFile.write('\n');
3917 #break; #for now
3918
3919if __name__ == '__main__':
3920 generateDisassemblerTables();
3921
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette