VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66921

Last change on this file since 66921 was 66921, checked in by vboxsync, 8 years ago

IEM: Implemented movsd Usd,Hsd,Vsd (VEX.F2.0F 11 mod=3) and movsd Mq,Vsd (VEX.F2.0F 11 mod!=3).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 143.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66921 2017-05-16 19:31:36Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66921 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230
231 # ModR/M.rm - register only.
232 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
233 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
234 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
235 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
236 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
237 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
238 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
239
240 # ModR/M.rm - memory only.
241 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
242 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
243 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
244 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
245 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
246 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
247 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
248 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
249 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
250
251 # ModR/M.reg
252 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
253 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
254 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
255 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
256 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
257 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
258 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
259 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
260 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
261 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
262 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
263 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
264 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
265 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
266 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
267 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
268 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
269 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
270 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
271 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
272 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
273
274 # VEX.vvvv
275 'HdqCss': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCss', ),
276 'HdqCsd': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCsd', ),
277
278 # Immediate values.
279 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
280 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
281 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
282 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
283 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
284 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
285
286 # Address operands (no ModR/M).
287 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
288 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
289
290 # Relative jump targets
291 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
292 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
293
294 # DS:rSI
295 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
296 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
297 # ES:rDI
298 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
299 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
300
301 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
302
303 # Fixed registers.
304 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
305 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
306 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
307 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
308 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
309 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
310 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
311 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
312};
313
314# IDX_ParseFixedReg
315# IDX_ParseVexDest
316
317
318## IEMFORM_XXX mappings.
319g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
320 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
321 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
322 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
323 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
324 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
325 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
326 'M': ( 'ModR/M', [ 'rm', ], ),
327 'M_REG': ( 'ModR/M', [ 'rm', ], ),
328 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
329 'R': ( 'ModR/M', [ 'reg', ], ),
330
331 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
332 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
333 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
334 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM
335 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
336 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
337 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
338 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
339 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
340 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
341 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
342 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
343 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
344
345 'FIXED': ( 'fixed', None, )
346};
347
348## \@oppfx values.
349g_kdPrefixes = {
350 'none': [],
351 '0x66': [],
352 '0xf3': [],
353 '0xf2': [],
354};
355
356## Special \@opcode tag values.
357g_kdSpecialOpcodes = {
358 '/reg': [],
359 'mr/reg': [],
360 '11 /reg': [],
361 '!11 /reg': [],
362 '11 mr/reg': [],
363 '!11 mr/reg': [],
364};
365
366## Special \@opcodesub tag values.
367g_kdSubOpcodes = {
368 'none': [ None, ],
369 '11 mr/reg': [ '11 mr/reg', ],
370 '11': [ '11 mr/reg', ], ##< alias
371 '!11 mr/reg': [ '!11 mr/reg', ],
372 '!11': [ '!11 mr/reg', ], ##< alias
373};
374
375## Valid values for \@openc
376g_kdEncodings = {
377 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
378 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
379 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
380 'prefix': [ None, ], ##< Prefix
381};
382
383## \@opunused, \@opinvalid, \@opinvlstyle
384g_kdInvalidStyles = {
385 'immediate': [], ##< CPU stops decoding immediately after the opcode.
386 'intel-modrm': [], ##< Intel decodes ModR/M.
387 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
388 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
389 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
390};
391
392g_kdCpuNames = {
393 '8086': (),
394 '80186': (),
395 '80286': (),
396 '80386': (),
397 '80486': (),
398};
399
400## \@opcpuid
401g_kdCpuIdFlags = {
402 'vme': 'X86_CPUID_FEATURE_EDX_VME',
403 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
404 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
405 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
406 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
407 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
408 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
409 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
410 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
411 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
412 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
413 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
414 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
415 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
416 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
417 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
418 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
419 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
420 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
421 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
422 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
423 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
424 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
425 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
426 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
427 'aes': 'X86_CPUID_FEATURE_ECX_AES',
428 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
429 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
430 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
431 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
432
433 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
434 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
435 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
436 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
437 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
438 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
439 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
440 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
441 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
442 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
443};
444
445## \@ophints values.
446g_kdHints = {
447 'invalid': 'DISOPTYPE_INVALID', ##<
448 'harmless': 'DISOPTYPE_HARMLESS', ##<
449 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
450 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
451 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
452 'portio': 'DISOPTYPE_PORTIO', ##<
453 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
454 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
455 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
456 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
457 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
458 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
459 'illegal': 'DISOPTYPE_ILLEGAL', ##<
460 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
461 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
462 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
463 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
464 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
465 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
466 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
467 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
468 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
469 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
470 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
471 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
472 ## (only in 16 & 32 bits mode!)
473 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
474 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
475 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
476 'ignores_op_size': '', ##< Ignores both operand size prefixes.
477 'lock_allowed': '', ##< Lock prefix allowed.
478};
479
480## \@opxcpttype values (see SDMv2 2.4, 2.7).
481g_kdXcptTypes = {
482 'none': [],
483 '1': [],
484 '2': [],
485 '3': [],
486 '4': [],
487 '4UA': [],
488 '5': [],
489 '6': [],
490 '7': [],
491 '8': [],
492 '11': [],
493 '12': [],
494 'E1': [],
495 'E1NF': [],
496 'E2': [],
497 'E3': [],
498 'E3NF': [],
499 'E4': [],
500 'E4NF': [],
501 'E5': [],
502 'E5NF': [],
503 'E6': [],
504 'E6NF': [],
505 'E7NF': [],
506 'E9': [],
507 'E9NF': [],
508 'E10': [],
509 'E11': [],
510 'E12': [],
511 'E12NF': [],
512};
513
514
515def _isValidOpcodeByte(sOpcode):
516 """
517 Checks if sOpcode is a valid lower case opcode byte.
518 Returns true/false.
519 """
520 if len(sOpcode) == 4:
521 if sOpcode[:2] == '0x':
522 if sOpcode[2] in '0123456789abcdef':
523 if sOpcode[3] in '0123456789abcdef':
524 return True;
525 return False;
526
527
528class InstructionMap(object):
529 """
530 Instruction map.
531
532 The opcode map provides the lead opcode bytes (empty for the one byte
533 opcode map). An instruction can be member of multiple opcode maps as long
534 as it uses the same opcode value within the map (because of VEX).
535 """
536
537 kdEncodings = {
538 'legacy': [],
539 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
540 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
541 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
542 'xop8': [], ##< XOP prefix with vvvvv = 8
543 'xop9': [], ##< XOP prefix with vvvvv = 9
544 'xop10': [], ##< XOP prefix with vvvvv = 10
545 };
546 ## Selectors.
547 ## The first value is the number of table entries required by a
548 ## decoder or disassembler for this type of selector.
549 kdSelectors = {
550 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
551 '/r': [ 8, ], ##< modrm.reg selects the instruction.
552 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
553 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
554 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
555 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
556 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
557 };
558
559 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
560 assert sSelector in self.kdSelectors;
561 assert sEncoding in self.kdEncodings;
562 if asLeadOpcodes is None:
563 asLeadOpcodes = [];
564 else:
565 for sOpcode in asLeadOpcodes:
566 assert _isValidOpcodeByte(sOpcode);
567 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
568
569 self.sName = sName;
570 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
571 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
572 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
573 self.aoInstructions = []; # type: Instruction
574 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
575
576 def getTableSize(self):
577 """
578 Number of table entries. This corresponds directly to the selector.
579 """
580 return self.kdSelectors[self.sSelector][0];
581
582 def getInstructionIndex(self, oInstr):
583 """
584 Returns the table index for the instruction.
585 """
586 bOpcode = oInstr.getOpcodeByte();
587
588 # The byte selector is simple. We need a full opcode byte and need just return it.
589 if self.sSelector == 'byte':
590 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
591 return bOpcode;
592
593 # The other selectors needs masking and shifting.
594 if self.sSelector == '/r':
595 return (bOpcode >> 3) & 0x7;
596
597 if self.sSelector == 'mod /r':
598 return (bOpcode >> 3) & 0x1f;
599
600 if self.sSelector == 'memreg /r':
601 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
602
603 if self.sSelector == '!11 /r':
604 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
605 return (bOpcode >> 3) & 0x7;
606
607 if self.sSelector == '11 /r':
608 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
609 return (bOpcode >> 3) & 0x7;
610
611 if self.sSelector == '11':
612 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
613 return bOpcode & 0x3f;
614
615 assert False, self.sSelector;
616 return -1;
617
618 def getInstructionsInTableOrder(self):
619 """
620 Get instructions in table order.
621
622 Returns array of instructions. Normally there is exactly one
623 instruction per entry. However the entry could also be None if
624 not instruction was specified for that opcode value. Or there
625 could be a list of instructions to deal with special encodings
626 where for instance prefix (e.g. REX.W) encodes a different
627 instruction or different CPUs have different instructions or
628 prefixes in the same place.
629 """
630 # Start with empty table.
631 cTable = self.getTableSize();
632 aoTable = [None] * cTable;
633
634 # Insert the instructions.
635 for oInstr in self.aoInstructions:
636 if oInstr.sOpcode:
637 idxOpcode = self.getInstructionIndex(oInstr);
638 assert idxOpcode < cTable, str(idxOpcode);
639
640 oExisting = aoTable[idxOpcode];
641 if oExisting is None:
642 aoTable[idxOpcode] = oInstr;
643 elif not isinstance(oExisting, list):
644 aoTable[idxOpcode] = list([oExisting, oInstr]);
645 else:
646 oExisting.append(oInstr);
647
648 return aoTable;
649
650
651 def getDisasTableName(self):
652 """
653 Returns the disassembler table name for this map.
654 """
655 sName = 'g_aDisas';
656 for sWord in self.sName.split('_'):
657 if sWord == 'm': # suffix indicating modrm.mod==mem
658 sName += '_m';
659 elif sWord == 'r': # suffix indicating modrm.mod==reg
660 sName += '_r';
661 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
662 sName += '_' + sWord;
663 else:
664 sWord = sWord.replace('grp', 'Grp');
665 sWord = sWord.replace('map', 'Map');
666 sName += sWord[0].upper() + sWord[1:];
667 return sName;
668
669
670class TestType(object):
671 """
672 Test value type.
673
674 This base class deals with integer like values. The fUnsigned constructor
675 parameter indicates the default stance on zero vs sign extending. It is
676 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
677 """
678 def __init__(self, sName, acbSizes = None, fUnsigned = True):
679 self.sName = sName;
680 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
681 self.fUnsigned = fUnsigned;
682
683 class BadValue(Exception):
684 """ Bad value exception. """
685 def __init__(self, sMessage):
686 Exception.__init__(self, sMessage);
687 self.sMessage = sMessage;
688
689 ## For ascii ~ operator.
690 kdHexInv = {
691 '0': 'f',
692 '1': 'e',
693 '2': 'd',
694 '3': 'c',
695 '4': 'b',
696 '5': 'a',
697 '6': '9',
698 '7': '8',
699 '8': '7',
700 '9': '6',
701 'a': '5',
702 'b': '4',
703 'c': '3',
704 'd': '2',
705 'e': '1',
706 'f': '0',
707 };
708
709 def get(self, sValue):
710 """
711 Get the shortest normal sized byte representation of oValue.
712
713 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
714 The latter form is for AND+OR pairs where the first entry is what to
715 AND with the field and the second the one or OR with.
716
717 Raises BadValue if invalid value.
718 """
719 if not sValue:
720 raise TestType.BadValue('empty value');
721
722 # Deal with sign and detect hexadecimal or decimal.
723 fSignExtend = not self.fUnsigned;
724 if sValue[0] == '-' or sValue[0] == '+':
725 fSignExtend = True;
726 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
727 else:
728 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
729
730 # try convert it to long integer.
731 try:
732 iValue = long(sValue, 16 if fHex else 10);
733 except Exception as oXcpt:
734 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
735
736 # Convert the hex string and pad it to a decent value. Negative values
737 # needs to be manually converted to something non-negative (~-n + 1).
738 if iValue >= 0:
739 sHex = hex(iValue);
740 if sys.version_info[0] < 3:
741 assert sHex[-1] == 'L';
742 sHex = sHex[:-1];
743 assert sHex[:2] == '0x';
744 sHex = sHex[2:];
745 else:
746 sHex = hex(-iValue - 1);
747 if sys.version_info[0] < 3:
748 assert sHex[-1] == 'L';
749 sHex = sHex[:-1];
750 assert sHex[:2] == '0x';
751 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
752 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
753 sHex = 'f' + sHex;
754
755 cDigits = len(sHex);
756 if cDigits <= self.acbSizes[-1] * 2:
757 for cb in self.acbSizes:
758 cNaturalDigits = cb * 2;
759 if cDigits <= cNaturalDigits:
760 break;
761 else:
762 cNaturalDigits = self.acbSizes[-1] * 2;
763 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
764 assert isinstance(cNaturalDigits, int)
765
766 if cNaturalDigits != cDigits:
767 cNeeded = cNaturalDigits - cDigits;
768 if iValue >= 0:
769 sHex = ('0' * cNeeded) + sHex;
770 else:
771 sHex = ('f' * cNeeded) + sHex;
772
773 # Invert and convert to bytearray and return it.
774 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
775
776 return ((fSignExtend, abValue),);
777
778 def validate(self, sValue):
779 """
780 Returns True if value is okay, error message on failure.
781 """
782 try:
783 self.get(sValue);
784 except TestType.BadValue as oXcpt:
785 return oXcpt.sMessage;
786 return True;
787
788 def isAndOrPair(self, sValue):
789 """
790 Checks if sValue is a pair.
791 """
792 _ = sValue;
793 return False;
794
795
796class TestTypeEflags(TestType):
797 """
798 Special value parsing for EFLAGS/RFLAGS/FLAGS.
799 """
800
801 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
802
803 def __init__(self, sName):
804 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
805
806 def get(self, sValue):
807 fClear = 0;
808 fSet = 0;
809 for sFlag in sValue.split(','):
810 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
811 if sConstant is None:
812 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
813 if sConstant[0] == '!':
814 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
815 else:
816 fSet |= g_kdX86EFlagsConstants[sConstant];
817
818 aoSet = TestType.get(self, '0x%x' % (fSet,));
819 if fClear != 0:
820 aoClear = TestType.get(self, '%#x' % (fClear,))
821 assert self.isAndOrPair(sValue) is True;
822 return (aoClear[0], aoSet[0]);
823 assert self.isAndOrPair(sValue) is False;
824 return aoSet;
825
826 def isAndOrPair(self, sValue):
827 for sZeroFlag in self.kdZeroValueFlags:
828 if sValue.find(sZeroFlag) >= 0:
829 return True;
830 return False;
831
832class TestTypeFromDict(TestType):
833 """
834 Special value parsing for CR0.
835 """
836
837 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
838
839 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
840 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
841 self.kdConstantsAndValues = kdConstantsAndValues;
842 self.sConstantPrefix = sConstantPrefix;
843
844 def get(self, sValue):
845 fValue = 0;
846 for sFlag in sValue.split(','):
847 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
848 if fFlagValue is None:
849 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
850 fValue |= fFlagValue;
851 return TestType.get(self, '0x%x' % (fValue,));
852
853
854class TestInOut(object):
855 """
856 One input or output state modifier.
857
858 This should be thought as values to modify BS3REGCTX and extended (needs
859 to be structured) state.
860 """
861 ## Assigned operators.
862 kasOperators = [
863 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
864 '&~=',
865 '&=',
866 '|=',
867 '='
868 ];
869 ## Types
870 kdTypes = {
871 'uint': TestType('uint', fUnsigned = True),
872 'int': TestType('int'),
873 'efl': TestTypeEflags('efl'),
874 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
875 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
876 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
877 };
878 ## CPU context fields.
879 kdFields = {
880 # name: ( default type, [both|input|output], )
881 # Operands.
882 'op1': ( 'uint', 'both', ), ## \@op1
883 'op2': ( 'uint', 'both', ), ## \@op2
884 'op3': ( 'uint', 'both', ), ## \@op3
885 'op4': ( 'uint', 'both', ), ## \@op4
886 # Flags.
887 'efl': ( 'efl', 'both', ),
888 'efl_undef': ( 'uint', 'output', ),
889 # 8-bit GPRs.
890 'al': ( 'uint', 'both', ),
891 'cl': ( 'uint', 'both', ),
892 'dl': ( 'uint', 'both', ),
893 'bl': ( 'uint', 'both', ),
894 'ah': ( 'uint', 'both', ),
895 'ch': ( 'uint', 'both', ),
896 'dh': ( 'uint', 'both', ),
897 'bh': ( 'uint', 'both', ),
898 'r8l': ( 'uint', 'both', ),
899 'r9l': ( 'uint', 'both', ),
900 'r10l': ( 'uint', 'both', ),
901 'r11l': ( 'uint', 'both', ),
902 'r12l': ( 'uint', 'both', ),
903 'r13l': ( 'uint', 'both', ),
904 'r14l': ( 'uint', 'both', ),
905 'r15l': ( 'uint', 'both', ),
906 # 16-bit GPRs.
907 'ax': ( 'uint', 'both', ),
908 'dx': ( 'uint', 'both', ),
909 'cx': ( 'uint', 'both', ),
910 'bx': ( 'uint', 'both', ),
911 'sp': ( 'uint', 'both', ),
912 'bp': ( 'uint', 'both', ),
913 'si': ( 'uint', 'both', ),
914 'di': ( 'uint', 'both', ),
915 'r8w': ( 'uint', 'both', ),
916 'r9w': ( 'uint', 'both', ),
917 'r10w': ( 'uint', 'both', ),
918 'r11w': ( 'uint', 'both', ),
919 'r12w': ( 'uint', 'both', ),
920 'r13w': ( 'uint', 'both', ),
921 'r14w': ( 'uint', 'both', ),
922 'r15w': ( 'uint', 'both', ),
923 # 32-bit GPRs.
924 'eax': ( 'uint', 'both', ),
925 'edx': ( 'uint', 'both', ),
926 'ecx': ( 'uint', 'both', ),
927 'ebx': ( 'uint', 'both', ),
928 'esp': ( 'uint', 'both', ),
929 'ebp': ( 'uint', 'both', ),
930 'esi': ( 'uint', 'both', ),
931 'edi': ( 'uint', 'both', ),
932 'r8d': ( 'uint', 'both', ),
933 'r9d': ( 'uint', 'both', ),
934 'r10d': ( 'uint', 'both', ),
935 'r11d': ( 'uint', 'both', ),
936 'r12d': ( 'uint', 'both', ),
937 'r13d': ( 'uint', 'both', ),
938 'r14d': ( 'uint', 'both', ),
939 'r15d': ( 'uint', 'both', ),
940 # 64-bit GPRs.
941 'rax': ( 'uint', 'both', ),
942 'rdx': ( 'uint', 'both', ),
943 'rcx': ( 'uint', 'both', ),
944 'rbx': ( 'uint', 'both', ),
945 'rsp': ( 'uint', 'both', ),
946 'rbp': ( 'uint', 'both', ),
947 'rsi': ( 'uint', 'both', ),
948 'rdi': ( 'uint', 'both', ),
949 'r8': ( 'uint', 'both', ),
950 'r9': ( 'uint', 'both', ),
951 'r10': ( 'uint', 'both', ),
952 'r11': ( 'uint', 'both', ),
953 'r12': ( 'uint', 'both', ),
954 'r13': ( 'uint', 'both', ),
955 'r14': ( 'uint', 'both', ),
956 'r15': ( 'uint', 'both', ),
957 # 16-bit, 32-bit or 64-bit registers according to operand size.
958 'oz.rax': ( 'uint', 'both', ),
959 'oz.rdx': ( 'uint', 'both', ),
960 'oz.rcx': ( 'uint', 'both', ),
961 'oz.rbx': ( 'uint', 'both', ),
962 'oz.rsp': ( 'uint', 'both', ),
963 'oz.rbp': ( 'uint', 'both', ),
964 'oz.rsi': ( 'uint', 'both', ),
965 'oz.rdi': ( 'uint', 'both', ),
966 'oz.r8': ( 'uint', 'both', ),
967 'oz.r9': ( 'uint', 'both', ),
968 'oz.r10': ( 'uint', 'both', ),
969 'oz.r11': ( 'uint', 'both', ),
970 'oz.r12': ( 'uint', 'both', ),
971 'oz.r13': ( 'uint', 'both', ),
972 'oz.r14': ( 'uint', 'both', ),
973 'oz.r15': ( 'uint', 'both', ),
974 # Control registers.
975 'cr0': ( 'cr0', 'both', ),
976 'cr4': ( 'cr4', 'both', ),
977 'xcr0': ( 'xcr0', 'both', ),
978 # FPU Registers
979 'fcw': ( 'uint', 'both', ),
980 'fsw': ( 'uint', 'both', ),
981 'ftw': ( 'uint', 'both', ),
982 'fop': ( 'uint', 'both', ),
983 'fpuip': ( 'uint', 'both', ),
984 'fpucs': ( 'uint', 'both', ),
985 'fpudp': ( 'uint', 'both', ),
986 'fpuds': ( 'uint', 'both', ),
987 'mxcsr': ( 'uint', 'both', ),
988 'st0': ( 'uint', 'both', ),
989 'st1': ( 'uint', 'both', ),
990 'st2': ( 'uint', 'both', ),
991 'st3': ( 'uint', 'both', ),
992 'st4': ( 'uint', 'both', ),
993 'st5': ( 'uint', 'both', ),
994 'st6': ( 'uint', 'both', ),
995 'st7': ( 'uint', 'both', ),
996 # MMX registers.
997 'mm0': ( 'uint', 'both', ),
998 'mm1': ( 'uint', 'both', ),
999 'mm2': ( 'uint', 'both', ),
1000 'mm3': ( 'uint', 'both', ),
1001 'mm4': ( 'uint', 'both', ),
1002 'mm5': ( 'uint', 'both', ),
1003 'mm6': ( 'uint', 'both', ),
1004 'mm7': ( 'uint', 'both', ),
1005 # SSE registers.
1006 'xmm0': ( 'uint', 'both', ),
1007 'xmm1': ( 'uint', 'both', ),
1008 'xmm2': ( 'uint', 'both', ),
1009 'xmm3': ( 'uint', 'both', ),
1010 'xmm4': ( 'uint', 'both', ),
1011 'xmm5': ( 'uint', 'both', ),
1012 'xmm6': ( 'uint', 'both', ),
1013 'xmm7': ( 'uint', 'both', ),
1014 'xmm8': ( 'uint', 'both', ),
1015 'xmm9': ( 'uint', 'both', ),
1016 'xmm10': ( 'uint', 'both', ),
1017 'xmm11': ( 'uint', 'both', ),
1018 'xmm12': ( 'uint', 'both', ),
1019 'xmm13': ( 'uint', 'both', ),
1020 'xmm14': ( 'uint', 'both', ),
1021 'xmm15': ( 'uint', 'both', ),
1022 'xmm0.lo': ( 'uint', 'both', ),
1023 'xmm1.lo': ( 'uint', 'both', ),
1024 'xmm2.lo': ( 'uint', 'both', ),
1025 'xmm3.lo': ( 'uint', 'both', ),
1026 'xmm4.lo': ( 'uint', 'both', ),
1027 'xmm5.lo': ( 'uint', 'both', ),
1028 'xmm6.lo': ( 'uint', 'both', ),
1029 'xmm7.lo': ( 'uint', 'both', ),
1030 'xmm8.lo': ( 'uint', 'both', ),
1031 'xmm9.lo': ( 'uint', 'both', ),
1032 'xmm10.lo': ( 'uint', 'both', ),
1033 'xmm11.lo': ( 'uint', 'both', ),
1034 'xmm12.lo': ( 'uint', 'both', ),
1035 'xmm13.lo': ( 'uint', 'both', ),
1036 'xmm14.lo': ( 'uint', 'both', ),
1037 'xmm15.lo': ( 'uint', 'both', ),
1038 'xmm0.hi': ( 'uint', 'both', ),
1039 'xmm1.hi': ( 'uint', 'both', ),
1040 'xmm2.hi': ( 'uint', 'both', ),
1041 'xmm3.hi': ( 'uint', 'both', ),
1042 'xmm4.hi': ( 'uint', 'both', ),
1043 'xmm5.hi': ( 'uint', 'both', ),
1044 'xmm6.hi': ( 'uint', 'both', ),
1045 'xmm7.hi': ( 'uint', 'both', ),
1046 'xmm8.hi': ( 'uint', 'both', ),
1047 'xmm9.hi': ( 'uint', 'both', ),
1048 'xmm10.hi': ( 'uint', 'both', ),
1049 'xmm11.hi': ( 'uint', 'both', ),
1050 'xmm12.hi': ( 'uint', 'both', ),
1051 'xmm13.hi': ( 'uint', 'both', ),
1052 'xmm14.hi': ( 'uint', 'both', ),
1053 'xmm15.hi': ( 'uint', 'both', ),
1054 'xmm0.lo.zx': ( 'uint', 'both', ),
1055 'xmm1.lo.zx': ( 'uint', 'both', ),
1056 'xmm2.lo.zx': ( 'uint', 'both', ),
1057 'xmm3.lo.zx': ( 'uint', 'both', ),
1058 'xmm4.lo.zx': ( 'uint', 'both', ),
1059 'xmm5.lo.zx': ( 'uint', 'both', ),
1060 'xmm6.lo.zx': ( 'uint', 'both', ),
1061 'xmm7.lo.zx': ( 'uint', 'both', ),
1062 'xmm8.lo.zx': ( 'uint', 'both', ),
1063 'xmm9.lo.zx': ( 'uint', 'both', ),
1064 'xmm10.lo.zx': ( 'uint', 'both', ),
1065 'xmm11.lo.zx': ( 'uint', 'both', ),
1066 'xmm12.lo.zx': ( 'uint', 'both', ),
1067 'xmm13.lo.zx': ( 'uint', 'both', ),
1068 'xmm14.lo.zx': ( 'uint', 'both', ),
1069 'xmm15.lo.zx': ( 'uint', 'both', ),
1070 'xmm0.dw0': ( 'uint', 'both', ),
1071 'xmm1.dw0': ( 'uint', 'both', ),
1072 'xmm2.dw0': ( 'uint', 'both', ),
1073 'xmm3.dw0': ( 'uint', 'both', ),
1074 'xmm4.dw0': ( 'uint', 'both', ),
1075 'xmm5.dw0': ( 'uint', 'both', ),
1076 'xmm6.dw0': ( 'uint', 'both', ),
1077 'xmm7.dw0': ( 'uint', 'both', ),
1078 'xmm8.dw0': ( 'uint', 'both', ),
1079 'xmm9.dw0': ( 'uint', 'both', ),
1080 'xmm10.dw0': ( 'uint', 'both', ),
1081 'xmm11.dw0': ( 'uint', 'both', ),
1082 'xmm12.dw0': ( 'uint', 'both', ),
1083 'xmm13.dw0': ( 'uint', 'both', ),
1084 'xmm14.dw0': ( 'uint', 'both', ),
1085 'xmm15_dw0': ( 'uint', 'both', ),
1086 # AVX registers.
1087 'ymm0': ( 'uint', 'both', ),
1088 'ymm1': ( 'uint', 'both', ),
1089 'ymm2': ( 'uint', 'both', ),
1090 'ymm3': ( 'uint', 'both', ),
1091 'ymm4': ( 'uint', 'both', ),
1092 'ymm5': ( 'uint', 'both', ),
1093 'ymm6': ( 'uint', 'both', ),
1094 'ymm7': ( 'uint', 'both', ),
1095 'ymm8': ( 'uint', 'both', ),
1096 'ymm9': ( 'uint', 'both', ),
1097 'ymm10': ( 'uint', 'both', ),
1098 'ymm11': ( 'uint', 'both', ),
1099 'ymm12': ( 'uint', 'both', ),
1100 'ymm13': ( 'uint', 'both', ),
1101 'ymm14': ( 'uint', 'both', ),
1102 'ymm15': ( 'uint', 'both', ),
1103
1104 # Special ones.
1105 'value.xcpt': ( 'uint', 'output', ),
1106 };
1107
1108 def __init__(self, sField, sOp, sValue, sType):
1109 assert sField in self.kdFields;
1110 assert sOp in self.kasOperators;
1111 self.sField = sField;
1112 self.sOp = sOp;
1113 self.sValue = sValue;
1114 self.sType = sType;
1115 assert isinstance(sField, str);
1116 assert isinstance(sOp, str);
1117 assert isinstance(sType, str);
1118 assert isinstance(sValue, str);
1119
1120
1121class TestSelector(object):
1122 """
1123 One selector for an instruction test.
1124 """
1125 ## Selector compare operators.
1126 kasCompareOps = [ '==', '!=' ];
1127 ## Selector variables and their valid values.
1128 kdVariables = {
1129 # Operand size.
1130 'size': {
1131 'o16': 'size_o16',
1132 'o32': 'size_o32',
1133 'o64': 'size_o64',
1134 },
1135 # Execution ring.
1136 'ring': {
1137 '0': 'ring_0',
1138 '1': 'ring_1',
1139 '2': 'ring_2',
1140 '3': 'ring_3',
1141 '0..2': 'ring_0_thru_2',
1142 '1..3': 'ring_1_thru_3',
1143 },
1144 # Basic code mode.
1145 'codebits': {
1146 '64': 'code_64bit',
1147 '32': 'code_32bit',
1148 '16': 'code_16bit',
1149 },
1150 # cpu modes.
1151 'mode': {
1152 'real': 'mode_real',
1153 'prot': 'mode_prot',
1154 'long': 'mode_long',
1155 'v86': 'mode_v86',
1156 'smm': 'mode_smm',
1157 'vmx': 'mode_vmx',
1158 'svm': 'mode_svm',
1159 },
1160 # paging on/off
1161 'paging': {
1162 'on': 'paging_on',
1163 'off': 'paging_off',
1164 },
1165 # CPU vendor
1166 'vendor': {
1167 'amd': 'vendor_amd',
1168 'intel': 'vendor_intel',
1169 'via': 'vendor_via',
1170 },
1171 };
1172 ## Selector shorthand predicates.
1173 ## These translates into variable expressions.
1174 kdPredicates = {
1175 'o16': 'size==o16',
1176 'o32': 'size==o32',
1177 'o64': 'size==o64',
1178 'ring0': 'ring==0',
1179 '!ring0': 'ring==1..3',
1180 'ring1': 'ring==1',
1181 'ring2': 'ring==2',
1182 'ring3': 'ring==3',
1183 'user': 'ring==3',
1184 'supervisor': 'ring==0..2',
1185 'real': 'mode==real',
1186 'prot': 'mode==prot',
1187 'long': 'mode==long',
1188 'v86': 'mode==v86',
1189 'smm': 'mode==smm',
1190 'vmx': 'mode==vmx',
1191 'svm': 'mode==svm',
1192 'paging': 'paging==on',
1193 '!paging': 'paging==off',
1194 'amd': 'vendor==amd',
1195 '!amd': 'vendor!=amd',
1196 'intel': 'vendor==intel',
1197 '!intel': 'vendor!=intel',
1198 'via': 'vendor==via',
1199 '!via': 'vendor!=via',
1200 };
1201
1202 def __init__(self, sVariable, sOp, sValue):
1203 assert sVariable in self.kdVariables;
1204 assert sOp in self.kasCompareOps;
1205 assert sValue in self.kdVariables[sVariable];
1206 self.sVariable = sVariable;
1207 self.sOp = sOp;
1208 self.sValue = sValue;
1209
1210
1211class InstructionTest(object):
1212 """
1213 Instruction test.
1214 """
1215
1216 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1217 self.oInstr = oInstr; # type: InstructionTest
1218 self.aoInputs = []; # type: list(TestInOut)
1219 self.aoOutputs = []; # type: list(TestInOut)
1220 self.aoSelectors = []; # type: list(TestSelector)
1221
1222 def toString(self, fRepr = False):
1223 """
1224 Converts it to string representation.
1225 """
1226 asWords = [];
1227 if self.aoSelectors:
1228 for oSelector in self.aoSelectors:
1229 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1230 asWords.append('/');
1231
1232 for oModifier in self.aoInputs:
1233 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1234
1235 asWords.append('->');
1236
1237 for oModifier in self.aoOutputs:
1238 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1239
1240 if fRepr:
1241 return '<' + ' '.join(asWords) + '>';
1242 return ' '.join(asWords);
1243
1244 def __str__(self):
1245 """ Provide string represenation. """
1246 return self.toString(False);
1247
1248 def __repr__(self):
1249 """ Provide unambigious string representation. """
1250 return self.toString(True);
1251
1252class Operand(object):
1253 """
1254 Instruction operand.
1255 """
1256
1257 def __init__(self, sWhere, sType):
1258 assert sWhere in g_kdOpLocations, sWhere;
1259 assert sType in g_kdOpTypes, sType;
1260 self.sWhere = sWhere; ##< g_kdOpLocations
1261 self.sType = sType; ##< g_kdOpTypes
1262
1263 def usesModRM(self):
1264 """ Returns True if using some form of ModR/M encoding. """
1265 return self.sType[0] in ['E', 'G', 'M'];
1266
1267
1268
1269class Instruction(object): # pylint: disable=too-many-instance-attributes
1270 """
1271 Instruction.
1272 """
1273
1274 def __init__(self, sSrcFile, iLine):
1275 ## @name Core attributes.
1276 ## @{
1277 self.sMnemonic = None;
1278 self.sBrief = None;
1279 self.asDescSections = []; # type: list(str)
1280 self.aoMaps = []; # type: list(InstructionMap)
1281 self.aoOperands = []; # type: list(Operand)
1282 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1283 self.sOpcode = None; # type: str
1284 self.sSubOpcode = None; # type: str
1285 self.sEncoding = None;
1286 self.asFlTest = None;
1287 self.asFlModify = None;
1288 self.asFlUndefined = None;
1289 self.asFlSet = None;
1290 self.asFlClear = None;
1291 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1292 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1293 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1294 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1295 self.aoTests = []; # type: list(InstructionTest)
1296 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1297 self.oCpuExpr = None; ##< Some CPU restriction expression...
1298 self.sGroup = None;
1299 self.fUnused = False; ##< Unused instruction.
1300 self.fInvalid = False; ##< Invalid instruction (like UD2).
1301 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1302 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1303 ## @}
1304
1305 ## @name Implementation attributes.
1306 ## @{
1307 self.sStats = None;
1308 self.sFunction = None;
1309 self.fStub = False;
1310 self.fUdStub = False;
1311 ## @}
1312
1313 ## @name Decoding info
1314 ## @{
1315 self.sSrcFile = sSrcFile;
1316 self.iLineCreated = iLine;
1317 self.iLineCompleted = None;
1318 self.cOpTags = 0;
1319 self.iLineFnIemOpMacro = -1;
1320 self.iLineMnemonicMacro = -1;
1321 ## @}
1322
1323 ## @name Intermediate input fields.
1324 ## @{
1325 self.sRawDisOpNo = None;
1326 self.asRawDisParams = [];
1327 self.sRawIemOpFlags = None;
1328 self.sRawOldOpcodes = None;
1329 self.asCopyTests = [];
1330 ## @}
1331
1332 def toString(self, fRepr = False):
1333 """ Turn object into a string. """
1334 aasFields = [];
1335
1336 aasFields.append(['opcode', self.sOpcode]);
1337 aasFields.append(['mnemonic', self.sMnemonic]);
1338 for iOperand, oOperand in enumerate(self.aoOperands):
1339 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1340 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1341 aasFields.append(['encoding', self.sEncoding]);
1342 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1343 aasFields.append(['disenum', self.sDisEnum]);
1344 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1345 aasFields.append(['group', self.sGroup]);
1346 if self.fUnused: aasFields.append(['unused', 'True']);
1347 if self.fInvalid: aasFields.append(['invalid', 'True']);
1348 aasFields.append(['invlstyle', self.sInvalidStyle]);
1349 aasFields.append(['fltest', self.asFlTest]);
1350 aasFields.append(['flmodify', self.asFlModify]);
1351 aasFields.append(['flundef', self.asFlUndefined]);
1352 aasFields.append(['flset', self.asFlSet]);
1353 aasFields.append(['flclear', self.asFlClear]);
1354 aasFields.append(['mincpu', self.sMinCpu]);
1355 aasFields.append(['stats', self.sStats]);
1356 aasFields.append(['sFunction', self.sFunction]);
1357 if self.fStub: aasFields.append(['fStub', 'True']);
1358 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1359 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1360 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1361 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1362
1363 sRet = '<' if fRepr else '';
1364 for sField, sValue in aasFields:
1365 if sValue != None:
1366 if len(sRet) > 1:
1367 sRet += '; ';
1368 sRet += '%s=%s' % (sField, sValue,);
1369 if fRepr:
1370 sRet += '>';
1371
1372 return sRet;
1373
1374 def __str__(self):
1375 """ Provide string represenation. """
1376 return self.toString(False);
1377
1378 def __repr__(self):
1379 """ Provide unambigious string representation. """
1380 return self.toString(True);
1381
1382 def getOpcodeByte(self):
1383 """
1384 Decodes sOpcode into a byte range integer value.
1385 Raises exception if sOpcode is None or invalid.
1386 """
1387 if self.sOpcode is None:
1388 raise Exception('No opcode byte for %s!' % (self,));
1389 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1390
1391 # Full hex byte form.
1392 if sOpcode[:2] == '0x':
1393 return int(sOpcode, 16);
1394
1395 # The /r form:
1396 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1397 return int(sOpcode[-1:]) << 3;
1398
1399 # The 11/r form:
1400 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1401 return (int(sOpcode[-1:]) << 3) | 0xc0;
1402
1403 # The !11/r form (returns mod=1):
1404 ## @todo this doesn't really work...
1405 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1406 return (int(sOpcode[-1:]) << 3) | 0x80;
1407
1408 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1409
1410 @staticmethod
1411 def _flagsToIntegerMask(asFlags):
1412 """
1413 Returns the integer mask value for asFlags.
1414 """
1415 uRet = 0;
1416 if asFlags:
1417 for sFlag in asFlags:
1418 sConstant = g_kdEFlagsMnemonics[sFlag];
1419 assert sConstant[0] != '!', sConstant
1420 uRet |= g_kdX86EFlagsConstants[sConstant];
1421 return uRet;
1422
1423 def getTestedFlagsMask(self):
1424 """ Returns asFlTest into a integer mask value """
1425 return self._flagsToIntegerMask(self.asFlTest);
1426
1427 def getModifiedFlagsMask(self):
1428 """ Returns asFlModify into a integer mask value """
1429 return self._flagsToIntegerMask(self.asFlModify);
1430
1431 def getUndefinedFlagsMask(self):
1432 """ Returns asFlUndefined into a integer mask value """
1433 return self._flagsToIntegerMask(self.asFlUndefined);
1434
1435 def getSetFlagsMask(self):
1436 """ Returns asFlSet into a integer mask value """
1437 return self._flagsToIntegerMask(self.asFlSet);
1438
1439 def getClearedFlagsMask(self):
1440 """ Returns asFlClear into a integer mask value """
1441 return self._flagsToIntegerMask(self.asFlClear);
1442
1443
1444## All the instructions.
1445g_aoAllInstructions = []; # type: list(Instruction)
1446
1447## All the instructions indexed by statistics name (opstat).
1448g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1449
1450## All the instructions indexed by function name (opfunction).
1451g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1452
1453## Instructions tagged by oponlytest
1454g_aoOnlyTestInstructions = []; # type: list(Instruction)
1455
1456## Instruction maps.
1457g_dInstructionMaps = {
1458 'one': InstructionMap('one'),
1459 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1460 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1461 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1462 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1463 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1464 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1465 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1466 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1467 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1468 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1469 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1470 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1471 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1472 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1473 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1474 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1475 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1476 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1477 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1478
1479 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1480 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1481 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1482 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1483 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1484 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1485 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1486 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1487 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1488 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1489 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1490 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1491 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1492 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1493
1494 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1495 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1496
1497 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1498 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1499 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1500 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1501 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1502 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1503
1504 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1505 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1506
1507 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1508 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1509 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1510 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1511 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1512 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1513 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1514 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1515};
1516
1517
1518
1519class ParserException(Exception):
1520 """ Parser exception """
1521 def __init__(self, sMessage):
1522 Exception.__init__(self, sMessage);
1523
1524
1525class SimpleParser(object):
1526 """
1527 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1528 """
1529
1530 ## @name Parser state.
1531 ## @{
1532 kiCode = 0;
1533 kiCommentMulti = 1;
1534 ## @}
1535
1536 def __init__(self, sSrcFile, asLines, sDefaultMap):
1537 self.sSrcFile = sSrcFile;
1538 self.asLines = asLines;
1539 self.iLine = 0;
1540 self.iState = self.kiCode;
1541 self.sComment = '';
1542 self.iCommentLine = 0;
1543 self.aoCurInstrs = [];
1544
1545 assert sDefaultMap in g_dInstructionMaps;
1546 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1547
1548 self.cTotalInstr = 0;
1549 self.cTotalStubs = 0;
1550 self.cTotalTagged = 0;
1551
1552 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1553 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1554 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1555 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1556 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1557 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1558 self.fDebug = True;
1559
1560 self.dTagHandlers = {
1561 '@opbrief': self.parseTagOpBrief,
1562 '@opdesc': self.parseTagOpDesc,
1563 '@opmnemonic': self.parseTagOpMnemonic,
1564 '@op1': self.parseTagOpOperandN,
1565 '@op2': self.parseTagOpOperandN,
1566 '@op3': self.parseTagOpOperandN,
1567 '@op4': self.parseTagOpOperandN,
1568 '@oppfx': self.parseTagOpPfx,
1569 '@opmaps': self.parseTagOpMaps,
1570 '@opcode': self.parseTagOpcode,
1571 '@opcodesub': self.parseTagOpcodeSub,
1572 '@openc': self.parseTagOpEnc,
1573 '@opfltest': self.parseTagOpEFlags,
1574 '@opflmodify': self.parseTagOpEFlags,
1575 '@opflundef': self.parseTagOpEFlags,
1576 '@opflset': self.parseTagOpEFlags,
1577 '@opflclear': self.parseTagOpEFlags,
1578 '@ophints': self.parseTagOpHints,
1579 '@opdisenum': self.parseTagOpDisEnum,
1580 '@opmincpu': self.parseTagOpMinCpu,
1581 '@opcpuid': self.parseTagOpCpuId,
1582 '@opgroup': self.parseTagOpGroup,
1583 '@opunused': self.parseTagOpUnusedInvalid,
1584 '@opinvalid': self.parseTagOpUnusedInvalid,
1585 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1586 '@optest': self.parseTagOpTest,
1587 '@optestign': self.parseTagOpTestIgnore,
1588 '@optestignore': self.parseTagOpTestIgnore,
1589 '@opcopytests': self.parseTagOpCopyTests,
1590 '@oponly': self.parseTagOpOnlyTest,
1591 '@oponlytest': self.parseTagOpOnlyTest,
1592 '@opxcpttype': self.parseTagOpXcptType,
1593 '@opstats': self.parseTagOpStats,
1594 '@opfunction': self.parseTagOpFunction,
1595 '@opdone': self.parseTagOpDone,
1596 };
1597 for i in range(48):
1598 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1599 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1600
1601 self.asErrors = [];
1602
1603 def raiseError(self, sMessage):
1604 """
1605 Raise error prefixed with the source and line number.
1606 """
1607 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1608
1609 def raiseCommentError(self, iLineInComment, sMessage):
1610 """
1611 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1612 """
1613 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1614
1615 def error(self, sMessage):
1616 """
1617 Adds an error.
1618 returns False;
1619 """
1620 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1621 return False;
1622
1623 def errorComment(self, iLineInComment, sMessage):
1624 """
1625 Adds a comment error.
1626 returns False;
1627 """
1628 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1629 return False;
1630
1631 def printErrors(self):
1632 """
1633 Print the errors to stderr.
1634 Returns number of errors.
1635 """
1636 if self.asErrors:
1637 sys.stderr.write(u''.join(self.asErrors));
1638 return len(self.asErrors);
1639
1640 def debug(self, sMessage):
1641 """
1642 For debugging.
1643 """
1644 if self.fDebug:
1645 print('debug: %s' % (sMessage,));
1646
1647
1648 def addInstruction(self, iLine = None):
1649 """
1650 Adds an instruction.
1651 """
1652 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1653 g_aoAllInstructions.append(oInstr);
1654 self.aoCurInstrs.append(oInstr);
1655 return oInstr;
1656
1657 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1658 """
1659 Derives the mnemonic and operands from a IEM stats base name like string.
1660 """
1661 if oInstr.sMnemonic is None:
1662 asWords = sStats.split('_');
1663 oInstr.sMnemonic = asWords[0].lower();
1664 if len(asWords) > 1 and not oInstr.aoOperands:
1665 for sType in asWords[1:]:
1666 if sType in g_kdOpTypes:
1667 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1668 else:
1669 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1670 return False;
1671 return True;
1672
1673 def doneInstructionOne(self, oInstr, iLine):
1674 """
1675 Complete the parsing by processing, validating and expanding raw inputs.
1676 """
1677 assert oInstr.iLineCompleted is None;
1678 oInstr.iLineCompleted = iLine;
1679
1680 #
1681 # Specified instructions.
1682 #
1683 if oInstr.cOpTags > 0:
1684 if oInstr.sStats is None:
1685 pass;
1686
1687 #
1688 # Unspecified legacy stuff. We generally only got a few things to go on here.
1689 # /** Opcode 0x0f 0x00 /0. */
1690 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1691 #
1692 else:
1693 #if oInstr.sRawOldOpcodes:
1694 #
1695 #if oInstr.sMnemonic:
1696 pass;
1697
1698 #
1699 # Common defaults.
1700 #
1701
1702 # Guess mnemonic and operands from stats if the former is missing.
1703 if oInstr.sMnemonic is None:
1704 if oInstr.sStats is not None:
1705 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1706 elif oInstr.sFunction is not None:
1707 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1708
1709 # Derive the disassembler op enum constant from the mnemonic.
1710 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1711 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1712
1713 # Derive the IEM statistics base name from mnemonic and operand types.
1714 if oInstr.sStats is None:
1715 if oInstr.sFunction is not None:
1716 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1717 elif oInstr.sMnemonic is not None:
1718 oInstr.sStats = oInstr.sMnemonic;
1719 for oOperand in oInstr.aoOperands:
1720 if oOperand.sType:
1721 oInstr.sStats += '_' + oOperand.sType;
1722
1723 # Derive the IEM function name from mnemonic and operand types.
1724 if oInstr.sFunction is None:
1725 if oInstr.sMnemonic is not None:
1726 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1727 for oOperand in oInstr.aoOperands:
1728 if oOperand.sType:
1729 oInstr.sFunction += '_' + oOperand.sType;
1730 elif oInstr.sStats:
1731 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1732
1733 # Derive encoding from operands.
1734 if oInstr.sEncoding is None:
1735 if not oInstr.aoOperands:
1736 if oInstr.fUnused and oInstr.sSubOpcode:
1737 oInstr.sEncoding = 'ModR/M';
1738 else:
1739 oInstr.sEncoding = 'fixed';
1740 elif oInstr.aoOperands[0].usesModRM():
1741 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1742 oInstr.sEncoding = 'ModR/M+VEX';
1743 else:
1744 oInstr.sEncoding = 'ModR/M';
1745
1746 #
1747 # Apply default map and then add the instruction to all it's groups.
1748 #
1749 if not oInstr.aoMaps:
1750 oInstr.aoMaps = [ self.oDefaultMap, ];
1751 for oMap in oInstr.aoMaps:
1752 oMap.aoInstructions.append(oInstr);
1753
1754 #
1755 # Check the opstat value and add it to the opstat indexed dictionary.
1756 #
1757 if oInstr.sStats:
1758 if oInstr.sStats not in g_dAllInstructionsByStat:
1759 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1760 else:
1761 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1762 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1763
1764 #
1765 # Add to function indexed dictionary. We allow multiple instructions per function.
1766 #
1767 if oInstr.sFunction:
1768 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1769 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1770 else:
1771 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1772
1773 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1774 return True;
1775
1776 def doneInstructions(self, iLineInComment = None):
1777 """
1778 Done with current instruction.
1779 """
1780 for oInstr in self.aoCurInstrs:
1781 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1782 if oInstr.fStub:
1783 self.cTotalStubs += 1;
1784
1785 self.cTotalInstr += len(self.aoCurInstrs);
1786
1787 self.sComment = '';
1788 self.aoCurInstrs = [];
1789 return True;
1790
1791 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1792 """
1793 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1794 is False, only None values and empty strings are replaced.
1795 """
1796 for oInstr in self.aoCurInstrs:
1797 if fOverwrite is not True:
1798 oOldValue = getattr(oInstr, sAttrib);
1799 if oOldValue is not None:
1800 continue;
1801 setattr(oInstr, sAttrib, oValue);
1802
1803 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1804 """
1805 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1806 If fOverwrite is False, only None values and empty strings are replaced.
1807 """
1808 for oInstr in self.aoCurInstrs:
1809 aoArray = getattr(oInstr, sAttrib);
1810 while len(aoArray) <= iEntry:
1811 aoArray.append(None);
1812 if fOverwrite is True or aoArray[iEntry] is None:
1813 aoArray[iEntry] = oValue;
1814
1815 def parseCommentOldOpcode(self, asLines):
1816 """ Deals with 'Opcode 0xff /4' like comments """
1817 asWords = asLines[0].split();
1818 if len(asWords) >= 2 \
1819 and asWords[0] == 'Opcode' \
1820 and ( asWords[1].startswith('0x')
1821 or asWords[1].startswith('0X')):
1822 asWords = asWords[:1];
1823 for iWord, sWord in enumerate(asWords):
1824 if sWord.startswith('0X'):
1825 sWord = '0x' + sWord[:2];
1826 asWords[iWord] = asWords;
1827 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1828
1829 return False;
1830
1831 def ensureInstructionForOpTag(self, iTagLine):
1832 """ Ensure there is an instruction for the op-tag being parsed. """
1833 if not self.aoCurInstrs:
1834 self.addInstruction(self.iCommentLine + iTagLine);
1835 for oInstr in self.aoCurInstrs:
1836 oInstr.cOpTags += 1;
1837 if oInstr.cOpTags == 1:
1838 self.cTotalTagged += 1;
1839 return self.aoCurInstrs[-1];
1840
1841 @staticmethod
1842 def flattenSections(aasSections):
1843 """
1844 Flattens multiline sections into stripped single strings.
1845 Returns list of strings, on section per string.
1846 """
1847 asRet = [];
1848 for asLines in aasSections:
1849 if asLines:
1850 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1851 return asRet;
1852
1853 @staticmethod
1854 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1855 """
1856 Flattens sections into a simple stripped string with newlines as
1857 section breaks. The final section does not sport a trailing newline.
1858 """
1859 # Typical: One section with a single line.
1860 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1861 return aasSections[0][0].strip();
1862
1863 sRet = '';
1864 for iSection, asLines in enumerate(aasSections):
1865 if asLines:
1866 if iSection > 0:
1867 sRet += sSectionSep;
1868 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1869 return sRet;
1870
1871
1872
1873 ## @name Tag parsers
1874 ## @{
1875
1876 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1877 """
1878 Tag: \@opbrief
1879 Value: Text description, multiple sections, appended.
1880
1881 Brief description. If not given, it's the first sentence from @opdesc.
1882 """
1883 oInstr = self.ensureInstructionForOpTag(iTagLine);
1884
1885 # Flatten and validate the value.
1886 sBrief = self.flattenAllSections(aasSections);
1887 if not sBrief:
1888 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1889 if sBrief[-1] != '.':
1890 sBrief = sBrief + '.';
1891 if len(sBrief) > 180:
1892 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1893 offDot = sBrief.find('.');
1894 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1895 offDot = sBrief.find('.', offDot + 1);
1896 if offDot >= 0 and offDot != len(sBrief) - 1:
1897 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1898
1899 # Update the instruction.
1900 if oInstr.sBrief is not None:
1901 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1902 % (sTag, oInstr.sBrief, sBrief,));
1903 _ = iEndLine;
1904 return True;
1905
1906 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1907 """
1908 Tag: \@opdesc
1909 Value: Text description, multiple sections, appended.
1910
1911 It is used to describe instructions.
1912 """
1913 oInstr = self.ensureInstructionForOpTag(iTagLine);
1914 if aasSections:
1915 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1916 return True;
1917
1918 _ = sTag; _ = iEndLine;
1919 return True;
1920
1921 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1922 """
1923 Tag: @opmenmonic
1924 Value: mnemonic
1925
1926 The 'mnemonic' value must be a valid C identifier string. Because of
1927 prefixes, groups and whatnot, there times when the mnemonic isn't that
1928 of an actual assembler mnemonic.
1929 """
1930 oInstr = self.ensureInstructionForOpTag(iTagLine);
1931
1932 # Flatten and validate the value.
1933 sMnemonic = self.flattenAllSections(aasSections);
1934 if not self.oReMnemonic.match(sMnemonic):
1935 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1936 if oInstr.sMnemonic is not None:
1937 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1938 % (sTag, oInstr.sMnemonic, sMnemonic,));
1939 oInstr.sMnemonic = sMnemonic
1940
1941 _ = iEndLine;
1942 return True;
1943
1944 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1945 """
1946 Tags: \@op1, \@op2, \@op3, \@op4
1947 Value: [where:]type
1948
1949 The 'where' value indicates where the operand is found, like the 'reg'
1950 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1951 a list.
1952
1953 The 'type' value indicates the operand type. These follow the types
1954 given in the opcode tables in the CPU reference manuals.
1955 See Instruction.kdOperandTypes for a list.
1956
1957 """
1958 oInstr = self.ensureInstructionForOpTag(iTagLine);
1959 idxOp = int(sTag[-1]) - 1;
1960 assert idxOp >= 0 and idxOp < 4;
1961
1962 # flatten, split up, and validate the "where:type" value.
1963 sFlattened = self.flattenAllSections(aasSections);
1964 asSplit = sFlattened.split(':');
1965 if len(asSplit) == 1:
1966 sType = asSplit[0];
1967 sWhere = None;
1968 elif len(asSplit) == 2:
1969 (sWhere, sType) = asSplit;
1970 else:
1971 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1972
1973 if sType not in g_kdOpTypes:
1974 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1975 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1976 if sWhere is None:
1977 sWhere = g_kdOpTypes[sType][1];
1978 elif sWhere not in g_kdOpLocations:
1979 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1980 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1981
1982 # Insert the operand, refusing to overwrite an existing one.
1983 while idxOp >= len(oInstr.aoOperands):
1984 oInstr.aoOperands.append(None);
1985 if oInstr.aoOperands[idxOp] is not None:
1986 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1987 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1988 sWhere, sType,));
1989 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1990
1991 _ = iEndLine;
1992 return True;
1993
1994 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1995 """
1996 Tag: \@opmaps
1997 Value: map[,map2]
1998
1999 Indicates which maps the instruction is in. There is a default map
2000 associated with each input file.
2001 """
2002 oInstr = self.ensureInstructionForOpTag(iTagLine);
2003
2004 # Flatten, split up and validate the value.
2005 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2006 asMaps = sFlattened.split(',');
2007 if not asMaps:
2008 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2009 for sMap in asMaps:
2010 if sMap not in g_dInstructionMaps:
2011 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2012 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2013
2014 # Add the maps to the current list. Throw errors on duplicates.
2015 for oMap in oInstr.aoMaps:
2016 if oMap.sName in asMaps:
2017 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2018
2019 for sMap in asMaps:
2020 oMap = g_dInstructionMaps[sMap];
2021 if oMap not in oInstr.aoMaps:
2022 oInstr.aoMaps.append(oMap);
2023 else:
2024 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2025
2026 _ = iEndLine;
2027 return True;
2028
2029 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2030 """
2031 Tag: \@oppfx
2032 Value: n/a|none|0x66|0xf3|0xf2
2033
2034 Required prefix for the instruction. (In a (E)VEX context this is the
2035 value of the 'pp' field rather than an actual prefix.)
2036 """
2037 oInstr = self.ensureInstructionForOpTag(iTagLine);
2038
2039 # Flatten and validate the value.
2040 sFlattened = self.flattenAllSections(aasSections);
2041 asPrefixes = sFlattened.split();
2042 if len(asPrefixes) > 1:
2043 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2044
2045 sPrefix = asPrefixes[0].lower();
2046 if sPrefix == 'none':
2047 sPrefix = 'none';
2048 elif sPrefix == 'n/a':
2049 sPrefix = None;
2050 else:
2051 if len(sPrefix) == 2:
2052 sPrefix = '0x' + sPrefix;
2053 if not _isValidOpcodeByte(sPrefix):
2054 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2055
2056 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2057 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2058
2059 # Set it.
2060 if oInstr.sPrefix is not None:
2061 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2062 oInstr.sPrefix = sPrefix;
2063
2064 _ = iEndLine;
2065 return True;
2066
2067 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2068 """
2069 Tag: \@opcode
2070 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2071
2072 The opcode byte or sub-byte for the instruction in the context of a map.
2073 """
2074 oInstr = self.ensureInstructionForOpTag(iTagLine);
2075
2076 # Flatten and validate the value.
2077 sOpcode = self.flattenAllSections(aasSections);
2078 if _isValidOpcodeByte(sOpcode):
2079 pass;
2080 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2081 pass;
2082 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2083 pass;
2084 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2085 pass;
2086 else:
2087 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2088
2089 # Set it.
2090 if oInstr.sOpcode is not None:
2091 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2092 oInstr.sOpcode = sOpcode;
2093
2094 _ = iEndLine;
2095 return True;
2096
2097 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2098 """
2099 Tag: \@opcodesub
2100 Value: none | 11 mr/reg | !11 mr/reg
2101
2102 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2103 represents exactly two different instructions. The more proper way would
2104 be to go via maps with two members, but this is faster.
2105 """
2106 oInstr = self.ensureInstructionForOpTag(iTagLine);
2107
2108 # Flatten and validate the value.
2109 sSubOpcode = self.flattenAllSections(aasSections);
2110 if sSubOpcode not in g_kdSubOpcodes:
2111 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2112 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2113
2114 # Set it.
2115 if oInstr.sSubOpcode is not None:
2116 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2117 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2118 oInstr.sSubOpcode = sSubOpcode;
2119
2120 _ = iEndLine;
2121 return True;
2122
2123 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2124 """
2125 Tag: \@openc
2126 Value: ModR/M|fixed|prefix|<map name>
2127
2128 The instruction operand encoding style.
2129 """
2130 oInstr = self.ensureInstructionForOpTag(iTagLine);
2131
2132 # Flatten and validate the value.
2133 sEncoding = self.flattenAllSections(aasSections);
2134 if sEncoding in g_kdEncodings:
2135 pass;
2136 elif sEncoding in g_dInstructionMaps:
2137 pass;
2138 elif not _isValidOpcodeByte(sEncoding):
2139 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2140
2141 # Set it.
2142 if oInstr.sEncoding is not None:
2143 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2144 % ( sTag, oInstr.sEncoding, sEncoding,));
2145 oInstr.sEncoding = sEncoding;
2146
2147 _ = iEndLine;
2148 return True;
2149
2150 ## EFlags tag to Instruction attribute name.
2151 kdOpFlagToAttr = {
2152 '@opfltest': 'asFlTest',
2153 '@opflmodify': 'asFlModify',
2154 '@opflundef': 'asFlUndefined',
2155 '@opflset': 'asFlSet',
2156 '@opflclear': 'asFlClear',
2157 };
2158
2159 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2160 """
2161 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2162 Value: <eflags specifier>
2163
2164 """
2165 oInstr = self.ensureInstructionForOpTag(iTagLine);
2166
2167 # Flatten, split up and validate the values.
2168 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2169 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2170 asFlags = [];
2171 else:
2172 fRc = True;
2173 for iFlag, sFlag in enumerate(asFlags):
2174 if sFlag not in g_kdEFlagsMnemonics:
2175 if sFlag.strip() in g_kdEFlagsMnemonics:
2176 asFlags[iFlag] = sFlag.strip();
2177 else:
2178 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2179 if not fRc:
2180 return False;
2181
2182 # Set them.
2183 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2184 if asOld is not None:
2185 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2186 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2187
2188 _ = iEndLine;
2189 return True;
2190
2191 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2192 """
2193 Tag: \@ophints
2194 Value: Comma or space separated list of flags and hints.
2195
2196 This covers the disassembler flags table and more.
2197 """
2198 oInstr = self.ensureInstructionForOpTag(iTagLine);
2199
2200 # Flatten as a space separated list, split it up and validate the values.
2201 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2202 if len(asHints) == 1 and asHints[0].lower() == 'none':
2203 asHints = [];
2204 else:
2205 fRc = True;
2206 for iHint, sHint in enumerate(asHints):
2207 if sHint not in g_kdHints:
2208 if sHint.strip() in g_kdHints:
2209 sHint[iHint] = sHint.strip();
2210 else:
2211 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2212 if not fRc:
2213 return False;
2214
2215 # Append them.
2216 for sHint in asHints:
2217 if sHint not in oInstr.dHints:
2218 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2219 else:
2220 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2221
2222 _ = iEndLine;
2223 return True;
2224
2225 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2226 """
2227 Tag: \@opdisenum
2228 Value: OP_XXXX
2229
2230 This is for select a specific (legacy) disassembler enum value for the
2231 instruction.
2232 """
2233 oInstr = self.ensureInstructionForOpTag(iTagLine);
2234
2235 # Flatten and split.
2236 asWords = self.flattenAllSections(aasSections).split();
2237 if len(asWords) != 1:
2238 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2239 if not asWords:
2240 return False;
2241 sDisEnum = asWords[0];
2242 if not self.oReDisEnum.match(sDisEnum):
2243 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2244 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2245
2246 # Set it.
2247 if oInstr.sDisEnum is not None:
2248 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2249 oInstr.sDisEnum = sDisEnum;
2250
2251 _ = iEndLine;
2252 return True;
2253
2254 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2255 """
2256 Tag: \@opmincpu
2257 Value: <simple CPU name>
2258
2259 Indicates when this instruction was introduced.
2260 """
2261 oInstr = self.ensureInstructionForOpTag(iTagLine);
2262
2263 # Flatten the value, split into words, make sure there's just one, valid it.
2264 asCpus = self.flattenAllSections(aasSections).split();
2265 if len(asCpus) > 1:
2266 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2267
2268 sMinCpu = asCpus[0];
2269 if sMinCpu in g_kdCpuNames:
2270 oInstr.sMinCpu = sMinCpu;
2271 else:
2272 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2273 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2274
2275 # Set it.
2276 if oInstr.sMinCpu is None:
2277 oInstr.sMinCpu = sMinCpu;
2278 elif oInstr.sMinCpu != sMinCpu:
2279 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2280
2281 _ = iEndLine;
2282 return True;
2283
2284 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2285 """
2286 Tag: \@opcpuid
2287 Value: none | <CPUID flag specifier>
2288
2289 CPUID feature bit which is required for the instruction to be present.
2290 """
2291 oInstr = self.ensureInstructionForOpTag(iTagLine);
2292
2293 # Flatten as a space separated list, split it up and validate the values.
2294 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2295 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2296 asCpuIds = [];
2297 else:
2298 fRc = True;
2299 for iCpuId, sCpuId in enumerate(asCpuIds):
2300 if sCpuId not in g_kdCpuIdFlags:
2301 if sCpuId.strip() in g_kdCpuIdFlags:
2302 sCpuId[iCpuId] = sCpuId.strip();
2303 else:
2304 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2305 if not fRc:
2306 return False;
2307
2308 # Append them.
2309 for sCpuId in asCpuIds:
2310 if sCpuId not in oInstr.asCpuIds:
2311 oInstr.asCpuIds.append(sCpuId);
2312 else:
2313 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2314
2315 _ = iEndLine;
2316 return True;
2317
2318 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2319 """
2320 Tag: \@opgroup
2321 Value: op_grp1[_subgrp2[_subsubgrp3]]
2322
2323 Instruction grouping.
2324 """
2325 oInstr = self.ensureInstructionForOpTag(iTagLine);
2326
2327 # Flatten as a space separated list, split it up and validate the values.
2328 asGroups = self.flattenAllSections(aasSections).split();
2329 if len(asGroups) != 1:
2330 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2331 sGroup = asGroups[0];
2332 if not self.oReGroupName.match(sGroup):
2333 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2334 % (sTag, sGroup, self.oReGroupName.pattern));
2335
2336 # Set it.
2337 if oInstr.sGroup is not None:
2338 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2339 oInstr.sGroup = sGroup;
2340
2341 _ = iEndLine;
2342 return True;
2343
2344 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2345 """
2346 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2347 Value: <invalid opcode behaviour style>
2348
2349 The \@opunused indicates the specification is for a currently unused
2350 instruction encoding.
2351
2352 The \@opinvalid indicates the specification is for an invalid currently
2353 instruction encoding (like UD2).
2354
2355 The \@opinvlstyle just indicates how CPUs decode the instruction when
2356 not supported (\@opcpuid, \@opmincpu) or disabled.
2357 """
2358 oInstr = self.ensureInstructionForOpTag(iTagLine);
2359
2360 # Flatten as a space separated list, split it up and validate the values.
2361 asStyles = self.flattenAllSections(aasSections).split();
2362 if len(asStyles) != 1:
2363 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2364 sStyle = asStyles[0];
2365 if sStyle not in g_kdInvalidStyles:
2366 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2367 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2368 # Set it.
2369 if oInstr.sInvalidStyle is not None:
2370 return self.errorComment(iTagLine,
2371 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2372 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2373 oInstr.sInvalidStyle = sStyle;
2374 if sTag == '@opunused':
2375 oInstr.fUnused = True;
2376 elif sTag == '@opinvalid':
2377 oInstr.fInvalid = True;
2378
2379 _ = iEndLine;
2380 return True;
2381
2382 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2383 """
2384 Tag: \@optest
2385 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2386 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2387
2388 The main idea here is to generate basic instruction tests.
2389
2390 The probably simplest way of handling the diverse input, would be to use
2391 it to produce size optimized byte code for a simple interpreter that
2392 modifies the register input and output states.
2393
2394 An alternative to the interpreter would be creating multiple tables,
2395 but that becomes rather complicated wrt what goes where and then to use
2396 them in an efficient manner.
2397 """
2398 oInstr = self.ensureInstructionForOpTag(iTagLine);
2399
2400 #
2401 # Do it section by section.
2402 #
2403 for asSectionLines in aasSections:
2404 #
2405 # Sort the input into outputs, inputs and selector conditions.
2406 #
2407 sFlatSection = self.flattenAllSections([asSectionLines,]);
2408 if not sFlatSection:
2409 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2410 continue;
2411 oTest = InstructionTest(oInstr);
2412
2413 asSelectors = [];
2414 asInputs = [];
2415 asOutputs = [];
2416 asCur = asOutputs;
2417 fRc = True;
2418 asWords = sFlatSection.split();
2419 for iWord in range(len(asWords) - 1, -1, -1):
2420 sWord = asWords[iWord];
2421 # Check for array switchers.
2422 if sWord == '->':
2423 if asCur != asOutputs:
2424 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2425 break;
2426 asCur = asInputs;
2427 elif sWord == '/':
2428 if asCur != asInputs:
2429 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2430 break;
2431 asCur = asSelectors;
2432 else:
2433 asCur.insert(0, sWord);
2434
2435 #
2436 # Validate and add selectors.
2437 #
2438 for sCond in asSelectors:
2439 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2440 oSelector = None;
2441 for sOp in TestSelector.kasCompareOps:
2442 off = sCondExp.find(sOp);
2443 if off >= 0:
2444 sVariable = sCondExp[:off];
2445 sValue = sCondExp[off + len(sOp):];
2446 if sVariable in TestSelector.kdVariables:
2447 if sValue in TestSelector.kdVariables[sVariable]:
2448 oSelector = TestSelector(sVariable, sOp, sValue);
2449 else:
2450 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2451 % ( sTag, sValue, sCond,
2452 TestSelector.kdVariables[sVariable].keys(),));
2453 else:
2454 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2455 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2456 break;
2457 if oSelector is not None:
2458 for oExisting in oTest.aoSelectors:
2459 if oExisting.sVariable == oSelector.sVariable:
2460 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2461 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2462 oTest.aoSelectors.append(oSelector);
2463 else:
2464 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2465
2466 #
2467 # Validate outputs and inputs, adding them to the test as we go along.
2468 #
2469 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2470 asValidFieldKinds = [ 'both', sDesc, ];
2471 for sItem in asItems:
2472 oItem = None;
2473 for sOp in TestInOut.kasOperators:
2474 off = sItem.find(sOp);
2475 if off < 0:
2476 continue;
2477 sField = sItem[:off];
2478 sValueType = sItem[off + len(sOp):];
2479 if sField in TestInOut.kdFields \
2480 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2481 asSplit = sValueType.split(':', 1);
2482 sValue = asSplit[0];
2483 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2484 if sType in TestInOut.kdTypes:
2485 oValid = TestInOut.kdTypes[sType].validate(sValue);
2486 if oValid is True:
2487 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2488 oItem = TestInOut(sField, sOp, sValue, sType);
2489 else:
2490 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2491 % ( sTag, sDesc, sItem, ));
2492 else:
2493 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2494 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2495 else:
2496 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2497 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2498 else:
2499 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2500 % ( sTag, sDesc, sField, sItem,
2501 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2502 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2503 break;
2504 if oItem is not None:
2505 for oExisting in aoDst:
2506 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2507 self.errorComment(iTagLine,
2508 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2509 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2510 aoDst.append(oItem);
2511 else:
2512 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2513
2514 #
2515 # .
2516 #
2517 if fRc:
2518 oInstr.aoTests.append(oTest);
2519 else:
2520 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2521 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2522 % (sTag, asSelectors, asInputs, asOutputs,));
2523
2524 _ = iEndLine;
2525 return True;
2526
2527 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2528 """
2529 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2530 """
2531 oInstr = self.ensureInstructionForOpTag(iTagLine);
2532
2533 iTest = 0;
2534 if sTag[-1] == ']':
2535 iTest = int(sTag[8:-1]);
2536 else:
2537 iTest = int(sTag[7:]);
2538
2539 if iTest != len(oInstr.aoTests):
2540 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2541 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2542
2543 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2544 """
2545 Tag: \@optestign | \@optestignore
2546 Value: <value is ignored>
2547
2548 This is a simple trick to ignore a test while debugging another.
2549
2550 See also \@oponlytest.
2551 """
2552 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2553 return True;
2554
2555 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2556 """
2557 Tag: \@opcopytests
2558 Value: <opstat | function> [..]
2559 Example: \@opcopytests add_Eb_Gb
2560
2561 Trick to avoid duplicating tests for different encodings of the same
2562 operation.
2563 """
2564 oInstr = self.ensureInstructionForOpTag(iTagLine);
2565
2566 # Flatten, validate and append the copy job to the instruction. We execute
2567 # them after parsing all the input so we can handle forward references.
2568 asToCopy = self.flattenAllSections(aasSections).split();
2569 if not asToCopy:
2570 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2571 for sToCopy in asToCopy:
2572 if sToCopy not in oInstr.asCopyTests:
2573 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2574 oInstr.asCopyTests.append(sToCopy);
2575 else:
2576 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2577 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2578 else:
2579 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2580
2581 _ = iEndLine;
2582 return True;
2583
2584 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2585 """
2586 Tag: \@oponlytest | \@oponly
2587 Value: none
2588
2589 Only test instructions with this tag. This is a trick that is handy
2590 for singling out one or two new instructions or tests.
2591
2592 See also \@optestignore.
2593 """
2594 oInstr = self.ensureInstructionForOpTag(iTagLine);
2595
2596 # Validate and add instruction to only test dictionary.
2597 sValue = self.flattenAllSections(aasSections).strip();
2598 if sValue:
2599 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2600
2601 if oInstr not in g_aoOnlyTestInstructions:
2602 g_aoOnlyTestInstructions.append(oInstr);
2603
2604 _ = iEndLine;
2605 return True;
2606
2607 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2608 """
2609 Tag: \@opxcpttype
2610 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2611
2612 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2613 """
2614 oInstr = self.ensureInstructionForOpTag(iTagLine);
2615
2616 # Flatten as a space separated list, split it up and validate the values.
2617 asTypes = self.flattenAllSections(aasSections).split();
2618 if len(asTypes) != 1:
2619 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2620 sType = asTypes[0];
2621 if sType not in g_kdXcptTypes:
2622 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2623 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2624 # Set it.
2625 if oInstr.sXcptType is not None:
2626 return self.errorComment(iTagLine,
2627 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2628 % ( sTag, oInstr.sXcptType, sType,));
2629 oInstr.sXcptType = sType;
2630
2631 _ = iEndLine;
2632 return True;
2633
2634 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2635 """
2636 Tag: \@opfunction
2637 Value: <VMM function name>
2638
2639 This is for explicitly setting the IEM function name. Normally we pick
2640 this up from the FNIEMOP_XXX macro invocation after the description, or
2641 generate it from the mnemonic and operands.
2642
2643 It it thought it maybe necessary to set it when specifying instructions
2644 which implementation isn't following immediately or aren't implemented yet.
2645 """
2646 oInstr = self.ensureInstructionForOpTag(iTagLine);
2647
2648 # Flatten and validate the value.
2649 sFunction = self.flattenAllSections(aasSections);
2650 if not self.oReFunctionName.match(sFunction):
2651 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2652 % (sTag, sFunction, self.oReFunctionName.pattern));
2653
2654 if oInstr.sFunction is not None:
2655 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2656 % (sTag, oInstr.sFunction, sFunction,));
2657 oInstr.sFunction = sFunction;
2658
2659 _ = iEndLine;
2660 return True;
2661
2662 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2663 """
2664 Tag: \@opstats
2665 Value: <VMM statistics base name>
2666
2667 This is for explicitly setting the statistics name. Normally we pick
2668 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2669 the mnemonic and operands.
2670
2671 It it thought it maybe necessary to set it when specifying instructions
2672 which implementation isn't following immediately or aren't implemented yet.
2673 """
2674 oInstr = self.ensureInstructionForOpTag(iTagLine);
2675
2676 # Flatten and validate the value.
2677 sStats = self.flattenAllSections(aasSections);
2678 if not self.oReStatsName.match(sStats):
2679 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2680 % (sTag, sStats, self.oReStatsName.pattern));
2681
2682 if oInstr.sStats is not None:
2683 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2684 % (sTag, oInstr.sStats, sStats,));
2685 oInstr.sStats = sStats;
2686
2687 _ = iEndLine;
2688 return True;
2689
2690 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2691 """
2692 Tag: \@opdone
2693 Value: none
2694
2695 Used to explictily flush the instructions that have been specified.
2696 """
2697 sFlattened = self.flattenAllSections(aasSections);
2698 if sFlattened != '':
2699 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2700 _ = sTag; _ = iEndLine;
2701 return self.doneInstructions();
2702
2703 ## @}
2704
2705
2706 def parseComment(self):
2707 """
2708 Parse the current comment (self.sComment).
2709
2710 If it's a opcode specifiying comment, we reset the macro stuff.
2711 """
2712 #
2713 # Reject if comment doesn't seem to contain anything interesting.
2714 #
2715 if self.sComment.find('Opcode') < 0 \
2716 and self.sComment.find('@') < 0:
2717 return False;
2718
2719 #
2720 # Split the comment into lines, removing leading asterisks and spaces.
2721 # Also remove leading and trailing empty lines.
2722 #
2723 asLines = self.sComment.split('\n');
2724 for iLine, sLine in enumerate(asLines):
2725 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2726
2727 while asLines and not asLines[0]:
2728 self.iCommentLine += 1;
2729 asLines.pop(0);
2730
2731 while asLines and not asLines[-1]:
2732 asLines.pop(len(asLines) - 1);
2733
2734 #
2735 # Check for old style: Opcode 0x0f 0x12
2736 #
2737 if asLines[0].startswith('Opcode '):
2738 self.parseCommentOldOpcode(asLines);
2739
2740 #
2741 # Look for @op* tagged data.
2742 #
2743 cOpTags = 0;
2744 sFlatDefault = None;
2745 sCurTag = '@default';
2746 iCurTagLine = 0;
2747 asCurSection = [];
2748 aasSections = [ asCurSection, ];
2749 for iLine, sLine in enumerate(asLines):
2750 if not sLine.startswith('@'):
2751 if sLine:
2752 asCurSection.append(sLine);
2753 elif asCurSection:
2754 asCurSection = [];
2755 aasSections.append(asCurSection);
2756 else:
2757 #
2758 # Process the previous tag.
2759 #
2760 if not asCurSection and len(aasSections) > 1:
2761 aasSections.pop(-1);
2762 if sCurTag in self.dTagHandlers:
2763 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2764 cOpTags += 1;
2765 elif sCurTag.startswith('@op'):
2766 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2767 elif sCurTag == '@default':
2768 sFlatDefault = self.flattenAllSections(aasSections);
2769 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2770 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2771 elif sCurTag in ['@encoding', '@opencoding']:
2772 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2773
2774 #
2775 # New tag.
2776 #
2777 asSplit = sLine.split(None, 1);
2778 sCurTag = asSplit[0].lower();
2779 if len(asSplit) > 1:
2780 asCurSection = [asSplit[1],];
2781 else:
2782 asCurSection = [];
2783 aasSections = [asCurSection, ];
2784 iCurTagLine = iLine;
2785
2786 #
2787 # Process the final tag.
2788 #
2789 if not asCurSection and len(aasSections) > 1:
2790 aasSections.pop(-1);
2791 if sCurTag in self.dTagHandlers:
2792 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2793 cOpTags += 1;
2794 elif sCurTag.startswith('@op'):
2795 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2796 elif sCurTag == '@default':
2797 sFlatDefault = self.flattenAllSections(aasSections);
2798
2799 #
2800 # Don't allow default text in blocks containing @op*.
2801 #
2802 if cOpTags > 0 and sFlatDefault:
2803 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2804
2805 return True;
2806
2807 def parseMacroInvocation(self, sInvocation):
2808 """
2809 Parses a macro invocation.
2810
2811 Returns a tuple, first element is the offset following the macro
2812 invocation. The second element is a list of macro arguments, where the
2813 zero'th is the macro name.
2814 """
2815 # First the name.
2816 offOpen = sInvocation.find('(');
2817 if offOpen <= 0:
2818 self.raiseError("macro invocation open parenthesis not found");
2819 sName = sInvocation[:offOpen].strip();
2820 if not self.oReMacroName.match(sName):
2821 return self.error("invalid macro name '%s'" % (sName,));
2822 asRet = [sName, ];
2823
2824 # Arguments.
2825 iLine = self.iLine;
2826 cDepth = 1;
2827 off = offOpen + 1;
2828 offStart = off;
2829 while cDepth > 0:
2830 if off >= len(sInvocation):
2831 if iLine >= len(self.asLines):
2832 return self.error('macro invocation beyond end of file');
2833 sInvocation += self.asLines[iLine];
2834 iLine += 1;
2835 ch = sInvocation[off];
2836
2837 if ch == ',' or ch == ')':
2838 if cDepth == 1:
2839 asRet.append(sInvocation[offStart:off].strip());
2840 offStart = off + 1;
2841 if ch == ')':
2842 cDepth -= 1;
2843 elif ch == '(':
2844 cDepth += 1;
2845 off += 1;
2846
2847 return (off, asRet);
2848
2849 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2850 """
2851 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2852 """
2853 offHit = sCode.find(sMacro);
2854 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2855 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2856 return (offHit + offAfter, asRet);
2857 return (len(sCode), None);
2858
2859 def findAndParseMacroInvocation(self, sCode, sMacro):
2860 """
2861 Returns None if not found, arguments as per parseMacroInvocation if found.
2862 """
2863 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2864
2865 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2866 """
2867 Returns same as findAndParseMacroInvocation.
2868 """
2869 for sMacro in asMacro:
2870 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2871 if asRet is not None:
2872 return asRet;
2873 return None;
2874
2875 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2876 sDisHints, sIemHints, asOperands):
2877 """
2878 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2879 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2880 """
2881 #
2882 # Some invocation checks.
2883 #
2884 if sUpper != sUpper.upper():
2885 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2886 if sLower != sLower.lower():
2887 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2888 if sUpper.lower() != sLower:
2889 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2890 if not self.oReMnemonic.match(sLower):
2891 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2892
2893 #
2894 # Check if sIemHints tells us to not consider this macro invocation.
2895 #
2896 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2897 return True;
2898
2899 # Apply to the last instruction only for now.
2900 if not self.aoCurInstrs:
2901 self.addInstruction();
2902 oInstr = self.aoCurInstrs[-1];
2903 if oInstr.iLineMnemonicMacro == -1:
2904 oInstr.iLineMnemonicMacro = self.iLine;
2905 else:
2906 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2907 % (sMacro, oInstr.iLineMnemonicMacro,));
2908
2909 # Mnemonic
2910 if oInstr.sMnemonic is None:
2911 oInstr.sMnemonic = sLower;
2912 elif oInstr.sMnemonic != sLower:
2913 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2914
2915 # Process operands.
2916 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2917 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2918 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2919 for iOperand, sType in enumerate(asOperands):
2920 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2921 if sWhere is None:
2922 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2923 if iOperand < len(oInstr.aoOperands): # error recovery.
2924 sWhere = oInstr.aoOperands[iOperand].sWhere;
2925 sType = oInstr.aoOperands[iOperand].sType;
2926 else:
2927 sWhere = 'reg';
2928 sType = 'Gb';
2929 if iOperand == len(oInstr.aoOperands):
2930 oInstr.aoOperands.append(Operand(sWhere, sType))
2931 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2932 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2933 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2934 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2935
2936 # Encoding.
2937 if sForm not in g_kdIemForms:
2938 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2939 else:
2940 if oInstr.sEncoding is None:
2941 oInstr.sEncoding = g_kdIemForms[sForm][0];
2942 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2943 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2944 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2945
2946 # Check the parameter locations for the encoding.
2947 if g_kdIemForms[sForm][1] is not None:
2948 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2949 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2950 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2951 else:
2952 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2953 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2954 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2955 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2956
2957 # Stats.
2958 if not self.oReStatsName.match(sStats):
2959 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2960 elif oInstr.sStats is None:
2961 oInstr.sStats = sStats;
2962 elif oInstr.sStats != sStats:
2963 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2964 % (sMacro, oInstr.sStats, sStats,));
2965
2966 # Process the hints (simply merge with @ophints w/o checking anything).
2967 for sHint in sDisHints.split('|'):
2968 sHint = sHint.strip();
2969 if sHint.startswith('DISOPTYPE_'):
2970 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2971 if sShortHint in g_kdHints:
2972 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2973 else:
2974 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2975 elif sHint != '0':
2976 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2977
2978 for sHint in sIemHints.split('|'):
2979 sHint = sHint.strip();
2980 if sHint.startswith('IEMOPHINT_'):
2981 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2982 if sShortHint in g_kdHints:
2983 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2984 else:
2985 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2986 elif sHint != '0':
2987 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2988
2989
2990 _ = sAsm;
2991 return True;
2992
2993 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2994 """
2995 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2996 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2997 """
2998 if not asOperands:
2999 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3000 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3001 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3002
3003 def checkCodeForMacro(self, sCode):
3004 """
3005 Checks code for relevant macro invocation.
3006 """
3007 #
3008 # Scan macro invocations.
3009 #
3010 if sCode.find('(') > 0:
3011 # Look for instruction decoder function definitions. ASSUME single line.
3012 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3013 [ 'FNIEMOP_DEF',
3014 'FNIEMOP_STUB',
3015 'FNIEMOP_STUB_1',
3016 'FNIEMOP_UD_STUB',
3017 'FNIEMOP_UD_STUB_1' ]);
3018 if asArgs is not None:
3019 sFunction = asArgs[1];
3020
3021 if not self.aoCurInstrs:
3022 self.addInstruction();
3023 for oInstr in self.aoCurInstrs:
3024 if oInstr.iLineFnIemOpMacro == -1:
3025 oInstr.iLineFnIemOpMacro = self.iLine;
3026 else:
3027 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3028 self.setInstrunctionAttrib('sFunction', sFunction);
3029 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3030 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3031 if asArgs[0].find('STUB') > 0:
3032 self.doneInstructions();
3033 return True;
3034
3035 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3036 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3037 if asArgs is not None:
3038 if len(self.aoCurInstrs) == 1:
3039 oInstr = self.aoCurInstrs[0];
3040 if oInstr.sStats is None:
3041 oInstr.sStats = asArgs[1];
3042 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3043
3044 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3045 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3046 if asArgs is not None:
3047 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3048 []);
3049 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3050 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3051 if asArgs is not None:
3052 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3053 [asArgs[6],]);
3054 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3055 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3056 if asArgs is not None:
3057 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3058 [asArgs[6], asArgs[7]]);
3059 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3060 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3061 if asArgs is not None:
3062 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3063 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3064 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3065 # a_fIemHints)
3066 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3067 if asArgs is not None:
3068 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3069 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3070
3071 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3072 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3073 if asArgs is not None:
3074 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3075 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3076 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3077 if asArgs is not None:
3078 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3079 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3080 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3081 if asArgs is not None:
3082 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3083 [asArgs[4], asArgs[5],]);
3084 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3085 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3086 if asArgs is not None:
3087 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3088 [asArgs[4], asArgs[5], asArgs[6],]);
3089 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3090 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3091 if asArgs is not None:
3092 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3093 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3094
3095 return False;
3096
3097
3098 def parse(self):
3099 """
3100 Parses the given file.
3101 Returns number or errors.
3102 Raises exception on fatal trouble.
3103 """
3104 #self.debug('Parsing %s' % (self.sSrcFile,));
3105
3106 while self.iLine < len(self.asLines):
3107 sLine = self.asLines[self.iLine];
3108 self.iLine += 1;
3109
3110 # We only look for comments, so only lines with a slash might possibly
3111 # influence the parser state.
3112 offSlash = sLine.find('/');
3113 if offSlash >= 0:
3114 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3115 offLine = 0;
3116 while offLine < len(sLine):
3117 if self.iState == self.kiCode:
3118 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3119 if offHit >= 0:
3120 self.checkCodeForMacro(sLine[offLine:offHit]);
3121 self.sComment = '';
3122 self.iCommentLine = self.iLine;
3123 self.iState = self.kiCommentMulti;
3124 offLine = offHit + 2;
3125 else:
3126 self.checkCodeForMacro(sLine[offLine:]);
3127 offLine = len(sLine);
3128
3129 elif self.iState == self.kiCommentMulti:
3130 offHit = sLine.find('*/', offLine);
3131 if offHit >= 0:
3132 self.sComment += sLine[offLine:offHit];
3133 self.iState = self.kiCode;
3134 offLine = offHit + 2;
3135 self.parseComment();
3136 else:
3137 self.sComment += sLine[offLine:];
3138 offLine = len(sLine);
3139 else:
3140 assert False;
3141 # C++ line comment.
3142 elif offSlash > 0:
3143 self.checkCodeForMacro(sLine[:offSlash]);
3144
3145 # No slash, but append the line if in multi-line comment.
3146 elif self.iState == self.kiCommentMulti:
3147 #self.debug('line %d: multi' % (self.iLine,));
3148 self.sComment += sLine;
3149
3150 # No slash, but check code line for relevant macro.
3151 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3152 #self.debug('line %d: macro' % (self.iLine,));
3153 self.checkCodeForMacro(sLine);
3154
3155 # If the line is a '}' in the first position, complete the instructions.
3156 elif self.iState == self.kiCode and sLine[0] == '}':
3157 #self.debug('line %d: }' % (self.iLine,));
3158 self.doneInstructions();
3159
3160 self.doneInstructions();
3161 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3162 return self.printErrors();
3163
3164
3165def __parseFileByName(sSrcFile, sDefaultMap):
3166 """
3167 Parses one source file for instruction specfications.
3168 """
3169 #
3170 # Read sSrcFile into a line array.
3171 #
3172 try:
3173 oFile = open(sSrcFile, "r");
3174 except Exception as oXcpt:
3175 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3176 try:
3177 asLines = oFile.readlines();
3178 except Exception as oXcpt:
3179 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3180 finally:
3181 oFile.close();
3182
3183 #
3184 # Do the parsing.
3185 #
3186 try:
3187 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3188 except ParserException as oXcpt:
3189 print(str(oXcpt));
3190 raise;
3191 except Exception as oXcpt:
3192 raise;
3193
3194 return cErrors;
3195
3196
3197def __doTestCopying():
3198 """
3199 Executes the asCopyTests instructions.
3200 """
3201 asErrors = [];
3202 for oDstInstr in g_aoAllInstructions:
3203 if oDstInstr.asCopyTests:
3204 for sSrcInstr in oDstInstr.asCopyTests:
3205 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3206 if oSrcInstr:
3207 aoSrcInstrs = [oSrcInstr,];
3208 else:
3209 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3210 if aoSrcInstrs:
3211 for oSrcInstr in aoSrcInstrs:
3212 if oSrcInstr != oDstInstr:
3213 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3214 else:
3215 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3216 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3217 else:
3218 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3219 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3220
3221 if asErrors:
3222 sys.stderr.write(u''.join(asErrors));
3223 return len(asErrors);
3224
3225
3226def __applyOnlyTest():
3227 """
3228 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3229 all other instructions so that only these get tested.
3230 """
3231 if g_aoOnlyTestInstructions:
3232 for oInstr in g_aoAllInstructions:
3233 if oInstr.aoTests:
3234 if oInstr not in g_aoOnlyTestInstructions:
3235 oInstr.aoTests = [];
3236 return 0;
3237
3238def __parseAll():
3239 """
3240 Parses all the IEMAllInstruction*.cpp.h files.
3241
3242 Raises exception on failure.
3243 """
3244 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3245 cErrors = 0;
3246 for sDefaultMap, sName in [
3247 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3248 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3249 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3250 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3251 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3252 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3253 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3254 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3255 ]:
3256 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3257 cErrors += __doTestCopying();
3258 cErrors += __applyOnlyTest();
3259
3260 if cErrors != 0:
3261 #raise Exception('%d parse errors' % (cErrors,));
3262 sys.exit(1);
3263 return True;
3264
3265
3266
3267__parseAll();
3268
3269
3270#
3271# Generators (may perhaps move later).
3272#
3273def generateDisassemblerTables(oDstFile = sys.stdout):
3274 """
3275 Generates disassembler tables.
3276 """
3277
3278 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3279 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3280 assert oMap.sName == sName;
3281 asLines = [];
3282
3283 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3284 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3285 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3286 asLines.append('{');
3287
3288 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3289
3290 aoTableOrder = oMap.getInstructionsInTableOrder();
3291 for iInstr, oInstr in enumerate(aoTableOrder):
3292
3293 if (iInstr & 0xf) == 0:
3294 if iInstr != 0:
3295 asLines.append('');
3296 asLines.append(' /* %x */' % (iInstr >> 4,));
3297
3298 if oInstr is None:
3299 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3300 elif isinstance(oInstr, list):
3301 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3302 else:
3303 sMacro = 'OP';
3304 cMaxOperands = 3;
3305 if len(oInstr.aoOperands) > 3:
3306 sMacro = 'OPVEX'
3307 cMaxOperands = 4;
3308 assert len(oInstr.aoOperands) <= cMaxOperands;
3309
3310 #
3311 # Format string.
3312 #
3313 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3314 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3315 sTmp += ' ' if iOperand == 0 else ',';
3316 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3317 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3318 else:
3319 sTmp += g_kdOpTypes[oOperand.sType][2];
3320 sTmp += '",';
3321 asColumns = [ sTmp, ];
3322
3323 #
3324 # Decoders.
3325 #
3326 iStart = len(asColumns);
3327 if oInstr.sEncoding is None:
3328 pass;
3329 elif oInstr.sEncoding == 'ModR/M':
3330 # ASSUME the first operand is using the ModR/M encoding
3331 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3332 asColumns.append('IDX_ParseModRM,');
3333 ## @todo IDX_ParseVexDest
3334 # Is second operand using ModR/M too?
3335 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3336 asColumns.append('IDX_UseModRM,')
3337 elif oInstr.sEncoding in [ 'prefix', ]:
3338 for oOperand in oInstr.aoOperands:
3339 asColumns.append('0,');
3340 elif oInstr.sEncoding in [ 'fixed' ]:
3341 pass;
3342 elif oInstr.sEncoding == 'vex2':
3343 asColumns.append('IDX_ParseVex2b,')
3344 elif oInstr.sEncoding == 'vex3':
3345 asColumns.append('IDX_ParseVex3b,')
3346 elif oInstr.sEncoding in g_dInstructionMaps:
3347 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3348 else:
3349 ## @todo
3350 #IDX_ParseTwoByteEsc,
3351 #IDX_ParseGrp1,
3352 #IDX_ParseShiftGrp2,
3353 #IDX_ParseGrp3,
3354 #IDX_ParseGrp4,
3355 #IDX_ParseGrp5,
3356 #IDX_Parse3DNow,
3357 #IDX_ParseGrp6,
3358 #IDX_ParseGrp7,
3359 #IDX_ParseGrp8,
3360 #IDX_ParseGrp9,
3361 #IDX_ParseGrp10,
3362 #IDX_ParseGrp12,
3363 #IDX_ParseGrp13,
3364 #IDX_ParseGrp14,
3365 #IDX_ParseGrp15,
3366 #IDX_ParseGrp16,
3367 #IDX_ParseThreeByteEsc4,
3368 #IDX_ParseThreeByteEsc5,
3369 #IDX_ParseModFence,
3370 #IDX_ParseEscFP,
3371 #IDX_ParseNopPause,
3372 #IDX_ParseInvOpModRM,
3373 assert False, str(oInstr);
3374
3375 # Check for immediates and stuff in the remaining operands.
3376 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3377 sIdx = g_kdOpTypes[oOperand.sType][0];
3378 if sIdx != 'IDX_UseModRM':
3379 asColumns.append(sIdx + ',');
3380 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3381
3382 #
3383 # Opcode and operands.
3384 #
3385 assert oInstr.sDisEnum, str(oInstr);
3386 asColumns.append(oInstr.sDisEnum + ',');
3387 iStart = len(asColumns)
3388 for oOperand in oInstr.aoOperands:
3389 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3390 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3391
3392 #
3393 # Flags.
3394 #
3395 sTmp = '';
3396 for sHint in sorted(oInstr.dHints.keys()):
3397 sDefine = g_kdHints[sHint];
3398 if sDefine.startswith('DISOPTYPE_'):
3399 if sTmp:
3400 sTmp += ' | ' + sDefine;
3401 else:
3402 sTmp += sDefine;
3403 if sTmp:
3404 sTmp += '),';
3405 else:
3406 sTmp += '0),';
3407 asColumns.append(sTmp);
3408
3409 #
3410 # Format the columns into a line.
3411 #
3412 sLine = '';
3413 for i, s in enumerate(asColumns):
3414 if len(sLine) < aoffColumns[i]:
3415 sLine += ' ' * (aoffColumns[i] - len(sLine));
3416 else:
3417 sLine += ' ';
3418 sLine += s;
3419
3420 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3421 # DISOPTYPE_HARMLESS),
3422 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3423 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3424
3425 asLines.append(sLine);
3426
3427 asLines.append('};');
3428 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3429
3430 #
3431 # Write out the lines.
3432 #
3433 oDstFile.write('\n'.join(asLines));
3434 oDstFile.write('\n');
3435 break; #for now
3436
3437if __name__ == '__main__':
3438 generateDisassemblerTables();
3439
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette