VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66920

Last change on this file since 66920 was 66920, checked in by vboxsync, 8 years ago

IEM: Implemented movss Uss,Hss,Vss (VEX.F3.0F 11 mod=3) and movss Md,Vss (VEX.F3.0F 11 mod!=3).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 143.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66920 2017-05-16 19:21:21Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66920 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230
231 # ModR/M.rm - register only.
232 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
233 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
234 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
235 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
236 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
237 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
238
239 # ModR/M.rm - memory only.
240 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
241 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
242 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
243 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
244 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
245 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
246 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
247 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
248 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
249
250 # ModR/M.reg
251 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
252 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
253 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
254 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
255 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
256 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
257 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
258 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
259 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
260 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
261 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
262 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
263 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
264 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
265 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
266 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
267 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
268 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
269 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
270 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
271 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
272
273 # VEX.vvvv
274 'HdqCss': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCss', ),
275 'HdqCsd': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HdqCsd', ),
276
277 # Immediate values.
278 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
279 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
280 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
281 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
282 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
283 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
284
285 # Address operands (no ModR/M).
286 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
287 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
288
289 # Relative jump targets
290 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
291 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
292
293 # DS:rSI
294 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
295 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
296 # ES:rDI
297 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
298 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
299
300 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
301
302 # Fixed registers.
303 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
304 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
305 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
306 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
307 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
308 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
309 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
310 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
311};
312
313# IDX_ParseFixedReg
314# IDX_ParseVexDest
315
316
317## IEMFORM_XXX mappings.
318g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
319 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
320 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
321 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
322 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
323 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
324 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
325 'M': ( 'ModR/M', [ 'rm', ], ),
326 'M_REG': ( 'ModR/M', [ 'rm', ], ),
327 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
328 'R': ( 'ModR/M', [ 'reg', ], ),
329
330 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
331 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
332 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
333 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM
334 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
335 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
336 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
337 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
338 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
339 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
340 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
341 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
342 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
343
344 'FIXED': ( 'fixed', None, )
345};
346
347## \@oppfx values.
348g_kdPrefixes = {
349 'none': [],
350 '0x66': [],
351 '0xf3': [],
352 '0xf2': [],
353};
354
355## Special \@opcode tag values.
356g_kdSpecialOpcodes = {
357 '/reg': [],
358 'mr/reg': [],
359 '11 /reg': [],
360 '!11 /reg': [],
361 '11 mr/reg': [],
362 '!11 mr/reg': [],
363};
364
365## Special \@opcodesub tag values.
366g_kdSubOpcodes = {
367 'none': [ None, ],
368 '11 mr/reg': [ '11 mr/reg', ],
369 '11': [ '11 mr/reg', ], ##< alias
370 '!11 mr/reg': [ '!11 mr/reg', ],
371 '!11': [ '!11 mr/reg', ], ##< alias
372};
373
374## Valid values for \@openc
375g_kdEncodings = {
376 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
377 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
378 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
379 'prefix': [ None, ], ##< Prefix
380};
381
382## \@opunused, \@opinvalid, \@opinvlstyle
383g_kdInvalidStyles = {
384 'immediate': [], ##< CPU stops decoding immediately after the opcode.
385 'intel-modrm': [], ##< Intel decodes ModR/M.
386 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
387 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
388 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
389};
390
391g_kdCpuNames = {
392 '8086': (),
393 '80186': (),
394 '80286': (),
395 '80386': (),
396 '80486': (),
397};
398
399## \@opcpuid
400g_kdCpuIdFlags = {
401 'vme': 'X86_CPUID_FEATURE_EDX_VME',
402 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
403 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
404 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
405 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
406 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
407 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
408 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
409 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
410 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
411 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
412 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
413 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
414 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
415 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
416 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
417 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
418 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
419 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
420 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
421 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
422 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
423 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
424 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
425 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
426 'aes': 'X86_CPUID_FEATURE_ECX_AES',
427 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
428 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
429 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
430 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
431
432 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
433 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
434 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
435 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
436 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
437 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
438 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
439 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
440 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
441 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
442};
443
444## \@ophints values.
445g_kdHints = {
446 'invalid': 'DISOPTYPE_INVALID', ##<
447 'harmless': 'DISOPTYPE_HARMLESS', ##<
448 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
449 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
450 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
451 'portio': 'DISOPTYPE_PORTIO', ##<
452 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
453 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
454 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
455 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
456 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
457 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
458 'illegal': 'DISOPTYPE_ILLEGAL', ##<
459 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
460 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
461 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
462 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
463 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
464 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
465 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
466 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
467 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
468 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
469 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
470 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
471 ## (only in 16 & 32 bits mode!)
472 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
473 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
474 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
475 'ignores_op_size': '', ##< Ignores both operand size prefixes.
476 'lock_allowed': '', ##< Lock prefix allowed.
477};
478
479## \@opxcpttype values (see SDMv2 2.4, 2.7).
480g_kdXcptTypes = {
481 'none': [],
482 '1': [],
483 '2': [],
484 '3': [],
485 '4': [],
486 '4UA': [],
487 '5': [],
488 '6': [],
489 '7': [],
490 '8': [],
491 '11': [],
492 '12': [],
493 'E1': [],
494 'E1NF': [],
495 'E2': [],
496 'E3': [],
497 'E3NF': [],
498 'E4': [],
499 'E4NF': [],
500 'E5': [],
501 'E5NF': [],
502 'E6': [],
503 'E6NF': [],
504 'E7NF': [],
505 'E9': [],
506 'E9NF': [],
507 'E10': [],
508 'E11': [],
509 'E12': [],
510 'E12NF': [],
511};
512
513
514def _isValidOpcodeByte(sOpcode):
515 """
516 Checks if sOpcode is a valid lower case opcode byte.
517 Returns true/false.
518 """
519 if len(sOpcode) == 4:
520 if sOpcode[:2] == '0x':
521 if sOpcode[2] in '0123456789abcdef':
522 if sOpcode[3] in '0123456789abcdef':
523 return True;
524 return False;
525
526
527class InstructionMap(object):
528 """
529 Instruction map.
530
531 The opcode map provides the lead opcode bytes (empty for the one byte
532 opcode map). An instruction can be member of multiple opcode maps as long
533 as it uses the same opcode value within the map (because of VEX).
534 """
535
536 kdEncodings = {
537 'legacy': [],
538 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
539 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
540 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
541 'xop8': [], ##< XOP prefix with vvvvv = 8
542 'xop9': [], ##< XOP prefix with vvvvv = 9
543 'xop10': [], ##< XOP prefix with vvvvv = 10
544 };
545 ## Selectors.
546 ## The first value is the number of table entries required by a
547 ## decoder or disassembler for this type of selector.
548 kdSelectors = {
549 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
550 '/r': [ 8, ], ##< modrm.reg selects the instruction.
551 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
552 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
553 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
554 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
555 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
556 };
557
558 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
559 assert sSelector in self.kdSelectors;
560 assert sEncoding in self.kdEncodings;
561 if asLeadOpcodes is None:
562 asLeadOpcodes = [];
563 else:
564 for sOpcode in asLeadOpcodes:
565 assert _isValidOpcodeByte(sOpcode);
566 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
567
568 self.sName = sName;
569 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
570 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
571 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
572 self.aoInstructions = []; # type: Instruction
573 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
574
575 def getTableSize(self):
576 """
577 Number of table entries. This corresponds directly to the selector.
578 """
579 return self.kdSelectors[self.sSelector][0];
580
581 def getInstructionIndex(self, oInstr):
582 """
583 Returns the table index for the instruction.
584 """
585 bOpcode = oInstr.getOpcodeByte();
586
587 # The byte selector is simple. We need a full opcode byte and need just return it.
588 if self.sSelector == 'byte':
589 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
590 return bOpcode;
591
592 # The other selectors needs masking and shifting.
593 if self.sSelector == '/r':
594 return (bOpcode >> 3) & 0x7;
595
596 if self.sSelector == 'mod /r':
597 return (bOpcode >> 3) & 0x1f;
598
599 if self.sSelector == 'memreg /r':
600 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
601
602 if self.sSelector == '!11 /r':
603 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
604 return (bOpcode >> 3) & 0x7;
605
606 if self.sSelector == '11 /r':
607 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
608 return (bOpcode >> 3) & 0x7;
609
610 if self.sSelector == '11':
611 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
612 return bOpcode & 0x3f;
613
614 assert False, self.sSelector;
615 return -1;
616
617 def getInstructionsInTableOrder(self):
618 """
619 Get instructions in table order.
620
621 Returns array of instructions. Normally there is exactly one
622 instruction per entry. However the entry could also be None if
623 not instruction was specified for that opcode value. Or there
624 could be a list of instructions to deal with special encodings
625 where for instance prefix (e.g. REX.W) encodes a different
626 instruction or different CPUs have different instructions or
627 prefixes in the same place.
628 """
629 # Start with empty table.
630 cTable = self.getTableSize();
631 aoTable = [None] * cTable;
632
633 # Insert the instructions.
634 for oInstr in self.aoInstructions:
635 if oInstr.sOpcode:
636 idxOpcode = self.getInstructionIndex(oInstr);
637 assert idxOpcode < cTable, str(idxOpcode);
638
639 oExisting = aoTable[idxOpcode];
640 if oExisting is None:
641 aoTable[idxOpcode] = oInstr;
642 elif not isinstance(oExisting, list):
643 aoTable[idxOpcode] = list([oExisting, oInstr]);
644 else:
645 oExisting.append(oInstr);
646
647 return aoTable;
648
649
650 def getDisasTableName(self):
651 """
652 Returns the disassembler table name for this map.
653 """
654 sName = 'g_aDisas';
655 for sWord in self.sName.split('_'):
656 if sWord == 'm': # suffix indicating modrm.mod==mem
657 sName += '_m';
658 elif sWord == 'r': # suffix indicating modrm.mod==reg
659 sName += '_r';
660 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
661 sName += '_' + sWord;
662 else:
663 sWord = sWord.replace('grp', 'Grp');
664 sWord = sWord.replace('map', 'Map');
665 sName += sWord[0].upper() + sWord[1:];
666 return sName;
667
668
669class TestType(object):
670 """
671 Test value type.
672
673 This base class deals with integer like values. The fUnsigned constructor
674 parameter indicates the default stance on zero vs sign extending. It is
675 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
676 """
677 def __init__(self, sName, acbSizes = None, fUnsigned = True):
678 self.sName = sName;
679 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
680 self.fUnsigned = fUnsigned;
681
682 class BadValue(Exception):
683 """ Bad value exception. """
684 def __init__(self, sMessage):
685 Exception.__init__(self, sMessage);
686 self.sMessage = sMessage;
687
688 ## For ascii ~ operator.
689 kdHexInv = {
690 '0': 'f',
691 '1': 'e',
692 '2': 'd',
693 '3': 'c',
694 '4': 'b',
695 '5': 'a',
696 '6': '9',
697 '7': '8',
698 '8': '7',
699 '9': '6',
700 'a': '5',
701 'b': '4',
702 'c': '3',
703 'd': '2',
704 'e': '1',
705 'f': '0',
706 };
707
708 def get(self, sValue):
709 """
710 Get the shortest normal sized byte representation of oValue.
711
712 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
713 The latter form is for AND+OR pairs where the first entry is what to
714 AND with the field and the second the one or OR with.
715
716 Raises BadValue if invalid value.
717 """
718 if not sValue:
719 raise TestType.BadValue('empty value');
720
721 # Deal with sign and detect hexadecimal or decimal.
722 fSignExtend = not self.fUnsigned;
723 if sValue[0] == '-' or sValue[0] == '+':
724 fSignExtend = True;
725 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
726 else:
727 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
728
729 # try convert it to long integer.
730 try:
731 iValue = long(sValue, 16 if fHex else 10);
732 except Exception as oXcpt:
733 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
734
735 # Convert the hex string and pad it to a decent value. Negative values
736 # needs to be manually converted to something non-negative (~-n + 1).
737 if iValue >= 0:
738 sHex = hex(iValue);
739 if sys.version_info[0] < 3:
740 assert sHex[-1] == 'L';
741 sHex = sHex[:-1];
742 assert sHex[:2] == '0x';
743 sHex = sHex[2:];
744 else:
745 sHex = hex(-iValue - 1);
746 if sys.version_info[0] < 3:
747 assert sHex[-1] == 'L';
748 sHex = sHex[:-1];
749 assert sHex[:2] == '0x';
750 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
751 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
752 sHex = 'f' + sHex;
753
754 cDigits = len(sHex);
755 if cDigits <= self.acbSizes[-1] * 2:
756 for cb in self.acbSizes:
757 cNaturalDigits = cb * 2;
758 if cDigits <= cNaturalDigits:
759 break;
760 else:
761 cNaturalDigits = self.acbSizes[-1] * 2;
762 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
763 assert isinstance(cNaturalDigits, int)
764
765 if cNaturalDigits != cDigits:
766 cNeeded = cNaturalDigits - cDigits;
767 if iValue >= 0:
768 sHex = ('0' * cNeeded) + sHex;
769 else:
770 sHex = ('f' * cNeeded) + sHex;
771
772 # Invert and convert to bytearray and return it.
773 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
774
775 return ((fSignExtend, abValue),);
776
777 def validate(self, sValue):
778 """
779 Returns True if value is okay, error message on failure.
780 """
781 try:
782 self.get(sValue);
783 except TestType.BadValue as oXcpt:
784 return oXcpt.sMessage;
785 return True;
786
787 def isAndOrPair(self, sValue):
788 """
789 Checks if sValue is a pair.
790 """
791 _ = sValue;
792 return False;
793
794
795class TestTypeEflags(TestType):
796 """
797 Special value parsing for EFLAGS/RFLAGS/FLAGS.
798 """
799
800 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
801
802 def __init__(self, sName):
803 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
804
805 def get(self, sValue):
806 fClear = 0;
807 fSet = 0;
808 for sFlag in sValue.split(','):
809 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
810 if sConstant is None:
811 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
812 if sConstant[0] == '!':
813 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
814 else:
815 fSet |= g_kdX86EFlagsConstants[sConstant];
816
817 aoSet = TestType.get(self, '0x%x' % (fSet,));
818 if fClear != 0:
819 aoClear = TestType.get(self, '%#x' % (fClear,))
820 assert self.isAndOrPair(sValue) is True;
821 return (aoClear[0], aoSet[0]);
822 assert self.isAndOrPair(sValue) is False;
823 return aoSet;
824
825 def isAndOrPair(self, sValue):
826 for sZeroFlag in self.kdZeroValueFlags:
827 if sValue.find(sZeroFlag) >= 0:
828 return True;
829 return False;
830
831class TestTypeFromDict(TestType):
832 """
833 Special value parsing for CR0.
834 """
835
836 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
837
838 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
839 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
840 self.kdConstantsAndValues = kdConstantsAndValues;
841 self.sConstantPrefix = sConstantPrefix;
842
843 def get(self, sValue):
844 fValue = 0;
845 for sFlag in sValue.split(','):
846 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
847 if fFlagValue is None:
848 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
849 fValue |= fFlagValue;
850 return TestType.get(self, '0x%x' % (fValue,));
851
852
853class TestInOut(object):
854 """
855 One input or output state modifier.
856
857 This should be thought as values to modify BS3REGCTX and extended (needs
858 to be structured) state.
859 """
860 ## Assigned operators.
861 kasOperators = [
862 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
863 '&~=',
864 '&=',
865 '|=',
866 '='
867 ];
868 ## Types
869 kdTypes = {
870 'uint': TestType('uint', fUnsigned = True),
871 'int': TestType('int'),
872 'efl': TestTypeEflags('efl'),
873 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
874 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
875 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
876 };
877 ## CPU context fields.
878 kdFields = {
879 # name: ( default type, [both|input|output], )
880 # Operands.
881 'op1': ( 'uint', 'both', ), ## \@op1
882 'op2': ( 'uint', 'both', ), ## \@op2
883 'op3': ( 'uint', 'both', ), ## \@op3
884 'op4': ( 'uint', 'both', ), ## \@op4
885 # Flags.
886 'efl': ( 'efl', 'both', ),
887 'efl_undef': ( 'uint', 'output', ),
888 # 8-bit GPRs.
889 'al': ( 'uint', 'both', ),
890 'cl': ( 'uint', 'both', ),
891 'dl': ( 'uint', 'both', ),
892 'bl': ( 'uint', 'both', ),
893 'ah': ( 'uint', 'both', ),
894 'ch': ( 'uint', 'both', ),
895 'dh': ( 'uint', 'both', ),
896 'bh': ( 'uint', 'both', ),
897 'r8l': ( 'uint', 'both', ),
898 'r9l': ( 'uint', 'both', ),
899 'r10l': ( 'uint', 'both', ),
900 'r11l': ( 'uint', 'both', ),
901 'r12l': ( 'uint', 'both', ),
902 'r13l': ( 'uint', 'both', ),
903 'r14l': ( 'uint', 'both', ),
904 'r15l': ( 'uint', 'both', ),
905 # 16-bit GPRs.
906 'ax': ( 'uint', 'both', ),
907 'dx': ( 'uint', 'both', ),
908 'cx': ( 'uint', 'both', ),
909 'bx': ( 'uint', 'both', ),
910 'sp': ( 'uint', 'both', ),
911 'bp': ( 'uint', 'both', ),
912 'si': ( 'uint', 'both', ),
913 'di': ( 'uint', 'both', ),
914 'r8w': ( 'uint', 'both', ),
915 'r9w': ( 'uint', 'both', ),
916 'r10w': ( 'uint', 'both', ),
917 'r11w': ( 'uint', 'both', ),
918 'r12w': ( 'uint', 'both', ),
919 'r13w': ( 'uint', 'both', ),
920 'r14w': ( 'uint', 'both', ),
921 'r15w': ( 'uint', 'both', ),
922 # 32-bit GPRs.
923 'eax': ( 'uint', 'both', ),
924 'edx': ( 'uint', 'both', ),
925 'ecx': ( 'uint', 'both', ),
926 'ebx': ( 'uint', 'both', ),
927 'esp': ( 'uint', 'both', ),
928 'ebp': ( 'uint', 'both', ),
929 'esi': ( 'uint', 'both', ),
930 'edi': ( 'uint', 'both', ),
931 'r8d': ( 'uint', 'both', ),
932 'r9d': ( 'uint', 'both', ),
933 'r10d': ( 'uint', 'both', ),
934 'r11d': ( 'uint', 'both', ),
935 'r12d': ( 'uint', 'both', ),
936 'r13d': ( 'uint', 'both', ),
937 'r14d': ( 'uint', 'both', ),
938 'r15d': ( 'uint', 'both', ),
939 # 64-bit GPRs.
940 'rax': ( 'uint', 'both', ),
941 'rdx': ( 'uint', 'both', ),
942 'rcx': ( 'uint', 'both', ),
943 'rbx': ( 'uint', 'both', ),
944 'rsp': ( 'uint', 'both', ),
945 'rbp': ( 'uint', 'both', ),
946 'rsi': ( 'uint', 'both', ),
947 'rdi': ( 'uint', 'both', ),
948 'r8': ( 'uint', 'both', ),
949 'r9': ( 'uint', 'both', ),
950 'r10': ( 'uint', 'both', ),
951 'r11': ( 'uint', 'both', ),
952 'r12': ( 'uint', 'both', ),
953 'r13': ( 'uint', 'both', ),
954 'r14': ( 'uint', 'both', ),
955 'r15': ( 'uint', 'both', ),
956 # 16-bit, 32-bit or 64-bit registers according to operand size.
957 'oz.rax': ( 'uint', 'both', ),
958 'oz.rdx': ( 'uint', 'both', ),
959 'oz.rcx': ( 'uint', 'both', ),
960 'oz.rbx': ( 'uint', 'both', ),
961 'oz.rsp': ( 'uint', 'both', ),
962 'oz.rbp': ( 'uint', 'both', ),
963 'oz.rsi': ( 'uint', 'both', ),
964 'oz.rdi': ( 'uint', 'both', ),
965 'oz.r8': ( 'uint', 'both', ),
966 'oz.r9': ( 'uint', 'both', ),
967 'oz.r10': ( 'uint', 'both', ),
968 'oz.r11': ( 'uint', 'both', ),
969 'oz.r12': ( 'uint', 'both', ),
970 'oz.r13': ( 'uint', 'both', ),
971 'oz.r14': ( 'uint', 'both', ),
972 'oz.r15': ( 'uint', 'both', ),
973 # Control registers.
974 'cr0': ( 'cr0', 'both', ),
975 'cr4': ( 'cr4', 'both', ),
976 'xcr0': ( 'xcr0', 'both', ),
977 # FPU Registers
978 'fcw': ( 'uint', 'both', ),
979 'fsw': ( 'uint', 'both', ),
980 'ftw': ( 'uint', 'both', ),
981 'fop': ( 'uint', 'both', ),
982 'fpuip': ( 'uint', 'both', ),
983 'fpucs': ( 'uint', 'both', ),
984 'fpudp': ( 'uint', 'both', ),
985 'fpuds': ( 'uint', 'both', ),
986 'mxcsr': ( 'uint', 'both', ),
987 'st0': ( 'uint', 'both', ),
988 'st1': ( 'uint', 'both', ),
989 'st2': ( 'uint', 'both', ),
990 'st3': ( 'uint', 'both', ),
991 'st4': ( 'uint', 'both', ),
992 'st5': ( 'uint', 'both', ),
993 'st6': ( 'uint', 'both', ),
994 'st7': ( 'uint', 'both', ),
995 # MMX registers.
996 'mm0': ( 'uint', 'both', ),
997 'mm1': ( 'uint', 'both', ),
998 'mm2': ( 'uint', 'both', ),
999 'mm3': ( 'uint', 'both', ),
1000 'mm4': ( 'uint', 'both', ),
1001 'mm5': ( 'uint', 'both', ),
1002 'mm6': ( 'uint', 'both', ),
1003 'mm7': ( 'uint', 'both', ),
1004 # SSE registers.
1005 'xmm0': ( 'uint', 'both', ),
1006 'xmm1': ( 'uint', 'both', ),
1007 'xmm2': ( 'uint', 'both', ),
1008 'xmm3': ( 'uint', 'both', ),
1009 'xmm4': ( 'uint', 'both', ),
1010 'xmm5': ( 'uint', 'both', ),
1011 'xmm6': ( 'uint', 'both', ),
1012 'xmm7': ( 'uint', 'both', ),
1013 'xmm8': ( 'uint', 'both', ),
1014 'xmm9': ( 'uint', 'both', ),
1015 'xmm10': ( 'uint', 'both', ),
1016 'xmm11': ( 'uint', 'both', ),
1017 'xmm12': ( 'uint', 'both', ),
1018 'xmm13': ( 'uint', 'both', ),
1019 'xmm14': ( 'uint', 'both', ),
1020 'xmm15': ( 'uint', 'both', ),
1021 'xmm0.lo': ( 'uint', 'both', ),
1022 'xmm1.lo': ( 'uint', 'both', ),
1023 'xmm2.lo': ( 'uint', 'both', ),
1024 'xmm3.lo': ( 'uint', 'both', ),
1025 'xmm4.lo': ( 'uint', 'both', ),
1026 'xmm5.lo': ( 'uint', 'both', ),
1027 'xmm6.lo': ( 'uint', 'both', ),
1028 'xmm7.lo': ( 'uint', 'both', ),
1029 'xmm8.lo': ( 'uint', 'both', ),
1030 'xmm9.lo': ( 'uint', 'both', ),
1031 'xmm10.lo': ( 'uint', 'both', ),
1032 'xmm11.lo': ( 'uint', 'both', ),
1033 'xmm12.lo': ( 'uint', 'both', ),
1034 'xmm13.lo': ( 'uint', 'both', ),
1035 'xmm14.lo': ( 'uint', 'both', ),
1036 'xmm15.lo': ( 'uint', 'both', ),
1037 'xmm0.hi': ( 'uint', 'both', ),
1038 'xmm1.hi': ( 'uint', 'both', ),
1039 'xmm2.hi': ( 'uint', 'both', ),
1040 'xmm3.hi': ( 'uint', 'both', ),
1041 'xmm4.hi': ( 'uint', 'both', ),
1042 'xmm5.hi': ( 'uint', 'both', ),
1043 'xmm6.hi': ( 'uint', 'both', ),
1044 'xmm7.hi': ( 'uint', 'both', ),
1045 'xmm8.hi': ( 'uint', 'both', ),
1046 'xmm9.hi': ( 'uint', 'both', ),
1047 'xmm10.hi': ( 'uint', 'both', ),
1048 'xmm11.hi': ( 'uint', 'both', ),
1049 'xmm12.hi': ( 'uint', 'both', ),
1050 'xmm13.hi': ( 'uint', 'both', ),
1051 'xmm14.hi': ( 'uint', 'both', ),
1052 'xmm15.hi': ( 'uint', 'both', ),
1053 'xmm0.lo.zx': ( 'uint', 'both', ),
1054 'xmm1.lo.zx': ( 'uint', 'both', ),
1055 'xmm2.lo.zx': ( 'uint', 'both', ),
1056 'xmm3.lo.zx': ( 'uint', 'both', ),
1057 'xmm4.lo.zx': ( 'uint', 'both', ),
1058 'xmm5.lo.zx': ( 'uint', 'both', ),
1059 'xmm6.lo.zx': ( 'uint', 'both', ),
1060 'xmm7.lo.zx': ( 'uint', 'both', ),
1061 'xmm8.lo.zx': ( 'uint', 'both', ),
1062 'xmm9.lo.zx': ( 'uint', 'both', ),
1063 'xmm10.lo.zx': ( 'uint', 'both', ),
1064 'xmm11.lo.zx': ( 'uint', 'both', ),
1065 'xmm12.lo.zx': ( 'uint', 'both', ),
1066 'xmm13.lo.zx': ( 'uint', 'both', ),
1067 'xmm14.lo.zx': ( 'uint', 'both', ),
1068 'xmm15.lo.zx': ( 'uint', 'both', ),
1069 'xmm0.dw0': ( 'uint', 'both', ),
1070 'xmm1.dw0': ( 'uint', 'both', ),
1071 'xmm2.dw0': ( 'uint', 'both', ),
1072 'xmm3.dw0': ( 'uint', 'both', ),
1073 'xmm4.dw0': ( 'uint', 'both', ),
1074 'xmm5.dw0': ( 'uint', 'both', ),
1075 'xmm6.dw0': ( 'uint', 'both', ),
1076 'xmm7.dw0': ( 'uint', 'both', ),
1077 'xmm8.dw0': ( 'uint', 'both', ),
1078 'xmm9.dw0': ( 'uint', 'both', ),
1079 'xmm10.dw0': ( 'uint', 'both', ),
1080 'xmm11.dw0': ( 'uint', 'both', ),
1081 'xmm12.dw0': ( 'uint', 'both', ),
1082 'xmm13.dw0': ( 'uint', 'both', ),
1083 'xmm14.dw0': ( 'uint', 'both', ),
1084 'xmm15_dw0': ( 'uint', 'both', ),
1085 # AVX registers.
1086 'ymm0': ( 'uint', 'both', ),
1087 'ymm1': ( 'uint', 'both', ),
1088 'ymm2': ( 'uint', 'both', ),
1089 'ymm3': ( 'uint', 'both', ),
1090 'ymm4': ( 'uint', 'both', ),
1091 'ymm5': ( 'uint', 'both', ),
1092 'ymm6': ( 'uint', 'both', ),
1093 'ymm7': ( 'uint', 'both', ),
1094 'ymm8': ( 'uint', 'both', ),
1095 'ymm9': ( 'uint', 'both', ),
1096 'ymm10': ( 'uint', 'both', ),
1097 'ymm11': ( 'uint', 'both', ),
1098 'ymm12': ( 'uint', 'both', ),
1099 'ymm13': ( 'uint', 'both', ),
1100 'ymm14': ( 'uint', 'both', ),
1101 'ymm15': ( 'uint', 'both', ),
1102
1103 # Special ones.
1104 'value.xcpt': ( 'uint', 'output', ),
1105 };
1106
1107 def __init__(self, sField, sOp, sValue, sType):
1108 assert sField in self.kdFields;
1109 assert sOp in self.kasOperators;
1110 self.sField = sField;
1111 self.sOp = sOp;
1112 self.sValue = sValue;
1113 self.sType = sType;
1114 assert isinstance(sField, str);
1115 assert isinstance(sOp, str);
1116 assert isinstance(sType, str);
1117 assert isinstance(sValue, str);
1118
1119
1120class TestSelector(object):
1121 """
1122 One selector for an instruction test.
1123 """
1124 ## Selector compare operators.
1125 kasCompareOps = [ '==', '!=' ];
1126 ## Selector variables and their valid values.
1127 kdVariables = {
1128 # Operand size.
1129 'size': {
1130 'o16': 'size_o16',
1131 'o32': 'size_o32',
1132 'o64': 'size_o64',
1133 },
1134 # Execution ring.
1135 'ring': {
1136 '0': 'ring_0',
1137 '1': 'ring_1',
1138 '2': 'ring_2',
1139 '3': 'ring_3',
1140 '0..2': 'ring_0_thru_2',
1141 '1..3': 'ring_1_thru_3',
1142 },
1143 # Basic code mode.
1144 'codebits': {
1145 '64': 'code_64bit',
1146 '32': 'code_32bit',
1147 '16': 'code_16bit',
1148 },
1149 # cpu modes.
1150 'mode': {
1151 'real': 'mode_real',
1152 'prot': 'mode_prot',
1153 'long': 'mode_long',
1154 'v86': 'mode_v86',
1155 'smm': 'mode_smm',
1156 'vmx': 'mode_vmx',
1157 'svm': 'mode_svm',
1158 },
1159 # paging on/off
1160 'paging': {
1161 'on': 'paging_on',
1162 'off': 'paging_off',
1163 },
1164 # CPU vendor
1165 'vendor': {
1166 'amd': 'vendor_amd',
1167 'intel': 'vendor_intel',
1168 'via': 'vendor_via',
1169 },
1170 };
1171 ## Selector shorthand predicates.
1172 ## These translates into variable expressions.
1173 kdPredicates = {
1174 'o16': 'size==o16',
1175 'o32': 'size==o32',
1176 'o64': 'size==o64',
1177 'ring0': 'ring==0',
1178 '!ring0': 'ring==1..3',
1179 'ring1': 'ring==1',
1180 'ring2': 'ring==2',
1181 'ring3': 'ring==3',
1182 'user': 'ring==3',
1183 'supervisor': 'ring==0..2',
1184 'real': 'mode==real',
1185 'prot': 'mode==prot',
1186 'long': 'mode==long',
1187 'v86': 'mode==v86',
1188 'smm': 'mode==smm',
1189 'vmx': 'mode==vmx',
1190 'svm': 'mode==svm',
1191 'paging': 'paging==on',
1192 '!paging': 'paging==off',
1193 'amd': 'vendor==amd',
1194 '!amd': 'vendor!=amd',
1195 'intel': 'vendor==intel',
1196 '!intel': 'vendor!=intel',
1197 'via': 'vendor==via',
1198 '!via': 'vendor!=via',
1199 };
1200
1201 def __init__(self, sVariable, sOp, sValue):
1202 assert sVariable in self.kdVariables;
1203 assert sOp in self.kasCompareOps;
1204 assert sValue in self.kdVariables[sVariable];
1205 self.sVariable = sVariable;
1206 self.sOp = sOp;
1207 self.sValue = sValue;
1208
1209
1210class InstructionTest(object):
1211 """
1212 Instruction test.
1213 """
1214
1215 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1216 self.oInstr = oInstr; # type: InstructionTest
1217 self.aoInputs = []; # type: list(TestInOut)
1218 self.aoOutputs = []; # type: list(TestInOut)
1219 self.aoSelectors = []; # type: list(TestSelector)
1220
1221 def toString(self, fRepr = False):
1222 """
1223 Converts it to string representation.
1224 """
1225 asWords = [];
1226 if self.aoSelectors:
1227 for oSelector in self.aoSelectors:
1228 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1229 asWords.append('/');
1230
1231 for oModifier in self.aoInputs:
1232 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1233
1234 asWords.append('->');
1235
1236 for oModifier in self.aoOutputs:
1237 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1238
1239 if fRepr:
1240 return '<' + ' '.join(asWords) + '>';
1241 return ' '.join(asWords);
1242
1243 def __str__(self):
1244 """ Provide string represenation. """
1245 return self.toString(False);
1246
1247 def __repr__(self):
1248 """ Provide unambigious string representation. """
1249 return self.toString(True);
1250
1251class Operand(object):
1252 """
1253 Instruction operand.
1254 """
1255
1256 def __init__(self, sWhere, sType):
1257 assert sWhere in g_kdOpLocations, sWhere;
1258 assert sType in g_kdOpTypes, sType;
1259 self.sWhere = sWhere; ##< g_kdOpLocations
1260 self.sType = sType; ##< g_kdOpTypes
1261
1262 def usesModRM(self):
1263 """ Returns True if using some form of ModR/M encoding. """
1264 return self.sType[0] in ['E', 'G', 'M'];
1265
1266
1267
1268class Instruction(object): # pylint: disable=too-many-instance-attributes
1269 """
1270 Instruction.
1271 """
1272
1273 def __init__(self, sSrcFile, iLine):
1274 ## @name Core attributes.
1275 ## @{
1276 self.sMnemonic = None;
1277 self.sBrief = None;
1278 self.asDescSections = []; # type: list(str)
1279 self.aoMaps = []; # type: list(InstructionMap)
1280 self.aoOperands = []; # type: list(Operand)
1281 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1282 self.sOpcode = None; # type: str
1283 self.sSubOpcode = None; # type: str
1284 self.sEncoding = None;
1285 self.asFlTest = None;
1286 self.asFlModify = None;
1287 self.asFlUndefined = None;
1288 self.asFlSet = None;
1289 self.asFlClear = None;
1290 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1291 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1292 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1293 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1294 self.aoTests = []; # type: list(InstructionTest)
1295 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1296 self.oCpuExpr = None; ##< Some CPU restriction expression...
1297 self.sGroup = None;
1298 self.fUnused = False; ##< Unused instruction.
1299 self.fInvalid = False; ##< Invalid instruction (like UD2).
1300 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1301 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1302 ## @}
1303
1304 ## @name Implementation attributes.
1305 ## @{
1306 self.sStats = None;
1307 self.sFunction = None;
1308 self.fStub = False;
1309 self.fUdStub = False;
1310 ## @}
1311
1312 ## @name Decoding info
1313 ## @{
1314 self.sSrcFile = sSrcFile;
1315 self.iLineCreated = iLine;
1316 self.iLineCompleted = None;
1317 self.cOpTags = 0;
1318 self.iLineFnIemOpMacro = -1;
1319 self.iLineMnemonicMacro = -1;
1320 ## @}
1321
1322 ## @name Intermediate input fields.
1323 ## @{
1324 self.sRawDisOpNo = None;
1325 self.asRawDisParams = [];
1326 self.sRawIemOpFlags = None;
1327 self.sRawOldOpcodes = None;
1328 self.asCopyTests = [];
1329 ## @}
1330
1331 def toString(self, fRepr = False):
1332 """ Turn object into a string. """
1333 aasFields = [];
1334
1335 aasFields.append(['opcode', self.sOpcode]);
1336 aasFields.append(['mnemonic', self.sMnemonic]);
1337 for iOperand, oOperand in enumerate(self.aoOperands):
1338 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1339 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1340 aasFields.append(['encoding', self.sEncoding]);
1341 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1342 aasFields.append(['disenum', self.sDisEnum]);
1343 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1344 aasFields.append(['group', self.sGroup]);
1345 if self.fUnused: aasFields.append(['unused', 'True']);
1346 if self.fInvalid: aasFields.append(['invalid', 'True']);
1347 aasFields.append(['invlstyle', self.sInvalidStyle]);
1348 aasFields.append(['fltest', self.asFlTest]);
1349 aasFields.append(['flmodify', self.asFlModify]);
1350 aasFields.append(['flundef', self.asFlUndefined]);
1351 aasFields.append(['flset', self.asFlSet]);
1352 aasFields.append(['flclear', self.asFlClear]);
1353 aasFields.append(['mincpu', self.sMinCpu]);
1354 aasFields.append(['stats', self.sStats]);
1355 aasFields.append(['sFunction', self.sFunction]);
1356 if self.fStub: aasFields.append(['fStub', 'True']);
1357 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1358 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1359 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1360 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1361
1362 sRet = '<' if fRepr else '';
1363 for sField, sValue in aasFields:
1364 if sValue != None:
1365 if len(sRet) > 1:
1366 sRet += '; ';
1367 sRet += '%s=%s' % (sField, sValue,);
1368 if fRepr:
1369 sRet += '>';
1370
1371 return sRet;
1372
1373 def __str__(self):
1374 """ Provide string represenation. """
1375 return self.toString(False);
1376
1377 def __repr__(self):
1378 """ Provide unambigious string representation. """
1379 return self.toString(True);
1380
1381 def getOpcodeByte(self):
1382 """
1383 Decodes sOpcode into a byte range integer value.
1384 Raises exception if sOpcode is None or invalid.
1385 """
1386 if self.sOpcode is None:
1387 raise Exception('No opcode byte for %s!' % (self,));
1388 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1389
1390 # Full hex byte form.
1391 if sOpcode[:2] == '0x':
1392 return int(sOpcode, 16);
1393
1394 # The /r form:
1395 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1396 return int(sOpcode[-1:]) << 3;
1397
1398 # The 11/r form:
1399 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1400 return (int(sOpcode[-1:]) << 3) | 0xc0;
1401
1402 # The !11/r form (returns mod=1):
1403 ## @todo this doesn't really work...
1404 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1405 return (int(sOpcode[-1:]) << 3) | 0x80;
1406
1407 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1408
1409 @staticmethod
1410 def _flagsToIntegerMask(asFlags):
1411 """
1412 Returns the integer mask value for asFlags.
1413 """
1414 uRet = 0;
1415 if asFlags:
1416 for sFlag in asFlags:
1417 sConstant = g_kdEFlagsMnemonics[sFlag];
1418 assert sConstant[0] != '!', sConstant
1419 uRet |= g_kdX86EFlagsConstants[sConstant];
1420 return uRet;
1421
1422 def getTestedFlagsMask(self):
1423 """ Returns asFlTest into a integer mask value """
1424 return self._flagsToIntegerMask(self.asFlTest);
1425
1426 def getModifiedFlagsMask(self):
1427 """ Returns asFlModify into a integer mask value """
1428 return self._flagsToIntegerMask(self.asFlModify);
1429
1430 def getUndefinedFlagsMask(self):
1431 """ Returns asFlUndefined into a integer mask value """
1432 return self._flagsToIntegerMask(self.asFlUndefined);
1433
1434 def getSetFlagsMask(self):
1435 """ Returns asFlSet into a integer mask value """
1436 return self._flagsToIntegerMask(self.asFlSet);
1437
1438 def getClearedFlagsMask(self):
1439 """ Returns asFlClear into a integer mask value """
1440 return self._flagsToIntegerMask(self.asFlClear);
1441
1442
1443## All the instructions.
1444g_aoAllInstructions = []; # type: list(Instruction)
1445
1446## All the instructions indexed by statistics name (opstat).
1447g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1448
1449## All the instructions indexed by function name (opfunction).
1450g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1451
1452## Instructions tagged by oponlytest
1453g_aoOnlyTestInstructions = []; # type: list(Instruction)
1454
1455## Instruction maps.
1456g_dInstructionMaps = {
1457 'one': InstructionMap('one'),
1458 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1459 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1460 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1461 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1462 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1463 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1464 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1465 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1466 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1467 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1468 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1469 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1470 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1471 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1472 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1473 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1474 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1475 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1476 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1477
1478 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1479 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1480 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1481 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1482 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1483 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1484 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1485 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1486 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1487 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1488 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1489 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1490 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1491 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1492
1493 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1494 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1495
1496 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1497 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1498 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1499 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1500 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1501 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1502
1503 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1504 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1505
1506 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1507 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1508 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1509 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1510 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1511 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1512 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1513 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1514};
1515
1516
1517
1518class ParserException(Exception):
1519 """ Parser exception """
1520 def __init__(self, sMessage):
1521 Exception.__init__(self, sMessage);
1522
1523
1524class SimpleParser(object):
1525 """
1526 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1527 """
1528
1529 ## @name Parser state.
1530 ## @{
1531 kiCode = 0;
1532 kiCommentMulti = 1;
1533 ## @}
1534
1535 def __init__(self, sSrcFile, asLines, sDefaultMap):
1536 self.sSrcFile = sSrcFile;
1537 self.asLines = asLines;
1538 self.iLine = 0;
1539 self.iState = self.kiCode;
1540 self.sComment = '';
1541 self.iCommentLine = 0;
1542 self.aoCurInstrs = [];
1543
1544 assert sDefaultMap in g_dInstructionMaps;
1545 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1546
1547 self.cTotalInstr = 0;
1548 self.cTotalStubs = 0;
1549 self.cTotalTagged = 0;
1550
1551 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1552 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1553 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1554 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1555 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1556 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1557 self.fDebug = True;
1558
1559 self.dTagHandlers = {
1560 '@opbrief': self.parseTagOpBrief,
1561 '@opdesc': self.parseTagOpDesc,
1562 '@opmnemonic': self.parseTagOpMnemonic,
1563 '@op1': self.parseTagOpOperandN,
1564 '@op2': self.parseTagOpOperandN,
1565 '@op3': self.parseTagOpOperandN,
1566 '@op4': self.parseTagOpOperandN,
1567 '@oppfx': self.parseTagOpPfx,
1568 '@opmaps': self.parseTagOpMaps,
1569 '@opcode': self.parseTagOpcode,
1570 '@opcodesub': self.parseTagOpcodeSub,
1571 '@openc': self.parseTagOpEnc,
1572 '@opfltest': self.parseTagOpEFlags,
1573 '@opflmodify': self.parseTagOpEFlags,
1574 '@opflundef': self.parseTagOpEFlags,
1575 '@opflset': self.parseTagOpEFlags,
1576 '@opflclear': self.parseTagOpEFlags,
1577 '@ophints': self.parseTagOpHints,
1578 '@opdisenum': self.parseTagOpDisEnum,
1579 '@opmincpu': self.parseTagOpMinCpu,
1580 '@opcpuid': self.parseTagOpCpuId,
1581 '@opgroup': self.parseTagOpGroup,
1582 '@opunused': self.parseTagOpUnusedInvalid,
1583 '@opinvalid': self.parseTagOpUnusedInvalid,
1584 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1585 '@optest': self.parseTagOpTest,
1586 '@optestign': self.parseTagOpTestIgnore,
1587 '@optestignore': self.parseTagOpTestIgnore,
1588 '@opcopytests': self.parseTagOpCopyTests,
1589 '@oponly': self.parseTagOpOnlyTest,
1590 '@oponlytest': self.parseTagOpOnlyTest,
1591 '@opxcpttype': self.parseTagOpXcptType,
1592 '@opstats': self.parseTagOpStats,
1593 '@opfunction': self.parseTagOpFunction,
1594 '@opdone': self.parseTagOpDone,
1595 };
1596 for i in range(48):
1597 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1598 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1599
1600 self.asErrors = [];
1601
1602 def raiseError(self, sMessage):
1603 """
1604 Raise error prefixed with the source and line number.
1605 """
1606 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1607
1608 def raiseCommentError(self, iLineInComment, sMessage):
1609 """
1610 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1611 """
1612 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1613
1614 def error(self, sMessage):
1615 """
1616 Adds an error.
1617 returns False;
1618 """
1619 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1620 return False;
1621
1622 def errorComment(self, iLineInComment, sMessage):
1623 """
1624 Adds a comment error.
1625 returns False;
1626 """
1627 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1628 return False;
1629
1630 def printErrors(self):
1631 """
1632 Print the errors to stderr.
1633 Returns number of errors.
1634 """
1635 if self.asErrors:
1636 sys.stderr.write(u''.join(self.asErrors));
1637 return len(self.asErrors);
1638
1639 def debug(self, sMessage):
1640 """
1641 For debugging.
1642 """
1643 if self.fDebug:
1644 print('debug: %s' % (sMessage,));
1645
1646
1647 def addInstruction(self, iLine = None):
1648 """
1649 Adds an instruction.
1650 """
1651 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1652 g_aoAllInstructions.append(oInstr);
1653 self.aoCurInstrs.append(oInstr);
1654 return oInstr;
1655
1656 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1657 """
1658 Derives the mnemonic and operands from a IEM stats base name like string.
1659 """
1660 if oInstr.sMnemonic is None:
1661 asWords = sStats.split('_');
1662 oInstr.sMnemonic = asWords[0].lower();
1663 if len(asWords) > 1 and not oInstr.aoOperands:
1664 for sType in asWords[1:]:
1665 if sType in g_kdOpTypes:
1666 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1667 else:
1668 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1669 return False;
1670 return True;
1671
1672 def doneInstructionOne(self, oInstr, iLine):
1673 """
1674 Complete the parsing by processing, validating and expanding raw inputs.
1675 """
1676 assert oInstr.iLineCompleted is None;
1677 oInstr.iLineCompleted = iLine;
1678
1679 #
1680 # Specified instructions.
1681 #
1682 if oInstr.cOpTags > 0:
1683 if oInstr.sStats is None:
1684 pass;
1685
1686 #
1687 # Unspecified legacy stuff. We generally only got a few things to go on here.
1688 # /** Opcode 0x0f 0x00 /0. */
1689 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1690 #
1691 else:
1692 #if oInstr.sRawOldOpcodes:
1693 #
1694 #if oInstr.sMnemonic:
1695 pass;
1696
1697 #
1698 # Common defaults.
1699 #
1700
1701 # Guess mnemonic and operands from stats if the former is missing.
1702 if oInstr.sMnemonic is None:
1703 if oInstr.sStats is not None:
1704 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1705 elif oInstr.sFunction is not None:
1706 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1707
1708 # Derive the disassembler op enum constant from the mnemonic.
1709 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1710 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1711
1712 # Derive the IEM statistics base name from mnemonic and operand types.
1713 if oInstr.sStats is None:
1714 if oInstr.sFunction is not None:
1715 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1716 elif oInstr.sMnemonic is not None:
1717 oInstr.sStats = oInstr.sMnemonic;
1718 for oOperand in oInstr.aoOperands:
1719 if oOperand.sType:
1720 oInstr.sStats += '_' + oOperand.sType;
1721
1722 # Derive the IEM function name from mnemonic and operand types.
1723 if oInstr.sFunction is None:
1724 if oInstr.sMnemonic is not None:
1725 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1726 for oOperand in oInstr.aoOperands:
1727 if oOperand.sType:
1728 oInstr.sFunction += '_' + oOperand.sType;
1729 elif oInstr.sStats:
1730 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1731
1732 # Derive encoding from operands.
1733 if oInstr.sEncoding is None:
1734 if not oInstr.aoOperands:
1735 if oInstr.fUnused and oInstr.sSubOpcode:
1736 oInstr.sEncoding = 'ModR/M';
1737 else:
1738 oInstr.sEncoding = 'fixed';
1739 elif oInstr.aoOperands[0].usesModRM():
1740 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1741 oInstr.sEncoding = 'ModR/M+VEX';
1742 else:
1743 oInstr.sEncoding = 'ModR/M';
1744
1745 #
1746 # Apply default map and then add the instruction to all it's groups.
1747 #
1748 if not oInstr.aoMaps:
1749 oInstr.aoMaps = [ self.oDefaultMap, ];
1750 for oMap in oInstr.aoMaps:
1751 oMap.aoInstructions.append(oInstr);
1752
1753 #
1754 # Check the opstat value and add it to the opstat indexed dictionary.
1755 #
1756 if oInstr.sStats:
1757 if oInstr.sStats not in g_dAllInstructionsByStat:
1758 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1759 else:
1760 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1761 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1762
1763 #
1764 # Add to function indexed dictionary. We allow multiple instructions per function.
1765 #
1766 if oInstr.sFunction:
1767 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1768 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1769 else:
1770 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1771
1772 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1773 return True;
1774
1775 def doneInstructions(self, iLineInComment = None):
1776 """
1777 Done with current instruction.
1778 """
1779 for oInstr in self.aoCurInstrs:
1780 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1781 if oInstr.fStub:
1782 self.cTotalStubs += 1;
1783
1784 self.cTotalInstr += len(self.aoCurInstrs);
1785
1786 self.sComment = '';
1787 self.aoCurInstrs = [];
1788 return True;
1789
1790 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1791 """
1792 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1793 is False, only None values and empty strings are replaced.
1794 """
1795 for oInstr in self.aoCurInstrs:
1796 if fOverwrite is not True:
1797 oOldValue = getattr(oInstr, sAttrib);
1798 if oOldValue is not None:
1799 continue;
1800 setattr(oInstr, sAttrib, oValue);
1801
1802 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1803 """
1804 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1805 If fOverwrite is False, only None values and empty strings are replaced.
1806 """
1807 for oInstr in self.aoCurInstrs:
1808 aoArray = getattr(oInstr, sAttrib);
1809 while len(aoArray) <= iEntry:
1810 aoArray.append(None);
1811 if fOverwrite is True or aoArray[iEntry] is None:
1812 aoArray[iEntry] = oValue;
1813
1814 def parseCommentOldOpcode(self, asLines):
1815 """ Deals with 'Opcode 0xff /4' like comments """
1816 asWords = asLines[0].split();
1817 if len(asWords) >= 2 \
1818 and asWords[0] == 'Opcode' \
1819 and ( asWords[1].startswith('0x')
1820 or asWords[1].startswith('0X')):
1821 asWords = asWords[:1];
1822 for iWord, sWord in enumerate(asWords):
1823 if sWord.startswith('0X'):
1824 sWord = '0x' + sWord[:2];
1825 asWords[iWord] = asWords;
1826 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1827
1828 return False;
1829
1830 def ensureInstructionForOpTag(self, iTagLine):
1831 """ Ensure there is an instruction for the op-tag being parsed. """
1832 if not self.aoCurInstrs:
1833 self.addInstruction(self.iCommentLine + iTagLine);
1834 for oInstr in self.aoCurInstrs:
1835 oInstr.cOpTags += 1;
1836 if oInstr.cOpTags == 1:
1837 self.cTotalTagged += 1;
1838 return self.aoCurInstrs[-1];
1839
1840 @staticmethod
1841 def flattenSections(aasSections):
1842 """
1843 Flattens multiline sections into stripped single strings.
1844 Returns list of strings, on section per string.
1845 """
1846 asRet = [];
1847 for asLines in aasSections:
1848 if asLines:
1849 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1850 return asRet;
1851
1852 @staticmethod
1853 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1854 """
1855 Flattens sections into a simple stripped string with newlines as
1856 section breaks. The final section does not sport a trailing newline.
1857 """
1858 # Typical: One section with a single line.
1859 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1860 return aasSections[0][0].strip();
1861
1862 sRet = '';
1863 for iSection, asLines in enumerate(aasSections):
1864 if asLines:
1865 if iSection > 0:
1866 sRet += sSectionSep;
1867 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1868 return sRet;
1869
1870
1871
1872 ## @name Tag parsers
1873 ## @{
1874
1875 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1876 """
1877 Tag: \@opbrief
1878 Value: Text description, multiple sections, appended.
1879
1880 Brief description. If not given, it's the first sentence from @opdesc.
1881 """
1882 oInstr = self.ensureInstructionForOpTag(iTagLine);
1883
1884 # Flatten and validate the value.
1885 sBrief = self.flattenAllSections(aasSections);
1886 if not sBrief:
1887 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1888 if sBrief[-1] != '.':
1889 sBrief = sBrief + '.';
1890 if len(sBrief) > 180:
1891 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1892 offDot = sBrief.find('.');
1893 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1894 offDot = sBrief.find('.', offDot + 1);
1895 if offDot >= 0 and offDot != len(sBrief) - 1:
1896 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1897
1898 # Update the instruction.
1899 if oInstr.sBrief is not None:
1900 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1901 % (sTag, oInstr.sBrief, sBrief,));
1902 _ = iEndLine;
1903 return True;
1904
1905 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1906 """
1907 Tag: \@opdesc
1908 Value: Text description, multiple sections, appended.
1909
1910 It is used to describe instructions.
1911 """
1912 oInstr = self.ensureInstructionForOpTag(iTagLine);
1913 if aasSections:
1914 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1915 return True;
1916
1917 _ = sTag; _ = iEndLine;
1918 return True;
1919
1920 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1921 """
1922 Tag: @opmenmonic
1923 Value: mnemonic
1924
1925 The 'mnemonic' value must be a valid C identifier string. Because of
1926 prefixes, groups and whatnot, there times when the mnemonic isn't that
1927 of an actual assembler mnemonic.
1928 """
1929 oInstr = self.ensureInstructionForOpTag(iTagLine);
1930
1931 # Flatten and validate the value.
1932 sMnemonic = self.flattenAllSections(aasSections);
1933 if not self.oReMnemonic.match(sMnemonic):
1934 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1935 if oInstr.sMnemonic is not None:
1936 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1937 % (sTag, oInstr.sMnemonic, sMnemonic,));
1938 oInstr.sMnemonic = sMnemonic
1939
1940 _ = iEndLine;
1941 return True;
1942
1943 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1944 """
1945 Tags: \@op1, \@op2, \@op3, \@op4
1946 Value: [where:]type
1947
1948 The 'where' value indicates where the operand is found, like the 'reg'
1949 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1950 a list.
1951
1952 The 'type' value indicates the operand type. These follow the types
1953 given in the opcode tables in the CPU reference manuals.
1954 See Instruction.kdOperandTypes for a list.
1955
1956 """
1957 oInstr = self.ensureInstructionForOpTag(iTagLine);
1958 idxOp = int(sTag[-1]) - 1;
1959 assert idxOp >= 0 and idxOp < 4;
1960
1961 # flatten, split up, and validate the "where:type" value.
1962 sFlattened = self.flattenAllSections(aasSections);
1963 asSplit = sFlattened.split(':');
1964 if len(asSplit) == 1:
1965 sType = asSplit[0];
1966 sWhere = None;
1967 elif len(asSplit) == 2:
1968 (sWhere, sType) = asSplit;
1969 else:
1970 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1971
1972 if sType not in g_kdOpTypes:
1973 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1974 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1975 if sWhere is None:
1976 sWhere = g_kdOpTypes[sType][1];
1977 elif sWhere not in g_kdOpLocations:
1978 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1979 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1980
1981 # Insert the operand, refusing to overwrite an existing one.
1982 while idxOp >= len(oInstr.aoOperands):
1983 oInstr.aoOperands.append(None);
1984 if oInstr.aoOperands[idxOp] is not None:
1985 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1986 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1987 sWhere, sType,));
1988 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1989
1990 _ = iEndLine;
1991 return True;
1992
1993 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1994 """
1995 Tag: \@opmaps
1996 Value: map[,map2]
1997
1998 Indicates which maps the instruction is in. There is a default map
1999 associated with each input file.
2000 """
2001 oInstr = self.ensureInstructionForOpTag(iTagLine);
2002
2003 # Flatten, split up and validate the value.
2004 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2005 asMaps = sFlattened.split(',');
2006 if not asMaps:
2007 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2008 for sMap in asMaps:
2009 if sMap not in g_dInstructionMaps:
2010 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2011 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2012
2013 # Add the maps to the current list. Throw errors on duplicates.
2014 for oMap in oInstr.aoMaps:
2015 if oMap.sName in asMaps:
2016 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2017
2018 for sMap in asMaps:
2019 oMap = g_dInstructionMaps[sMap];
2020 if oMap not in oInstr.aoMaps:
2021 oInstr.aoMaps.append(oMap);
2022 else:
2023 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2024
2025 _ = iEndLine;
2026 return True;
2027
2028 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2029 """
2030 Tag: \@oppfx
2031 Value: n/a|none|0x66|0xf3|0xf2
2032
2033 Required prefix for the instruction. (In a (E)VEX context this is the
2034 value of the 'pp' field rather than an actual prefix.)
2035 """
2036 oInstr = self.ensureInstructionForOpTag(iTagLine);
2037
2038 # Flatten and validate the value.
2039 sFlattened = self.flattenAllSections(aasSections);
2040 asPrefixes = sFlattened.split();
2041 if len(asPrefixes) > 1:
2042 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2043
2044 sPrefix = asPrefixes[0].lower();
2045 if sPrefix == 'none':
2046 sPrefix = 'none';
2047 elif sPrefix == 'n/a':
2048 sPrefix = None;
2049 else:
2050 if len(sPrefix) == 2:
2051 sPrefix = '0x' + sPrefix;
2052 if not _isValidOpcodeByte(sPrefix):
2053 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2054
2055 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2056 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2057
2058 # Set it.
2059 if oInstr.sPrefix is not None:
2060 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2061 oInstr.sPrefix = sPrefix;
2062
2063 _ = iEndLine;
2064 return True;
2065
2066 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2067 """
2068 Tag: \@opcode
2069 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2070
2071 The opcode byte or sub-byte for the instruction in the context of a map.
2072 """
2073 oInstr = self.ensureInstructionForOpTag(iTagLine);
2074
2075 # Flatten and validate the value.
2076 sOpcode = self.flattenAllSections(aasSections);
2077 if _isValidOpcodeByte(sOpcode):
2078 pass;
2079 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2080 pass;
2081 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2082 pass;
2083 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2084 pass;
2085 else:
2086 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2087
2088 # Set it.
2089 if oInstr.sOpcode is not None:
2090 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2091 oInstr.sOpcode = sOpcode;
2092
2093 _ = iEndLine;
2094 return True;
2095
2096 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2097 """
2098 Tag: \@opcodesub
2099 Value: none | 11 mr/reg | !11 mr/reg
2100
2101 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2102 represents exactly two different instructions. The more proper way would
2103 be to go via maps with two members, but this is faster.
2104 """
2105 oInstr = self.ensureInstructionForOpTag(iTagLine);
2106
2107 # Flatten and validate the value.
2108 sSubOpcode = self.flattenAllSections(aasSections);
2109 if sSubOpcode not in g_kdSubOpcodes:
2110 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2111 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2112
2113 # Set it.
2114 if oInstr.sSubOpcode is not None:
2115 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2116 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2117 oInstr.sSubOpcode = sSubOpcode;
2118
2119 _ = iEndLine;
2120 return True;
2121
2122 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2123 """
2124 Tag: \@openc
2125 Value: ModR/M|fixed|prefix|<map name>
2126
2127 The instruction operand encoding style.
2128 """
2129 oInstr = self.ensureInstructionForOpTag(iTagLine);
2130
2131 # Flatten and validate the value.
2132 sEncoding = self.flattenAllSections(aasSections);
2133 if sEncoding in g_kdEncodings:
2134 pass;
2135 elif sEncoding in g_dInstructionMaps:
2136 pass;
2137 elif not _isValidOpcodeByte(sEncoding):
2138 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2139
2140 # Set it.
2141 if oInstr.sEncoding is not None:
2142 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2143 % ( sTag, oInstr.sEncoding, sEncoding,));
2144 oInstr.sEncoding = sEncoding;
2145
2146 _ = iEndLine;
2147 return True;
2148
2149 ## EFlags tag to Instruction attribute name.
2150 kdOpFlagToAttr = {
2151 '@opfltest': 'asFlTest',
2152 '@opflmodify': 'asFlModify',
2153 '@opflundef': 'asFlUndefined',
2154 '@opflset': 'asFlSet',
2155 '@opflclear': 'asFlClear',
2156 };
2157
2158 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2159 """
2160 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2161 Value: <eflags specifier>
2162
2163 """
2164 oInstr = self.ensureInstructionForOpTag(iTagLine);
2165
2166 # Flatten, split up and validate the values.
2167 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2168 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2169 asFlags = [];
2170 else:
2171 fRc = True;
2172 for iFlag, sFlag in enumerate(asFlags):
2173 if sFlag not in g_kdEFlagsMnemonics:
2174 if sFlag.strip() in g_kdEFlagsMnemonics:
2175 asFlags[iFlag] = sFlag.strip();
2176 else:
2177 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2178 if not fRc:
2179 return False;
2180
2181 # Set them.
2182 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2183 if asOld is not None:
2184 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2185 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2186
2187 _ = iEndLine;
2188 return True;
2189
2190 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2191 """
2192 Tag: \@ophints
2193 Value: Comma or space separated list of flags and hints.
2194
2195 This covers the disassembler flags table and more.
2196 """
2197 oInstr = self.ensureInstructionForOpTag(iTagLine);
2198
2199 # Flatten as a space separated list, split it up and validate the values.
2200 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2201 if len(asHints) == 1 and asHints[0].lower() == 'none':
2202 asHints = [];
2203 else:
2204 fRc = True;
2205 for iHint, sHint in enumerate(asHints):
2206 if sHint not in g_kdHints:
2207 if sHint.strip() in g_kdHints:
2208 sHint[iHint] = sHint.strip();
2209 else:
2210 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2211 if not fRc:
2212 return False;
2213
2214 # Append them.
2215 for sHint in asHints:
2216 if sHint not in oInstr.dHints:
2217 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2218 else:
2219 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2220
2221 _ = iEndLine;
2222 return True;
2223
2224 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2225 """
2226 Tag: \@opdisenum
2227 Value: OP_XXXX
2228
2229 This is for select a specific (legacy) disassembler enum value for the
2230 instruction.
2231 """
2232 oInstr = self.ensureInstructionForOpTag(iTagLine);
2233
2234 # Flatten and split.
2235 asWords = self.flattenAllSections(aasSections).split();
2236 if len(asWords) != 1:
2237 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2238 if not asWords:
2239 return False;
2240 sDisEnum = asWords[0];
2241 if not self.oReDisEnum.match(sDisEnum):
2242 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2243 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2244
2245 # Set it.
2246 if oInstr.sDisEnum is not None:
2247 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2248 oInstr.sDisEnum = sDisEnum;
2249
2250 _ = iEndLine;
2251 return True;
2252
2253 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2254 """
2255 Tag: \@opmincpu
2256 Value: <simple CPU name>
2257
2258 Indicates when this instruction was introduced.
2259 """
2260 oInstr = self.ensureInstructionForOpTag(iTagLine);
2261
2262 # Flatten the value, split into words, make sure there's just one, valid it.
2263 asCpus = self.flattenAllSections(aasSections).split();
2264 if len(asCpus) > 1:
2265 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2266
2267 sMinCpu = asCpus[0];
2268 if sMinCpu in g_kdCpuNames:
2269 oInstr.sMinCpu = sMinCpu;
2270 else:
2271 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2272 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2273
2274 # Set it.
2275 if oInstr.sMinCpu is None:
2276 oInstr.sMinCpu = sMinCpu;
2277 elif oInstr.sMinCpu != sMinCpu:
2278 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2279
2280 _ = iEndLine;
2281 return True;
2282
2283 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2284 """
2285 Tag: \@opcpuid
2286 Value: none | <CPUID flag specifier>
2287
2288 CPUID feature bit which is required for the instruction to be present.
2289 """
2290 oInstr = self.ensureInstructionForOpTag(iTagLine);
2291
2292 # Flatten as a space separated list, split it up and validate the values.
2293 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2294 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2295 asCpuIds = [];
2296 else:
2297 fRc = True;
2298 for iCpuId, sCpuId in enumerate(asCpuIds):
2299 if sCpuId not in g_kdCpuIdFlags:
2300 if sCpuId.strip() in g_kdCpuIdFlags:
2301 sCpuId[iCpuId] = sCpuId.strip();
2302 else:
2303 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2304 if not fRc:
2305 return False;
2306
2307 # Append them.
2308 for sCpuId in asCpuIds:
2309 if sCpuId not in oInstr.asCpuIds:
2310 oInstr.asCpuIds.append(sCpuId);
2311 else:
2312 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2313
2314 _ = iEndLine;
2315 return True;
2316
2317 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2318 """
2319 Tag: \@opgroup
2320 Value: op_grp1[_subgrp2[_subsubgrp3]]
2321
2322 Instruction grouping.
2323 """
2324 oInstr = self.ensureInstructionForOpTag(iTagLine);
2325
2326 # Flatten as a space separated list, split it up and validate the values.
2327 asGroups = self.flattenAllSections(aasSections).split();
2328 if len(asGroups) != 1:
2329 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2330 sGroup = asGroups[0];
2331 if not self.oReGroupName.match(sGroup):
2332 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2333 % (sTag, sGroup, self.oReGroupName.pattern));
2334
2335 # Set it.
2336 if oInstr.sGroup is not None:
2337 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2338 oInstr.sGroup = sGroup;
2339
2340 _ = iEndLine;
2341 return True;
2342
2343 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2344 """
2345 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2346 Value: <invalid opcode behaviour style>
2347
2348 The \@opunused indicates the specification is for a currently unused
2349 instruction encoding.
2350
2351 The \@opinvalid indicates the specification is for an invalid currently
2352 instruction encoding (like UD2).
2353
2354 The \@opinvlstyle just indicates how CPUs decode the instruction when
2355 not supported (\@opcpuid, \@opmincpu) or disabled.
2356 """
2357 oInstr = self.ensureInstructionForOpTag(iTagLine);
2358
2359 # Flatten as a space separated list, split it up and validate the values.
2360 asStyles = self.flattenAllSections(aasSections).split();
2361 if len(asStyles) != 1:
2362 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2363 sStyle = asStyles[0];
2364 if sStyle not in g_kdInvalidStyles:
2365 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2366 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2367 # Set it.
2368 if oInstr.sInvalidStyle is not None:
2369 return self.errorComment(iTagLine,
2370 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2371 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2372 oInstr.sInvalidStyle = sStyle;
2373 if sTag == '@opunused':
2374 oInstr.fUnused = True;
2375 elif sTag == '@opinvalid':
2376 oInstr.fInvalid = True;
2377
2378 _ = iEndLine;
2379 return True;
2380
2381 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2382 """
2383 Tag: \@optest
2384 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2385 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2386
2387 The main idea here is to generate basic instruction tests.
2388
2389 The probably simplest way of handling the diverse input, would be to use
2390 it to produce size optimized byte code for a simple interpreter that
2391 modifies the register input and output states.
2392
2393 An alternative to the interpreter would be creating multiple tables,
2394 but that becomes rather complicated wrt what goes where and then to use
2395 them in an efficient manner.
2396 """
2397 oInstr = self.ensureInstructionForOpTag(iTagLine);
2398
2399 #
2400 # Do it section by section.
2401 #
2402 for asSectionLines in aasSections:
2403 #
2404 # Sort the input into outputs, inputs and selector conditions.
2405 #
2406 sFlatSection = self.flattenAllSections([asSectionLines,]);
2407 if not sFlatSection:
2408 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2409 continue;
2410 oTest = InstructionTest(oInstr);
2411
2412 asSelectors = [];
2413 asInputs = [];
2414 asOutputs = [];
2415 asCur = asOutputs;
2416 fRc = True;
2417 asWords = sFlatSection.split();
2418 for iWord in range(len(asWords) - 1, -1, -1):
2419 sWord = asWords[iWord];
2420 # Check for array switchers.
2421 if sWord == '->':
2422 if asCur != asOutputs:
2423 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2424 break;
2425 asCur = asInputs;
2426 elif sWord == '/':
2427 if asCur != asInputs:
2428 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2429 break;
2430 asCur = asSelectors;
2431 else:
2432 asCur.insert(0, sWord);
2433
2434 #
2435 # Validate and add selectors.
2436 #
2437 for sCond in asSelectors:
2438 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2439 oSelector = None;
2440 for sOp in TestSelector.kasCompareOps:
2441 off = sCondExp.find(sOp);
2442 if off >= 0:
2443 sVariable = sCondExp[:off];
2444 sValue = sCondExp[off + len(sOp):];
2445 if sVariable in TestSelector.kdVariables:
2446 if sValue in TestSelector.kdVariables[sVariable]:
2447 oSelector = TestSelector(sVariable, sOp, sValue);
2448 else:
2449 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2450 % ( sTag, sValue, sCond,
2451 TestSelector.kdVariables[sVariable].keys(),));
2452 else:
2453 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2454 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2455 break;
2456 if oSelector is not None:
2457 for oExisting in oTest.aoSelectors:
2458 if oExisting.sVariable == oSelector.sVariable:
2459 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2460 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2461 oTest.aoSelectors.append(oSelector);
2462 else:
2463 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2464
2465 #
2466 # Validate outputs and inputs, adding them to the test as we go along.
2467 #
2468 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2469 asValidFieldKinds = [ 'both', sDesc, ];
2470 for sItem in asItems:
2471 oItem = None;
2472 for sOp in TestInOut.kasOperators:
2473 off = sItem.find(sOp);
2474 if off < 0:
2475 continue;
2476 sField = sItem[:off];
2477 sValueType = sItem[off + len(sOp):];
2478 if sField in TestInOut.kdFields \
2479 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2480 asSplit = sValueType.split(':', 1);
2481 sValue = asSplit[0];
2482 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2483 if sType in TestInOut.kdTypes:
2484 oValid = TestInOut.kdTypes[sType].validate(sValue);
2485 if oValid is True:
2486 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2487 oItem = TestInOut(sField, sOp, sValue, sType);
2488 else:
2489 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2490 % ( sTag, sDesc, sItem, ));
2491 else:
2492 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2493 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2494 else:
2495 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2496 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2497 else:
2498 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2499 % ( sTag, sDesc, sField, sItem,
2500 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2501 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2502 break;
2503 if oItem is not None:
2504 for oExisting in aoDst:
2505 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2506 self.errorComment(iTagLine,
2507 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2508 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2509 aoDst.append(oItem);
2510 else:
2511 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2512
2513 #
2514 # .
2515 #
2516 if fRc:
2517 oInstr.aoTests.append(oTest);
2518 else:
2519 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2520 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2521 % (sTag, asSelectors, asInputs, asOutputs,));
2522
2523 _ = iEndLine;
2524 return True;
2525
2526 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2527 """
2528 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2529 """
2530 oInstr = self.ensureInstructionForOpTag(iTagLine);
2531
2532 iTest = 0;
2533 if sTag[-1] == ']':
2534 iTest = int(sTag[8:-1]);
2535 else:
2536 iTest = int(sTag[7:]);
2537
2538 if iTest != len(oInstr.aoTests):
2539 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2540 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2541
2542 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2543 """
2544 Tag: \@optestign | \@optestignore
2545 Value: <value is ignored>
2546
2547 This is a simple trick to ignore a test while debugging another.
2548
2549 See also \@oponlytest.
2550 """
2551 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2552 return True;
2553
2554 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2555 """
2556 Tag: \@opcopytests
2557 Value: <opstat | function> [..]
2558 Example: \@opcopytests add_Eb_Gb
2559
2560 Trick to avoid duplicating tests for different encodings of the same
2561 operation.
2562 """
2563 oInstr = self.ensureInstructionForOpTag(iTagLine);
2564
2565 # Flatten, validate and append the copy job to the instruction. We execute
2566 # them after parsing all the input so we can handle forward references.
2567 asToCopy = self.flattenAllSections(aasSections).split();
2568 if not asToCopy:
2569 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2570 for sToCopy in asToCopy:
2571 if sToCopy not in oInstr.asCopyTests:
2572 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2573 oInstr.asCopyTests.append(sToCopy);
2574 else:
2575 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2576 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2577 else:
2578 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2579
2580 _ = iEndLine;
2581 return True;
2582
2583 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2584 """
2585 Tag: \@oponlytest | \@oponly
2586 Value: none
2587
2588 Only test instructions with this tag. This is a trick that is handy
2589 for singling out one or two new instructions or tests.
2590
2591 See also \@optestignore.
2592 """
2593 oInstr = self.ensureInstructionForOpTag(iTagLine);
2594
2595 # Validate and add instruction to only test dictionary.
2596 sValue = self.flattenAllSections(aasSections).strip();
2597 if sValue:
2598 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2599
2600 if oInstr not in g_aoOnlyTestInstructions:
2601 g_aoOnlyTestInstructions.append(oInstr);
2602
2603 _ = iEndLine;
2604 return True;
2605
2606 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2607 """
2608 Tag: \@opxcpttype
2609 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2610
2611 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2612 """
2613 oInstr = self.ensureInstructionForOpTag(iTagLine);
2614
2615 # Flatten as a space separated list, split it up and validate the values.
2616 asTypes = self.flattenAllSections(aasSections).split();
2617 if len(asTypes) != 1:
2618 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2619 sType = asTypes[0];
2620 if sType not in g_kdXcptTypes:
2621 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2622 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2623 # Set it.
2624 if oInstr.sXcptType is not None:
2625 return self.errorComment(iTagLine,
2626 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2627 % ( sTag, oInstr.sXcptType, sType,));
2628 oInstr.sXcptType = sType;
2629
2630 _ = iEndLine;
2631 return True;
2632
2633 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2634 """
2635 Tag: \@opfunction
2636 Value: <VMM function name>
2637
2638 This is for explicitly setting the IEM function name. Normally we pick
2639 this up from the FNIEMOP_XXX macro invocation after the description, or
2640 generate it from the mnemonic and operands.
2641
2642 It it thought it maybe necessary to set it when specifying instructions
2643 which implementation isn't following immediately or aren't implemented yet.
2644 """
2645 oInstr = self.ensureInstructionForOpTag(iTagLine);
2646
2647 # Flatten and validate the value.
2648 sFunction = self.flattenAllSections(aasSections);
2649 if not self.oReFunctionName.match(sFunction):
2650 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2651 % (sTag, sFunction, self.oReFunctionName.pattern));
2652
2653 if oInstr.sFunction is not None:
2654 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2655 % (sTag, oInstr.sFunction, sFunction,));
2656 oInstr.sFunction = sFunction;
2657
2658 _ = iEndLine;
2659 return True;
2660
2661 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2662 """
2663 Tag: \@opstats
2664 Value: <VMM statistics base name>
2665
2666 This is for explicitly setting the statistics name. Normally we pick
2667 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2668 the mnemonic and operands.
2669
2670 It it thought it maybe necessary to set it when specifying instructions
2671 which implementation isn't following immediately or aren't implemented yet.
2672 """
2673 oInstr = self.ensureInstructionForOpTag(iTagLine);
2674
2675 # Flatten and validate the value.
2676 sStats = self.flattenAllSections(aasSections);
2677 if not self.oReStatsName.match(sStats):
2678 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2679 % (sTag, sStats, self.oReStatsName.pattern));
2680
2681 if oInstr.sStats is not None:
2682 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2683 % (sTag, oInstr.sStats, sStats,));
2684 oInstr.sStats = sStats;
2685
2686 _ = iEndLine;
2687 return True;
2688
2689 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2690 """
2691 Tag: \@opdone
2692 Value: none
2693
2694 Used to explictily flush the instructions that have been specified.
2695 """
2696 sFlattened = self.flattenAllSections(aasSections);
2697 if sFlattened != '':
2698 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2699 _ = sTag; _ = iEndLine;
2700 return self.doneInstructions();
2701
2702 ## @}
2703
2704
2705 def parseComment(self):
2706 """
2707 Parse the current comment (self.sComment).
2708
2709 If it's a opcode specifiying comment, we reset the macro stuff.
2710 """
2711 #
2712 # Reject if comment doesn't seem to contain anything interesting.
2713 #
2714 if self.sComment.find('Opcode') < 0 \
2715 and self.sComment.find('@') < 0:
2716 return False;
2717
2718 #
2719 # Split the comment into lines, removing leading asterisks and spaces.
2720 # Also remove leading and trailing empty lines.
2721 #
2722 asLines = self.sComment.split('\n');
2723 for iLine, sLine in enumerate(asLines):
2724 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2725
2726 while asLines and not asLines[0]:
2727 self.iCommentLine += 1;
2728 asLines.pop(0);
2729
2730 while asLines and not asLines[-1]:
2731 asLines.pop(len(asLines) - 1);
2732
2733 #
2734 # Check for old style: Opcode 0x0f 0x12
2735 #
2736 if asLines[0].startswith('Opcode '):
2737 self.parseCommentOldOpcode(asLines);
2738
2739 #
2740 # Look for @op* tagged data.
2741 #
2742 cOpTags = 0;
2743 sFlatDefault = None;
2744 sCurTag = '@default';
2745 iCurTagLine = 0;
2746 asCurSection = [];
2747 aasSections = [ asCurSection, ];
2748 for iLine, sLine in enumerate(asLines):
2749 if not sLine.startswith('@'):
2750 if sLine:
2751 asCurSection.append(sLine);
2752 elif asCurSection:
2753 asCurSection = [];
2754 aasSections.append(asCurSection);
2755 else:
2756 #
2757 # Process the previous tag.
2758 #
2759 if not asCurSection and len(aasSections) > 1:
2760 aasSections.pop(-1);
2761 if sCurTag in self.dTagHandlers:
2762 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2763 cOpTags += 1;
2764 elif sCurTag.startswith('@op'):
2765 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2766 elif sCurTag == '@default':
2767 sFlatDefault = self.flattenAllSections(aasSections);
2768 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2769 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2770 elif sCurTag in ['@encoding', '@opencoding']:
2771 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2772
2773 #
2774 # New tag.
2775 #
2776 asSplit = sLine.split(None, 1);
2777 sCurTag = asSplit[0].lower();
2778 if len(asSplit) > 1:
2779 asCurSection = [asSplit[1],];
2780 else:
2781 asCurSection = [];
2782 aasSections = [asCurSection, ];
2783 iCurTagLine = iLine;
2784
2785 #
2786 # Process the final tag.
2787 #
2788 if not asCurSection and len(aasSections) > 1:
2789 aasSections.pop(-1);
2790 if sCurTag in self.dTagHandlers:
2791 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2792 cOpTags += 1;
2793 elif sCurTag.startswith('@op'):
2794 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2795 elif sCurTag == '@default':
2796 sFlatDefault = self.flattenAllSections(aasSections);
2797
2798 #
2799 # Don't allow default text in blocks containing @op*.
2800 #
2801 if cOpTags > 0 and sFlatDefault:
2802 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2803
2804 return True;
2805
2806 def parseMacroInvocation(self, sInvocation):
2807 """
2808 Parses a macro invocation.
2809
2810 Returns a tuple, first element is the offset following the macro
2811 invocation. The second element is a list of macro arguments, where the
2812 zero'th is the macro name.
2813 """
2814 # First the name.
2815 offOpen = sInvocation.find('(');
2816 if offOpen <= 0:
2817 self.raiseError("macro invocation open parenthesis not found");
2818 sName = sInvocation[:offOpen].strip();
2819 if not self.oReMacroName.match(sName):
2820 return self.error("invalid macro name '%s'" % (sName,));
2821 asRet = [sName, ];
2822
2823 # Arguments.
2824 iLine = self.iLine;
2825 cDepth = 1;
2826 off = offOpen + 1;
2827 offStart = off;
2828 while cDepth > 0:
2829 if off >= len(sInvocation):
2830 if iLine >= len(self.asLines):
2831 return self.error('macro invocation beyond end of file');
2832 sInvocation += self.asLines[iLine];
2833 iLine += 1;
2834 ch = sInvocation[off];
2835
2836 if ch == ',' or ch == ')':
2837 if cDepth == 1:
2838 asRet.append(sInvocation[offStart:off].strip());
2839 offStart = off + 1;
2840 if ch == ')':
2841 cDepth -= 1;
2842 elif ch == '(':
2843 cDepth += 1;
2844 off += 1;
2845
2846 return (off, asRet);
2847
2848 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2849 """
2850 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2851 """
2852 offHit = sCode.find(sMacro);
2853 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2854 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2855 return (offHit + offAfter, asRet);
2856 return (len(sCode), None);
2857
2858 def findAndParseMacroInvocation(self, sCode, sMacro):
2859 """
2860 Returns None if not found, arguments as per parseMacroInvocation if found.
2861 """
2862 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2863
2864 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2865 """
2866 Returns same as findAndParseMacroInvocation.
2867 """
2868 for sMacro in asMacro:
2869 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2870 if asRet is not None:
2871 return asRet;
2872 return None;
2873
2874 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2875 sDisHints, sIemHints, asOperands):
2876 """
2877 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2878 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2879 """
2880 #
2881 # Some invocation checks.
2882 #
2883 if sUpper != sUpper.upper():
2884 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2885 if sLower != sLower.lower():
2886 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2887 if sUpper.lower() != sLower:
2888 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2889 if not self.oReMnemonic.match(sLower):
2890 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2891
2892 #
2893 # Check if sIemHints tells us to not consider this macro invocation.
2894 #
2895 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2896 return True;
2897
2898 # Apply to the last instruction only for now.
2899 if not self.aoCurInstrs:
2900 self.addInstruction();
2901 oInstr = self.aoCurInstrs[-1];
2902 if oInstr.iLineMnemonicMacro == -1:
2903 oInstr.iLineMnemonicMacro = self.iLine;
2904 else:
2905 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2906 % (sMacro, oInstr.iLineMnemonicMacro,));
2907
2908 # Mnemonic
2909 if oInstr.sMnemonic is None:
2910 oInstr.sMnemonic = sLower;
2911 elif oInstr.sMnemonic != sLower:
2912 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2913
2914 # Process operands.
2915 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2916 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2917 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2918 for iOperand, sType in enumerate(asOperands):
2919 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2920 if sWhere is None:
2921 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2922 if iOperand < len(oInstr.aoOperands): # error recovery.
2923 sWhere = oInstr.aoOperands[iOperand].sWhere;
2924 sType = oInstr.aoOperands[iOperand].sType;
2925 else:
2926 sWhere = 'reg';
2927 sType = 'Gb';
2928 if iOperand == len(oInstr.aoOperands):
2929 oInstr.aoOperands.append(Operand(sWhere, sType))
2930 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2931 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2932 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2933 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2934
2935 # Encoding.
2936 if sForm not in g_kdIemForms:
2937 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2938 else:
2939 if oInstr.sEncoding is None:
2940 oInstr.sEncoding = g_kdIemForms[sForm][0];
2941 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2942 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2943 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2944
2945 # Check the parameter locations for the encoding.
2946 if g_kdIemForms[sForm][1] is not None:
2947 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2948 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2949 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2950 else:
2951 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2952 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2953 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2954 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2955
2956 # Stats.
2957 if not self.oReStatsName.match(sStats):
2958 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2959 elif oInstr.sStats is None:
2960 oInstr.sStats = sStats;
2961 elif oInstr.sStats != sStats:
2962 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2963 % (sMacro, oInstr.sStats, sStats,));
2964
2965 # Process the hints (simply merge with @ophints w/o checking anything).
2966 for sHint in sDisHints.split('|'):
2967 sHint = sHint.strip();
2968 if sHint.startswith('DISOPTYPE_'):
2969 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2970 if sShortHint in g_kdHints:
2971 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2972 else:
2973 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2974 elif sHint != '0':
2975 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2976
2977 for sHint in sIemHints.split('|'):
2978 sHint = sHint.strip();
2979 if sHint.startswith('IEMOPHINT_'):
2980 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2981 if sShortHint in g_kdHints:
2982 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2983 else:
2984 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2985 elif sHint != '0':
2986 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2987
2988
2989 _ = sAsm;
2990 return True;
2991
2992 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2993 """
2994 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2995 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2996 """
2997 if not asOperands:
2998 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2999 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3000 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3001
3002 def checkCodeForMacro(self, sCode):
3003 """
3004 Checks code for relevant macro invocation.
3005 """
3006 #
3007 # Scan macro invocations.
3008 #
3009 if sCode.find('(') > 0:
3010 # Look for instruction decoder function definitions. ASSUME single line.
3011 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3012 [ 'FNIEMOP_DEF',
3013 'FNIEMOP_STUB',
3014 'FNIEMOP_STUB_1',
3015 'FNIEMOP_UD_STUB',
3016 'FNIEMOP_UD_STUB_1' ]);
3017 if asArgs is not None:
3018 sFunction = asArgs[1];
3019
3020 if not self.aoCurInstrs:
3021 self.addInstruction();
3022 for oInstr in self.aoCurInstrs:
3023 if oInstr.iLineFnIemOpMacro == -1:
3024 oInstr.iLineFnIemOpMacro = self.iLine;
3025 else:
3026 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3027 self.setInstrunctionAttrib('sFunction', sFunction);
3028 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3029 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3030 if asArgs[0].find('STUB') > 0:
3031 self.doneInstructions();
3032 return True;
3033
3034 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3035 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3036 if asArgs is not None:
3037 if len(self.aoCurInstrs) == 1:
3038 oInstr = self.aoCurInstrs[0];
3039 if oInstr.sStats is None:
3040 oInstr.sStats = asArgs[1];
3041 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3042
3043 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3044 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3045 if asArgs is not None:
3046 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3047 []);
3048 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3049 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3050 if asArgs is not None:
3051 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3052 [asArgs[6],]);
3053 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3054 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3055 if asArgs is not None:
3056 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3057 [asArgs[6], asArgs[7]]);
3058 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3059 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3060 if asArgs is not None:
3061 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3062 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3063 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3064 # a_fIemHints)
3065 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3066 if asArgs is not None:
3067 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3068 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3069
3070 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3071 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3072 if asArgs is not None:
3073 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3074 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3075 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3076 if asArgs is not None:
3077 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3078 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3079 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3080 if asArgs is not None:
3081 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3082 [asArgs[4], asArgs[5],]);
3083 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3084 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3085 if asArgs is not None:
3086 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3087 [asArgs[4], asArgs[5], asArgs[6],]);
3088 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3089 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3090 if asArgs is not None:
3091 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3092 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3093
3094 return False;
3095
3096
3097 def parse(self):
3098 """
3099 Parses the given file.
3100 Returns number or errors.
3101 Raises exception on fatal trouble.
3102 """
3103 #self.debug('Parsing %s' % (self.sSrcFile,));
3104
3105 while self.iLine < len(self.asLines):
3106 sLine = self.asLines[self.iLine];
3107 self.iLine += 1;
3108
3109 # We only look for comments, so only lines with a slash might possibly
3110 # influence the parser state.
3111 offSlash = sLine.find('/');
3112 if offSlash >= 0:
3113 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3114 offLine = 0;
3115 while offLine < len(sLine):
3116 if self.iState == self.kiCode:
3117 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3118 if offHit >= 0:
3119 self.checkCodeForMacro(sLine[offLine:offHit]);
3120 self.sComment = '';
3121 self.iCommentLine = self.iLine;
3122 self.iState = self.kiCommentMulti;
3123 offLine = offHit + 2;
3124 else:
3125 self.checkCodeForMacro(sLine[offLine:]);
3126 offLine = len(sLine);
3127
3128 elif self.iState == self.kiCommentMulti:
3129 offHit = sLine.find('*/', offLine);
3130 if offHit >= 0:
3131 self.sComment += sLine[offLine:offHit];
3132 self.iState = self.kiCode;
3133 offLine = offHit + 2;
3134 self.parseComment();
3135 else:
3136 self.sComment += sLine[offLine:];
3137 offLine = len(sLine);
3138 else:
3139 assert False;
3140 # C++ line comment.
3141 elif offSlash > 0:
3142 self.checkCodeForMacro(sLine[:offSlash]);
3143
3144 # No slash, but append the line if in multi-line comment.
3145 elif self.iState == self.kiCommentMulti:
3146 #self.debug('line %d: multi' % (self.iLine,));
3147 self.sComment += sLine;
3148
3149 # No slash, but check code line for relevant macro.
3150 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3151 #self.debug('line %d: macro' % (self.iLine,));
3152 self.checkCodeForMacro(sLine);
3153
3154 # If the line is a '}' in the first position, complete the instructions.
3155 elif self.iState == self.kiCode and sLine[0] == '}':
3156 #self.debug('line %d: }' % (self.iLine,));
3157 self.doneInstructions();
3158
3159 self.doneInstructions();
3160 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3161 return self.printErrors();
3162
3163
3164def __parseFileByName(sSrcFile, sDefaultMap):
3165 """
3166 Parses one source file for instruction specfications.
3167 """
3168 #
3169 # Read sSrcFile into a line array.
3170 #
3171 try:
3172 oFile = open(sSrcFile, "r");
3173 except Exception as oXcpt:
3174 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3175 try:
3176 asLines = oFile.readlines();
3177 except Exception as oXcpt:
3178 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3179 finally:
3180 oFile.close();
3181
3182 #
3183 # Do the parsing.
3184 #
3185 try:
3186 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3187 except ParserException as oXcpt:
3188 print(str(oXcpt));
3189 raise;
3190 except Exception as oXcpt:
3191 raise;
3192
3193 return cErrors;
3194
3195
3196def __doTestCopying():
3197 """
3198 Executes the asCopyTests instructions.
3199 """
3200 asErrors = [];
3201 for oDstInstr in g_aoAllInstructions:
3202 if oDstInstr.asCopyTests:
3203 for sSrcInstr in oDstInstr.asCopyTests:
3204 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3205 if oSrcInstr:
3206 aoSrcInstrs = [oSrcInstr,];
3207 else:
3208 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3209 if aoSrcInstrs:
3210 for oSrcInstr in aoSrcInstrs:
3211 if oSrcInstr != oDstInstr:
3212 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3213 else:
3214 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3215 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3216 else:
3217 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3218 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3219
3220 if asErrors:
3221 sys.stderr.write(u''.join(asErrors));
3222 return len(asErrors);
3223
3224
3225def __applyOnlyTest():
3226 """
3227 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3228 all other instructions so that only these get tested.
3229 """
3230 if g_aoOnlyTestInstructions:
3231 for oInstr in g_aoAllInstructions:
3232 if oInstr.aoTests:
3233 if oInstr not in g_aoOnlyTestInstructions:
3234 oInstr.aoTests = [];
3235 return 0;
3236
3237def __parseAll():
3238 """
3239 Parses all the IEMAllInstruction*.cpp.h files.
3240
3241 Raises exception on failure.
3242 """
3243 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3244 cErrors = 0;
3245 for sDefaultMap, sName in [
3246 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3247 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3248 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3249 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3250 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3251 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3252 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3253 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3254 ]:
3255 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3256 cErrors += __doTestCopying();
3257 cErrors += __applyOnlyTest();
3258
3259 if cErrors != 0:
3260 #raise Exception('%d parse errors' % (cErrors,));
3261 sys.exit(1);
3262 return True;
3263
3264
3265
3266__parseAll();
3267
3268
3269#
3270# Generators (may perhaps move later).
3271#
3272def generateDisassemblerTables(oDstFile = sys.stdout):
3273 """
3274 Generates disassembler tables.
3275 """
3276
3277 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3278 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3279 assert oMap.sName == sName;
3280 asLines = [];
3281
3282 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3283 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3284 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3285 asLines.append('{');
3286
3287 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3288
3289 aoTableOrder = oMap.getInstructionsInTableOrder();
3290 for iInstr, oInstr in enumerate(aoTableOrder):
3291
3292 if (iInstr & 0xf) == 0:
3293 if iInstr != 0:
3294 asLines.append('');
3295 asLines.append(' /* %x */' % (iInstr >> 4,));
3296
3297 if oInstr is None:
3298 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3299 elif isinstance(oInstr, list):
3300 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3301 else:
3302 sMacro = 'OP';
3303 cMaxOperands = 3;
3304 if len(oInstr.aoOperands) > 3:
3305 sMacro = 'OPVEX'
3306 cMaxOperands = 4;
3307 assert len(oInstr.aoOperands) <= cMaxOperands;
3308
3309 #
3310 # Format string.
3311 #
3312 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3313 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3314 sTmp += ' ' if iOperand == 0 else ',';
3315 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3316 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3317 else:
3318 sTmp += g_kdOpTypes[oOperand.sType][2];
3319 sTmp += '",';
3320 asColumns = [ sTmp, ];
3321
3322 #
3323 # Decoders.
3324 #
3325 iStart = len(asColumns);
3326 if oInstr.sEncoding is None:
3327 pass;
3328 elif oInstr.sEncoding == 'ModR/M':
3329 # ASSUME the first operand is using the ModR/M encoding
3330 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3331 asColumns.append('IDX_ParseModRM,');
3332 ## @todo IDX_ParseVexDest
3333 # Is second operand using ModR/M too?
3334 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3335 asColumns.append('IDX_UseModRM,')
3336 elif oInstr.sEncoding in [ 'prefix', ]:
3337 for oOperand in oInstr.aoOperands:
3338 asColumns.append('0,');
3339 elif oInstr.sEncoding in [ 'fixed' ]:
3340 pass;
3341 elif oInstr.sEncoding == 'vex2':
3342 asColumns.append('IDX_ParseVex2b,')
3343 elif oInstr.sEncoding == 'vex3':
3344 asColumns.append('IDX_ParseVex3b,')
3345 elif oInstr.sEncoding in g_dInstructionMaps:
3346 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3347 else:
3348 ## @todo
3349 #IDX_ParseTwoByteEsc,
3350 #IDX_ParseGrp1,
3351 #IDX_ParseShiftGrp2,
3352 #IDX_ParseGrp3,
3353 #IDX_ParseGrp4,
3354 #IDX_ParseGrp5,
3355 #IDX_Parse3DNow,
3356 #IDX_ParseGrp6,
3357 #IDX_ParseGrp7,
3358 #IDX_ParseGrp8,
3359 #IDX_ParseGrp9,
3360 #IDX_ParseGrp10,
3361 #IDX_ParseGrp12,
3362 #IDX_ParseGrp13,
3363 #IDX_ParseGrp14,
3364 #IDX_ParseGrp15,
3365 #IDX_ParseGrp16,
3366 #IDX_ParseThreeByteEsc4,
3367 #IDX_ParseThreeByteEsc5,
3368 #IDX_ParseModFence,
3369 #IDX_ParseEscFP,
3370 #IDX_ParseNopPause,
3371 #IDX_ParseInvOpModRM,
3372 assert False, str(oInstr);
3373
3374 # Check for immediates and stuff in the remaining operands.
3375 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3376 sIdx = g_kdOpTypes[oOperand.sType][0];
3377 if sIdx != 'IDX_UseModRM':
3378 asColumns.append(sIdx + ',');
3379 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3380
3381 #
3382 # Opcode and operands.
3383 #
3384 assert oInstr.sDisEnum, str(oInstr);
3385 asColumns.append(oInstr.sDisEnum + ',');
3386 iStart = len(asColumns)
3387 for oOperand in oInstr.aoOperands:
3388 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3389 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3390
3391 #
3392 # Flags.
3393 #
3394 sTmp = '';
3395 for sHint in sorted(oInstr.dHints.keys()):
3396 sDefine = g_kdHints[sHint];
3397 if sDefine.startswith('DISOPTYPE_'):
3398 if sTmp:
3399 sTmp += ' | ' + sDefine;
3400 else:
3401 sTmp += sDefine;
3402 if sTmp:
3403 sTmp += '),';
3404 else:
3405 sTmp += '0),';
3406 asColumns.append(sTmp);
3407
3408 #
3409 # Format the columns into a line.
3410 #
3411 sLine = '';
3412 for i, s in enumerate(asColumns):
3413 if len(sLine) < aoffColumns[i]:
3414 sLine += ' ' * (aoffColumns[i] - len(sLine));
3415 else:
3416 sLine += ' ';
3417 sLine += s;
3418
3419 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3420 # DISOPTYPE_HARMLESS),
3421 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3422 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3423
3424 asLines.append(sLine);
3425
3426 asLines.append('};');
3427 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3428
3429 #
3430 # Write out the lines.
3431 #
3432 oDstFile.write('\n'.join(asLines));
3433 oDstFile.write('\n');
3434 break; #for now
3435
3436if __name__ == '__main__':
3437 generateDisassemblerTables();
3438
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette