VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66812

Last change on this file since 66812 was 66812, checked in by vboxsync, 8 years ago

IEM: clearly mark operands that are written to.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 142.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66812 2017-05-05 18:48:33Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66812 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230
231 # ModR/M.rm - register only.
232 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
233 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
234 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
235
236 # ModR/M.rm - memory only.
237 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
238 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
239 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
240 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
241 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
242 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
243 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
244 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
245
246 # ModR/M.reg
247 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
248 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
249 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
250 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
251 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
252 'VssZxReg_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
253 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
254 'VsdZxReg_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
255 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
256 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
257 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
258 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
259 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
260 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
261 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
262 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
263 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
264 'VqZxReg_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
265
266 # Immediate values.
267 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
268 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
269 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
270 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
271 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
272 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
273
274 # Address operands (no ModR/M).
275 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
276 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
277
278 # Relative jump targets
279 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
280 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
281
282 # DS:rSI
283 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
284 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
285 # ES:rDI
286 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
287 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
288
289 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
290
291 # Fixed registers.
292 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
293 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
294 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
295 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
296 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
297 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
298 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
299 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
300};
301
302# IDX_ParseFixedReg
303# IDX_ParseVexDest
304
305
306## IEMFORM_XXX mappings.
307g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
308 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
309 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
310 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
311 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
312 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
313 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
314 'M': ( 'ModR/M', [ 'rm', ], ),
315 'M_REG': ( 'ModR/M', [ 'rm', ], ),
316 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
317 'R': ( 'ModR/M', [ 'reg', ], ),
318
319 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
320 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
321 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
322 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
323 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
324 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
325 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
326 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
327 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
328 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
329 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
330 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
331
332 'FIXED': ( 'fixed', None, )
333};
334
335## \@oppfx values.
336g_kdPrefixes = {
337 'none': [],
338 '0x66': [],
339 '0xf3': [],
340 '0xf2': [],
341};
342
343## Special \@opcode tag values.
344g_kdSpecialOpcodes = {
345 '/reg': [],
346 'mr/reg': [],
347 '11 /reg': [],
348 '!11 /reg': [],
349 '11 mr/reg': [],
350 '!11 mr/reg': [],
351};
352
353## Special \@opcodesub tag values.
354g_kdSubOpcodes = {
355 'none': [ None, ],
356 '11 mr/reg': [ '11 mr/reg', ],
357 '11': [ '11 mr/reg', ], ##< alias
358 '!11 mr/reg': [ '!11 mr/reg', ],
359 '!11': [ '!11 mr/reg', ], ##< alias
360};
361
362## Valid values for \@openc
363g_kdEncodings = {
364 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
365 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
366 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
367 'prefix': [ None, ], ##< Prefix
368};
369
370## \@opunused, \@opinvalid, \@opinvlstyle
371g_kdInvalidStyles = {
372 'immediate': [], ##< CPU stops decoding immediately after the opcode.
373 'intel-modrm': [], ##< Intel decodes ModR/M.
374 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
375 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
376 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
377};
378
379g_kdCpuNames = {
380 '8086': (),
381 '80186': (),
382 '80286': (),
383 '80386': (),
384 '80486': (),
385};
386
387## \@opcpuid
388g_kdCpuIdFlags = {
389 'vme': 'X86_CPUID_FEATURE_EDX_VME',
390 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
391 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
392 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
393 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
394 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
395 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
396 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
397 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
398 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
399 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
400 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
401 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
402 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
403 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
404 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
405 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
406 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
407 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
408 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
409 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
410 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
411 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
412 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
413 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
414 'aes': 'X86_CPUID_FEATURE_ECX_AES',
415 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
416 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
417 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
418 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
419
420 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
421 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
422 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
423 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
424 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
425 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
426 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
427 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
428 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
429 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
430};
431
432## \@ophints values.
433g_kdHints = {
434 'invalid': 'DISOPTYPE_INVALID', ##<
435 'harmless': 'DISOPTYPE_HARMLESS', ##<
436 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
437 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
438 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
439 'portio': 'DISOPTYPE_PORTIO', ##<
440 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
441 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
442 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
443 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
444 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
445 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
446 'illegal': 'DISOPTYPE_ILLEGAL', ##<
447 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
448 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
449 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
450 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
451 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
452 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
453 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
454 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
455 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
456 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
457 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
458 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
459 ## (only in 16 & 32 bits mode!)
460 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
461 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
462 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
463 'ignores_op_size': '', ##< Ignores both operand size prefixes.
464 'lock_allowed': '', ##< Lock prefix allowed.
465};
466
467## \@opxcpttype values (see SDMv2 2.4, 2.7).
468g_kdXcptTypes = {
469 'none': [],
470 '1': [],
471 '2': [],
472 '3': [],
473 '4': [],
474 '4UA': [],
475 '5': [],
476 '6': [],
477 '7': [],
478 '8': [],
479 '11': [],
480 '12': [],
481 'E1': [],
482 'E1NF': [],
483 'E2': [],
484 'E3': [],
485 'E3NF': [],
486 'E4': [],
487 'E4NF': [],
488 'E5': [],
489 'E5NF': [],
490 'E6': [],
491 'E6NF': [],
492 'E7NF': [],
493 'E9': [],
494 'E9NF': [],
495 'E10': [],
496 'E11': [],
497 'E12': [],
498 'E12NF': [],
499};
500
501
502def _isValidOpcodeByte(sOpcode):
503 """
504 Checks if sOpcode is a valid lower case opcode byte.
505 Returns true/false.
506 """
507 if len(sOpcode) == 4:
508 if sOpcode[:2] == '0x':
509 if sOpcode[2] in '0123456789abcdef':
510 if sOpcode[3] in '0123456789abcdef':
511 return True;
512 return False;
513
514
515class InstructionMap(object):
516 """
517 Instruction map.
518
519 The opcode map provides the lead opcode bytes (empty for the one byte
520 opcode map). An instruction can be member of multiple opcode maps as long
521 as it uses the same opcode value within the map (because of VEX).
522 """
523
524 kdEncodings = {
525 'legacy': [],
526 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
527 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
528 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
529 'xop8': [], ##< XOP prefix with vvvvv = 8
530 'xop9': [], ##< XOP prefix with vvvvv = 9
531 'xop10': [], ##< XOP prefix with vvvvv = 10
532 };
533 ## Selectors.
534 ## The first value is the number of table entries required by a
535 ## decoder or disassembler for this type of selector.
536 kdSelectors = {
537 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
538 '/r': [ 8, ], ##< modrm.reg selects the instruction.
539 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
540 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
541 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
542 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
543 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
544 };
545
546 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
547 assert sSelector in self.kdSelectors;
548 assert sEncoding in self.kdEncodings;
549 if asLeadOpcodes is None:
550 asLeadOpcodes = [];
551 else:
552 for sOpcode in asLeadOpcodes:
553 assert _isValidOpcodeByte(sOpcode);
554 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
555
556 self.sName = sName;
557 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
558 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
559 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
560 self.aoInstructions = []; # type: Instruction
561 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
562
563 def getTableSize(self):
564 """
565 Number of table entries. This corresponds directly to the selector.
566 """
567 return self.kdSelectors[self.sSelector][0];
568
569 def getInstructionIndex(self, oInstr):
570 """
571 Returns the table index for the instruction.
572 """
573 bOpcode = oInstr.getOpcodeByte();
574
575 # The byte selector is simple. We need a full opcode byte and need just return it.
576 if self.sSelector == 'byte':
577 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
578 return bOpcode;
579
580 # The other selectors needs masking and shifting.
581 if self.sSelector == '/r':
582 return (bOpcode >> 3) & 0x7;
583
584 if self.sSelector == 'mod /r':
585 return (bOpcode >> 3) & 0x1f;
586
587 if self.sSelector == 'memreg /r':
588 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
589
590 if self.sSelector == '!11 /r':
591 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
592 return (bOpcode >> 3) & 0x7;
593
594 if self.sSelector == '11 /r':
595 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
596 return (bOpcode >> 3) & 0x7;
597
598 if self.sSelector == '11':
599 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
600 return bOpcode & 0x3f;
601
602 assert False, self.sSelector;
603 return -1;
604
605 def getInstructionsInTableOrder(self):
606 """
607 Get instructions in table order.
608
609 Returns array of instructions. Normally there is exactly one
610 instruction per entry. However the entry could also be None if
611 not instruction was specified for that opcode value. Or there
612 could be a list of instructions to deal with special encodings
613 where for instance prefix (e.g. REX.W) encodes a different
614 instruction or different CPUs have different instructions or
615 prefixes in the same place.
616 """
617 # Start with empty table.
618 cTable = self.getTableSize();
619 aoTable = [None] * cTable;
620
621 # Insert the instructions.
622 for oInstr in self.aoInstructions:
623 if oInstr.sOpcode:
624 idxOpcode = self.getInstructionIndex(oInstr);
625 assert idxOpcode < cTable, str(idxOpcode);
626
627 oExisting = aoTable[idxOpcode];
628 if oExisting is None:
629 aoTable[idxOpcode] = oInstr;
630 elif not isinstance(oExisting, list):
631 aoTable[idxOpcode] = list([oExisting, oInstr]);
632 else:
633 oExisting.append(oInstr);
634
635 return aoTable;
636
637
638 def getDisasTableName(self):
639 """
640 Returns the disassembler table name for this map.
641 """
642 sName = 'g_aDisas';
643 for sWord in self.sName.split('_'):
644 if sWord == 'm': # suffix indicating modrm.mod==mem
645 sName += '_m';
646 elif sWord == 'r': # suffix indicating modrm.mod==reg
647 sName += '_r';
648 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
649 sName += '_' + sWord;
650 else:
651 sWord = sWord.replace('grp', 'Grp');
652 sWord = sWord.replace('map', 'Map');
653 sName += sWord[0].upper() + sWord[1:];
654 return sName;
655
656
657class TestType(object):
658 """
659 Test value type.
660
661 This base class deals with integer like values. The fUnsigned constructor
662 parameter indicates the default stance on zero vs sign extending. It is
663 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
664 """
665 def __init__(self, sName, acbSizes = None, fUnsigned = True):
666 self.sName = sName;
667 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
668 self.fUnsigned = fUnsigned;
669
670 class BadValue(Exception):
671 """ Bad value exception. """
672 def __init__(self, sMessage):
673 Exception.__init__(self, sMessage);
674 self.sMessage = sMessage;
675
676 ## For ascii ~ operator.
677 kdHexInv = {
678 '0': 'f',
679 '1': 'e',
680 '2': 'd',
681 '3': 'c',
682 '4': 'b',
683 '5': 'a',
684 '6': '9',
685 '7': '8',
686 '8': '7',
687 '9': '6',
688 'a': '5',
689 'b': '4',
690 'c': '3',
691 'd': '2',
692 'e': '1',
693 'f': '0',
694 };
695
696 def get(self, sValue):
697 """
698 Get the shortest normal sized byte representation of oValue.
699
700 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
701 The latter form is for AND+OR pairs where the first entry is what to
702 AND with the field and the second the one or OR with.
703
704 Raises BadValue if invalid value.
705 """
706 if not sValue:
707 raise TestType.BadValue('empty value');
708
709 # Deal with sign and detect hexadecimal or decimal.
710 fSignExtend = not self.fUnsigned;
711 if sValue[0] == '-' or sValue[0] == '+':
712 fSignExtend = True;
713 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
714 else:
715 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
716
717 # try convert it to long integer.
718 try:
719 iValue = long(sValue, 16 if fHex else 10);
720 except Exception as oXcpt:
721 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
722
723 # Convert the hex string and pad it to a decent value. Negative values
724 # needs to be manually converted to something non-negative (~-n + 1).
725 if iValue >= 0:
726 sHex = hex(iValue);
727 if sys.version_info[0] < 3:
728 assert sHex[-1] == 'L';
729 sHex = sHex[:-1];
730 assert sHex[:2] == '0x';
731 sHex = sHex[2:];
732 else:
733 sHex = hex(-iValue - 1);
734 if sys.version_info[0] < 3:
735 assert sHex[-1] == 'L';
736 sHex = sHex[:-1];
737 assert sHex[:2] == '0x';
738 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
739 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
740 sHex = 'f' + sHex;
741
742 cDigits = len(sHex);
743 if cDigits <= self.acbSizes[-1] * 2:
744 for cb in self.acbSizes:
745 cNaturalDigits = cb * 2;
746 if cDigits <= cNaturalDigits:
747 break;
748 else:
749 cNaturalDigits = self.acbSizes[-1] * 2;
750 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
751 assert isinstance(cNaturalDigits, int)
752
753 if cNaturalDigits != cDigits:
754 cNeeded = cNaturalDigits - cDigits;
755 if iValue >= 0:
756 sHex = ('0' * cNeeded) + sHex;
757 else:
758 sHex = ('f' * cNeeded) + sHex;
759
760 # Invert and convert to bytearray and return it.
761 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
762
763 return ((fSignExtend, abValue),);
764
765 def validate(self, sValue):
766 """
767 Returns True if value is okay, error message on failure.
768 """
769 try:
770 self.get(sValue);
771 except TestType.BadValue as oXcpt:
772 return oXcpt.sMessage;
773 return True;
774
775 def isAndOrPair(self, sValue):
776 """
777 Checks if sValue is a pair.
778 """
779 _ = sValue;
780 return False;
781
782
783class TestTypeEflags(TestType):
784 """
785 Special value parsing for EFLAGS/RFLAGS/FLAGS.
786 """
787
788 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
789
790 def __init__(self, sName):
791 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
792
793 def get(self, sValue):
794 fClear = 0;
795 fSet = 0;
796 for sFlag in sValue.split(','):
797 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
798 if sConstant is None:
799 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
800 if sConstant[0] == '!':
801 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
802 else:
803 fSet |= g_kdX86EFlagsConstants[sConstant];
804
805 aoSet = TestType.get(self, '0x%x' % (fSet,));
806 if fClear != 0:
807 aoClear = TestType.get(self, '%#x' % (fClear,))
808 assert self.isAndOrPair(sValue) is True;
809 return (aoClear[0], aoSet[0]);
810 assert self.isAndOrPair(sValue) is False;
811 return aoSet;
812
813 def isAndOrPair(self, sValue):
814 for sZeroFlag in self.kdZeroValueFlags:
815 if sValue.find(sZeroFlag) >= 0:
816 return True;
817 return False;
818
819class TestTypeFromDict(TestType):
820 """
821 Special value parsing for CR0.
822 """
823
824 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
825
826 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
827 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
828 self.kdConstantsAndValues = kdConstantsAndValues;
829 self.sConstantPrefix = sConstantPrefix;
830
831 def get(self, sValue):
832 fValue = 0;
833 for sFlag in sValue.split(','):
834 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
835 if fFlagValue is None:
836 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
837 fValue |= fFlagValue;
838 return TestType.get(self, '0x%x' % (fValue,));
839
840
841class TestInOut(object):
842 """
843 One input or output state modifier.
844
845 This should be thought as values to modify BS3REGCTX and extended (needs
846 to be structured) state.
847 """
848 ## Assigned operators.
849 kasOperators = [
850 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
851 '&~=',
852 '&=',
853 '|=',
854 '='
855 ];
856 ## Types
857 kdTypes = {
858 'uint': TestType('uint', fUnsigned = True),
859 'int': TestType('int'),
860 'efl': TestTypeEflags('efl'),
861 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
862 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
863 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
864 };
865 ## CPU context fields.
866 kdFields = {
867 # name: ( default type, [both|input|output], )
868 # Operands.
869 'op1': ( 'uint', 'both', ), ## \@op1
870 'op2': ( 'uint', 'both', ), ## \@op2
871 'op3': ( 'uint', 'both', ), ## \@op3
872 'op4': ( 'uint', 'both', ), ## \@op4
873 # Flags.
874 'efl': ( 'efl', 'both', ),
875 'efl_undef': ( 'uint', 'output', ),
876 # 8-bit GPRs.
877 'al': ( 'uint', 'both', ),
878 'cl': ( 'uint', 'both', ),
879 'dl': ( 'uint', 'both', ),
880 'bl': ( 'uint', 'both', ),
881 'ah': ( 'uint', 'both', ),
882 'ch': ( 'uint', 'both', ),
883 'dh': ( 'uint', 'both', ),
884 'bh': ( 'uint', 'both', ),
885 'r8l': ( 'uint', 'both', ),
886 'r9l': ( 'uint', 'both', ),
887 'r10l': ( 'uint', 'both', ),
888 'r11l': ( 'uint', 'both', ),
889 'r12l': ( 'uint', 'both', ),
890 'r13l': ( 'uint', 'both', ),
891 'r14l': ( 'uint', 'both', ),
892 'r15l': ( 'uint', 'both', ),
893 # 16-bit GPRs.
894 'ax': ( 'uint', 'both', ),
895 'dx': ( 'uint', 'both', ),
896 'cx': ( 'uint', 'both', ),
897 'bx': ( 'uint', 'both', ),
898 'sp': ( 'uint', 'both', ),
899 'bp': ( 'uint', 'both', ),
900 'si': ( 'uint', 'both', ),
901 'di': ( 'uint', 'both', ),
902 'r8w': ( 'uint', 'both', ),
903 'r9w': ( 'uint', 'both', ),
904 'r10w': ( 'uint', 'both', ),
905 'r11w': ( 'uint', 'both', ),
906 'r12w': ( 'uint', 'both', ),
907 'r13w': ( 'uint', 'both', ),
908 'r14w': ( 'uint', 'both', ),
909 'r15w': ( 'uint', 'both', ),
910 # 32-bit GPRs.
911 'eax': ( 'uint', 'both', ),
912 'edx': ( 'uint', 'both', ),
913 'ecx': ( 'uint', 'both', ),
914 'ebx': ( 'uint', 'both', ),
915 'esp': ( 'uint', 'both', ),
916 'ebp': ( 'uint', 'both', ),
917 'esi': ( 'uint', 'both', ),
918 'edi': ( 'uint', 'both', ),
919 'r8d': ( 'uint', 'both', ),
920 'r9d': ( 'uint', 'both', ),
921 'r10d': ( 'uint', 'both', ),
922 'r11d': ( 'uint', 'both', ),
923 'r12d': ( 'uint', 'both', ),
924 'r13d': ( 'uint', 'both', ),
925 'r14d': ( 'uint', 'both', ),
926 'r15d': ( 'uint', 'both', ),
927 # 64-bit GPRs.
928 'rax': ( 'uint', 'both', ),
929 'rdx': ( 'uint', 'both', ),
930 'rcx': ( 'uint', 'both', ),
931 'rbx': ( 'uint', 'both', ),
932 'rsp': ( 'uint', 'both', ),
933 'rbp': ( 'uint', 'both', ),
934 'rsi': ( 'uint', 'both', ),
935 'rdi': ( 'uint', 'both', ),
936 'r8': ( 'uint', 'both', ),
937 'r9': ( 'uint', 'both', ),
938 'r10': ( 'uint', 'both', ),
939 'r11': ( 'uint', 'both', ),
940 'r12': ( 'uint', 'both', ),
941 'r13': ( 'uint', 'both', ),
942 'r14': ( 'uint', 'both', ),
943 'r15': ( 'uint', 'both', ),
944 # 16-bit, 32-bit or 64-bit registers according to operand size.
945 'oz.rax': ( 'uint', 'both', ),
946 'oz.rdx': ( 'uint', 'both', ),
947 'oz.rcx': ( 'uint', 'both', ),
948 'oz.rbx': ( 'uint', 'both', ),
949 'oz.rsp': ( 'uint', 'both', ),
950 'oz.rbp': ( 'uint', 'both', ),
951 'oz.rsi': ( 'uint', 'both', ),
952 'oz.rdi': ( 'uint', 'both', ),
953 'oz.r8': ( 'uint', 'both', ),
954 'oz.r9': ( 'uint', 'both', ),
955 'oz.r10': ( 'uint', 'both', ),
956 'oz.r11': ( 'uint', 'both', ),
957 'oz.r12': ( 'uint', 'both', ),
958 'oz.r13': ( 'uint', 'both', ),
959 'oz.r14': ( 'uint', 'both', ),
960 'oz.r15': ( 'uint', 'both', ),
961 # Control registers.
962 'cr0': ( 'cr0', 'both', ),
963 'cr4': ( 'cr4', 'both', ),
964 'xcr0': ( 'xcr0', 'both', ),
965 # FPU Registers
966 'fcw': ( 'uint', 'both', ),
967 'fsw': ( 'uint', 'both', ),
968 'ftw': ( 'uint', 'both', ),
969 'fop': ( 'uint', 'both', ),
970 'fpuip': ( 'uint', 'both', ),
971 'fpucs': ( 'uint', 'both', ),
972 'fpudp': ( 'uint', 'both', ),
973 'fpuds': ( 'uint', 'both', ),
974 'mxcsr': ( 'uint', 'both', ),
975 'st0': ( 'uint', 'both', ),
976 'st1': ( 'uint', 'both', ),
977 'st2': ( 'uint', 'both', ),
978 'st3': ( 'uint', 'both', ),
979 'st4': ( 'uint', 'both', ),
980 'st5': ( 'uint', 'both', ),
981 'st6': ( 'uint', 'both', ),
982 'st7': ( 'uint', 'both', ),
983 # MMX registers.
984 'mm0': ( 'uint', 'both', ),
985 'mm1': ( 'uint', 'both', ),
986 'mm2': ( 'uint', 'both', ),
987 'mm3': ( 'uint', 'both', ),
988 'mm4': ( 'uint', 'both', ),
989 'mm5': ( 'uint', 'both', ),
990 'mm6': ( 'uint', 'both', ),
991 'mm7': ( 'uint', 'both', ),
992 # SSE registers.
993 'xmm0': ( 'uint', 'both', ),
994 'xmm1': ( 'uint', 'both', ),
995 'xmm2': ( 'uint', 'both', ),
996 'xmm3': ( 'uint', 'both', ),
997 'xmm4': ( 'uint', 'both', ),
998 'xmm5': ( 'uint', 'both', ),
999 'xmm6': ( 'uint', 'both', ),
1000 'xmm7': ( 'uint', 'both', ),
1001 'xmm8': ( 'uint', 'both', ),
1002 'xmm9': ( 'uint', 'both', ),
1003 'xmm10': ( 'uint', 'both', ),
1004 'xmm11': ( 'uint', 'both', ),
1005 'xmm12': ( 'uint', 'both', ),
1006 'xmm13': ( 'uint', 'both', ),
1007 'xmm14': ( 'uint', 'both', ),
1008 'xmm15': ( 'uint', 'both', ),
1009 'xmm0.lo': ( 'uint', 'both', ),
1010 'xmm1.lo': ( 'uint', 'both', ),
1011 'xmm2.lo': ( 'uint', 'both', ),
1012 'xmm3.lo': ( 'uint', 'both', ),
1013 'xmm4.lo': ( 'uint', 'both', ),
1014 'xmm5.lo': ( 'uint', 'both', ),
1015 'xmm6.lo': ( 'uint', 'both', ),
1016 'xmm7.lo': ( 'uint', 'both', ),
1017 'xmm8.lo': ( 'uint', 'both', ),
1018 'xmm9.lo': ( 'uint', 'both', ),
1019 'xmm10.lo': ( 'uint', 'both', ),
1020 'xmm11.lo': ( 'uint', 'both', ),
1021 'xmm12.lo': ( 'uint', 'both', ),
1022 'xmm13.lo': ( 'uint', 'both', ),
1023 'xmm14.lo': ( 'uint', 'both', ),
1024 'xmm15.lo': ( 'uint', 'both', ),
1025 'xmm0.hi': ( 'uint', 'both', ),
1026 'xmm1.hi': ( 'uint', 'both', ),
1027 'xmm2.hi': ( 'uint', 'both', ),
1028 'xmm3.hi': ( 'uint', 'both', ),
1029 'xmm4.hi': ( 'uint', 'both', ),
1030 'xmm5.hi': ( 'uint', 'both', ),
1031 'xmm6.hi': ( 'uint', 'both', ),
1032 'xmm7.hi': ( 'uint', 'both', ),
1033 'xmm8.hi': ( 'uint', 'both', ),
1034 'xmm9.hi': ( 'uint', 'both', ),
1035 'xmm10.hi': ( 'uint', 'both', ),
1036 'xmm11.hi': ( 'uint', 'both', ),
1037 'xmm12.hi': ( 'uint', 'both', ),
1038 'xmm13.hi': ( 'uint', 'both', ),
1039 'xmm14.hi': ( 'uint', 'both', ),
1040 'xmm15.hi': ( 'uint', 'both', ),
1041 'xmm0.lo.zx': ( 'uint', 'both', ),
1042 'xmm1.lo.zx': ( 'uint', 'both', ),
1043 'xmm2.lo.zx': ( 'uint', 'both', ),
1044 'xmm3.lo.zx': ( 'uint', 'both', ),
1045 'xmm4.lo.zx': ( 'uint', 'both', ),
1046 'xmm5.lo.zx': ( 'uint', 'both', ),
1047 'xmm6.lo.zx': ( 'uint', 'both', ),
1048 'xmm7.lo.zx': ( 'uint', 'both', ),
1049 'xmm8.lo.zx': ( 'uint', 'both', ),
1050 'xmm9.lo.zx': ( 'uint', 'both', ),
1051 'xmm10.lo.zx': ( 'uint', 'both', ),
1052 'xmm11.lo.zx': ( 'uint', 'both', ),
1053 'xmm12.lo.zx': ( 'uint', 'both', ),
1054 'xmm13.lo.zx': ( 'uint', 'both', ),
1055 'xmm14.lo.zx': ( 'uint', 'both', ),
1056 'xmm15.lo.zx': ( 'uint', 'both', ),
1057 'xmm0.dw0': ( 'uint', 'both', ),
1058 'xmm1.dw0': ( 'uint', 'both', ),
1059 'xmm2.dw0': ( 'uint', 'both', ),
1060 'xmm3.dw0': ( 'uint', 'both', ),
1061 'xmm4.dw0': ( 'uint', 'both', ),
1062 'xmm5.dw0': ( 'uint', 'both', ),
1063 'xmm6.dw0': ( 'uint', 'both', ),
1064 'xmm7.dw0': ( 'uint', 'both', ),
1065 'xmm8.dw0': ( 'uint', 'both', ),
1066 'xmm9.dw0': ( 'uint', 'both', ),
1067 'xmm10.dw0': ( 'uint', 'both', ),
1068 'xmm11.dw0': ( 'uint', 'both', ),
1069 'xmm12.dw0': ( 'uint', 'both', ),
1070 'xmm13.dw0': ( 'uint', 'both', ),
1071 'xmm14.dw0': ( 'uint', 'both', ),
1072 'xmm15_dw0': ( 'uint', 'both', ),
1073 # AVX registers.
1074 'ymm0': ( 'uint', 'both', ),
1075 'ymm1': ( 'uint', 'both', ),
1076 'ymm2': ( 'uint', 'both', ),
1077 'ymm3': ( 'uint', 'both', ),
1078 'ymm4': ( 'uint', 'both', ),
1079 'ymm5': ( 'uint', 'both', ),
1080 'ymm6': ( 'uint', 'both', ),
1081 'ymm7': ( 'uint', 'both', ),
1082 'ymm8': ( 'uint', 'both', ),
1083 'ymm9': ( 'uint', 'both', ),
1084 'ymm10': ( 'uint', 'both', ),
1085 'ymm11': ( 'uint', 'both', ),
1086 'ymm12': ( 'uint', 'both', ),
1087 'ymm13': ( 'uint', 'both', ),
1088 'ymm14': ( 'uint', 'both', ),
1089 'ymm15': ( 'uint', 'both', ),
1090
1091 # Special ones.
1092 'value.xcpt': ( 'uint', 'output', ),
1093 };
1094
1095 def __init__(self, sField, sOp, sValue, sType):
1096 assert sField in self.kdFields;
1097 assert sOp in self.kasOperators;
1098 self.sField = sField;
1099 self.sOp = sOp;
1100 self.sValue = sValue;
1101 self.sType = sType;
1102 assert isinstance(sField, str);
1103 assert isinstance(sOp, str);
1104 assert isinstance(sType, str);
1105 assert isinstance(sValue, str);
1106
1107
1108class TestSelector(object):
1109 """
1110 One selector for an instruction test.
1111 """
1112 ## Selector compare operators.
1113 kasCompareOps = [ '==', '!=' ];
1114 ## Selector variables and their valid values.
1115 kdVariables = {
1116 # Operand size.
1117 'size': {
1118 'o16': 'size_o16',
1119 'o32': 'size_o32',
1120 'o64': 'size_o64',
1121 },
1122 # Execution ring.
1123 'ring': {
1124 '0': 'ring_0',
1125 '1': 'ring_1',
1126 '2': 'ring_2',
1127 '3': 'ring_3',
1128 '0..2': 'ring_0_thru_2',
1129 '1..3': 'ring_1_thru_3',
1130 },
1131 # Basic code mode.
1132 'codebits': {
1133 '64': 'code_64bit',
1134 '32': 'code_32bit',
1135 '16': 'code_16bit',
1136 },
1137 # cpu modes.
1138 'mode': {
1139 'real': 'mode_real',
1140 'prot': 'mode_prot',
1141 'long': 'mode_long',
1142 'v86': 'mode_v86',
1143 'smm': 'mode_smm',
1144 'vmx': 'mode_vmx',
1145 'svm': 'mode_svm',
1146 },
1147 # paging on/off
1148 'paging': {
1149 'on': 'paging_on',
1150 'off': 'paging_off',
1151 },
1152 # CPU vendor
1153 'vendor': {
1154 'amd': 'vendor_amd',
1155 'intel': 'vendor_intel',
1156 'via': 'vendor_via',
1157 },
1158 };
1159 ## Selector shorthand predicates.
1160 ## These translates into variable expressions.
1161 kdPredicates = {
1162 'o16': 'size==o16',
1163 'o32': 'size==o32',
1164 'o64': 'size==o64',
1165 'ring0': 'ring==0',
1166 '!ring0': 'ring==1..3',
1167 'ring1': 'ring==1',
1168 'ring2': 'ring==2',
1169 'ring3': 'ring==3',
1170 'user': 'ring==3',
1171 'supervisor': 'ring==0..2',
1172 'real': 'mode==real',
1173 'prot': 'mode==prot',
1174 'long': 'mode==long',
1175 'v86': 'mode==v86',
1176 'smm': 'mode==smm',
1177 'vmx': 'mode==vmx',
1178 'svm': 'mode==svm',
1179 'paging': 'paging==on',
1180 '!paging': 'paging==off',
1181 'amd': 'vendor==amd',
1182 '!amd': 'vendor!=amd',
1183 'intel': 'vendor==intel',
1184 '!intel': 'vendor!=intel',
1185 'via': 'vendor==via',
1186 '!via': 'vendor!=via',
1187 };
1188
1189 def __init__(self, sVariable, sOp, sValue):
1190 assert sVariable in self.kdVariables;
1191 assert sOp in self.kasCompareOps;
1192 assert sValue in self.kdVariables[sVariable];
1193 self.sVariable = sVariable;
1194 self.sOp = sOp;
1195 self.sValue = sValue;
1196
1197
1198class InstructionTest(object):
1199 """
1200 Instruction test.
1201 """
1202
1203 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1204 self.oInstr = oInstr; # type: InstructionTest
1205 self.aoInputs = []; # type: list(TestInOut)
1206 self.aoOutputs = []; # type: list(TestInOut)
1207 self.aoSelectors = []; # type: list(TestSelector)
1208
1209 def toString(self, fRepr = False):
1210 """
1211 Converts it to string representation.
1212 """
1213 asWords = [];
1214 if self.aoSelectors:
1215 for oSelector in self.aoSelectors:
1216 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1217 asWords.append('/');
1218
1219 for oModifier in self.aoInputs:
1220 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1221
1222 asWords.append('->');
1223
1224 for oModifier in self.aoOutputs:
1225 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1226
1227 if fRepr:
1228 return '<' + ' '.join(asWords) + '>';
1229 return ' '.join(asWords);
1230
1231 def __str__(self):
1232 """ Provide string represenation. """
1233 return self.toString(False);
1234
1235 def __repr__(self):
1236 """ Provide unambigious string representation. """
1237 return self.toString(True);
1238
1239class Operand(object):
1240 """
1241 Instruction operand.
1242 """
1243
1244 def __init__(self, sWhere, sType):
1245 assert sWhere in g_kdOpLocations, sWhere;
1246 assert sType in g_kdOpTypes, sType;
1247 self.sWhere = sWhere; ##< g_kdOpLocations
1248 self.sType = sType; ##< g_kdOpTypes
1249
1250 def usesModRM(self):
1251 """ Returns True if using some form of ModR/M encoding. """
1252 return self.sType[0] in ['E', 'G', 'M'];
1253
1254
1255
1256class Instruction(object): # pylint: disable=too-many-instance-attributes
1257 """
1258 Instruction.
1259 """
1260
1261 def __init__(self, sSrcFile, iLine):
1262 ## @name Core attributes.
1263 ## @{
1264 self.sMnemonic = None;
1265 self.sBrief = None;
1266 self.asDescSections = []; # type: list(str)
1267 self.aoMaps = []; # type: list(InstructionMap)
1268 self.aoOperands = []; # type: list(Operand)
1269 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1270 self.sOpcode = None; # type: str
1271 self.sSubOpcode = None; # type: str
1272 self.sEncoding = None;
1273 self.asFlTest = None;
1274 self.asFlModify = None;
1275 self.asFlUndefined = None;
1276 self.asFlSet = None;
1277 self.asFlClear = None;
1278 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1279 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1280 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1281 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1282 self.aoTests = []; # type: list(InstructionTest)
1283 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1284 self.oCpuExpr = None; ##< Some CPU restriction expression...
1285 self.sGroup = None;
1286 self.fUnused = False; ##< Unused instruction.
1287 self.fInvalid = False; ##< Invalid instruction (like UD2).
1288 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1289 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1290 ## @}
1291
1292 ## @name Implementation attributes.
1293 ## @{
1294 self.sStats = None;
1295 self.sFunction = None;
1296 self.fStub = False;
1297 self.fUdStub = False;
1298 ## @}
1299
1300 ## @name Decoding info
1301 ## @{
1302 self.sSrcFile = sSrcFile;
1303 self.iLineCreated = iLine;
1304 self.iLineCompleted = None;
1305 self.cOpTags = 0;
1306 self.iLineFnIemOpMacro = -1;
1307 self.iLineMnemonicMacro = -1;
1308 ## @}
1309
1310 ## @name Intermediate input fields.
1311 ## @{
1312 self.sRawDisOpNo = None;
1313 self.asRawDisParams = [];
1314 self.sRawIemOpFlags = None;
1315 self.sRawOldOpcodes = None;
1316 self.asCopyTests = [];
1317 ## @}
1318
1319 def toString(self, fRepr = False):
1320 """ Turn object into a string. """
1321 aasFields = [];
1322
1323 aasFields.append(['opcode', self.sOpcode]);
1324 aasFields.append(['mnemonic', self.sMnemonic]);
1325 for iOperand, oOperand in enumerate(self.aoOperands):
1326 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1327 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1328 aasFields.append(['encoding', self.sEncoding]);
1329 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1330 aasFields.append(['disenum', self.sDisEnum]);
1331 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1332 aasFields.append(['group', self.sGroup]);
1333 if self.fUnused: aasFields.append(['unused', 'True']);
1334 if self.fInvalid: aasFields.append(['invalid', 'True']);
1335 aasFields.append(['invlstyle', self.sInvalidStyle]);
1336 aasFields.append(['fltest', self.asFlTest]);
1337 aasFields.append(['flmodify', self.asFlModify]);
1338 aasFields.append(['flundef', self.asFlUndefined]);
1339 aasFields.append(['flset', self.asFlSet]);
1340 aasFields.append(['flclear', self.asFlClear]);
1341 aasFields.append(['mincpu', self.sMinCpu]);
1342 aasFields.append(['stats', self.sStats]);
1343 aasFields.append(['sFunction', self.sFunction]);
1344 if self.fStub: aasFields.append(['fStub', 'True']);
1345 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1346 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1347 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1348 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1349
1350 sRet = '<' if fRepr else '';
1351 for sField, sValue in aasFields:
1352 if sValue != None:
1353 if len(sRet) > 1:
1354 sRet += '; ';
1355 sRet += '%s=%s' % (sField, sValue,);
1356 if fRepr:
1357 sRet += '>';
1358
1359 return sRet;
1360
1361 def __str__(self):
1362 """ Provide string represenation. """
1363 return self.toString(False);
1364
1365 def __repr__(self):
1366 """ Provide unambigious string representation. """
1367 return self.toString(True);
1368
1369 def getOpcodeByte(self):
1370 """
1371 Decodes sOpcode into a byte range integer value.
1372 Raises exception if sOpcode is None or invalid.
1373 """
1374 if self.sOpcode is None:
1375 raise Exception('No opcode byte for %s!' % (self,));
1376 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1377
1378 # Full hex byte form.
1379 if sOpcode[:2] == '0x':
1380 return int(sOpcode, 16);
1381
1382 # The /r form:
1383 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1384 return int(sOpcode[-1:]) << 3;
1385
1386 # The 11/r form:
1387 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1388 return (int(sOpcode[-1:]) << 3) | 0xc0;
1389
1390 # The !11/r form (returns mod=1):
1391 ## @todo this doesn't really work...
1392 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1393 return (int(sOpcode[-1:]) << 3) | 0x80;
1394
1395 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1396
1397 @staticmethod
1398 def _flagsToIntegerMask(asFlags):
1399 """
1400 Returns the integer mask value for asFlags.
1401 """
1402 uRet = 0;
1403 if asFlags:
1404 for sFlag in asFlags:
1405 sConstant = g_kdEFlagsMnemonics[sFlag];
1406 assert sConstant[0] != '!', sConstant
1407 uRet |= g_kdX86EFlagsConstants[sConstant];
1408 return uRet;
1409
1410 def getTestedFlagsMask(self):
1411 """ Returns asFlTest into a integer mask value """
1412 return self._flagsToIntegerMask(self.asFlTest);
1413
1414 def getModifiedFlagsMask(self):
1415 """ Returns asFlModify into a integer mask value """
1416 return self._flagsToIntegerMask(self.asFlModify);
1417
1418 def getUndefinedFlagsMask(self):
1419 """ Returns asFlUndefined into a integer mask value """
1420 return self._flagsToIntegerMask(self.asFlUndefined);
1421
1422 def getSetFlagsMask(self):
1423 """ Returns asFlSet into a integer mask value """
1424 return self._flagsToIntegerMask(self.asFlSet);
1425
1426 def getClearedFlagsMask(self):
1427 """ Returns asFlClear into a integer mask value """
1428 return self._flagsToIntegerMask(self.asFlClear);
1429
1430
1431## All the instructions.
1432g_aoAllInstructions = []; # type: list(Instruction)
1433
1434## All the instructions indexed by statistics name (opstat).
1435g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1436
1437## All the instructions indexed by function name (opfunction).
1438g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1439
1440## Instructions tagged by oponlytest
1441g_aoOnlyTestInstructions = []; # type: list(Instruction)
1442
1443## Instruction maps.
1444g_dInstructionMaps = {
1445 'one': InstructionMap('one'),
1446 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1447 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1448 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1449 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1450 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1451 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1452 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1453 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1454 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1455 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1456 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1457 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1458 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1459 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1460 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1461 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1462 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1463 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1464 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1465
1466 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1467 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1468 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1469 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1470 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1471 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1472 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1473 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1474 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1475 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1476 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1477 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1478 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1479 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1480
1481 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1482 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1483
1484 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1485 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1486 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1487 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1488 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1489 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1490
1491 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1492 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1493
1494 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1495 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1496 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1497 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1498 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1499 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1500 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1501 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1502};
1503
1504
1505
1506class ParserException(Exception):
1507 """ Parser exception """
1508 def __init__(self, sMessage):
1509 Exception.__init__(self, sMessage);
1510
1511
1512class SimpleParser(object):
1513 """
1514 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1515 """
1516
1517 ## @name Parser state.
1518 ## @{
1519 kiCode = 0;
1520 kiCommentMulti = 1;
1521 ## @}
1522
1523 def __init__(self, sSrcFile, asLines, sDefaultMap):
1524 self.sSrcFile = sSrcFile;
1525 self.asLines = asLines;
1526 self.iLine = 0;
1527 self.iState = self.kiCode;
1528 self.sComment = '';
1529 self.iCommentLine = 0;
1530 self.aoCurInstrs = [];
1531
1532 assert sDefaultMap in g_dInstructionMaps;
1533 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1534
1535 self.cTotalInstr = 0;
1536 self.cTotalStubs = 0;
1537 self.cTotalTagged = 0;
1538
1539 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1540 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1541 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1542 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1543 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1544 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1545 self.fDebug = True;
1546
1547 self.dTagHandlers = {
1548 '@opbrief': self.parseTagOpBrief,
1549 '@opdesc': self.parseTagOpDesc,
1550 '@opmnemonic': self.parseTagOpMnemonic,
1551 '@op1': self.parseTagOpOperandN,
1552 '@op2': self.parseTagOpOperandN,
1553 '@op3': self.parseTagOpOperandN,
1554 '@op4': self.parseTagOpOperandN,
1555 '@oppfx': self.parseTagOpPfx,
1556 '@opmaps': self.parseTagOpMaps,
1557 '@opcode': self.parseTagOpcode,
1558 '@opcodesub': self.parseTagOpcodeSub,
1559 '@openc': self.parseTagOpEnc,
1560 '@opfltest': self.parseTagOpEFlags,
1561 '@opflmodify': self.parseTagOpEFlags,
1562 '@opflundef': self.parseTagOpEFlags,
1563 '@opflset': self.parseTagOpEFlags,
1564 '@opflclear': self.parseTagOpEFlags,
1565 '@ophints': self.parseTagOpHints,
1566 '@opdisenum': self.parseTagOpDisEnum,
1567 '@opmincpu': self.parseTagOpMinCpu,
1568 '@opcpuid': self.parseTagOpCpuId,
1569 '@opgroup': self.parseTagOpGroup,
1570 '@opunused': self.parseTagOpUnusedInvalid,
1571 '@opinvalid': self.parseTagOpUnusedInvalid,
1572 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1573 '@optest': self.parseTagOpTest,
1574 '@optestign': self.parseTagOpTestIgnore,
1575 '@optestignore': self.parseTagOpTestIgnore,
1576 '@opcopytests': self.parseTagOpCopyTests,
1577 '@oponly': self.parseTagOpOnlyTest,
1578 '@oponlytest': self.parseTagOpOnlyTest,
1579 '@opxcpttype': self.parseTagOpXcptType,
1580 '@opstats': self.parseTagOpStats,
1581 '@opfunction': self.parseTagOpFunction,
1582 '@opdone': self.parseTagOpDone,
1583 };
1584 for i in range(48):
1585 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1586 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1587
1588 self.asErrors = [];
1589
1590 def raiseError(self, sMessage):
1591 """
1592 Raise error prefixed with the source and line number.
1593 """
1594 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1595
1596 def raiseCommentError(self, iLineInComment, sMessage):
1597 """
1598 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1599 """
1600 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1601
1602 def error(self, sMessage):
1603 """
1604 Adds an error.
1605 returns False;
1606 """
1607 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1608 return False;
1609
1610 def errorComment(self, iLineInComment, sMessage):
1611 """
1612 Adds a comment error.
1613 returns False;
1614 """
1615 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1616 return False;
1617
1618 def printErrors(self):
1619 """
1620 Print the errors to stderr.
1621 Returns number of errors.
1622 """
1623 if self.asErrors:
1624 sys.stderr.write(u''.join(self.asErrors));
1625 return len(self.asErrors);
1626
1627 def debug(self, sMessage):
1628 """
1629 For debugging.
1630 """
1631 if self.fDebug:
1632 print('debug: %s' % (sMessage,));
1633
1634
1635 def addInstruction(self, iLine = None):
1636 """
1637 Adds an instruction.
1638 """
1639 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1640 g_aoAllInstructions.append(oInstr);
1641 self.aoCurInstrs.append(oInstr);
1642 return oInstr;
1643
1644 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1645 """
1646 Derives the mnemonic and operands from a IEM stats base name like string.
1647 """
1648 if oInstr.sMnemonic is None:
1649 asWords = sStats.split('_');
1650 oInstr.sMnemonic = asWords[0].lower();
1651 if len(asWords) > 1 and not oInstr.aoOperands:
1652 for sType in asWords[1:]:
1653 if sType in g_kdOpTypes:
1654 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1655 else:
1656 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1657 return False;
1658 return True;
1659
1660 def doneInstructionOne(self, oInstr, iLine):
1661 """
1662 Complete the parsing by processing, validating and expanding raw inputs.
1663 """
1664 assert oInstr.iLineCompleted is None;
1665 oInstr.iLineCompleted = iLine;
1666
1667 #
1668 # Specified instructions.
1669 #
1670 if oInstr.cOpTags > 0:
1671 if oInstr.sStats is None:
1672 pass;
1673
1674 #
1675 # Unspecified legacy stuff. We generally only got a few things to go on here.
1676 # /** Opcode 0x0f 0x00 /0. */
1677 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1678 #
1679 else:
1680 #if oInstr.sRawOldOpcodes:
1681 #
1682 #if oInstr.sMnemonic:
1683 pass;
1684
1685 #
1686 # Common defaults.
1687 #
1688
1689 # Guess mnemonic and operands from stats if the former is missing.
1690 if oInstr.sMnemonic is None:
1691 if oInstr.sStats is not None:
1692 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1693 elif oInstr.sFunction is not None:
1694 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1695
1696 # Derive the disassembler op enum constant from the mnemonic.
1697 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1698 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1699
1700 # Derive the IEM statistics base name from mnemonic and operand types.
1701 if oInstr.sStats is None:
1702 if oInstr.sFunction is not None:
1703 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1704 elif oInstr.sMnemonic is not None:
1705 oInstr.sStats = oInstr.sMnemonic;
1706 for oOperand in oInstr.aoOperands:
1707 if oOperand.sType:
1708 oInstr.sStats += '_' + oOperand.sType;
1709
1710 # Derive the IEM function name from mnemonic and operand types.
1711 if oInstr.sFunction is None:
1712 if oInstr.sMnemonic is not None:
1713 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1714 for oOperand in oInstr.aoOperands:
1715 if oOperand.sType:
1716 oInstr.sFunction += '_' + oOperand.sType;
1717 elif oInstr.sStats:
1718 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1719
1720 # Derive encoding from operands.
1721 if oInstr.sEncoding is None:
1722 if not oInstr.aoOperands:
1723 if oInstr.fUnused and oInstr.sSubOpcode:
1724 oInstr.sEncoding = 'ModR/M';
1725 else:
1726 oInstr.sEncoding = 'fixed';
1727 elif oInstr.aoOperands[0].usesModRM():
1728 if len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv':
1729 oInstr.sEncoding = 'ModR/M+VEX';
1730 else:
1731 oInstr.sEncoding = 'ModR/M';
1732
1733 #
1734 # Apply default map and then add the instruction to all it's groups.
1735 #
1736 if not oInstr.aoMaps:
1737 oInstr.aoMaps = [ self.oDefaultMap, ];
1738 for oMap in oInstr.aoMaps:
1739 oMap.aoInstructions.append(oInstr);
1740
1741 #
1742 # Check the opstat value and add it to the opstat indexed dictionary.
1743 #
1744 if oInstr.sStats:
1745 if oInstr.sStats not in g_dAllInstructionsByStat:
1746 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1747 else:
1748 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1749 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1750
1751 #
1752 # Add to function indexed dictionary. We allow multiple instructions per function.
1753 #
1754 if oInstr.sFunction:
1755 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1756 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1757 else:
1758 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1759
1760 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1761 return True;
1762
1763 def doneInstructions(self, iLineInComment = None):
1764 """
1765 Done with current instruction.
1766 """
1767 for oInstr in self.aoCurInstrs:
1768 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1769 if oInstr.fStub:
1770 self.cTotalStubs += 1;
1771
1772 self.cTotalInstr += len(self.aoCurInstrs);
1773
1774 self.sComment = '';
1775 self.aoCurInstrs = [];
1776 return True;
1777
1778 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1779 """
1780 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1781 is False, only None values and empty strings are replaced.
1782 """
1783 for oInstr in self.aoCurInstrs:
1784 if fOverwrite is not True:
1785 oOldValue = getattr(oInstr, sAttrib);
1786 if oOldValue is not None:
1787 continue;
1788 setattr(oInstr, sAttrib, oValue);
1789
1790 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1791 """
1792 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1793 If fOverwrite is False, only None values and empty strings are replaced.
1794 """
1795 for oInstr in self.aoCurInstrs:
1796 aoArray = getattr(oInstr, sAttrib);
1797 while len(aoArray) <= iEntry:
1798 aoArray.append(None);
1799 if fOverwrite is True or aoArray[iEntry] is None:
1800 aoArray[iEntry] = oValue;
1801
1802 def parseCommentOldOpcode(self, asLines):
1803 """ Deals with 'Opcode 0xff /4' like comments """
1804 asWords = asLines[0].split();
1805 if len(asWords) >= 2 \
1806 and asWords[0] == 'Opcode' \
1807 and ( asWords[1].startswith('0x')
1808 or asWords[1].startswith('0X')):
1809 asWords = asWords[:1];
1810 for iWord, sWord in enumerate(asWords):
1811 if sWord.startswith('0X'):
1812 sWord = '0x' + sWord[:2];
1813 asWords[iWord] = asWords;
1814 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1815
1816 return False;
1817
1818 def ensureInstructionForOpTag(self, iTagLine):
1819 """ Ensure there is an instruction for the op-tag being parsed. """
1820 if not self.aoCurInstrs:
1821 self.addInstruction(self.iCommentLine + iTagLine);
1822 for oInstr in self.aoCurInstrs:
1823 oInstr.cOpTags += 1;
1824 if oInstr.cOpTags == 1:
1825 self.cTotalTagged += 1;
1826 return self.aoCurInstrs[-1];
1827
1828 @staticmethod
1829 def flattenSections(aasSections):
1830 """
1831 Flattens multiline sections into stripped single strings.
1832 Returns list of strings, on section per string.
1833 """
1834 asRet = [];
1835 for asLines in aasSections:
1836 if asLines:
1837 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1838 return asRet;
1839
1840 @staticmethod
1841 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1842 """
1843 Flattens sections into a simple stripped string with newlines as
1844 section breaks. The final section does not sport a trailing newline.
1845 """
1846 # Typical: One section with a single line.
1847 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1848 return aasSections[0][0].strip();
1849
1850 sRet = '';
1851 for iSection, asLines in enumerate(aasSections):
1852 if asLines:
1853 if iSection > 0:
1854 sRet += sSectionSep;
1855 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1856 return sRet;
1857
1858
1859
1860 ## @name Tag parsers
1861 ## @{
1862
1863 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1864 """
1865 Tag: \@opbrief
1866 Value: Text description, multiple sections, appended.
1867
1868 Brief description. If not given, it's the first sentence from @opdesc.
1869 """
1870 oInstr = self.ensureInstructionForOpTag(iTagLine);
1871
1872 # Flatten and validate the value.
1873 sBrief = self.flattenAllSections(aasSections);
1874 if not sBrief:
1875 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1876 if sBrief[-1] != '.':
1877 sBrief = sBrief + '.';
1878 if len(sBrief) > 180:
1879 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1880 offDot = sBrief.find('.');
1881 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1882 offDot = sBrief.find('.', offDot + 1);
1883 if offDot >= 0 and offDot != len(sBrief) - 1:
1884 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1885
1886 # Update the instruction.
1887 if oInstr.sBrief is not None:
1888 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1889 % (sTag, oInstr.sBrief, sBrief,));
1890 _ = iEndLine;
1891 return True;
1892
1893 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1894 """
1895 Tag: \@opdesc
1896 Value: Text description, multiple sections, appended.
1897
1898 It is used to describe instructions.
1899 """
1900 oInstr = self.ensureInstructionForOpTag(iTagLine);
1901 if aasSections:
1902 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1903 return True;
1904
1905 _ = sTag; _ = iEndLine;
1906 return True;
1907
1908 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1909 """
1910 Tag: @opmenmonic
1911 Value: mnemonic
1912
1913 The 'mnemonic' value must be a valid C identifier string. Because of
1914 prefixes, groups and whatnot, there times when the mnemonic isn't that
1915 of an actual assembler mnemonic.
1916 """
1917 oInstr = self.ensureInstructionForOpTag(iTagLine);
1918
1919 # Flatten and validate the value.
1920 sMnemonic = self.flattenAllSections(aasSections);
1921 if not self.oReMnemonic.match(sMnemonic):
1922 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1923 if oInstr.sMnemonic is not None:
1924 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1925 % (sTag, oInstr.sMnemonic, sMnemonic,));
1926 oInstr.sMnemonic = sMnemonic
1927
1928 _ = iEndLine;
1929 return True;
1930
1931 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1932 """
1933 Tags: \@op1, \@op2, \@op3, \@op4
1934 Value: [where:]type
1935
1936 The 'where' value indicates where the operand is found, like the 'reg'
1937 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1938 a list.
1939
1940 The 'type' value indicates the operand type. These follow the types
1941 given in the opcode tables in the CPU reference manuals.
1942 See Instruction.kdOperandTypes for a list.
1943
1944 """
1945 oInstr = self.ensureInstructionForOpTag(iTagLine);
1946 idxOp = int(sTag[-1]) - 1;
1947 assert idxOp >= 0 and idxOp < 4;
1948
1949 # flatten, split up, and validate the "where:type" value.
1950 sFlattened = self.flattenAllSections(aasSections);
1951 asSplit = sFlattened.split(':');
1952 if len(asSplit) == 1:
1953 sType = asSplit[0];
1954 sWhere = None;
1955 elif len(asSplit) == 2:
1956 (sWhere, sType) = asSplit;
1957 else:
1958 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
1959
1960 if sType not in g_kdOpTypes:
1961 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1962 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
1963 if sWhere is None:
1964 sWhere = g_kdOpTypes[sType][1];
1965 elif sWhere not in g_kdOpLocations:
1966 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1967 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
1968
1969 # Insert the operand, refusing to overwrite an existing one.
1970 while idxOp >= len(oInstr.aoOperands):
1971 oInstr.aoOperands.append(None);
1972 if oInstr.aoOperands[idxOp] is not None:
1973 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1974 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1975 sWhere, sType,));
1976 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1977
1978 _ = iEndLine;
1979 return True;
1980
1981 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1982 """
1983 Tag: \@opmaps
1984 Value: map[,map2]
1985
1986 Indicates which maps the instruction is in. There is a default map
1987 associated with each input file.
1988 """
1989 oInstr = self.ensureInstructionForOpTag(iTagLine);
1990
1991 # Flatten, split up and validate the value.
1992 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1993 asMaps = sFlattened.split(',');
1994 if not asMaps:
1995 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1996 for sMap in asMaps:
1997 if sMap not in g_dInstructionMaps:
1998 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1999 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2000
2001 # Add the maps to the current list. Throw errors on duplicates.
2002 for oMap in oInstr.aoMaps:
2003 if oMap.sName in asMaps:
2004 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2005
2006 for sMap in asMaps:
2007 oMap = g_dInstructionMaps[sMap];
2008 if oMap not in oInstr.aoMaps:
2009 oInstr.aoMaps.append(oMap);
2010 else:
2011 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2012
2013 _ = iEndLine;
2014 return True;
2015
2016 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2017 """
2018 Tag: \@oppfx
2019 Value: n/a|none|0x66|0xf3|0xf2
2020
2021 Required prefix for the instruction. (In a (E)VEX context this is the
2022 value of the 'pp' field rather than an actual prefix.)
2023 """
2024 oInstr = self.ensureInstructionForOpTag(iTagLine);
2025
2026 # Flatten and validate the value.
2027 sFlattened = self.flattenAllSections(aasSections);
2028 asPrefixes = sFlattened.split();
2029 if len(asPrefixes) > 1:
2030 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2031
2032 sPrefix = asPrefixes[0].lower();
2033 if sPrefix == 'none':
2034 sPrefix = 'none';
2035 elif sPrefix == 'n/a':
2036 sPrefix = None;
2037 else:
2038 if len(sPrefix) == 2:
2039 sPrefix = '0x' + sPrefix;
2040 if not _isValidOpcodeByte(sPrefix):
2041 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2042
2043 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2044 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2045
2046 # Set it.
2047 if oInstr.sPrefix is not None:
2048 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2049 oInstr.sPrefix = sPrefix;
2050
2051 _ = iEndLine;
2052 return True;
2053
2054 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2055 """
2056 Tag: \@opcode
2057 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2058
2059 The opcode byte or sub-byte for the instruction in the context of a map.
2060 """
2061 oInstr = self.ensureInstructionForOpTag(iTagLine);
2062
2063 # Flatten and validate the value.
2064 sOpcode = self.flattenAllSections(aasSections);
2065 if _isValidOpcodeByte(sOpcode):
2066 pass;
2067 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2068 pass;
2069 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2070 pass;
2071 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2072 pass;
2073 else:
2074 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2075
2076 # Set it.
2077 if oInstr.sOpcode is not None:
2078 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2079 oInstr.sOpcode = sOpcode;
2080
2081 _ = iEndLine;
2082 return True;
2083
2084 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2085 """
2086 Tag: \@opcodesub
2087 Value: none | 11 mr/reg | !11 mr/reg
2088
2089 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2090 represents exactly two different instructions. The more proper way would
2091 be to go via maps with two members, but this is faster.
2092 """
2093 oInstr = self.ensureInstructionForOpTag(iTagLine);
2094
2095 # Flatten and validate the value.
2096 sSubOpcode = self.flattenAllSections(aasSections);
2097 if sSubOpcode not in g_kdSubOpcodes:
2098 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2099 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2100
2101 # Set it.
2102 if oInstr.sSubOpcode is not None:
2103 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2104 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2105 oInstr.sSubOpcode = sSubOpcode;
2106
2107 _ = iEndLine;
2108 return True;
2109
2110 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2111 """
2112 Tag: \@openc
2113 Value: ModR/M|fixed|prefix|<map name>
2114
2115 The instruction operand encoding style.
2116 """
2117 oInstr = self.ensureInstructionForOpTag(iTagLine);
2118
2119 # Flatten and validate the value.
2120 sEncoding = self.flattenAllSections(aasSections);
2121 if sEncoding in g_kdEncodings:
2122 pass;
2123 elif sEncoding in g_dInstructionMaps:
2124 pass;
2125 elif not _isValidOpcodeByte(sEncoding):
2126 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2127
2128 # Set it.
2129 if oInstr.sEncoding is not None:
2130 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2131 % ( sTag, oInstr.sEncoding, sEncoding,));
2132 oInstr.sEncoding = sEncoding;
2133
2134 _ = iEndLine;
2135 return True;
2136
2137 ## EFlags tag to Instruction attribute name.
2138 kdOpFlagToAttr = {
2139 '@opfltest': 'asFlTest',
2140 '@opflmodify': 'asFlModify',
2141 '@opflundef': 'asFlUndefined',
2142 '@opflset': 'asFlSet',
2143 '@opflclear': 'asFlClear',
2144 };
2145
2146 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2147 """
2148 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2149 Value: <eflags specifier>
2150
2151 """
2152 oInstr = self.ensureInstructionForOpTag(iTagLine);
2153
2154 # Flatten, split up and validate the values.
2155 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2156 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2157 asFlags = [];
2158 else:
2159 fRc = True;
2160 for iFlag, sFlag in enumerate(asFlags):
2161 if sFlag not in g_kdEFlagsMnemonics:
2162 if sFlag.strip() in g_kdEFlagsMnemonics:
2163 asFlags[iFlag] = sFlag.strip();
2164 else:
2165 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2166 if not fRc:
2167 return False;
2168
2169 # Set them.
2170 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2171 if asOld is not None:
2172 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2173 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2174
2175 _ = iEndLine;
2176 return True;
2177
2178 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2179 """
2180 Tag: \@ophints
2181 Value: Comma or space separated list of flags and hints.
2182
2183 This covers the disassembler flags table and more.
2184 """
2185 oInstr = self.ensureInstructionForOpTag(iTagLine);
2186
2187 # Flatten as a space separated list, split it up and validate the values.
2188 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2189 if len(asHints) == 1 and asHints[0].lower() == 'none':
2190 asHints = [];
2191 else:
2192 fRc = True;
2193 for iHint, sHint in enumerate(asHints):
2194 if sHint not in g_kdHints:
2195 if sHint.strip() in g_kdHints:
2196 sHint[iHint] = sHint.strip();
2197 else:
2198 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2199 if not fRc:
2200 return False;
2201
2202 # Append them.
2203 for sHint in asHints:
2204 if sHint not in oInstr.dHints:
2205 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2206 else:
2207 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2208
2209 _ = iEndLine;
2210 return True;
2211
2212 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2213 """
2214 Tag: \@opdisenum
2215 Value: OP_XXXX
2216
2217 This is for select a specific (legacy) disassembler enum value for the
2218 instruction.
2219 """
2220 oInstr = self.ensureInstructionForOpTag(iTagLine);
2221
2222 # Flatten and split.
2223 asWords = self.flattenAllSections(aasSections).split();
2224 if len(asWords) != 1:
2225 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2226 if not asWords:
2227 return False;
2228 sDisEnum = asWords[0];
2229 if not self.oReDisEnum.match(sDisEnum):
2230 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2231 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2232
2233 # Set it.
2234 if oInstr.sDisEnum is not None:
2235 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2236 oInstr.sDisEnum = sDisEnum;
2237
2238 _ = iEndLine;
2239 return True;
2240
2241 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2242 """
2243 Tag: \@opmincpu
2244 Value: <simple CPU name>
2245
2246 Indicates when this instruction was introduced.
2247 """
2248 oInstr = self.ensureInstructionForOpTag(iTagLine);
2249
2250 # Flatten the value, split into words, make sure there's just one, valid it.
2251 asCpus = self.flattenAllSections(aasSections).split();
2252 if len(asCpus) > 1:
2253 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2254
2255 sMinCpu = asCpus[0];
2256 if sMinCpu in g_kdCpuNames:
2257 oInstr.sMinCpu = sMinCpu;
2258 else:
2259 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2260 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2261
2262 # Set it.
2263 if oInstr.sMinCpu is None:
2264 oInstr.sMinCpu = sMinCpu;
2265 elif oInstr.sMinCpu != sMinCpu:
2266 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2267
2268 _ = iEndLine;
2269 return True;
2270
2271 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2272 """
2273 Tag: \@opcpuid
2274 Value: none | <CPUID flag specifier>
2275
2276 CPUID feature bit which is required for the instruction to be present.
2277 """
2278 oInstr = self.ensureInstructionForOpTag(iTagLine);
2279
2280 # Flatten as a space separated list, split it up and validate the values.
2281 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2282 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2283 asCpuIds = [];
2284 else:
2285 fRc = True;
2286 for iCpuId, sCpuId in enumerate(asCpuIds):
2287 if sCpuId not in g_kdCpuIdFlags:
2288 if sCpuId.strip() in g_kdCpuIdFlags:
2289 sCpuId[iCpuId] = sCpuId.strip();
2290 else:
2291 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2292 if not fRc:
2293 return False;
2294
2295 # Append them.
2296 for sCpuId in asCpuIds:
2297 if sCpuId not in oInstr.asCpuIds:
2298 oInstr.asCpuIds.append(sCpuId);
2299 else:
2300 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2301
2302 _ = iEndLine;
2303 return True;
2304
2305 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2306 """
2307 Tag: \@opgroup
2308 Value: op_grp1[_subgrp2[_subsubgrp3]]
2309
2310 Instruction grouping.
2311 """
2312 oInstr = self.ensureInstructionForOpTag(iTagLine);
2313
2314 # Flatten as a space separated list, split it up and validate the values.
2315 asGroups = self.flattenAllSections(aasSections).split();
2316 if len(asGroups) != 1:
2317 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2318 sGroup = asGroups[0];
2319 if not self.oReGroupName.match(sGroup):
2320 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2321 % (sTag, sGroup, self.oReGroupName.pattern));
2322
2323 # Set it.
2324 if oInstr.sGroup is not None:
2325 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2326 oInstr.sGroup = sGroup;
2327
2328 _ = iEndLine;
2329 return True;
2330
2331 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2332 """
2333 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2334 Value: <invalid opcode behaviour style>
2335
2336 The \@opunused indicates the specification is for a currently unused
2337 instruction encoding.
2338
2339 The \@opinvalid indicates the specification is for an invalid currently
2340 instruction encoding (like UD2).
2341
2342 The \@opinvlstyle just indicates how CPUs decode the instruction when
2343 not supported (\@opcpuid, \@opmincpu) or disabled.
2344 """
2345 oInstr = self.ensureInstructionForOpTag(iTagLine);
2346
2347 # Flatten as a space separated list, split it up and validate the values.
2348 asStyles = self.flattenAllSections(aasSections).split();
2349 if len(asStyles) != 1:
2350 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2351 sStyle = asStyles[0];
2352 if sStyle not in g_kdInvalidStyles:
2353 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2354 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2355 # Set it.
2356 if oInstr.sInvalidStyle is not None:
2357 return self.errorComment(iTagLine,
2358 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2359 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2360 oInstr.sInvalidStyle = sStyle;
2361 if sTag == '@opunused':
2362 oInstr.fUnused = True;
2363 elif sTag == '@opinvalid':
2364 oInstr.fInvalid = True;
2365
2366 _ = iEndLine;
2367 return True;
2368
2369 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2370 """
2371 Tag: \@optest
2372 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2373 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2374
2375 The main idea here is to generate basic instruction tests.
2376
2377 The probably simplest way of handling the diverse input, would be to use
2378 it to produce size optimized byte code for a simple interpreter that
2379 modifies the register input and output states.
2380
2381 An alternative to the interpreter would be creating multiple tables,
2382 but that becomes rather complicated wrt what goes where and then to use
2383 them in an efficient manner.
2384 """
2385 oInstr = self.ensureInstructionForOpTag(iTagLine);
2386
2387 #
2388 # Do it section by section.
2389 #
2390 for asSectionLines in aasSections:
2391 #
2392 # Sort the input into outputs, inputs and selector conditions.
2393 #
2394 sFlatSection = self.flattenAllSections([asSectionLines,]);
2395 if not sFlatSection:
2396 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2397 continue;
2398 oTest = InstructionTest(oInstr);
2399
2400 asSelectors = [];
2401 asInputs = [];
2402 asOutputs = [];
2403 asCur = asOutputs;
2404 fRc = True;
2405 asWords = sFlatSection.split();
2406 for iWord in range(len(asWords) - 1, -1, -1):
2407 sWord = asWords[iWord];
2408 # Check for array switchers.
2409 if sWord == '->':
2410 if asCur != asOutputs:
2411 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2412 break;
2413 asCur = asInputs;
2414 elif sWord == '/':
2415 if asCur != asInputs:
2416 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2417 break;
2418 asCur = asSelectors;
2419 else:
2420 asCur.insert(0, sWord);
2421
2422 #
2423 # Validate and add selectors.
2424 #
2425 for sCond in asSelectors:
2426 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2427 oSelector = None;
2428 for sOp in TestSelector.kasCompareOps:
2429 off = sCondExp.find(sOp);
2430 if off >= 0:
2431 sVariable = sCondExp[:off];
2432 sValue = sCondExp[off + len(sOp):];
2433 if sVariable in TestSelector.kdVariables:
2434 if sValue in TestSelector.kdVariables[sVariable]:
2435 oSelector = TestSelector(sVariable, sOp, sValue);
2436 else:
2437 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2438 % ( sTag, sValue, sCond,
2439 TestSelector.kdVariables[sVariable].keys(),));
2440 else:
2441 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2442 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2443 break;
2444 if oSelector is not None:
2445 for oExisting in oTest.aoSelectors:
2446 if oExisting.sVariable == oSelector.sVariable:
2447 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2448 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2449 oTest.aoSelectors.append(oSelector);
2450 else:
2451 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2452
2453 #
2454 # Validate outputs and inputs, adding them to the test as we go along.
2455 #
2456 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2457 asValidFieldKinds = [ 'both', sDesc, ];
2458 for sItem in asItems:
2459 oItem = None;
2460 for sOp in TestInOut.kasOperators:
2461 off = sItem.find(sOp);
2462 if off < 0:
2463 continue;
2464 sField = sItem[:off];
2465 sValueType = sItem[off + len(sOp):];
2466 if sField in TestInOut.kdFields \
2467 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2468 asSplit = sValueType.split(':', 1);
2469 sValue = asSplit[0];
2470 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2471 if sType in TestInOut.kdTypes:
2472 oValid = TestInOut.kdTypes[sType].validate(sValue);
2473 if oValid is True:
2474 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2475 oItem = TestInOut(sField, sOp, sValue, sType);
2476 else:
2477 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2478 % ( sTag, sDesc, sItem, ));
2479 else:
2480 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2481 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2482 else:
2483 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2484 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2485 else:
2486 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2487 % ( sTag, sDesc, sField, sItem,
2488 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2489 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2490 break;
2491 if oItem is not None:
2492 for oExisting in aoDst:
2493 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2494 self.errorComment(iTagLine,
2495 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2496 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2497 aoDst.append(oItem);
2498 else:
2499 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2500
2501 #
2502 # .
2503 #
2504 if fRc:
2505 oInstr.aoTests.append(oTest);
2506 else:
2507 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2508 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2509 % (sTag, asSelectors, asInputs, asOutputs,));
2510
2511 _ = iEndLine;
2512 return True;
2513
2514 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2515 """
2516 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2517 """
2518 oInstr = self.ensureInstructionForOpTag(iTagLine);
2519
2520 iTest = 0;
2521 if sTag[-1] == ']':
2522 iTest = int(sTag[8:-1]);
2523 else:
2524 iTest = int(sTag[7:]);
2525
2526 if iTest != len(oInstr.aoTests):
2527 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2528 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2529
2530 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2531 """
2532 Tag: \@optestign | \@optestignore
2533 Value: <value is ignored>
2534
2535 This is a simple trick to ignore a test while debugging another.
2536
2537 See also \@oponlytest.
2538 """
2539 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2540 return True;
2541
2542 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2543 """
2544 Tag: \@opcopytests
2545 Value: <opstat | function> [..]
2546 Example: \@opcopytests add_Eb_Gb
2547
2548 Trick to avoid duplicating tests for different encodings of the same
2549 operation.
2550 """
2551 oInstr = self.ensureInstructionForOpTag(iTagLine);
2552
2553 # Flatten, validate and append the copy job to the instruction. We execute
2554 # them after parsing all the input so we can handle forward references.
2555 asToCopy = self.flattenAllSections(aasSections).split();
2556 if not asToCopy:
2557 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2558 for sToCopy in asToCopy:
2559 if sToCopy not in oInstr.asCopyTests:
2560 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2561 oInstr.asCopyTests.append(sToCopy);
2562 else:
2563 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2564 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2565 else:
2566 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2567
2568 _ = iEndLine;
2569 return True;
2570
2571 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2572 """
2573 Tag: \@oponlytest | \@oponly
2574 Value: none
2575
2576 Only test instructions with this tag. This is a trick that is handy
2577 for singling out one or two new instructions or tests.
2578
2579 See also \@optestignore.
2580 """
2581 oInstr = self.ensureInstructionForOpTag(iTagLine);
2582
2583 # Validate and add instruction to only test dictionary.
2584 sValue = self.flattenAllSections(aasSections).strip();
2585 if sValue:
2586 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2587
2588 if oInstr not in g_aoOnlyTestInstructions:
2589 g_aoOnlyTestInstructions.append(oInstr);
2590
2591 _ = iEndLine;
2592 return True;
2593
2594 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2595 """
2596 Tag: \@opxcpttype
2597 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2598
2599 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2600 """
2601 oInstr = self.ensureInstructionForOpTag(iTagLine);
2602
2603 # Flatten as a space separated list, split it up and validate the values.
2604 asTypes = self.flattenAllSections(aasSections).split();
2605 if len(asTypes) != 1:
2606 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2607 sType = asTypes[0];
2608 if sType not in g_kdXcptTypes:
2609 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2610 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2611 # Set it.
2612 if oInstr.sXcptType is not None:
2613 return self.errorComment(iTagLine,
2614 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2615 % ( sTag, oInstr.sXcptType, sType,));
2616 oInstr.sXcptType = sType;
2617
2618 _ = iEndLine;
2619 return True;
2620
2621 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2622 """
2623 Tag: \@opfunction
2624 Value: <VMM function name>
2625
2626 This is for explicitly setting the IEM function name. Normally we pick
2627 this up from the FNIEMOP_XXX macro invocation after the description, or
2628 generate it from the mnemonic and operands.
2629
2630 It it thought it maybe necessary to set it when specifying instructions
2631 which implementation isn't following immediately or aren't implemented yet.
2632 """
2633 oInstr = self.ensureInstructionForOpTag(iTagLine);
2634
2635 # Flatten and validate the value.
2636 sFunction = self.flattenAllSections(aasSections);
2637 if not self.oReFunctionName.match(sFunction):
2638 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2639 % (sTag, sFunction, self.oReFunctionName.pattern));
2640
2641 if oInstr.sFunction is not None:
2642 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2643 % (sTag, oInstr.sFunction, sFunction,));
2644 oInstr.sFunction = sFunction;
2645
2646 _ = iEndLine;
2647 return True;
2648
2649 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2650 """
2651 Tag: \@opstats
2652 Value: <VMM statistics base name>
2653
2654 This is for explicitly setting the statistics name. Normally we pick
2655 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2656 the mnemonic and operands.
2657
2658 It it thought it maybe necessary to set it when specifying instructions
2659 which implementation isn't following immediately or aren't implemented yet.
2660 """
2661 oInstr = self.ensureInstructionForOpTag(iTagLine);
2662
2663 # Flatten and validate the value.
2664 sStats = self.flattenAllSections(aasSections);
2665 if not self.oReStatsName.match(sStats):
2666 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2667 % (sTag, sStats, self.oReStatsName.pattern));
2668
2669 if oInstr.sStats is not None:
2670 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2671 % (sTag, oInstr.sStats, sStats,));
2672 oInstr.sStats = sStats;
2673
2674 _ = iEndLine;
2675 return True;
2676
2677 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2678 """
2679 Tag: \@opdone
2680 Value: none
2681
2682 Used to explictily flush the instructions that have been specified.
2683 """
2684 sFlattened = self.flattenAllSections(aasSections);
2685 if sFlattened != '':
2686 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2687 _ = sTag; _ = iEndLine;
2688 return self.doneInstructions();
2689
2690 ## @}
2691
2692
2693 def parseComment(self):
2694 """
2695 Parse the current comment (self.sComment).
2696
2697 If it's a opcode specifiying comment, we reset the macro stuff.
2698 """
2699 #
2700 # Reject if comment doesn't seem to contain anything interesting.
2701 #
2702 if self.sComment.find('Opcode') < 0 \
2703 and self.sComment.find('@') < 0:
2704 return False;
2705
2706 #
2707 # Split the comment into lines, removing leading asterisks and spaces.
2708 # Also remove leading and trailing empty lines.
2709 #
2710 asLines = self.sComment.split('\n');
2711 for iLine, sLine in enumerate(asLines):
2712 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2713
2714 while asLines and not asLines[0]:
2715 self.iCommentLine += 1;
2716 asLines.pop(0);
2717
2718 while asLines and not asLines[-1]:
2719 asLines.pop(len(asLines) - 1);
2720
2721 #
2722 # Check for old style: Opcode 0x0f 0x12
2723 #
2724 if asLines[0].startswith('Opcode '):
2725 self.parseCommentOldOpcode(asLines);
2726
2727 #
2728 # Look for @op* tagged data.
2729 #
2730 cOpTags = 0;
2731 sFlatDefault = None;
2732 sCurTag = '@default';
2733 iCurTagLine = 0;
2734 asCurSection = [];
2735 aasSections = [ asCurSection, ];
2736 for iLine, sLine in enumerate(asLines):
2737 if not sLine.startswith('@'):
2738 if sLine:
2739 asCurSection.append(sLine);
2740 elif asCurSection:
2741 asCurSection = [];
2742 aasSections.append(asCurSection);
2743 else:
2744 #
2745 # Process the previous tag.
2746 #
2747 if not asCurSection and len(aasSections) > 1:
2748 aasSections.pop(-1);
2749 if sCurTag in self.dTagHandlers:
2750 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2751 cOpTags += 1;
2752 elif sCurTag.startswith('@op'):
2753 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2754 elif sCurTag == '@default':
2755 sFlatDefault = self.flattenAllSections(aasSections);
2756 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2757 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2758 elif sCurTag in ['@encoding', '@opencoding']:
2759 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2760
2761 #
2762 # New tag.
2763 #
2764 asSplit = sLine.split(None, 1);
2765 sCurTag = asSplit[0].lower();
2766 if len(asSplit) > 1:
2767 asCurSection = [asSplit[1],];
2768 else:
2769 asCurSection = [];
2770 aasSections = [asCurSection, ];
2771 iCurTagLine = iLine;
2772
2773 #
2774 # Process the final tag.
2775 #
2776 if not asCurSection and len(aasSections) > 1:
2777 aasSections.pop(-1);
2778 if sCurTag in self.dTagHandlers:
2779 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2780 cOpTags += 1;
2781 elif sCurTag.startswith('@op'):
2782 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2783 elif sCurTag == '@default':
2784 sFlatDefault = self.flattenAllSections(aasSections);
2785
2786 #
2787 # Don't allow default text in blocks containing @op*.
2788 #
2789 if cOpTags > 0 and sFlatDefault:
2790 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2791
2792 return True;
2793
2794 def parseMacroInvocation(self, sInvocation):
2795 """
2796 Parses a macro invocation.
2797
2798 Returns a tuple, first element is the offset following the macro
2799 invocation. The second element is a list of macro arguments, where the
2800 zero'th is the macro name.
2801 """
2802 # First the name.
2803 offOpen = sInvocation.find('(');
2804 if offOpen <= 0:
2805 self.raiseError("macro invocation open parenthesis not found");
2806 sName = sInvocation[:offOpen].strip();
2807 if not self.oReMacroName.match(sName):
2808 return self.error("invalid macro name '%s'" % (sName,));
2809 asRet = [sName, ];
2810
2811 # Arguments.
2812 iLine = self.iLine;
2813 cDepth = 1;
2814 off = offOpen + 1;
2815 offStart = off;
2816 while cDepth > 0:
2817 if off >= len(sInvocation):
2818 if iLine >= len(self.asLines):
2819 return self.error('macro invocation beyond end of file');
2820 sInvocation += self.asLines[iLine];
2821 iLine += 1;
2822 ch = sInvocation[off];
2823
2824 if ch == ',' or ch == ')':
2825 if cDepth == 1:
2826 asRet.append(sInvocation[offStart:off].strip());
2827 offStart = off + 1;
2828 if ch == ')':
2829 cDepth -= 1;
2830 elif ch == '(':
2831 cDepth += 1;
2832 off += 1;
2833
2834 return (off, asRet);
2835
2836 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2837 """
2838 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2839 """
2840 offHit = sCode.find(sMacro);
2841 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2842 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2843 return (offHit + offAfter, asRet);
2844 return (len(sCode), None);
2845
2846 def findAndParseMacroInvocation(self, sCode, sMacro):
2847 """
2848 Returns None if not found, arguments as per parseMacroInvocation if found.
2849 """
2850 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2851
2852 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2853 """
2854 Returns same as findAndParseMacroInvocation.
2855 """
2856 for sMacro in asMacro:
2857 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2858 if asRet is not None:
2859 return asRet;
2860 return None;
2861
2862 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2863 sDisHints, sIemHints, asOperands):
2864 """
2865 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2866 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2867 """
2868 #
2869 # Some invocation checks.
2870 #
2871 if sUpper != sUpper.upper():
2872 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2873 if sLower != sLower.lower():
2874 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2875 if sUpper.lower() != sLower:
2876 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2877 if not self.oReMnemonic.match(sLower):
2878 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2879
2880 #
2881 # Check if sIemHints tells us to not consider this macro invocation.
2882 #
2883 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2884 return True;
2885
2886 # Apply to the last instruction only for now.
2887 if not self.aoCurInstrs:
2888 self.addInstruction();
2889 oInstr = self.aoCurInstrs[-1];
2890 if oInstr.iLineMnemonicMacro == -1:
2891 oInstr.iLineMnemonicMacro = self.iLine;
2892 else:
2893 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2894 % (sMacro, oInstr.iLineMnemonicMacro,));
2895
2896 # Mnemonic
2897 if oInstr.sMnemonic is None:
2898 oInstr.sMnemonic = sLower;
2899 elif oInstr.sMnemonic != sLower:
2900 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2901
2902 # Process operands.
2903 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2904 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2905 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2906 for iOperand, sType in enumerate(asOperands):
2907 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2908 if sWhere is None:
2909 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2910 if iOperand < len(oInstr.aoOperands): # error recovery.
2911 sWhere = oInstr.aoOperands[iOperand].sWhere;
2912 sType = oInstr.aoOperands[iOperand].sType;
2913 else:
2914 sWhere = 'reg';
2915 sType = 'Gb';
2916 if iOperand == len(oInstr.aoOperands):
2917 oInstr.aoOperands.append(Operand(sWhere, sType))
2918 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2919 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2920 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2921 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2922
2923 # Encoding.
2924 if sForm not in g_kdIemForms:
2925 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2926 else:
2927 if oInstr.sEncoding is None:
2928 oInstr.sEncoding = g_kdIemForms[sForm][0];
2929 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2930 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2931 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2932
2933 # Check the parameter locations for the encoding.
2934 if g_kdIemForms[sForm][1] is not None:
2935 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2936 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2937 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2938 else:
2939 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2940 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2941 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2942 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2943
2944 # Stats.
2945 if not self.oReStatsName.match(sStats):
2946 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2947 elif oInstr.sStats is None:
2948 oInstr.sStats = sStats;
2949 elif oInstr.sStats != sStats:
2950 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2951 % (sMacro, oInstr.sStats, sStats,));
2952
2953 # Process the hints (simply merge with @ophints w/o checking anything).
2954 for sHint in sDisHints.split('|'):
2955 sHint = sHint.strip();
2956 if sHint.startswith('DISOPTYPE_'):
2957 sShortHint = sHint[len('DISOPTYPE_'):].lower();
2958 if sShortHint in g_kdHints:
2959 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2960 else:
2961 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
2962 elif sHint != '0':
2963 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
2964
2965 for sHint in sIemHints.split('|'):
2966 sHint = sHint.strip();
2967 if sHint.startswith('IEMOPHINT_'):
2968 sShortHint = sHint[len('IEMOPHINT_'):].lower();
2969 if sShortHint in g_kdHints:
2970 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
2971 else:
2972 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
2973 elif sHint != '0':
2974 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
2975
2976
2977 _ = sAsm;
2978 return True;
2979
2980 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
2981 """
2982 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
2983 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
2984 """
2985 if not asOperands:
2986 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2987 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
2988 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
2989
2990 def checkCodeForMacro(self, sCode):
2991 """
2992 Checks code for relevant macro invocation.
2993 """
2994 #
2995 # Scan macro invocations.
2996 #
2997 if sCode.find('(') > 0:
2998 # Look for instruction decoder function definitions. ASSUME single line.
2999 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3000 [ 'FNIEMOP_DEF',
3001 'FNIEMOP_STUB',
3002 'FNIEMOP_STUB_1',
3003 'FNIEMOP_UD_STUB',
3004 'FNIEMOP_UD_STUB_1' ]);
3005 if asArgs is not None:
3006 sFunction = asArgs[1];
3007
3008 if not self.aoCurInstrs:
3009 self.addInstruction();
3010 for oInstr in self.aoCurInstrs:
3011 if oInstr.iLineFnIemOpMacro == -1:
3012 oInstr.iLineFnIemOpMacro = self.iLine;
3013 else:
3014 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3015 self.setInstrunctionAttrib('sFunction', sFunction);
3016 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3017 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3018 if asArgs[0].find('STUB') > 0:
3019 self.doneInstructions();
3020 return True;
3021
3022 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3023 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3024 if asArgs is not None:
3025 if len(self.aoCurInstrs) == 1:
3026 oInstr = self.aoCurInstrs[0];
3027 if oInstr.sStats is None:
3028 oInstr.sStats = asArgs[1];
3029 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3030
3031 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3032 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3033 if asArgs is not None:
3034 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3035 []);
3036 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3037 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3038 if asArgs is not None:
3039 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3040 [asArgs[6],]);
3041 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3042 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3043 if asArgs is not None:
3044 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3045 [asArgs[6], asArgs[7]]);
3046 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3047 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3048 if asArgs is not None:
3049 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3050 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3051 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3052 # a_fIemHints)
3053 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3054 if asArgs is not None:
3055 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3056 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3057
3058 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3059 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3060 if asArgs is not None:
3061 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3062 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3063 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3064 if asArgs is not None:
3065 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3066 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3067 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3068 if asArgs is not None:
3069 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3070 [asArgs[4], asArgs[5],]);
3071 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3072 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3073 if asArgs is not None:
3074 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3075 [asArgs[4], asArgs[5], asArgs[6],]);
3076 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3077 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3078 if asArgs is not None:
3079 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3080 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3081
3082 return False;
3083
3084
3085 def parse(self):
3086 """
3087 Parses the given file.
3088 Returns number or errors.
3089 Raises exception on fatal trouble.
3090 """
3091 #self.debug('Parsing %s' % (self.sSrcFile,));
3092
3093 while self.iLine < len(self.asLines):
3094 sLine = self.asLines[self.iLine];
3095 self.iLine += 1;
3096
3097 # We only look for comments, so only lines with a slash might possibly
3098 # influence the parser state.
3099 offSlash = sLine.find('/');
3100 if offSlash >= 0:
3101 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3102 offLine = 0;
3103 while offLine < len(sLine):
3104 if self.iState == self.kiCode:
3105 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3106 if offHit >= 0:
3107 self.checkCodeForMacro(sLine[offLine:offHit]);
3108 self.sComment = '';
3109 self.iCommentLine = self.iLine;
3110 self.iState = self.kiCommentMulti;
3111 offLine = offHit + 2;
3112 else:
3113 self.checkCodeForMacro(sLine[offLine:]);
3114 offLine = len(sLine);
3115
3116 elif self.iState == self.kiCommentMulti:
3117 offHit = sLine.find('*/', offLine);
3118 if offHit >= 0:
3119 self.sComment += sLine[offLine:offHit];
3120 self.iState = self.kiCode;
3121 offLine = offHit + 2;
3122 self.parseComment();
3123 else:
3124 self.sComment += sLine[offLine:];
3125 offLine = len(sLine);
3126 else:
3127 assert False;
3128 # C++ line comment.
3129 elif offSlash > 0:
3130 self.checkCodeForMacro(sLine[:offSlash]);
3131
3132 # No slash, but append the line if in multi-line comment.
3133 elif self.iState == self.kiCommentMulti:
3134 #self.debug('line %d: multi' % (self.iLine,));
3135 self.sComment += sLine;
3136
3137 # No slash, but check code line for relevant macro.
3138 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3139 #self.debug('line %d: macro' % (self.iLine,));
3140 self.checkCodeForMacro(sLine);
3141
3142 # If the line is a '}' in the first position, complete the instructions.
3143 elif self.iState == self.kiCode and sLine[0] == '}':
3144 #self.debug('line %d: }' % (self.iLine,));
3145 self.doneInstructions();
3146
3147 self.doneInstructions();
3148 self.debug('%3s stubs out of %3s instructions in %s' % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3149 return self.printErrors();
3150
3151
3152def __parseFileByName(sSrcFile, sDefaultMap):
3153 """
3154 Parses one source file for instruction specfications.
3155 """
3156 #
3157 # Read sSrcFile into a line array.
3158 #
3159 try:
3160 oFile = open(sSrcFile, "r");
3161 except Exception as oXcpt:
3162 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3163 try:
3164 asLines = oFile.readlines();
3165 except Exception as oXcpt:
3166 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3167 finally:
3168 oFile.close();
3169
3170 #
3171 # Do the parsing.
3172 #
3173 try:
3174 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3175 except ParserException as oXcpt:
3176 print(str(oXcpt));
3177 raise;
3178 except Exception as oXcpt:
3179 raise;
3180
3181 return cErrors;
3182
3183
3184def __doTestCopying():
3185 """
3186 Executes the asCopyTests instructions.
3187 """
3188 asErrors = [];
3189 for oDstInstr in g_aoAllInstructions:
3190 if oDstInstr.asCopyTests:
3191 for sSrcInstr in oDstInstr.asCopyTests:
3192 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3193 if oSrcInstr:
3194 aoSrcInstrs = [oSrcInstr,];
3195 else:
3196 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3197 if aoSrcInstrs:
3198 for oSrcInstr in aoSrcInstrs:
3199 if oSrcInstr != oDstInstr:
3200 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3201 else:
3202 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3203 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3204 else:
3205 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3206 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3207
3208 if asErrors:
3209 sys.stderr.write(u''.join(asErrors));
3210 return len(asErrors);
3211
3212
3213def __applyOnlyTest():
3214 """
3215 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3216 all other instructions so that only these get tested.
3217 """
3218 if g_aoOnlyTestInstructions:
3219 for oInstr in g_aoAllInstructions:
3220 if oInstr.aoTests:
3221 if oInstr not in g_aoOnlyTestInstructions:
3222 oInstr.aoTests = [];
3223 return 0;
3224
3225def __parseAll():
3226 """
3227 Parses all the IEMAllInstruction*.cpp.h files.
3228
3229 Raises exception on failure.
3230 """
3231 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3232 cErrors = 0;
3233 for sDefaultMap, sName in [
3234 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3235 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3236 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3237 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3238 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3239 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3240 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3241 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3242 ]:
3243 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3244 cErrors += __doTestCopying();
3245 cErrors += __applyOnlyTest();
3246
3247 if cErrors != 0:
3248 #raise Exception('%d parse errors' % (cErrors,));
3249 sys.exit(1);
3250 return True;
3251
3252
3253
3254__parseAll();
3255
3256
3257#
3258# Generators (may perhaps move later).
3259#
3260def generateDisassemblerTables(oDstFile = sys.stdout):
3261 """
3262 Generates disassembler tables.
3263 """
3264
3265 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3266 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3267 assert oMap.sName == sName;
3268 asLines = [];
3269
3270 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3271 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3272 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3273 asLines.append('{');
3274
3275 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3276
3277 aoTableOrder = oMap.getInstructionsInTableOrder();
3278 for iInstr, oInstr in enumerate(aoTableOrder):
3279
3280 if (iInstr & 0xf) == 0:
3281 if iInstr != 0:
3282 asLines.append('');
3283 asLines.append(' /* %x */' % (iInstr >> 4,));
3284
3285 if oInstr is None:
3286 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3287 elif isinstance(oInstr, list):
3288 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3289 else:
3290 sMacro = 'OP';
3291 cMaxOperands = 3;
3292 if len(oInstr.aoOperands) > 3:
3293 sMacro = 'OPVEX'
3294 cMaxOperands = 4;
3295 assert len(oInstr.aoOperands) <= cMaxOperands;
3296
3297 #
3298 # Format string.
3299 #
3300 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3301 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3302 sTmp += ' ' if iOperand == 0 else ',';
3303 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3304 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3305 else:
3306 sTmp += g_kdOpTypes[oOperand.sType][2];
3307 sTmp += '",';
3308 asColumns = [ sTmp, ];
3309
3310 #
3311 # Decoders.
3312 #
3313 iStart = len(asColumns);
3314 if oInstr.sEncoding is None:
3315 pass;
3316 elif oInstr.sEncoding == 'ModR/M':
3317 # ASSUME the first operand is using the ModR/M encoding
3318 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3319 asColumns.append('IDX_ParseModRM,');
3320 ## @todo IDX_ParseVexDest
3321 # Is second operand using ModR/M too?
3322 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3323 asColumns.append('IDX_UseModRM,')
3324 elif oInstr.sEncoding in [ 'prefix', ]:
3325 for oOperand in oInstr.aoOperands:
3326 asColumns.append('0,');
3327 elif oInstr.sEncoding in [ 'fixed' ]:
3328 pass;
3329 elif oInstr.sEncoding == 'vex2':
3330 asColumns.append('IDX_ParseVex2b,')
3331 elif oInstr.sEncoding == 'vex3':
3332 asColumns.append('IDX_ParseVex3b,')
3333 elif oInstr.sEncoding in g_dInstructionMaps:
3334 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3335 else:
3336 ## @todo
3337 #IDX_ParseTwoByteEsc,
3338 #IDX_ParseGrp1,
3339 #IDX_ParseShiftGrp2,
3340 #IDX_ParseGrp3,
3341 #IDX_ParseGrp4,
3342 #IDX_ParseGrp5,
3343 #IDX_Parse3DNow,
3344 #IDX_ParseGrp6,
3345 #IDX_ParseGrp7,
3346 #IDX_ParseGrp8,
3347 #IDX_ParseGrp9,
3348 #IDX_ParseGrp10,
3349 #IDX_ParseGrp12,
3350 #IDX_ParseGrp13,
3351 #IDX_ParseGrp14,
3352 #IDX_ParseGrp15,
3353 #IDX_ParseGrp16,
3354 #IDX_ParseThreeByteEsc4,
3355 #IDX_ParseThreeByteEsc5,
3356 #IDX_ParseModFence,
3357 #IDX_ParseEscFP,
3358 #IDX_ParseNopPause,
3359 #IDX_ParseInvOpModRM,
3360 assert False, str(oInstr);
3361
3362 # Check for immediates and stuff in the remaining operands.
3363 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3364 sIdx = g_kdOpTypes[oOperand.sType][0];
3365 if sIdx != 'IDX_UseModRM':
3366 asColumns.append(sIdx + ',');
3367 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3368
3369 #
3370 # Opcode and operands.
3371 #
3372 assert oInstr.sDisEnum, str(oInstr);
3373 asColumns.append(oInstr.sDisEnum + ',');
3374 iStart = len(asColumns)
3375 for oOperand in oInstr.aoOperands:
3376 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3377 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3378
3379 #
3380 # Flags.
3381 #
3382 sTmp = '';
3383 for sHint in sorted(oInstr.dHints.keys()):
3384 sDefine = g_kdHints[sHint];
3385 if sDefine.startswith('DISOPTYPE_'):
3386 if sTmp:
3387 sTmp += ' | ' + sDefine;
3388 else:
3389 sTmp += sDefine;
3390 if sTmp:
3391 sTmp += '),';
3392 else:
3393 sTmp += '0),';
3394 asColumns.append(sTmp);
3395
3396 #
3397 # Format the columns into a line.
3398 #
3399 sLine = '';
3400 for i, s in enumerate(asColumns):
3401 if len(sLine) < aoffColumns[i]:
3402 sLine += ' ' * (aoffColumns[i] - len(sLine));
3403 else:
3404 sLine += ' ';
3405 sLine += s;
3406
3407 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3408 # DISOPTYPE_HARMLESS),
3409 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3410 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3411
3412 asLines.append(sLine);
3413
3414 asLines.append('};');
3415 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3416
3417 #
3418 # Write out the lines.
3419 #
3420 oDstFile.write('\n'.join(asLines));
3421 oDstFile.write('\n');
3422 break; #for now
3423
3424if __name__ == '__main__':
3425 generateDisassemblerTables();
3426
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette