VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66976

Last change on this file since 66976 was 66976, checked in by vboxsync, 8 years ago

IEM: Implemented vmovapd Vpd,Wpd (VEX.66.28).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 144.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66976 2017-05-19 12:23:32Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66976 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
231
232 # ModR/M.rm - register only.
233 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
234 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
235 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
236 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
237 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
238 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
239 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
240
241 # ModR/M.rm - memory only.
242 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
243 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
244 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
245 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
246 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
247 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
248 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
249 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
250 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
251
252 # ModR/M.reg
253 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
254 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
255 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
256 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
257 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
258 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
259 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
260 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
261 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
262 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
263 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
264 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
265 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
266 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
267 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
268 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
269 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
270 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
271 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
272 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
273 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
274 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
275
276 # VEX.vvvv
277 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', ),
278 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', ),
279 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
280
281 # Immediate values.
282 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
283 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
284 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
285 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
286 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
287 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
288
289 # Address operands (no ModR/M).
290 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
291 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
292
293 # Relative jump targets
294 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
295 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
296
297 # DS:rSI
298 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
299 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
300 # ES:rDI
301 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
302 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
303
304 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
305
306 # Fixed registers.
307 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
308 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
309 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
310 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
311 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
312 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
313 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
314 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
315};
316
317# IDX_ParseFixedReg
318# IDX_ParseVexDest
319
320
321## IEMFORM_XXX mappings.
322g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
323 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
324 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
325 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
326 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
327 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
328 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
329 'M': ( 'ModR/M', [ 'rm', ], ),
330 'M_REG': ( 'ModR/M', [ 'rm', ], ),
331 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
332 'R': ( 'ModR/M', [ 'reg', ], ),
333
334 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
335 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
336 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
337 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
338 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
339 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
340 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
341 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
342 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
343 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
344 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
345 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
346 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
347 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
348 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
349 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
350 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
351
352 'FIXED': ( 'fixed', None, )
353};
354
355## \@oppfx values.
356g_kdPrefixes = {
357 'none': [],
358 '0x66': [],
359 '0xf3': [],
360 '0xf2': [],
361};
362
363## Special \@opcode tag values.
364g_kdSpecialOpcodes = {
365 '/reg': [],
366 'mr/reg': [],
367 '11 /reg': [],
368 '!11 /reg': [],
369 '11 mr/reg': [],
370 '!11 mr/reg': [],
371};
372
373## Special \@opcodesub tag values.
374g_kdSubOpcodes = {
375 'none': [ None, ],
376 '11 mr/reg': [ '11 mr/reg', ],
377 '11': [ '11 mr/reg', ], ##< alias
378 '!11 mr/reg': [ '!11 mr/reg', ],
379 '!11': [ '!11 mr/reg', ], ##< alias
380};
381
382## Valid values for \@openc
383g_kdEncodings = {
384 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
385 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
386 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
387 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
388 'prefix': [ None, ], ##< Prefix
389};
390
391## \@opunused, \@opinvalid, \@opinvlstyle
392g_kdInvalidStyles = {
393 'immediate': [], ##< CPU stops decoding immediately after the opcode.
394 'vex.modrm': [], ##< VEX+ModR/M, everyone.
395 'intel-modrm': [], ##< Intel decodes ModR/M.
396 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
397 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
398 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
399};
400
401g_kdCpuNames = {
402 '8086': (),
403 '80186': (),
404 '80286': (),
405 '80386': (),
406 '80486': (),
407};
408
409## \@opcpuid
410g_kdCpuIdFlags = {
411 'vme': 'X86_CPUID_FEATURE_EDX_VME',
412 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
413 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
414 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
415 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
416 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
417 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
418 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
419 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
420 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
421 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
422 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
423 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
424 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
425 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
426 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
427 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
428 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
429 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
430 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
431 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
432 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
433 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
434 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
435 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
436 'aes': 'X86_CPUID_FEATURE_ECX_AES',
437 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
438 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
439 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
440 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
441
442 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
443 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
444 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
445 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
446 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
447 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
448 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
449 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
450 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
451 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
452};
453
454## \@ophints values.
455g_kdHints = {
456 'invalid': 'DISOPTYPE_INVALID', ##<
457 'harmless': 'DISOPTYPE_HARMLESS', ##<
458 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
459 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
460 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
461 'portio': 'DISOPTYPE_PORTIO', ##<
462 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
463 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
464 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
465 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
466 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
467 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
468 'illegal': 'DISOPTYPE_ILLEGAL', ##<
469 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
470 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
471 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
472 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
473 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
474 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
475 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
476 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
477 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
478 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
479 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
480 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
481 ## (only in 16 & 32 bits mode!)
482 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
483 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
484 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
485 'ignores_op_size': '', ##< Ignores both operand size prefixes (66h + REX.W).
486 'ignores_vex_l': '', ##< Ignores VEX.L.
487 'vex_l_zero': '', ##< VEX.L must be 0.
488 'lock_allowed': '', ##< Lock prefix allowed.
489};
490
491## \@opxcpttype values (see SDMv2 2.4, 2.7).
492g_kdXcptTypes = {
493 'none': [],
494 '1': [],
495 '2': [],
496 '3': [],
497 '4': [],
498 '4UA': [],
499 '5': [],
500 '5LZ': [], # LZ = VEX.L must be zero.
501 '6': [],
502 '7': [],
503 '7LZ': [],
504 '8': [],
505 '11': [],
506 '12': [],
507 'E1': [],
508 'E1NF': [],
509 'E2': [],
510 'E3': [],
511 'E3NF': [],
512 'E4': [],
513 'E4NF': [],
514 'E5': [],
515 'E5NF': [],
516 'E6': [],
517 'E6NF': [],
518 'E7NF': [],
519 'E9': [],
520 'E9NF': [],
521 'E10': [],
522 'E11': [],
523 'E12': [],
524 'E12NF': [],
525};
526
527
528def _isValidOpcodeByte(sOpcode):
529 """
530 Checks if sOpcode is a valid lower case opcode byte.
531 Returns true/false.
532 """
533 if len(sOpcode) == 4:
534 if sOpcode[:2] == '0x':
535 if sOpcode[2] in '0123456789abcdef':
536 if sOpcode[3] in '0123456789abcdef':
537 return True;
538 return False;
539
540
541class InstructionMap(object):
542 """
543 Instruction map.
544
545 The opcode map provides the lead opcode bytes (empty for the one byte
546 opcode map). An instruction can be member of multiple opcode maps as long
547 as it uses the same opcode value within the map (because of VEX).
548 """
549
550 kdEncodings = {
551 'legacy': [],
552 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
553 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
554 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
555 'xop8': [], ##< XOP prefix with vvvvv = 8
556 'xop9': [], ##< XOP prefix with vvvvv = 9
557 'xop10': [], ##< XOP prefix with vvvvv = 10
558 };
559 ## Selectors.
560 ## The first value is the number of table entries required by a
561 ## decoder or disassembler for this type of selector.
562 kdSelectors = {
563 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
564 '/r': [ 8, ], ##< modrm.reg selects the instruction.
565 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
566 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
567 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
568 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
569 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
570 };
571
572 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
573 assert sSelector in self.kdSelectors;
574 assert sEncoding in self.kdEncodings;
575 if asLeadOpcodes is None:
576 asLeadOpcodes = [];
577 else:
578 for sOpcode in asLeadOpcodes:
579 assert _isValidOpcodeByte(sOpcode);
580 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
581
582 self.sName = sName;
583 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
584 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
585 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
586 self.aoInstructions = []; # type: Instruction
587 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
588
589 def getTableSize(self):
590 """
591 Number of table entries. This corresponds directly to the selector.
592 """
593 return self.kdSelectors[self.sSelector][0];
594
595 def getInstructionIndex(self, oInstr):
596 """
597 Returns the table index for the instruction.
598 """
599 bOpcode = oInstr.getOpcodeByte();
600
601 # The byte selector is simple. We need a full opcode byte and need just return it.
602 if self.sSelector == 'byte':
603 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
604 return bOpcode;
605
606 # The other selectors needs masking and shifting.
607 if self.sSelector == '/r':
608 return (bOpcode >> 3) & 0x7;
609
610 if self.sSelector == 'mod /r':
611 return (bOpcode >> 3) & 0x1f;
612
613 if self.sSelector == 'memreg /r':
614 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
615
616 if self.sSelector == '!11 /r':
617 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
618 return (bOpcode >> 3) & 0x7;
619
620 if self.sSelector == '11 /r':
621 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
622 return (bOpcode >> 3) & 0x7;
623
624 if self.sSelector == '11':
625 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
626 return bOpcode & 0x3f;
627
628 assert False, self.sSelector;
629 return -1;
630
631 def getInstructionsInTableOrder(self):
632 """
633 Get instructions in table order.
634
635 Returns array of instructions. Normally there is exactly one
636 instruction per entry. However the entry could also be None if
637 not instruction was specified for that opcode value. Or there
638 could be a list of instructions to deal with special encodings
639 where for instance prefix (e.g. REX.W) encodes a different
640 instruction or different CPUs have different instructions or
641 prefixes in the same place.
642 """
643 # Start with empty table.
644 cTable = self.getTableSize();
645 aoTable = [None] * cTable;
646
647 # Insert the instructions.
648 for oInstr in self.aoInstructions:
649 if oInstr.sOpcode:
650 idxOpcode = self.getInstructionIndex(oInstr);
651 assert idxOpcode < cTable, str(idxOpcode);
652
653 oExisting = aoTable[idxOpcode];
654 if oExisting is None:
655 aoTable[idxOpcode] = oInstr;
656 elif not isinstance(oExisting, list):
657 aoTable[idxOpcode] = list([oExisting, oInstr]);
658 else:
659 oExisting.append(oInstr);
660
661 return aoTable;
662
663
664 def getDisasTableName(self):
665 """
666 Returns the disassembler table name for this map.
667 """
668 sName = 'g_aDisas';
669 for sWord in self.sName.split('_'):
670 if sWord == 'm': # suffix indicating modrm.mod==mem
671 sName += '_m';
672 elif sWord == 'r': # suffix indicating modrm.mod==reg
673 sName += '_r';
674 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
675 sName += '_' + sWord;
676 else:
677 sWord = sWord.replace('grp', 'Grp');
678 sWord = sWord.replace('map', 'Map');
679 sName += sWord[0].upper() + sWord[1:];
680 return sName;
681
682
683 def isVexMap(self):
684 """ Returns True if a VEX map. """
685 return self.sEncoding.startswith('vex');
686
687
688class TestType(object):
689 """
690 Test value type.
691
692 This base class deals with integer like values. The fUnsigned constructor
693 parameter indicates the default stance on zero vs sign extending. It is
694 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
695 """
696 def __init__(self, sName, acbSizes = None, fUnsigned = True):
697 self.sName = sName;
698 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
699 self.fUnsigned = fUnsigned;
700
701 class BadValue(Exception):
702 """ Bad value exception. """
703 def __init__(self, sMessage):
704 Exception.__init__(self, sMessage);
705 self.sMessage = sMessage;
706
707 ## For ascii ~ operator.
708 kdHexInv = {
709 '0': 'f',
710 '1': 'e',
711 '2': 'd',
712 '3': 'c',
713 '4': 'b',
714 '5': 'a',
715 '6': '9',
716 '7': '8',
717 '8': '7',
718 '9': '6',
719 'a': '5',
720 'b': '4',
721 'c': '3',
722 'd': '2',
723 'e': '1',
724 'f': '0',
725 };
726
727 def get(self, sValue):
728 """
729 Get the shortest normal sized byte representation of oValue.
730
731 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
732 The latter form is for AND+OR pairs where the first entry is what to
733 AND with the field and the second the one or OR with.
734
735 Raises BadValue if invalid value.
736 """
737 if not sValue:
738 raise TestType.BadValue('empty value');
739
740 # Deal with sign and detect hexadecimal or decimal.
741 fSignExtend = not self.fUnsigned;
742 if sValue[0] == '-' or sValue[0] == '+':
743 fSignExtend = True;
744 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
745 else:
746 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
747
748 # try convert it to long integer.
749 try:
750 iValue = long(sValue, 16 if fHex else 10);
751 except Exception as oXcpt:
752 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
753
754 # Convert the hex string and pad it to a decent value. Negative values
755 # needs to be manually converted to something non-negative (~-n + 1).
756 if iValue >= 0:
757 sHex = hex(iValue);
758 if sys.version_info[0] < 3:
759 assert sHex[-1] == 'L';
760 sHex = sHex[:-1];
761 assert sHex[:2] == '0x';
762 sHex = sHex[2:];
763 else:
764 sHex = hex(-iValue - 1);
765 if sys.version_info[0] < 3:
766 assert sHex[-1] == 'L';
767 sHex = sHex[:-1];
768 assert sHex[:2] == '0x';
769 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
770 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
771 sHex = 'f' + sHex;
772
773 cDigits = len(sHex);
774 if cDigits <= self.acbSizes[-1] * 2:
775 for cb in self.acbSizes:
776 cNaturalDigits = cb * 2;
777 if cDigits <= cNaturalDigits:
778 break;
779 else:
780 cNaturalDigits = self.acbSizes[-1] * 2;
781 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
782 assert isinstance(cNaturalDigits, int)
783
784 if cNaturalDigits != cDigits:
785 cNeeded = cNaturalDigits - cDigits;
786 if iValue >= 0:
787 sHex = ('0' * cNeeded) + sHex;
788 else:
789 sHex = ('f' * cNeeded) + sHex;
790
791 # Invert and convert to bytearray and return it.
792 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
793
794 return ((fSignExtend, abValue),);
795
796 def validate(self, sValue):
797 """
798 Returns True if value is okay, error message on failure.
799 """
800 try:
801 self.get(sValue);
802 except TestType.BadValue as oXcpt:
803 return oXcpt.sMessage;
804 return True;
805
806 def isAndOrPair(self, sValue):
807 """
808 Checks if sValue is a pair.
809 """
810 _ = sValue;
811 return False;
812
813
814class TestTypeEflags(TestType):
815 """
816 Special value parsing for EFLAGS/RFLAGS/FLAGS.
817 """
818
819 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
820
821 def __init__(self, sName):
822 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
823
824 def get(self, sValue):
825 fClear = 0;
826 fSet = 0;
827 for sFlag in sValue.split(','):
828 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
829 if sConstant is None:
830 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
831 if sConstant[0] == '!':
832 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
833 else:
834 fSet |= g_kdX86EFlagsConstants[sConstant];
835
836 aoSet = TestType.get(self, '0x%x' % (fSet,));
837 if fClear != 0:
838 aoClear = TestType.get(self, '%#x' % (fClear,))
839 assert self.isAndOrPair(sValue) is True;
840 return (aoClear[0], aoSet[0]);
841 assert self.isAndOrPair(sValue) is False;
842 return aoSet;
843
844 def isAndOrPair(self, sValue):
845 for sZeroFlag in self.kdZeroValueFlags:
846 if sValue.find(sZeroFlag) >= 0:
847 return True;
848 return False;
849
850class TestTypeFromDict(TestType):
851 """
852 Special value parsing for CR0.
853 """
854
855 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
856
857 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
858 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
859 self.kdConstantsAndValues = kdConstantsAndValues;
860 self.sConstantPrefix = sConstantPrefix;
861
862 def get(self, sValue):
863 fValue = 0;
864 for sFlag in sValue.split(','):
865 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
866 if fFlagValue is None:
867 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
868 fValue |= fFlagValue;
869 return TestType.get(self, '0x%x' % (fValue,));
870
871
872class TestInOut(object):
873 """
874 One input or output state modifier.
875
876 This should be thought as values to modify BS3REGCTX and extended (needs
877 to be structured) state.
878 """
879 ## Assigned operators.
880 kasOperators = [
881 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
882 '&~=',
883 '&=',
884 '|=',
885 '='
886 ];
887 ## Types
888 kdTypes = {
889 'uint': TestType('uint', fUnsigned = True),
890 'int': TestType('int'),
891 'efl': TestTypeEflags('efl'),
892 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
893 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
894 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
895 };
896 ## CPU context fields.
897 kdFields = {
898 # name: ( default type, [both|input|output], )
899 # Operands.
900 'op1': ( 'uint', 'both', ), ## \@op1
901 'op2': ( 'uint', 'both', ), ## \@op2
902 'op3': ( 'uint', 'both', ), ## \@op3
903 'op4': ( 'uint', 'both', ), ## \@op4
904 # Flags.
905 'efl': ( 'efl', 'both', ),
906 'efl_undef': ( 'uint', 'output', ),
907 # 8-bit GPRs.
908 'al': ( 'uint', 'both', ),
909 'cl': ( 'uint', 'both', ),
910 'dl': ( 'uint', 'both', ),
911 'bl': ( 'uint', 'both', ),
912 'ah': ( 'uint', 'both', ),
913 'ch': ( 'uint', 'both', ),
914 'dh': ( 'uint', 'both', ),
915 'bh': ( 'uint', 'both', ),
916 'r8l': ( 'uint', 'both', ),
917 'r9l': ( 'uint', 'both', ),
918 'r10l': ( 'uint', 'both', ),
919 'r11l': ( 'uint', 'both', ),
920 'r12l': ( 'uint', 'both', ),
921 'r13l': ( 'uint', 'both', ),
922 'r14l': ( 'uint', 'both', ),
923 'r15l': ( 'uint', 'both', ),
924 # 16-bit GPRs.
925 'ax': ( 'uint', 'both', ),
926 'dx': ( 'uint', 'both', ),
927 'cx': ( 'uint', 'both', ),
928 'bx': ( 'uint', 'both', ),
929 'sp': ( 'uint', 'both', ),
930 'bp': ( 'uint', 'both', ),
931 'si': ( 'uint', 'both', ),
932 'di': ( 'uint', 'both', ),
933 'r8w': ( 'uint', 'both', ),
934 'r9w': ( 'uint', 'both', ),
935 'r10w': ( 'uint', 'both', ),
936 'r11w': ( 'uint', 'both', ),
937 'r12w': ( 'uint', 'both', ),
938 'r13w': ( 'uint', 'both', ),
939 'r14w': ( 'uint', 'both', ),
940 'r15w': ( 'uint', 'both', ),
941 # 32-bit GPRs.
942 'eax': ( 'uint', 'both', ),
943 'edx': ( 'uint', 'both', ),
944 'ecx': ( 'uint', 'both', ),
945 'ebx': ( 'uint', 'both', ),
946 'esp': ( 'uint', 'both', ),
947 'ebp': ( 'uint', 'both', ),
948 'esi': ( 'uint', 'both', ),
949 'edi': ( 'uint', 'both', ),
950 'r8d': ( 'uint', 'both', ),
951 'r9d': ( 'uint', 'both', ),
952 'r10d': ( 'uint', 'both', ),
953 'r11d': ( 'uint', 'both', ),
954 'r12d': ( 'uint', 'both', ),
955 'r13d': ( 'uint', 'both', ),
956 'r14d': ( 'uint', 'both', ),
957 'r15d': ( 'uint', 'both', ),
958 # 64-bit GPRs.
959 'rax': ( 'uint', 'both', ),
960 'rdx': ( 'uint', 'both', ),
961 'rcx': ( 'uint', 'both', ),
962 'rbx': ( 'uint', 'both', ),
963 'rsp': ( 'uint', 'both', ),
964 'rbp': ( 'uint', 'both', ),
965 'rsi': ( 'uint', 'both', ),
966 'rdi': ( 'uint', 'both', ),
967 'r8': ( 'uint', 'both', ),
968 'r9': ( 'uint', 'both', ),
969 'r10': ( 'uint', 'both', ),
970 'r11': ( 'uint', 'both', ),
971 'r12': ( 'uint', 'both', ),
972 'r13': ( 'uint', 'both', ),
973 'r14': ( 'uint', 'both', ),
974 'r15': ( 'uint', 'both', ),
975 # 16-bit, 32-bit or 64-bit registers according to operand size.
976 'oz.rax': ( 'uint', 'both', ),
977 'oz.rdx': ( 'uint', 'both', ),
978 'oz.rcx': ( 'uint', 'both', ),
979 'oz.rbx': ( 'uint', 'both', ),
980 'oz.rsp': ( 'uint', 'both', ),
981 'oz.rbp': ( 'uint', 'both', ),
982 'oz.rsi': ( 'uint', 'both', ),
983 'oz.rdi': ( 'uint', 'both', ),
984 'oz.r8': ( 'uint', 'both', ),
985 'oz.r9': ( 'uint', 'both', ),
986 'oz.r10': ( 'uint', 'both', ),
987 'oz.r11': ( 'uint', 'both', ),
988 'oz.r12': ( 'uint', 'both', ),
989 'oz.r13': ( 'uint', 'both', ),
990 'oz.r14': ( 'uint', 'both', ),
991 'oz.r15': ( 'uint', 'both', ),
992 # Control registers.
993 'cr0': ( 'cr0', 'both', ),
994 'cr4': ( 'cr4', 'both', ),
995 'xcr0': ( 'xcr0', 'both', ),
996 # FPU Registers
997 'fcw': ( 'uint', 'both', ),
998 'fsw': ( 'uint', 'both', ),
999 'ftw': ( 'uint', 'both', ),
1000 'fop': ( 'uint', 'both', ),
1001 'fpuip': ( 'uint', 'both', ),
1002 'fpucs': ( 'uint', 'both', ),
1003 'fpudp': ( 'uint', 'both', ),
1004 'fpuds': ( 'uint', 'both', ),
1005 'mxcsr': ( 'uint', 'both', ),
1006 'st0': ( 'uint', 'both', ),
1007 'st1': ( 'uint', 'both', ),
1008 'st2': ( 'uint', 'both', ),
1009 'st3': ( 'uint', 'both', ),
1010 'st4': ( 'uint', 'both', ),
1011 'st5': ( 'uint', 'both', ),
1012 'st6': ( 'uint', 'both', ),
1013 'st7': ( 'uint', 'both', ),
1014 # MMX registers.
1015 'mm0': ( 'uint', 'both', ),
1016 'mm1': ( 'uint', 'both', ),
1017 'mm2': ( 'uint', 'both', ),
1018 'mm3': ( 'uint', 'both', ),
1019 'mm4': ( 'uint', 'both', ),
1020 'mm5': ( 'uint', 'both', ),
1021 'mm6': ( 'uint', 'both', ),
1022 'mm7': ( 'uint', 'both', ),
1023 # SSE registers.
1024 'xmm0': ( 'uint', 'both', ),
1025 'xmm1': ( 'uint', 'both', ),
1026 'xmm2': ( 'uint', 'both', ),
1027 'xmm3': ( 'uint', 'both', ),
1028 'xmm4': ( 'uint', 'both', ),
1029 'xmm5': ( 'uint', 'both', ),
1030 'xmm6': ( 'uint', 'both', ),
1031 'xmm7': ( 'uint', 'both', ),
1032 'xmm8': ( 'uint', 'both', ),
1033 'xmm9': ( 'uint', 'both', ),
1034 'xmm10': ( 'uint', 'both', ),
1035 'xmm11': ( 'uint', 'both', ),
1036 'xmm12': ( 'uint', 'both', ),
1037 'xmm13': ( 'uint', 'both', ),
1038 'xmm14': ( 'uint', 'both', ),
1039 'xmm15': ( 'uint', 'both', ),
1040 'xmm0.lo': ( 'uint', 'both', ),
1041 'xmm1.lo': ( 'uint', 'both', ),
1042 'xmm2.lo': ( 'uint', 'both', ),
1043 'xmm3.lo': ( 'uint', 'both', ),
1044 'xmm4.lo': ( 'uint', 'both', ),
1045 'xmm5.lo': ( 'uint', 'both', ),
1046 'xmm6.lo': ( 'uint', 'both', ),
1047 'xmm7.lo': ( 'uint', 'both', ),
1048 'xmm8.lo': ( 'uint', 'both', ),
1049 'xmm9.lo': ( 'uint', 'both', ),
1050 'xmm10.lo': ( 'uint', 'both', ),
1051 'xmm11.lo': ( 'uint', 'both', ),
1052 'xmm12.lo': ( 'uint', 'both', ),
1053 'xmm13.lo': ( 'uint', 'both', ),
1054 'xmm14.lo': ( 'uint', 'both', ),
1055 'xmm15.lo': ( 'uint', 'both', ),
1056 'xmm0.hi': ( 'uint', 'both', ),
1057 'xmm1.hi': ( 'uint', 'both', ),
1058 'xmm2.hi': ( 'uint', 'both', ),
1059 'xmm3.hi': ( 'uint', 'both', ),
1060 'xmm4.hi': ( 'uint', 'both', ),
1061 'xmm5.hi': ( 'uint', 'both', ),
1062 'xmm6.hi': ( 'uint', 'both', ),
1063 'xmm7.hi': ( 'uint', 'both', ),
1064 'xmm8.hi': ( 'uint', 'both', ),
1065 'xmm9.hi': ( 'uint', 'both', ),
1066 'xmm10.hi': ( 'uint', 'both', ),
1067 'xmm11.hi': ( 'uint', 'both', ),
1068 'xmm12.hi': ( 'uint', 'both', ),
1069 'xmm13.hi': ( 'uint', 'both', ),
1070 'xmm14.hi': ( 'uint', 'both', ),
1071 'xmm15.hi': ( 'uint', 'both', ),
1072 'xmm0.lo.zx': ( 'uint', 'both', ),
1073 'xmm1.lo.zx': ( 'uint', 'both', ),
1074 'xmm2.lo.zx': ( 'uint', 'both', ),
1075 'xmm3.lo.zx': ( 'uint', 'both', ),
1076 'xmm4.lo.zx': ( 'uint', 'both', ),
1077 'xmm5.lo.zx': ( 'uint', 'both', ),
1078 'xmm6.lo.zx': ( 'uint', 'both', ),
1079 'xmm7.lo.zx': ( 'uint', 'both', ),
1080 'xmm8.lo.zx': ( 'uint', 'both', ),
1081 'xmm9.lo.zx': ( 'uint', 'both', ),
1082 'xmm10.lo.zx': ( 'uint', 'both', ),
1083 'xmm11.lo.zx': ( 'uint', 'both', ),
1084 'xmm12.lo.zx': ( 'uint', 'both', ),
1085 'xmm13.lo.zx': ( 'uint', 'both', ),
1086 'xmm14.lo.zx': ( 'uint', 'both', ),
1087 'xmm15.lo.zx': ( 'uint', 'both', ),
1088 'xmm0.dw0': ( 'uint', 'both', ),
1089 'xmm1.dw0': ( 'uint', 'both', ),
1090 'xmm2.dw0': ( 'uint', 'both', ),
1091 'xmm3.dw0': ( 'uint', 'both', ),
1092 'xmm4.dw0': ( 'uint', 'both', ),
1093 'xmm5.dw0': ( 'uint', 'both', ),
1094 'xmm6.dw0': ( 'uint', 'both', ),
1095 'xmm7.dw0': ( 'uint', 'both', ),
1096 'xmm8.dw0': ( 'uint', 'both', ),
1097 'xmm9.dw0': ( 'uint', 'both', ),
1098 'xmm10.dw0': ( 'uint', 'both', ),
1099 'xmm11.dw0': ( 'uint', 'both', ),
1100 'xmm12.dw0': ( 'uint', 'both', ),
1101 'xmm13.dw0': ( 'uint', 'both', ),
1102 'xmm14.dw0': ( 'uint', 'both', ),
1103 'xmm15_dw0': ( 'uint', 'both', ),
1104 # AVX registers.
1105 'ymm0': ( 'uint', 'both', ),
1106 'ymm1': ( 'uint', 'both', ),
1107 'ymm2': ( 'uint', 'both', ),
1108 'ymm3': ( 'uint', 'both', ),
1109 'ymm4': ( 'uint', 'both', ),
1110 'ymm5': ( 'uint', 'both', ),
1111 'ymm6': ( 'uint', 'both', ),
1112 'ymm7': ( 'uint', 'both', ),
1113 'ymm8': ( 'uint', 'both', ),
1114 'ymm9': ( 'uint', 'both', ),
1115 'ymm10': ( 'uint', 'both', ),
1116 'ymm11': ( 'uint', 'both', ),
1117 'ymm12': ( 'uint', 'both', ),
1118 'ymm13': ( 'uint', 'both', ),
1119 'ymm14': ( 'uint', 'both', ),
1120 'ymm15': ( 'uint', 'both', ),
1121
1122 # Special ones.
1123 'value.xcpt': ( 'uint', 'output', ),
1124 };
1125
1126 def __init__(self, sField, sOp, sValue, sType):
1127 assert sField in self.kdFields;
1128 assert sOp in self.kasOperators;
1129 self.sField = sField;
1130 self.sOp = sOp;
1131 self.sValue = sValue;
1132 self.sType = sType;
1133 assert isinstance(sField, str);
1134 assert isinstance(sOp, str);
1135 assert isinstance(sType, str);
1136 assert isinstance(sValue, str);
1137
1138
1139class TestSelector(object):
1140 """
1141 One selector for an instruction test.
1142 """
1143 ## Selector compare operators.
1144 kasCompareOps = [ '==', '!=' ];
1145 ## Selector variables and their valid values.
1146 kdVariables = {
1147 # Operand size.
1148 'size': {
1149 'o16': 'size_o16',
1150 'o32': 'size_o32',
1151 'o64': 'size_o64',
1152 },
1153 # VEX.L value.
1154 'vex.l': {
1155 '0': 'vexl_0',
1156 '1': 'vexl_1',
1157 },
1158 # Execution ring.
1159 'ring': {
1160 '0': 'ring_0',
1161 '1': 'ring_1',
1162 '2': 'ring_2',
1163 '3': 'ring_3',
1164 '0..2': 'ring_0_thru_2',
1165 '1..3': 'ring_1_thru_3',
1166 },
1167 # Basic code mode.
1168 'codebits': {
1169 '64': 'code_64bit',
1170 '32': 'code_32bit',
1171 '16': 'code_16bit',
1172 },
1173 # cpu modes.
1174 'mode': {
1175 'real': 'mode_real',
1176 'prot': 'mode_prot',
1177 'long': 'mode_long',
1178 'v86': 'mode_v86',
1179 'smm': 'mode_smm',
1180 'vmx': 'mode_vmx',
1181 'svm': 'mode_svm',
1182 },
1183 # paging on/off
1184 'paging': {
1185 'on': 'paging_on',
1186 'off': 'paging_off',
1187 },
1188 # CPU vendor
1189 'vendor': {
1190 'amd': 'vendor_amd',
1191 'intel': 'vendor_intel',
1192 'via': 'vendor_via',
1193 },
1194 };
1195 ## Selector shorthand predicates.
1196 ## These translates into variable expressions.
1197 kdPredicates = {
1198 'o16': 'size==o16',
1199 'o32': 'size==o32',
1200 'o64': 'size==o64',
1201 'ring0': 'ring==0',
1202 '!ring0': 'ring==1..3',
1203 'ring1': 'ring==1',
1204 'ring2': 'ring==2',
1205 'ring3': 'ring==3',
1206 'user': 'ring==3',
1207 'supervisor': 'ring==0..2',
1208 'real': 'mode==real',
1209 'prot': 'mode==prot',
1210 'long': 'mode==long',
1211 'v86': 'mode==v86',
1212 'smm': 'mode==smm',
1213 'vmx': 'mode==vmx',
1214 'svm': 'mode==svm',
1215 'paging': 'paging==on',
1216 '!paging': 'paging==off',
1217 'amd': 'vendor==amd',
1218 '!amd': 'vendor!=amd',
1219 'intel': 'vendor==intel',
1220 '!intel': 'vendor!=intel',
1221 'via': 'vendor==via',
1222 '!via': 'vendor!=via',
1223 };
1224
1225 def __init__(self, sVariable, sOp, sValue):
1226 assert sVariable in self.kdVariables;
1227 assert sOp in self.kasCompareOps;
1228 assert sValue in self.kdVariables[sVariable];
1229 self.sVariable = sVariable;
1230 self.sOp = sOp;
1231 self.sValue = sValue;
1232
1233
1234class InstructionTest(object):
1235 """
1236 Instruction test.
1237 """
1238
1239 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1240 self.oInstr = oInstr; # type: InstructionTest
1241 self.aoInputs = []; # type: list(TestInOut)
1242 self.aoOutputs = []; # type: list(TestInOut)
1243 self.aoSelectors = []; # type: list(TestSelector)
1244
1245 def toString(self, fRepr = False):
1246 """
1247 Converts it to string representation.
1248 """
1249 asWords = [];
1250 if self.aoSelectors:
1251 for oSelector in self.aoSelectors:
1252 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1253 asWords.append('/');
1254
1255 for oModifier in self.aoInputs:
1256 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1257
1258 asWords.append('->');
1259
1260 for oModifier in self.aoOutputs:
1261 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1262
1263 if fRepr:
1264 return '<' + ' '.join(asWords) + '>';
1265 return ' '.join(asWords);
1266
1267 def __str__(self):
1268 """ Provide string represenation. """
1269 return self.toString(False);
1270
1271 def __repr__(self):
1272 """ Provide unambigious string representation. """
1273 return self.toString(True);
1274
1275class Operand(object):
1276 """
1277 Instruction operand.
1278 """
1279
1280 def __init__(self, sWhere, sType):
1281 assert sWhere in g_kdOpLocations, sWhere;
1282 assert sType in g_kdOpTypes, sType;
1283 self.sWhere = sWhere; ##< g_kdOpLocations
1284 self.sType = sType; ##< g_kdOpTypes
1285
1286 def usesModRM(self):
1287 """ Returns True if using some form of ModR/M encoding. """
1288 return self.sType[0] in ['E', 'G', 'M'];
1289
1290
1291
1292class Instruction(object): # pylint: disable=too-many-instance-attributes
1293 """
1294 Instruction.
1295 """
1296
1297 def __init__(self, sSrcFile, iLine):
1298 ## @name Core attributes.
1299 ## @{
1300 self.sMnemonic = None;
1301 self.sBrief = None;
1302 self.asDescSections = []; # type: list(str)
1303 self.aoMaps = []; # type: list(InstructionMap)
1304 self.aoOperands = []; # type: list(Operand)
1305 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1306 self.sOpcode = None; # type: str
1307 self.sSubOpcode = None; # type: str
1308 self.sEncoding = None;
1309 self.asFlTest = None;
1310 self.asFlModify = None;
1311 self.asFlUndefined = None;
1312 self.asFlSet = None;
1313 self.asFlClear = None;
1314 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1315 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1316 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1317 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1318 self.aoTests = []; # type: list(InstructionTest)
1319 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1320 self.oCpuExpr = None; ##< Some CPU restriction expression...
1321 self.sGroup = None;
1322 self.fUnused = False; ##< Unused instruction.
1323 self.fInvalid = False; ##< Invalid instruction (like UD2).
1324 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1325 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1326 ## @}
1327
1328 ## @name Implementation attributes.
1329 ## @{
1330 self.sStats = None;
1331 self.sFunction = None;
1332 self.fStub = False;
1333 self.fUdStub = False;
1334 ## @}
1335
1336 ## @name Decoding info
1337 ## @{
1338 self.sSrcFile = sSrcFile;
1339 self.iLineCreated = iLine;
1340 self.iLineCompleted = None;
1341 self.cOpTags = 0;
1342 self.iLineFnIemOpMacro = -1;
1343 self.iLineMnemonicMacro = -1;
1344 ## @}
1345
1346 ## @name Intermediate input fields.
1347 ## @{
1348 self.sRawDisOpNo = None;
1349 self.asRawDisParams = [];
1350 self.sRawIemOpFlags = None;
1351 self.sRawOldOpcodes = None;
1352 self.asCopyTests = [];
1353 ## @}
1354
1355 def toString(self, fRepr = False):
1356 """ Turn object into a string. """
1357 aasFields = [];
1358
1359 aasFields.append(['opcode', self.sOpcode]);
1360 aasFields.append(['mnemonic', self.sMnemonic]);
1361 for iOperand, oOperand in enumerate(self.aoOperands):
1362 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1363 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1364 aasFields.append(['encoding', self.sEncoding]);
1365 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1366 aasFields.append(['disenum', self.sDisEnum]);
1367 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1368 aasFields.append(['group', self.sGroup]);
1369 if self.fUnused: aasFields.append(['unused', 'True']);
1370 if self.fInvalid: aasFields.append(['invalid', 'True']);
1371 aasFields.append(['invlstyle', self.sInvalidStyle]);
1372 aasFields.append(['fltest', self.asFlTest]);
1373 aasFields.append(['flmodify', self.asFlModify]);
1374 aasFields.append(['flundef', self.asFlUndefined]);
1375 aasFields.append(['flset', self.asFlSet]);
1376 aasFields.append(['flclear', self.asFlClear]);
1377 aasFields.append(['mincpu', self.sMinCpu]);
1378 aasFields.append(['stats', self.sStats]);
1379 aasFields.append(['sFunction', self.sFunction]);
1380 if self.fStub: aasFields.append(['fStub', 'True']);
1381 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1382 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1383 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1384 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1385
1386 sRet = '<' if fRepr else '';
1387 for sField, sValue in aasFields:
1388 if sValue != None:
1389 if len(sRet) > 1:
1390 sRet += '; ';
1391 sRet += '%s=%s' % (sField, sValue,);
1392 if fRepr:
1393 sRet += '>';
1394
1395 return sRet;
1396
1397 def __str__(self):
1398 """ Provide string represenation. """
1399 return self.toString(False);
1400
1401 def __repr__(self):
1402 """ Provide unambigious string representation. """
1403 return self.toString(True);
1404
1405 def getOpcodeByte(self):
1406 """
1407 Decodes sOpcode into a byte range integer value.
1408 Raises exception if sOpcode is None or invalid.
1409 """
1410 if self.sOpcode is None:
1411 raise Exception('No opcode byte for %s!' % (self,));
1412 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1413
1414 # Full hex byte form.
1415 if sOpcode[:2] == '0x':
1416 return int(sOpcode, 16);
1417
1418 # The /r form:
1419 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1420 return int(sOpcode[-1:]) << 3;
1421
1422 # The 11/r form:
1423 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1424 return (int(sOpcode[-1:]) << 3) | 0xc0;
1425
1426 # The !11/r form (returns mod=1):
1427 ## @todo this doesn't really work...
1428 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1429 return (int(sOpcode[-1:]) << 3) | 0x80;
1430
1431 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1432
1433 @staticmethod
1434 def _flagsToIntegerMask(asFlags):
1435 """
1436 Returns the integer mask value for asFlags.
1437 """
1438 uRet = 0;
1439 if asFlags:
1440 for sFlag in asFlags:
1441 sConstant = g_kdEFlagsMnemonics[sFlag];
1442 assert sConstant[0] != '!', sConstant
1443 uRet |= g_kdX86EFlagsConstants[sConstant];
1444 return uRet;
1445
1446 def getTestedFlagsMask(self):
1447 """ Returns asFlTest into a integer mask value """
1448 return self._flagsToIntegerMask(self.asFlTest);
1449
1450 def getModifiedFlagsMask(self):
1451 """ Returns asFlModify into a integer mask value """
1452 return self._flagsToIntegerMask(self.asFlModify);
1453
1454 def getUndefinedFlagsMask(self):
1455 """ Returns asFlUndefined into a integer mask value """
1456 return self._flagsToIntegerMask(self.asFlUndefined);
1457
1458 def getSetFlagsMask(self):
1459 """ Returns asFlSet into a integer mask value """
1460 return self._flagsToIntegerMask(self.asFlSet);
1461
1462 def getClearedFlagsMask(self):
1463 """ Returns asFlClear into a integer mask value """
1464 return self._flagsToIntegerMask(self.asFlClear);
1465
1466 def onlyInVexMaps(self):
1467 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1468 if not self.aoMaps:
1469 return False;
1470 for oMap in self.aoMaps:
1471 if not oMap.isVexMap():
1472 return False;
1473 return True;
1474
1475
1476
1477## All the instructions.
1478g_aoAllInstructions = []; # type: list(Instruction)
1479
1480## All the instructions indexed by statistics name (opstat).
1481g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1482
1483## All the instructions indexed by function name (opfunction).
1484g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1485
1486## Instructions tagged by oponlytest
1487g_aoOnlyTestInstructions = []; # type: list(Instruction)
1488
1489## Instruction maps.
1490g_dInstructionMaps = {
1491 'one': InstructionMap('one'),
1492 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1493 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1494 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1495 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1496 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1497 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1498 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1499 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1500 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1501 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1502 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1503 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1504 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1505 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1506 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1507 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1508 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1509 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1510 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1511
1512 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1513 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1514 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1515 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1516 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1517 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1518 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1519 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1520 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1521 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1522 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1523 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1524 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1525 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1526
1527 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1528 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1529
1530 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1531 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1532 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1533 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1534 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1535 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1536
1537 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1538 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1539
1540 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1541 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1542 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1543 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1544 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1545 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1546 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1547 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1548};
1549
1550
1551
1552class ParserException(Exception):
1553 """ Parser exception """
1554 def __init__(self, sMessage):
1555 Exception.__init__(self, sMessage);
1556
1557
1558class SimpleParser(object):
1559 """
1560 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1561 """
1562
1563 ## @name Parser state.
1564 ## @{
1565 kiCode = 0;
1566 kiCommentMulti = 1;
1567 ## @}
1568
1569 def __init__(self, sSrcFile, asLines, sDefaultMap):
1570 self.sSrcFile = sSrcFile;
1571 self.asLines = asLines;
1572 self.iLine = 0;
1573 self.iState = self.kiCode;
1574 self.sComment = '';
1575 self.iCommentLine = 0;
1576 self.aoCurInstrs = [];
1577
1578 assert sDefaultMap in g_dInstructionMaps;
1579 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1580
1581 self.cTotalInstr = 0;
1582 self.cTotalStubs = 0;
1583 self.cTotalTagged = 0;
1584
1585 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1586 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1587 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1588 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1589 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1590 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1591 self.fDebug = True;
1592
1593 self.dTagHandlers = {
1594 '@opbrief': self.parseTagOpBrief,
1595 '@opdesc': self.parseTagOpDesc,
1596 '@opmnemonic': self.parseTagOpMnemonic,
1597 '@op1': self.parseTagOpOperandN,
1598 '@op2': self.parseTagOpOperandN,
1599 '@op3': self.parseTagOpOperandN,
1600 '@op4': self.parseTagOpOperandN,
1601 '@oppfx': self.parseTagOpPfx,
1602 '@opmaps': self.parseTagOpMaps,
1603 '@opcode': self.parseTagOpcode,
1604 '@opcodesub': self.parseTagOpcodeSub,
1605 '@openc': self.parseTagOpEnc,
1606 '@opfltest': self.parseTagOpEFlags,
1607 '@opflmodify': self.parseTagOpEFlags,
1608 '@opflundef': self.parseTagOpEFlags,
1609 '@opflset': self.parseTagOpEFlags,
1610 '@opflclear': self.parseTagOpEFlags,
1611 '@ophints': self.parseTagOpHints,
1612 '@opdisenum': self.parseTagOpDisEnum,
1613 '@opmincpu': self.parseTagOpMinCpu,
1614 '@opcpuid': self.parseTagOpCpuId,
1615 '@opgroup': self.parseTagOpGroup,
1616 '@opunused': self.parseTagOpUnusedInvalid,
1617 '@opinvalid': self.parseTagOpUnusedInvalid,
1618 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1619 '@optest': self.parseTagOpTest,
1620 '@optestign': self.parseTagOpTestIgnore,
1621 '@optestignore': self.parseTagOpTestIgnore,
1622 '@opcopytests': self.parseTagOpCopyTests,
1623 '@oponly': self.parseTagOpOnlyTest,
1624 '@oponlytest': self.parseTagOpOnlyTest,
1625 '@opxcpttype': self.parseTagOpXcptType,
1626 '@opstats': self.parseTagOpStats,
1627 '@opfunction': self.parseTagOpFunction,
1628 '@opdone': self.parseTagOpDone,
1629 };
1630 for i in range(48):
1631 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1632 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1633
1634 self.asErrors = [];
1635
1636 def raiseError(self, sMessage):
1637 """
1638 Raise error prefixed with the source and line number.
1639 """
1640 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1641
1642 def raiseCommentError(self, iLineInComment, sMessage):
1643 """
1644 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1645 """
1646 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1647
1648 def error(self, sMessage):
1649 """
1650 Adds an error.
1651 returns False;
1652 """
1653 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1654 return False;
1655
1656 def errorComment(self, iLineInComment, sMessage):
1657 """
1658 Adds a comment error.
1659 returns False;
1660 """
1661 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1662 return False;
1663
1664 def printErrors(self):
1665 """
1666 Print the errors to stderr.
1667 Returns number of errors.
1668 """
1669 if self.asErrors:
1670 sys.stderr.write(u''.join(self.asErrors));
1671 return len(self.asErrors);
1672
1673 def debug(self, sMessage):
1674 """
1675 For debugging.
1676 """
1677 if self.fDebug:
1678 print('debug: %s' % (sMessage,));
1679
1680
1681 def addInstruction(self, iLine = None):
1682 """
1683 Adds an instruction.
1684 """
1685 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1686 g_aoAllInstructions.append(oInstr);
1687 self.aoCurInstrs.append(oInstr);
1688 return oInstr;
1689
1690 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1691 """
1692 Derives the mnemonic and operands from a IEM stats base name like string.
1693 """
1694 if oInstr.sMnemonic is None:
1695 asWords = sStats.split('_');
1696 oInstr.sMnemonic = asWords[0].lower();
1697 if len(asWords) > 1 and not oInstr.aoOperands:
1698 for sType in asWords[1:]:
1699 if sType in g_kdOpTypes:
1700 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1701 else:
1702 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1703 return False;
1704 return True;
1705
1706 def doneInstructionOne(self, oInstr, iLine):
1707 """
1708 Complete the parsing by processing, validating and expanding raw inputs.
1709 """
1710 assert oInstr.iLineCompleted is None;
1711 oInstr.iLineCompleted = iLine;
1712
1713 #
1714 # Specified instructions.
1715 #
1716 if oInstr.cOpTags > 0:
1717 if oInstr.sStats is None:
1718 pass;
1719
1720 #
1721 # Unspecified legacy stuff. We generally only got a few things to go on here.
1722 # /** Opcode 0x0f 0x00 /0. */
1723 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1724 #
1725 else:
1726 #if oInstr.sRawOldOpcodes:
1727 #
1728 #if oInstr.sMnemonic:
1729 pass;
1730
1731 #
1732 # Common defaults.
1733 #
1734
1735 # Guess mnemonic and operands from stats if the former is missing.
1736 if oInstr.sMnemonic is None:
1737 if oInstr.sStats is not None:
1738 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1739 elif oInstr.sFunction is not None:
1740 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1741
1742 # Derive the disassembler op enum constant from the mnemonic.
1743 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1744 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1745
1746 # Derive the IEM statistics base name from mnemonic and operand types.
1747 if oInstr.sStats is None:
1748 if oInstr.sFunction is not None:
1749 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1750 elif oInstr.sMnemonic is not None:
1751 oInstr.sStats = oInstr.sMnemonic;
1752 for oOperand in oInstr.aoOperands:
1753 if oOperand.sType:
1754 oInstr.sStats += '_' + oOperand.sType;
1755
1756 # Derive the IEM function name from mnemonic and operand types.
1757 if oInstr.sFunction is None:
1758 if oInstr.sMnemonic is not None:
1759 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1760 for oOperand in oInstr.aoOperands:
1761 if oOperand.sType:
1762 oInstr.sFunction += '_' + oOperand.sType;
1763 elif oInstr.sStats:
1764 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1765
1766 #
1767 # Apply default map and then add the instruction to all it's groups.
1768 #
1769 if not oInstr.aoMaps:
1770 oInstr.aoMaps = [ self.oDefaultMap, ];
1771 for oMap in oInstr.aoMaps:
1772 oMap.aoInstructions.append(oInstr);
1773
1774 #
1775 # Derive encoding from operands and maps.
1776 #
1777 if oInstr.sEncoding is None:
1778 if not oInstr.aoOperands:
1779 if oInstr.fUnused and oInstr.sSubOpcode:
1780 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1781 else:
1782 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
1783 elif oInstr.aoOperands[0].usesModRM():
1784 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1785 or oInstr.onlyInVexMaps():
1786 oInstr.sEncoding = 'VEX.ModR/M';
1787 else:
1788 oInstr.sEncoding = 'ModR/M';
1789
1790 #
1791 # Check the opstat value and add it to the opstat indexed dictionary.
1792 #
1793 if oInstr.sStats:
1794 if oInstr.sStats not in g_dAllInstructionsByStat:
1795 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1796 else:
1797 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1798 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1799
1800 #
1801 # Add to function indexed dictionary. We allow multiple instructions per function.
1802 #
1803 if oInstr.sFunction:
1804 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1805 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1806 else:
1807 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1808
1809 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1810 return True;
1811
1812 def doneInstructions(self, iLineInComment = None):
1813 """
1814 Done with current instruction.
1815 """
1816 for oInstr in self.aoCurInstrs:
1817 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1818 if oInstr.fStub:
1819 self.cTotalStubs += 1;
1820
1821 self.cTotalInstr += len(self.aoCurInstrs);
1822
1823 self.sComment = '';
1824 self.aoCurInstrs = [];
1825 return True;
1826
1827 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1828 """
1829 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1830 is False, only None values and empty strings are replaced.
1831 """
1832 for oInstr in self.aoCurInstrs:
1833 if fOverwrite is not True:
1834 oOldValue = getattr(oInstr, sAttrib);
1835 if oOldValue is not None:
1836 continue;
1837 setattr(oInstr, sAttrib, oValue);
1838
1839 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1840 """
1841 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1842 If fOverwrite is False, only None values and empty strings are replaced.
1843 """
1844 for oInstr in self.aoCurInstrs:
1845 aoArray = getattr(oInstr, sAttrib);
1846 while len(aoArray) <= iEntry:
1847 aoArray.append(None);
1848 if fOverwrite is True or aoArray[iEntry] is None:
1849 aoArray[iEntry] = oValue;
1850
1851 def parseCommentOldOpcode(self, asLines):
1852 """ Deals with 'Opcode 0xff /4' like comments """
1853 asWords = asLines[0].split();
1854 if len(asWords) >= 2 \
1855 and asWords[0] == 'Opcode' \
1856 and ( asWords[1].startswith('0x')
1857 or asWords[1].startswith('0X')):
1858 asWords = asWords[:1];
1859 for iWord, sWord in enumerate(asWords):
1860 if sWord.startswith('0X'):
1861 sWord = '0x' + sWord[:2];
1862 asWords[iWord] = asWords;
1863 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1864
1865 return False;
1866
1867 def ensureInstructionForOpTag(self, iTagLine):
1868 """ Ensure there is an instruction for the op-tag being parsed. """
1869 if not self.aoCurInstrs:
1870 self.addInstruction(self.iCommentLine + iTagLine);
1871 for oInstr in self.aoCurInstrs:
1872 oInstr.cOpTags += 1;
1873 if oInstr.cOpTags == 1:
1874 self.cTotalTagged += 1;
1875 return self.aoCurInstrs[-1];
1876
1877 @staticmethod
1878 def flattenSections(aasSections):
1879 """
1880 Flattens multiline sections into stripped single strings.
1881 Returns list of strings, on section per string.
1882 """
1883 asRet = [];
1884 for asLines in aasSections:
1885 if asLines:
1886 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1887 return asRet;
1888
1889 @staticmethod
1890 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1891 """
1892 Flattens sections into a simple stripped string with newlines as
1893 section breaks. The final section does not sport a trailing newline.
1894 """
1895 # Typical: One section with a single line.
1896 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1897 return aasSections[0][0].strip();
1898
1899 sRet = '';
1900 for iSection, asLines in enumerate(aasSections):
1901 if asLines:
1902 if iSection > 0:
1903 sRet += sSectionSep;
1904 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1905 return sRet;
1906
1907
1908
1909 ## @name Tag parsers
1910 ## @{
1911
1912 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1913 """
1914 Tag: \@opbrief
1915 Value: Text description, multiple sections, appended.
1916
1917 Brief description. If not given, it's the first sentence from @opdesc.
1918 """
1919 oInstr = self.ensureInstructionForOpTag(iTagLine);
1920
1921 # Flatten and validate the value.
1922 sBrief = self.flattenAllSections(aasSections);
1923 if not sBrief:
1924 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1925 if sBrief[-1] != '.':
1926 sBrief = sBrief + '.';
1927 if len(sBrief) > 180:
1928 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1929 offDot = sBrief.find('.');
1930 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1931 offDot = sBrief.find('.', offDot + 1);
1932 if offDot >= 0 and offDot != len(sBrief) - 1:
1933 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1934
1935 # Update the instruction.
1936 if oInstr.sBrief is not None:
1937 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1938 % (sTag, oInstr.sBrief, sBrief,));
1939 _ = iEndLine;
1940 return True;
1941
1942 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1943 """
1944 Tag: \@opdesc
1945 Value: Text description, multiple sections, appended.
1946
1947 It is used to describe instructions.
1948 """
1949 oInstr = self.ensureInstructionForOpTag(iTagLine);
1950 if aasSections:
1951 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1952 return True;
1953
1954 _ = sTag; _ = iEndLine;
1955 return True;
1956
1957 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1958 """
1959 Tag: @opmenmonic
1960 Value: mnemonic
1961
1962 The 'mnemonic' value must be a valid C identifier string. Because of
1963 prefixes, groups and whatnot, there times when the mnemonic isn't that
1964 of an actual assembler mnemonic.
1965 """
1966 oInstr = self.ensureInstructionForOpTag(iTagLine);
1967
1968 # Flatten and validate the value.
1969 sMnemonic = self.flattenAllSections(aasSections);
1970 if not self.oReMnemonic.match(sMnemonic):
1971 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1972 if oInstr.sMnemonic is not None:
1973 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1974 % (sTag, oInstr.sMnemonic, sMnemonic,));
1975 oInstr.sMnemonic = sMnemonic
1976
1977 _ = iEndLine;
1978 return True;
1979
1980 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1981 """
1982 Tags: \@op1, \@op2, \@op3, \@op4
1983 Value: [where:]type
1984
1985 The 'where' value indicates where the operand is found, like the 'reg'
1986 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1987 a list.
1988
1989 The 'type' value indicates the operand type. These follow the types
1990 given in the opcode tables in the CPU reference manuals.
1991 See Instruction.kdOperandTypes for a list.
1992
1993 """
1994 oInstr = self.ensureInstructionForOpTag(iTagLine);
1995 idxOp = int(sTag[-1]) - 1;
1996 assert idxOp >= 0 and idxOp < 4;
1997
1998 # flatten, split up, and validate the "where:type" value.
1999 sFlattened = self.flattenAllSections(aasSections);
2000 asSplit = sFlattened.split(':');
2001 if len(asSplit) == 1:
2002 sType = asSplit[0];
2003 sWhere = None;
2004 elif len(asSplit) == 2:
2005 (sWhere, sType) = asSplit;
2006 else:
2007 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2008
2009 if sType not in g_kdOpTypes:
2010 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2011 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2012 if sWhere is None:
2013 sWhere = g_kdOpTypes[sType][1];
2014 elif sWhere not in g_kdOpLocations:
2015 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2016 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2017
2018 # Insert the operand, refusing to overwrite an existing one.
2019 while idxOp >= len(oInstr.aoOperands):
2020 oInstr.aoOperands.append(None);
2021 if oInstr.aoOperands[idxOp] is not None:
2022 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2023 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2024 sWhere, sType,));
2025 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2026
2027 _ = iEndLine;
2028 return True;
2029
2030 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2031 """
2032 Tag: \@opmaps
2033 Value: map[,map2]
2034
2035 Indicates which maps the instruction is in. There is a default map
2036 associated with each input file.
2037 """
2038 oInstr = self.ensureInstructionForOpTag(iTagLine);
2039
2040 # Flatten, split up and validate the value.
2041 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2042 asMaps = sFlattened.split(',');
2043 if not asMaps:
2044 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2045 for sMap in asMaps:
2046 if sMap not in g_dInstructionMaps:
2047 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2048 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2049
2050 # Add the maps to the current list. Throw errors on duplicates.
2051 for oMap in oInstr.aoMaps:
2052 if oMap.sName in asMaps:
2053 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2054
2055 for sMap in asMaps:
2056 oMap = g_dInstructionMaps[sMap];
2057 if oMap not in oInstr.aoMaps:
2058 oInstr.aoMaps.append(oMap);
2059 else:
2060 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2061
2062 _ = iEndLine;
2063 return True;
2064
2065 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2066 """
2067 Tag: \@oppfx
2068 Value: n/a|none|0x66|0xf3|0xf2
2069
2070 Required prefix for the instruction. (In a (E)VEX context this is the
2071 value of the 'pp' field rather than an actual prefix.)
2072 """
2073 oInstr = self.ensureInstructionForOpTag(iTagLine);
2074
2075 # Flatten and validate the value.
2076 sFlattened = self.flattenAllSections(aasSections);
2077 asPrefixes = sFlattened.split();
2078 if len(asPrefixes) > 1:
2079 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2080
2081 sPrefix = asPrefixes[0].lower();
2082 if sPrefix == 'none':
2083 sPrefix = 'none';
2084 elif sPrefix == 'n/a':
2085 sPrefix = None;
2086 else:
2087 if len(sPrefix) == 2:
2088 sPrefix = '0x' + sPrefix;
2089 if not _isValidOpcodeByte(sPrefix):
2090 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2091
2092 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2093 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2094
2095 # Set it.
2096 if oInstr.sPrefix is not None:
2097 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2098 oInstr.sPrefix = sPrefix;
2099
2100 _ = iEndLine;
2101 return True;
2102
2103 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2104 """
2105 Tag: \@opcode
2106 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2107
2108 The opcode byte or sub-byte for the instruction in the context of a map.
2109 """
2110 oInstr = self.ensureInstructionForOpTag(iTagLine);
2111
2112 # Flatten and validate the value.
2113 sOpcode = self.flattenAllSections(aasSections);
2114 if _isValidOpcodeByte(sOpcode):
2115 pass;
2116 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2117 pass;
2118 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2119 pass;
2120 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2121 pass;
2122 else:
2123 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2124
2125 # Set it.
2126 if oInstr.sOpcode is not None:
2127 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2128 oInstr.sOpcode = sOpcode;
2129
2130 _ = iEndLine;
2131 return True;
2132
2133 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2134 """
2135 Tag: \@opcodesub
2136 Value: none | 11 mr/reg | !11 mr/reg
2137
2138 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2139 represents exactly two different instructions. The more proper way would
2140 be to go via maps with two members, but this is faster.
2141 """
2142 oInstr = self.ensureInstructionForOpTag(iTagLine);
2143
2144 # Flatten and validate the value.
2145 sSubOpcode = self.flattenAllSections(aasSections);
2146 if sSubOpcode not in g_kdSubOpcodes:
2147 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2148 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2149
2150 # Set it.
2151 if oInstr.sSubOpcode is not None:
2152 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2153 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2154 oInstr.sSubOpcode = sSubOpcode;
2155
2156 _ = iEndLine;
2157 return True;
2158
2159 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2160 """
2161 Tag: \@openc
2162 Value: ModR/M|fixed|prefix|<map name>
2163
2164 The instruction operand encoding style.
2165 """
2166 oInstr = self.ensureInstructionForOpTag(iTagLine);
2167
2168 # Flatten and validate the value.
2169 sEncoding = self.flattenAllSections(aasSections);
2170 if sEncoding in g_kdEncodings:
2171 pass;
2172 elif sEncoding in g_dInstructionMaps:
2173 pass;
2174 elif not _isValidOpcodeByte(sEncoding):
2175 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2176
2177 # Set it.
2178 if oInstr.sEncoding is not None:
2179 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2180 % ( sTag, oInstr.sEncoding, sEncoding,));
2181 oInstr.sEncoding = sEncoding;
2182
2183 _ = iEndLine;
2184 return True;
2185
2186 ## EFlags tag to Instruction attribute name.
2187 kdOpFlagToAttr = {
2188 '@opfltest': 'asFlTest',
2189 '@opflmodify': 'asFlModify',
2190 '@opflundef': 'asFlUndefined',
2191 '@opflset': 'asFlSet',
2192 '@opflclear': 'asFlClear',
2193 };
2194
2195 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2196 """
2197 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2198 Value: <eflags specifier>
2199
2200 """
2201 oInstr = self.ensureInstructionForOpTag(iTagLine);
2202
2203 # Flatten, split up and validate the values.
2204 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2205 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2206 asFlags = [];
2207 else:
2208 fRc = True;
2209 for iFlag, sFlag in enumerate(asFlags):
2210 if sFlag not in g_kdEFlagsMnemonics:
2211 if sFlag.strip() in g_kdEFlagsMnemonics:
2212 asFlags[iFlag] = sFlag.strip();
2213 else:
2214 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2215 if not fRc:
2216 return False;
2217
2218 # Set them.
2219 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2220 if asOld is not None:
2221 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2222 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2223
2224 _ = iEndLine;
2225 return True;
2226
2227 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2228 """
2229 Tag: \@ophints
2230 Value: Comma or space separated list of flags and hints.
2231
2232 This covers the disassembler flags table and more.
2233 """
2234 oInstr = self.ensureInstructionForOpTag(iTagLine);
2235
2236 # Flatten as a space separated list, split it up and validate the values.
2237 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2238 if len(asHints) == 1 and asHints[0].lower() == 'none':
2239 asHints = [];
2240 else:
2241 fRc = True;
2242 for iHint, sHint in enumerate(asHints):
2243 if sHint not in g_kdHints:
2244 if sHint.strip() in g_kdHints:
2245 sHint[iHint] = sHint.strip();
2246 else:
2247 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2248 if not fRc:
2249 return False;
2250
2251 # Append them.
2252 for sHint in asHints:
2253 if sHint not in oInstr.dHints:
2254 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2255 else:
2256 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2257
2258 _ = iEndLine;
2259 return True;
2260
2261 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2262 """
2263 Tag: \@opdisenum
2264 Value: OP_XXXX
2265
2266 This is for select a specific (legacy) disassembler enum value for the
2267 instruction.
2268 """
2269 oInstr = self.ensureInstructionForOpTag(iTagLine);
2270
2271 # Flatten and split.
2272 asWords = self.flattenAllSections(aasSections).split();
2273 if len(asWords) != 1:
2274 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2275 if not asWords:
2276 return False;
2277 sDisEnum = asWords[0];
2278 if not self.oReDisEnum.match(sDisEnum):
2279 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2280 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2281
2282 # Set it.
2283 if oInstr.sDisEnum is not None:
2284 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2285 oInstr.sDisEnum = sDisEnum;
2286
2287 _ = iEndLine;
2288 return True;
2289
2290 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2291 """
2292 Tag: \@opmincpu
2293 Value: <simple CPU name>
2294
2295 Indicates when this instruction was introduced.
2296 """
2297 oInstr = self.ensureInstructionForOpTag(iTagLine);
2298
2299 # Flatten the value, split into words, make sure there's just one, valid it.
2300 asCpus = self.flattenAllSections(aasSections).split();
2301 if len(asCpus) > 1:
2302 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2303
2304 sMinCpu = asCpus[0];
2305 if sMinCpu in g_kdCpuNames:
2306 oInstr.sMinCpu = sMinCpu;
2307 else:
2308 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2309 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2310
2311 # Set it.
2312 if oInstr.sMinCpu is None:
2313 oInstr.sMinCpu = sMinCpu;
2314 elif oInstr.sMinCpu != sMinCpu:
2315 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2316
2317 _ = iEndLine;
2318 return True;
2319
2320 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2321 """
2322 Tag: \@opcpuid
2323 Value: none | <CPUID flag specifier>
2324
2325 CPUID feature bit which is required for the instruction to be present.
2326 """
2327 oInstr = self.ensureInstructionForOpTag(iTagLine);
2328
2329 # Flatten as a space separated list, split it up and validate the values.
2330 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2331 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2332 asCpuIds = [];
2333 else:
2334 fRc = True;
2335 for iCpuId, sCpuId in enumerate(asCpuIds):
2336 if sCpuId not in g_kdCpuIdFlags:
2337 if sCpuId.strip() in g_kdCpuIdFlags:
2338 sCpuId[iCpuId] = sCpuId.strip();
2339 else:
2340 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2341 if not fRc:
2342 return False;
2343
2344 # Append them.
2345 for sCpuId in asCpuIds:
2346 if sCpuId not in oInstr.asCpuIds:
2347 oInstr.asCpuIds.append(sCpuId);
2348 else:
2349 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2350
2351 _ = iEndLine;
2352 return True;
2353
2354 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2355 """
2356 Tag: \@opgroup
2357 Value: op_grp1[_subgrp2[_subsubgrp3]]
2358
2359 Instruction grouping.
2360 """
2361 oInstr = self.ensureInstructionForOpTag(iTagLine);
2362
2363 # Flatten as a space separated list, split it up and validate the values.
2364 asGroups = self.flattenAllSections(aasSections).split();
2365 if len(asGroups) != 1:
2366 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2367 sGroup = asGroups[0];
2368 if not self.oReGroupName.match(sGroup):
2369 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2370 % (sTag, sGroup, self.oReGroupName.pattern));
2371
2372 # Set it.
2373 if oInstr.sGroup is not None:
2374 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2375 oInstr.sGroup = sGroup;
2376
2377 _ = iEndLine;
2378 return True;
2379
2380 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2381 """
2382 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2383 Value: <invalid opcode behaviour style>
2384
2385 The \@opunused indicates the specification is for a currently unused
2386 instruction encoding.
2387
2388 The \@opinvalid indicates the specification is for an invalid currently
2389 instruction encoding (like UD2).
2390
2391 The \@opinvlstyle just indicates how CPUs decode the instruction when
2392 not supported (\@opcpuid, \@opmincpu) or disabled.
2393 """
2394 oInstr = self.ensureInstructionForOpTag(iTagLine);
2395
2396 # Flatten as a space separated list, split it up and validate the values.
2397 asStyles = self.flattenAllSections(aasSections).split();
2398 if len(asStyles) != 1:
2399 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2400 sStyle = asStyles[0];
2401 if sStyle not in g_kdInvalidStyles:
2402 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2403 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2404 # Set it.
2405 if oInstr.sInvalidStyle is not None:
2406 return self.errorComment(iTagLine,
2407 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2408 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2409 oInstr.sInvalidStyle = sStyle;
2410 if sTag == '@opunused':
2411 oInstr.fUnused = True;
2412 elif sTag == '@opinvalid':
2413 oInstr.fInvalid = True;
2414
2415 _ = iEndLine;
2416 return True;
2417
2418 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2419 """
2420 Tag: \@optest
2421 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2422 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2423
2424 The main idea here is to generate basic instruction tests.
2425
2426 The probably simplest way of handling the diverse input, would be to use
2427 it to produce size optimized byte code for a simple interpreter that
2428 modifies the register input and output states.
2429
2430 An alternative to the interpreter would be creating multiple tables,
2431 but that becomes rather complicated wrt what goes where and then to use
2432 them in an efficient manner.
2433 """
2434 oInstr = self.ensureInstructionForOpTag(iTagLine);
2435
2436 #
2437 # Do it section by section.
2438 #
2439 for asSectionLines in aasSections:
2440 #
2441 # Sort the input into outputs, inputs and selector conditions.
2442 #
2443 sFlatSection = self.flattenAllSections([asSectionLines,]);
2444 if not sFlatSection:
2445 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2446 continue;
2447 oTest = InstructionTest(oInstr);
2448
2449 asSelectors = [];
2450 asInputs = [];
2451 asOutputs = [];
2452 asCur = asOutputs;
2453 fRc = True;
2454 asWords = sFlatSection.split();
2455 for iWord in range(len(asWords) - 1, -1, -1):
2456 sWord = asWords[iWord];
2457 # Check for array switchers.
2458 if sWord == '->':
2459 if asCur != asOutputs:
2460 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2461 break;
2462 asCur = asInputs;
2463 elif sWord == '/':
2464 if asCur != asInputs:
2465 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2466 break;
2467 asCur = asSelectors;
2468 else:
2469 asCur.insert(0, sWord);
2470
2471 #
2472 # Validate and add selectors.
2473 #
2474 for sCond in asSelectors:
2475 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2476 oSelector = None;
2477 for sOp in TestSelector.kasCompareOps:
2478 off = sCondExp.find(sOp);
2479 if off >= 0:
2480 sVariable = sCondExp[:off];
2481 sValue = sCondExp[off + len(sOp):];
2482 if sVariable in TestSelector.kdVariables:
2483 if sValue in TestSelector.kdVariables[sVariable]:
2484 oSelector = TestSelector(sVariable, sOp, sValue);
2485 else:
2486 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2487 % ( sTag, sValue, sCond,
2488 TestSelector.kdVariables[sVariable].keys(),));
2489 else:
2490 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2491 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2492 break;
2493 if oSelector is not None:
2494 for oExisting in oTest.aoSelectors:
2495 if oExisting.sVariable == oSelector.sVariable:
2496 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2497 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2498 oTest.aoSelectors.append(oSelector);
2499 else:
2500 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2501
2502 #
2503 # Validate outputs and inputs, adding them to the test as we go along.
2504 #
2505 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2506 asValidFieldKinds = [ 'both', sDesc, ];
2507 for sItem in asItems:
2508 oItem = None;
2509 for sOp in TestInOut.kasOperators:
2510 off = sItem.find(sOp);
2511 if off < 0:
2512 continue;
2513 sField = sItem[:off];
2514 sValueType = sItem[off + len(sOp):];
2515 if sField in TestInOut.kdFields \
2516 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2517 asSplit = sValueType.split(':', 1);
2518 sValue = asSplit[0];
2519 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2520 if sType in TestInOut.kdTypes:
2521 oValid = TestInOut.kdTypes[sType].validate(sValue);
2522 if oValid is True:
2523 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2524 oItem = TestInOut(sField, sOp, sValue, sType);
2525 else:
2526 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2527 % ( sTag, sDesc, sItem, ));
2528 else:
2529 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2530 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2531 else:
2532 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2533 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2534 else:
2535 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2536 % ( sTag, sDesc, sField, sItem,
2537 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2538 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2539 break;
2540 if oItem is not None:
2541 for oExisting in aoDst:
2542 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2543 self.errorComment(iTagLine,
2544 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2545 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2546 aoDst.append(oItem);
2547 else:
2548 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2549
2550 #
2551 # .
2552 #
2553 if fRc:
2554 oInstr.aoTests.append(oTest);
2555 else:
2556 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2557 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2558 % (sTag, asSelectors, asInputs, asOutputs,));
2559
2560 _ = iEndLine;
2561 return True;
2562
2563 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2564 """
2565 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2566 """
2567 oInstr = self.ensureInstructionForOpTag(iTagLine);
2568
2569 iTest = 0;
2570 if sTag[-1] == ']':
2571 iTest = int(sTag[8:-1]);
2572 else:
2573 iTest = int(sTag[7:]);
2574
2575 if iTest != len(oInstr.aoTests):
2576 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2577 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2578
2579 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2580 """
2581 Tag: \@optestign | \@optestignore
2582 Value: <value is ignored>
2583
2584 This is a simple trick to ignore a test while debugging another.
2585
2586 See also \@oponlytest.
2587 """
2588 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2589 return True;
2590
2591 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2592 """
2593 Tag: \@opcopytests
2594 Value: <opstat | function> [..]
2595 Example: \@opcopytests add_Eb_Gb
2596
2597 Trick to avoid duplicating tests for different encodings of the same
2598 operation.
2599 """
2600 oInstr = self.ensureInstructionForOpTag(iTagLine);
2601
2602 # Flatten, validate and append the copy job to the instruction. We execute
2603 # them after parsing all the input so we can handle forward references.
2604 asToCopy = self.flattenAllSections(aasSections).split();
2605 if not asToCopy:
2606 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2607 for sToCopy in asToCopy:
2608 if sToCopy not in oInstr.asCopyTests:
2609 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2610 oInstr.asCopyTests.append(sToCopy);
2611 else:
2612 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2613 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2614 else:
2615 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2616
2617 _ = iEndLine;
2618 return True;
2619
2620 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2621 """
2622 Tag: \@oponlytest | \@oponly
2623 Value: none
2624
2625 Only test instructions with this tag. This is a trick that is handy
2626 for singling out one or two new instructions or tests.
2627
2628 See also \@optestignore.
2629 """
2630 oInstr = self.ensureInstructionForOpTag(iTagLine);
2631
2632 # Validate and add instruction to only test dictionary.
2633 sValue = self.flattenAllSections(aasSections).strip();
2634 if sValue:
2635 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2636
2637 if oInstr not in g_aoOnlyTestInstructions:
2638 g_aoOnlyTestInstructions.append(oInstr);
2639
2640 _ = iEndLine;
2641 return True;
2642
2643 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2644 """
2645 Tag: \@opxcpttype
2646 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2647
2648 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2649 """
2650 oInstr = self.ensureInstructionForOpTag(iTagLine);
2651
2652 # Flatten as a space separated list, split it up and validate the values.
2653 asTypes = self.flattenAllSections(aasSections).split();
2654 if len(asTypes) != 1:
2655 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2656 sType = asTypes[0];
2657 if sType not in g_kdXcptTypes:
2658 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2659 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2660 # Set it.
2661 if oInstr.sXcptType is not None:
2662 return self.errorComment(iTagLine,
2663 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2664 % ( sTag, oInstr.sXcptType, sType,));
2665 oInstr.sXcptType = sType;
2666
2667 _ = iEndLine;
2668 return True;
2669
2670 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2671 """
2672 Tag: \@opfunction
2673 Value: <VMM function name>
2674
2675 This is for explicitly setting the IEM function name. Normally we pick
2676 this up from the FNIEMOP_XXX macro invocation after the description, or
2677 generate it from the mnemonic and operands.
2678
2679 It it thought it maybe necessary to set it when specifying instructions
2680 which implementation isn't following immediately or aren't implemented yet.
2681 """
2682 oInstr = self.ensureInstructionForOpTag(iTagLine);
2683
2684 # Flatten and validate the value.
2685 sFunction = self.flattenAllSections(aasSections);
2686 if not self.oReFunctionName.match(sFunction):
2687 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2688 % (sTag, sFunction, self.oReFunctionName.pattern));
2689
2690 if oInstr.sFunction is not None:
2691 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2692 % (sTag, oInstr.sFunction, sFunction,));
2693 oInstr.sFunction = sFunction;
2694
2695 _ = iEndLine;
2696 return True;
2697
2698 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2699 """
2700 Tag: \@opstats
2701 Value: <VMM statistics base name>
2702
2703 This is for explicitly setting the statistics name. Normally we pick
2704 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2705 the mnemonic and operands.
2706
2707 It it thought it maybe necessary to set it when specifying instructions
2708 which implementation isn't following immediately or aren't implemented yet.
2709 """
2710 oInstr = self.ensureInstructionForOpTag(iTagLine);
2711
2712 # Flatten and validate the value.
2713 sStats = self.flattenAllSections(aasSections);
2714 if not self.oReStatsName.match(sStats):
2715 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2716 % (sTag, sStats, self.oReStatsName.pattern));
2717
2718 if oInstr.sStats is not None:
2719 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2720 % (sTag, oInstr.sStats, sStats,));
2721 oInstr.sStats = sStats;
2722
2723 _ = iEndLine;
2724 return True;
2725
2726 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2727 """
2728 Tag: \@opdone
2729 Value: none
2730
2731 Used to explictily flush the instructions that have been specified.
2732 """
2733 sFlattened = self.flattenAllSections(aasSections);
2734 if sFlattened != '':
2735 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2736 _ = sTag; _ = iEndLine;
2737 return self.doneInstructions();
2738
2739 ## @}
2740
2741
2742 def parseComment(self):
2743 """
2744 Parse the current comment (self.sComment).
2745
2746 If it's a opcode specifiying comment, we reset the macro stuff.
2747 """
2748 #
2749 # Reject if comment doesn't seem to contain anything interesting.
2750 #
2751 if self.sComment.find('Opcode') < 0 \
2752 and self.sComment.find('@') < 0:
2753 return False;
2754
2755 #
2756 # Split the comment into lines, removing leading asterisks and spaces.
2757 # Also remove leading and trailing empty lines.
2758 #
2759 asLines = self.sComment.split('\n');
2760 for iLine, sLine in enumerate(asLines):
2761 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2762
2763 while asLines and not asLines[0]:
2764 self.iCommentLine += 1;
2765 asLines.pop(0);
2766
2767 while asLines and not asLines[-1]:
2768 asLines.pop(len(asLines) - 1);
2769
2770 #
2771 # Check for old style: Opcode 0x0f 0x12
2772 #
2773 if asLines[0].startswith('Opcode '):
2774 self.parseCommentOldOpcode(asLines);
2775
2776 #
2777 # Look for @op* tagged data.
2778 #
2779 cOpTags = 0;
2780 sFlatDefault = None;
2781 sCurTag = '@default';
2782 iCurTagLine = 0;
2783 asCurSection = [];
2784 aasSections = [ asCurSection, ];
2785 for iLine, sLine in enumerate(asLines):
2786 if not sLine.startswith('@'):
2787 if sLine:
2788 asCurSection.append(sLine);
2789 elif asCurSection:
2790 asCurSection = [];
2791 aasSections.append(asCurSection);
2792 else:
2793 #
2794 # Process the previous tag.
2795 #
2796 if not asCurSection and len(aasSections) > 1:
2797 aasSections.pop(-1);
2798 if sCurTag in self.dTagHandlers:
2799 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2800 cOpTags += 1;
2801 elif sCurTag.startswith('@op'):
2802 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2803 elif sCurTag == '@default':
2804 sFlatDefault = self.flattenAllSections(aasSections);
2805 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2806 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2807 elif sCurTag in ['@encoding', '@opencoding']:
2808 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2809
2810 #
2811 # New tag.
2812 #
2813 asSplit = sLine.split(None, 1);
2814 sCurTag = asSplit[0].lower();
2815 if len(asSplit) > 1:
2816 asCurSection = [asSplit[1],];
2817 else:
2818 asCurSection = [];
2819 aasSections = [asCurSection, ];
2820 iCurTagLine = iLine;
2821
2822 #
2823 # Process the final tag.
2824 #
2825 if not asCurSection and len(aasSections) > 1:
2826 aasSections.pop(-1);
2827 if sCurTag in self.dTagHandlers:
2828 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2829 cOpTags += 1;
2830 elif sCurTag.startswith('@op'):
2831 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2832 elif sCurTag == '@default':
2833 sFlatDefault = self.flattenAllSections(aasSections);
2834
2835 #
2836 # Don't allow default text in blocks containing @op*.
2837 #
2838 if cOpTags > 0 and sFlatDefault:
2839 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2840
2841 return True;
2842
2843 def parseMacroInvocation(self, sInvocation):
2844 """
2845 Parses a macro invocation.
2846
2847 Returns a tuple, first element is the offset following the macro
2848 invocation. The second element is a list of macro arguments, where the
2849 zero'th is the macro name.
2850 """
2851 # First the name.
2852 offOpen = sInvocation.find('(');
2853 if offOpen <= 0:
2854 self.raiseError("macro invocation open parenthesis not found");
2855 sName = sInvocation[:offOpen].strip();
2856 if not self.oReMacroName.match(sName):
2857 return self.error("invalid macro name '%s'" % (sName,));
2858 asRet = [sName, ];
2859
2860 # Arguments.
2861 iLine = self.iLine;
2862 cDepth = 1;
2863 off = offOpen + 1;
2864 offStart = off;
2865 while cDepth > 0:
2866 if off >= len(sInvocation):
2867 if iLine >= len(self.asLines):
2868 return self.error('macro invocation beyond end of file');
2869 sInvocation += self.asLines[iLine];
2870 iLine += 1;
2871 ch = sInvocation[off];
2872
2873 if ch == ',' or ch == ')':
2874 if cDepth == 1:
2875 asRet.append(sInvocation[offStart:off].strip());
2876 offStart = off + 1;
2877 if ch == ')':
2878 cDepth -= 1;
2879 elif ch == '(':
2880 cDepth += 1;
2881 off += 1;
2882
2883 return (off, asRet);
2884
2885 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2886 """
2887 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2888 """
2889 offHit = sCode.find(sMacro);
2890 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2891 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2892 return (offHit + offAfter, asRet);
2893 return (len(sCode), None);
2894
2895 def findAndParseMacroInvocation(self, sCode, sMacro):
2896 """
2897 Returns None if not found, arguments as per parseMacroInvocation if found.
2898 """
2899 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2900
2901 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2902 """
2903 Returns same as findAndParseMacroInvocation.
2904 """
2905 for sMacro in asMacro:
2906 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2907 if asRet is not None:
2908 return asRet;
2909 return None;
2910
2911 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2912 sDisHints, sIemHints, asOperands):
2913 """
2914 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2915 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2916 """
2917 #
2918 # Some invocation checks.
2919 #
2920 if sUpper != sUpper.upper():
2921 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2922 if sLower != sLower.lower():
2923 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2924 if sUpper.lower() != sLower:
2925 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2926 if not self.oReMnemonic.match(sLower):
2927 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2928
2929 #
2930 # Check if sIemHints tells us to not consider this macro invocation.
2931 #
2932 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2933 return True;
2934
2935 # Apply to the last instruction only for now.
2936 if not self.aoCurInstrs:
2937 self.addInstruction();
2938 oInstr = self.aoCurInstrs[-1];
2939 if oInstr.iLineMnemonicMacro == -1:
2940 oInstr.iLineMnemonicMacro = self.iLine;
2941 else:
2942 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2943 % (sMacro, oInstr.iLineMnemonicMacro,));
2944
2945 # Mnemonic
2946 if oInstr.sMnemonic is None:
2947 oInstr.sMnemonic = sLower;
2948 elif oInstr.sMnemonic != sLower:
2949 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2950
2951 # Process operands.
2952 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2953 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2954 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2955 for iOperand, sType in enumerate(asOperands):
2956 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2957 if sWhere is None:
2958 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2959 if iOperand < len(oInstr.aoOperands): # error recovery.
2960 sWhere = oInstr.aoOperands[iOperand].sWhere;
2961 sType = oInstr.aoOperands[iOperand].sType;
2962 else:
2963 sWhere = 'reg';
2964 sType = 'Gb';
2965 if iOperand == len(oInstr.aoOperands):
2966 oInstr.aoOperands.append(Operand(sWhere, sType))
2967 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2968 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2969 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2970 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2971
2972 # Encoding.
2973 if sForm not in g_kdIemForms:
2974 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2975 else:
2976 if oInstr.sEncoding is None:
2977 oInstr.sEncoding = g_kdIemForms[sForm][0];
2978 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2979 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2980 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2981
2982 # Check the parameter locations for the encoding.
2983 if g_kdIemForms[sForm][1] is not None:
2984 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2985 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2986 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2987 else:
2988 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2989 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2990 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2991 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2992
2993 # Stats.
2994 if not self.oReStatsName.match(sStats):
2995 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2996 elif oInstr.sStats is None:
2997 oInstr.sStats = sStats;
2998 elif oInstr.sStats != sStats:
2999 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3000 % (sMacro, oInstr.sStats, sStats,));
3001
3002 # Process the hints (simply merge with @ophints w/o checking anything).
3003 for sHint in sDisHints.split('|'):
3004 sHint = sHint.strip();
3005 if sHint.startswith('DISOPTYPE_'):
3006 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3007 if sShortHint in g_kdHints:
3008 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3009 else:
3010 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3011 elif sHint != '0':
3012 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3013
3014 for sHint in sIemHints.split('|'):
3015 sHint = sHint.strip();
3016 if sHint.startswith('IEMOPHINT_'):
3017 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3018 if sShortHint in g_kdHints:
3019 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3020 else:
3021 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3022 elif sHint != '0':
3023 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3024
3025
3026 _ = sAsm;
3027 return True;
3028
3029 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3030 """
3031 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3032 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3033 """
3034 if not asOperands:
3035 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3036 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3037 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3038
3039 def checkCodeForMacro(self, sCode):
3040 """
3041 Checks code for relevant macro invocation.
3042 """
3043 #
3044 # Scan macro invocations.
3045 #
3046 if sCode.find('(') > 0:
3047 # Look for instruction decoder function definitions. ASSUME single line.
3048 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3049 [ 'FNIEMOP_DEF',
3050 'FNIEMOP_STUB',
3051 'FNIEMOP_STUB_1',
3052 'FNIEMOP_UD_STUB',
3053 'FNIEMOP_UD_STUB_1' ]);
3054 if asArgs is not None:
3055 sFunction = asArgs[1];
3056
3057 if not self.aoCurInstrs:
3058 self.addInstruction();
3059 for oInstr in self.aoCurInstrs:
3060 if oInstr.iLineFnIemOpMacro == -1:
3061 oInstr.iLineFnIemOpMacro = self.iLine;
3062 else:
3063 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3064 self.setInstrunctionAttrib('sFunction', sFunction);
3065 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3066 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3067 if asArgs[0].find('STUB') > 0:
3068 self.doneInstructions();
3069 return True;
3070
3071 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3072 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3073 if asArgs is not None:
3074 if len(self.aoCurInstrs) == 1:
3075 oInstr = self.aoCurInstrs[0];
3076 if oInstr.sStats is None:
3077 oInstr.sStats = asArgs[1];
3078 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3079
3080 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3081 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3082 if asArgs is not None:
3083 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3084 []);
3085 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3086 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3087 if asArgs is not None:
3088 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3089 [asArgs[6],]);
3090 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3091 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3092 if asArgs is not None:
3093 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3094 [asArgs[6], asArgs[7]]);
3095 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3096 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3097 if asArgs is not None:
3098 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3099 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3100 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3101 # a_fIemHints)
3102 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3103 if asArgs is not None:
3104 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3105 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3106
3107 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3108 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3109 if asArgs is not None:
3110 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3111 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3112 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3113 if asArgs is not None:
3114 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3115 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3116 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3117 if asArgs is not None:
3118 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3119 [asArgs[4], asArgs[5],]);
3120 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3121 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3122 if asArgs is not None:
3123 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3124 [asArgs[4], asArgs[5], asArgs[6],]);
3125 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3126 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3127 if asArgs is not None:
3128 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3129 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3130
3131 return False;
3132
3133
3134 def parse(self):
3135 """
3136 Parses the given file.
3137 Returns number or errors.
3138 Raises exception on fatal trouble.
3139 """
3140 #self.debug('Parsing %s' % (self.sSrcFile,));
3141
3142 while self.iLine < len(self.asLines):
3143 sLine = self.asLines[self.iLine];
3144 self.iLine += 1;
3145
3146 # We only look for comments, so only lines with a slash might possibly
3147 # influence the parser state.
3148 offSlash = sLine.find('/');
3149 if offSlash >= 0:
3150 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3151 offLine = 0;
3152 while offLine < len(sLine):
3153 if self.iState == self.kiCode:
3154 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3155 if offHit >= 0:
3156 self.checkCodeForMacro(sLine[offLine:offHit]);
3157 self.sComment = '';
3158 self.iCommentLine = self.iLine;
3159 self.iState = self.kiCommentMulti;
3160 offLine = offHit + 2;
3161 else:
3162 self.checkCodeForMacro(sLine[offLine:]);
3163 offLine = len(sLine);
3164
3165 elif self.iState == self.kiCommentMulti:
3166 offHit = sLine.find('*/', offLine);
3167 if offHit >= 0:
3168 self.sComment += sLine[offLine:offHit];
3169 self.iState = self.kiCode;
3170 offLine = offHit + 2;
3171 self.parseComment();
3172 else:
3173 self.sComment += sLine[offLine:];
3174 offLine = len(sLine);
3175 else:
3176 assert False;
3177 # C++ line comment.
3178 elif offSlash > 0:
3179 self.checkCodeForMacro(sLine[:offSlash]);
3180
3181 # No slash, but append the line if in multi-line comment.
3182 elif self.iState == self.kiCommentMulti:
3183 #self.debug('line %d: multi' % (self.iLine,));
3184 self.sComment += sLine;
3185
3186 # No slash, but check code line for relevant macro.
3187 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3188 #self.debug('line %d: macro' % (self.iLine,));
3189 self.checkCodeForMacro(sLine);
3190
3191 # If the line is a '}' in the first position, complete the instructions.
3192 elif self.iState == self.kiCode and sLine[0] == '}':
3193 #self.debug('line %d: }' % (self.iLine,));
3194 self.doneInstructions();
3195
3196 self.doneInstructions();
3197 self.debug('%3s stubs out of %3s instructions in %s'
3198 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3199 return self.printErrors();
3200
3201
3202def __parseFileByName(sSrcFile, sDefaultMap):
3203 """
3204 Parses one source file for instruction specfications.
3205 """
3206 #
3207 # Read sSrcFile into a line array.
3208 #
3209 try:
3210 oFile = open(sSrcFile, "r");
3211 except Exception as oXcpt:
3212 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3213 try:
3214 asLines = oFile.readlines();
3215 except Exception as oXcpt:
3216 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3217 finally:
3218 oFile.close();
3219
3220 #
3221 # Do the parsing.
3222 #
3223 try:
3224 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3225 except ParserException as oXcpt:
3226 print(str(oXcpt));
3227 raise;
3228 except Exception as oXcpt:
3229 raise;
3230
3231 return cErrors;
3232
3233
3234def __doTestCopying():
3235 """
3236 Executes the asCopyTests instructions.
3237 """
3238 asErrors = [];
3239 for oDstInstr in g_aoAllInstructions:
3240 if oDstInstr.asCopyTests:
3241 for sSrcInstr in oDstInstr.asCopyTests:
3242 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3243 if oSrcInstr:
3244 aoSrcInstrs = [oSrcInstr,];
3245 else:
3246 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3247 if aoSrcInstrs:
3248 for oSrcInstr in aoSrcInstrs:
3249 if oSrcInstr != oDstInstr:
3250 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3251 else:
3252 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3253 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3254 else:
3255 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3256 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3257
3258 if asErrors:
3259 sys.stderr.write(u''.join(asErrors));
3260 return len(asErrors);
3261
3262
3263def __applyOnlyTest():
3264 """
3265 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3266 all other instructions so that only these get tested.
3267 """
3268 if g_aoOnlyTestInstructions:
3269 for oInstr in g_aoAllInstructions:
3270 if oInstr.aoTests:
3271 if oInstr not in g_aoOnlyTestInstructions:
3272 oInstr.aoTests = [];
3273 return 0;
3274
3275def __parseAll():
3276 """
3277 Parses all the IEMAllInstruction*.cpp.h files.
3278
3279 Raises exception on failure.
3280 """
3281 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3282 cErrors = 0;
3283 for sDefaultMap, sName in [
3284 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3285 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3286 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3287 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3288 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3289 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3290 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3291 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3292 ]:
3293 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3294 cErrors += __doTestCopying();
3295 cErrors += __applyOnlyTest();
3296
3297 if cErrors != 0:
3298 #raise Exception('%d parse errors' % (cErrors,));
3299 sys.exit(1);
3300 return True;
3301
3302
3303
3304__parseAll();
3305
3306
3307#
3308# Generators (may perhaps move later).
3309#
3310def generateDisassemblerTables(oDstFile = sys.stdout):
3311 """
3312 Generates disassembler tables.
3313 """
3314
3315 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3316 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3317 assert oMap.sName == sName;
3318 asLines = [];
3319
3320 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3321 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3322 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3323 asLines.append('{');
3324
3325 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3326
3327 aoTableOrder = oMap.getInstructionsInTableOrder();
3328 for iInstr, oInstr in enumerate(aoTableOrder):
3329
3330 if (iInstr & 0xf) == 0:
3331 if iInstr != 0:
3332 asLines.append('');
3333 asLines.append(' /* %x */' % (iInstr >> 4,));
3334
3335 if oInstr is None:
3336 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3337 elif isinstance(oInstr, list):
3338 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3339 else:
3340 sMacro = 'OP';
3341 cMaxOperands = 3;
3342 if len(oInstr.aoOperands) > 3:
3343 sMacro = 'OPVEX'
3344 cMaxOperands = 4;
3345 assert len(oInstr.aoOperands) <= cMaxOperands;
3346
3347 #
3348 # Format string.
3349 #
3350 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3351 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3352 sTmp += ' ' if iOperand == 0 else ',';
3353 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3354 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3355 else:
3356 sTmp += g_kdOpTypes[oOperand.sType][2];
3357 sTmp += '",';
3358 asColumns = [ sTmp, ];
3359
3360 #
3361 # Decoders.
3362 #
3363 iStart = len(asColumns);
3364 if oInstr.sEncoding is None:
3365 pass;
3366 elif oInstr.sEncoding == 'ModR/M':
3367 # ASSUME the first operand is using the ModR/M encoding
3368 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3369 asColumns.append('IDX_ParseModRM,');
3370 ## @todo IDX_ParseVexDest
3371 # Is second operand using ModR/M too?
3372 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3373 asColumns.append('IDX_UseModRM,')
3374 elif oInstr.sEncoding in [ 'prefix', ]:
3375 for oOperand in oInstr.aoOperands:
3376 asColumns.append('0,');
3377 elif oInstr.sEncoding in [ 'fixed' ]:
3378 pass;
3379 elif oInstr.sEncoding == 'vex2':
3380 asColumns.append('IDX_ParseVex2b,')
3381 elif oInstr.sEncoding == 'vex3':
3382 asColumns.append('IDX_ParseVex3b,')
3383 elif oInstr.sEncoding in g_dInstructionMaps:
3384 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3385 else:
3386 ## @todo
3387 #IDX_ParseTwoByteEsc,
3388 #IDX_ParseGrp1,
3389 #IDX_ParseShiftGrp2,
3390 #IDX_ParseGrp3,
3391 #IDX_ParseGrp4,
3392 #IDX_ParseGrp5,
3393 #IDX_Parse3DNow,
3394 #IDX_ParseGrp6,
3395 #IDX_ParseGrp7,
3396 #IDX_ParseGrp8,
3397 #IDX_ParseGrp9,
3398 #IDX_ParseGrp10,
3399 #IDX_ParseGrp12,
3400 #IDX_ParseGrp13,
3401 #IDX_ParseGrp14,
3402 #IDX_ParseGrp15,
3403 #IDX_ParseGrp16,
3404 #IDX_ParseThreeByteEsc4,
3405 #IDX_ParseThreeByteEsc5,
3406 #IDX_ParseModFence,
3407 #IDX_ParseEscFP,
3408 #IDX_ParseNopPause,
3409 #IDX_ParseInvOpModRM,
3410 assert False, str(oInstr);
3411
3412 # Check for immediates and stuff in the remaining operands.
3413 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3414 sIdx = g_kdOpTypes[oOperand.sType][0];
3415 if sIdx != 'IDX_UseModRM':
3416 asColumns.append(sIdx + ',');
3417 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3418
3419 #
3420 # Opcode and operands.
3421 #
3422 assert oInstr.sDisEnum, str(oInstr);
3423 asColumns.append(oInstr.sDisEnum + ',');
3424 iStart = len(asColumns)
3425 for oOperand in oInstr.aoOperands:
3426 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3427 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3428
3429 #
3430 # Flags.
3431 #
3432 sTmp = '';
3433 for sHint in sorted(oInstr.dHints.keys()):
3434 sDefine = g_kdHints[sHint];
3435 if sDefine.startswith('DISOPTYPE_'):
3436 if sTmp:
3437 sTmp += ' | ' + sDefine;
3438 else:
3439 sTmp += sDefine;
3440 if sTmp:
3441 sTmp += '),';
3442 else:
3443 sTmp += '0),';
3444 asColumns.append(sTmp);
3445
3446 #
3447 # Format the columns into a line.
3448 #
3449 sLine = '';
3450 for i, s in enumerate(asColumns):
3451 if len(sLine) < aoffColumns[i]:
3452 sLine += ' ' * (aoffColumns[i] - len(sLine));
3453 else:
3454 sLine += ' ';
3455 sLine += s;
3456
3457 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3458 # DISOPTYPE_HARMLESS),
3459 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3460 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3461
3462 asLines.append(sLine);
3463
3464 asLines.append('};');
3465 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3466
3467 #
3468 # Write out the lines.
3469 #
3470 oDstFile.write('\n'.join(asLines));
3471 oDstFile.write('\n');
3472 break; #for now
3473
3474if __name__ == '__main__':
3475 generateDisassemblerTables();
3476
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette