VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 66950

Last change on this file since 66950 was 66950, checked in by vboxsync, 8 years ago

IEM: Implemented vmovsldup Vx,Wx (VEX.F3.0F 12).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 144.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 66950 2017-05-18 14:24:43Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 66950 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208##
209## Note! See the A.2.1 in SDM vol 2 for the type names.
210g_kdOpTypes = {
211 # Fixed addresses
212 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', ),
213
214 # ModR/M.rm
215 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', ),
216 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', ),
217 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', ),
218 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
219 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', ),
220 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
221 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', ),
222 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
223 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', ),
224 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
225 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', ),
226 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
227 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', ),
228 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
229 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', ),
230 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', ),
231
232 # ModR/M.rm - register only.
233 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', ),
234 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', ),
235 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
236 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', ),
237 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
238 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', ),
239 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', ),
240
241 # ModR/M.rm - memory only.
242 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', ), ##< Only used by BOUND.
243 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', ),
244 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
245 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
246 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', ),
247 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
248 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', ),
249 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
250 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', ),
251
252 # ModR/M.reg
253 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', ),
254 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', ),
255 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
256 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', ),
257 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', ),
258 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
259 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
260 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', ),
261 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
262 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
263 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', ),
264 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
265 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', ),
266 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
267 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', ),
268 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
269 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', ),
270 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', ),
271 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
272 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', ),
273 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', ),
274 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', ),
275
276 # VEX.vvvv
277 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', ),
278 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', ),
279 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', ),
280
281 # Immediate values.
282 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
283 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', ),
284 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', ),
285 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', ),
286 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', ), ##< o16: word, o32: dword, o64: qword
287 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', ), ##< o16: word, o32|o64:dword
288
289 # Address operands (no ModR/M).
290 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', ),
291 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', ),
292
293 # Relative jump targets
294 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', ),
295 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', ),
296
297 # DS:rSI
298 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', ),
299 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', ),
300 # ES:rDI
301 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', ),
302 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', ),
303
304 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', ),
305
306 # Fixed registers.
307 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', ),
308 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', ),
309 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', ), # 8086: push CS
310 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', ),
311 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', ),
312 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', ),
313 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', ),
314 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', ),
315};
316
317# IDX_ParseFixedReg
318# IDX_ParseVexDest
319
320
321## IEMFORM_XXX mappings.
322g_kdIemForms = { # sEncoding, [ sWhere1, ... ]
323 'RM': ( 'ModR/M', [ 'reg', 'rm' ], ),
324 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], ),
325 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], ),
326 'MR': ( 'ModR/M', [ 'rm', 'reg' ], ),
327 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], ),
328 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], ),
329 'M': ( 'ModR/M', [ 'rm', ], ),
330 'M_REG': ( 'ModR/M', [ 'rm', ], ),
331 'M_MEM': ( 'ModR/M', [ 'rm', ], ),
332 'R': ( 'ModR/M', [ 'reg', ], ),
333
334 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
335 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
336 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ),
337 'VEX_XM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], ), # same as VEX_RM_MEM, typo?
338 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
339 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
340 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], ),
341 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], ),
342 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], ),
343 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], ),
344 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], ),
345 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
346 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
347 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm'], ),
348 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
349 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
350 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg'], ),
351
352 'FIXED': ( 'fixed', None, )
353};
354
355## \@oppfx values.
356g_kdPrefixes = {
357 'none': [],
358 '0x66': [],
359 '0xf3': [],
360 '0xf2': [],
361};
362
363## Special \@opcode tag values.
364g_kdSpecialOpcodes = {
365 '/reg': [],
366 'mr/reg': [],
367 '11 /reg': [],
368 '!11 /reg': [],
369 '11 mr/reg': [],
370 '!11 mr/reg': [],
371};
372
373## Special \@opcodesub tag values.
374g_kdSubOpcodes = {
375 'none': [ None, ],
376 '11 mr/reg': [ '11 mr/reg', ],
377 '11': [ '11 mr/reg', ], ##< alias
378 '!11 mr/reg': [ '!11 mr/reg', ],
379 '!11': [ '!11 mr/reg', ], ##< alias
380};
381
382## Valid values for \@openc
383g_kdEncodings = {
384 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
385 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
386 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, etc).
387 'prefix': [ None, ], ##< Prefix
388};
389
390## \@opunused, \@opinvalid, \@opinvlstyle
391g_kdInvalidStyles = {
392 'immediate': [], ##< CPU stops decoding immediately after the opcode.
393 'intel-modrm': [], ##< Intel decodes ModR/M.
394 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
395 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
396 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
397};
398
399g_kdCpuNames = {
400 '8086': (),
401 '80186': (),
402 '80286': (),
403 '80386': (),
404 '80486': (),
405};
406
407## \@opcpuid
408g_kdCpuIdFlags = {
409 'vme': 'X86_CPUID_FEATURE_EDX_VME',
410 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
411 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
412 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
413 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
414 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
415 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
416 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
417 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
418 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
419 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
420 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
421 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
422 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
423 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
424 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
425 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
426 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
427 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
428 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
429 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
430 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
431 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
432 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
433 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
434 'aes': 'X86_CPUID_FEATURE_ECX_AES',
435 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
436 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
437 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
438 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
439
440 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
441 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
442 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
443 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
444 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
445 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
446 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
447 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
448 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
449 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
450};
451
452## \@ophints values.
453g_kdHints = {
454 'invalid': 'DISOPTYPE_INVALID', ##<
455 'harmless': 'DISOPTYPE_HARMLESS', ##<
456 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
457 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
458 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
459 'portio': 'DISOPTYPE_PORTIO', ##<
460 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
461 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
462 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
463 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
464 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
465 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
466 'illegal': 'DISOPTYPE_ILLEGAL', ##<
467 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
468 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
469 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
470 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
471 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
472 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
473 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
474 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
475 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
476 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
477 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
478 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
479 ## (only in 16 & 32 bits mode!)
480 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
481 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
482 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
483 'ignores_op_size': '', ##< Ignores both operand size prefixes.
484 'lock_allowed': '', ##< Lock prefix allowed.
485};
486
487## \@opxcpttype values (see SDMv2 2.4, 2.7).
488g_kdXcptTypes = {
489 'none': [],
490 '1': [],
491 '2': [],
492 '3': [],
493 '4': [],
494 '4UA': [],
495 '5': [],
496 '5LZ': [], # LZ = VEX.L must be zero.
497 '6': [],
498 '7': [],
499 '7LZ': [],
500 '8': [],
501 '11': [],
502 '12': [],
503 'E1': [],
504 'E1NF': [],
505 'E2': [],
506 'E3': [],
507 'E3NF': [],
508 'E4': [],
509 'E4NF': [],
510 'E5': [],
511 'E5NF': [],
512 'E6': [],
513 'E6NF': [],
514 'E7NF': [],
515 'E9': [],
516 'E9NF': [],
517 'E10': [],
518 'E11': [],
519 'E12': [],
520 'E12NF': [],
521};
522
523
524def _isValidOpcodeByte(sOpcode):
525 """
526 Checks if sOpcode is a valid lower case opcode byte.
527 Returns true/false.
528 """
529 if len(sOpcode) == 4:
530 if sOpcode[:2] == '0x':
531 if sOpcode[2] in '0123456789abcdef':
532 if sOpcode[3] in '0123456789abcdef':
533 return True;
534 return False;
535
536
537class InstructionMap(object):
538 """
539 Instruction map.
540
541 The opcode map provides the lead opcode bytes (empty for the one byte
542 opcode map). An instruction can be member of multiple opcode maps as long
543 as it uses the same opcode value within the map (because of VEX).
544 """
545
546 kdEncodings = {
547 'legacy': [],
548 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
549 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
550 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
551 'xop8': [], ##< XOP prefix with vvvvv = 8
552 'xop9': [], ##< XOP prefix with vvvvv = 9
553 'xop10': [], ##< XOP prefix with vvvvv = 10
554 };
555 ## Selectors.
556 ## The first value is the number of table entries required by a
557 ## decoder or disassembler for this type of selector.
558 kdSelectors = {
559 'byte': [ 256, ], ##< next opcode byte selects the instruction (default).
560 '/r': [ 8, ], ##< modrm.reg selects the instruction.
561 'memreg /r':[ 16, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
562 'mod /r': [ 32, ], ##< modrm.reg and modrm.mod selects the instruction.
563 '!11 /r': [ 8, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
564 '11 /r': [ 8, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
565 '11': [ 64, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
566 };
567
568 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy', sDisParse = None):
569 assert sSelector in self.kdSelectors;
570 assert sEncoding in self.kdEncodings;
571 if asLeadOpcodes is None:
572 asLeadOpcodes = [];
573 else:
574 for sOpcode in asLeadOpcodes:
575 assert _isValidOpcodeByte(sOpcode);
576 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
577
578 self.sName = sName;
579 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
580 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
581 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
582 self.aoInstructions = []; # type: Instruction
583 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
584
585 def getTableSize(self):
586 """
587 Number of table entries. This corresponds directly to the selector.
588 """
589 return self.kdSelectors[self.sSelector][0];
590
591 def getInstructionIndex(self, oInstr):
592 """
593 Returns the table index for the instruction.
594 """
595 bOpcode = oInstr.getOpcodeByte();
596
597 # The byte selector is simple. We need a full opcode byte and need just return it.
598 if self.sSelector == 'byte':
599 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
600 return bOpcode;
601
602 # The other selectors needs masking and shifting.
603 if self.sSelector == '/r':
604 return (bOpcode >> 3) & 0x7;
605
606 if self.sSelector == 'mod /r':
607 return (bOpcode >> 3) & 0x1f;
608
609 if self.sSelector == 'memreg /r':
610 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
611
612 if self.sSelector == '!11 /r':
613 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
614 return (bOpcode >> 3) & 0x7;
615
616 if self.sSelector == '11 /r':
617 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
618 return (bOpcode >> 3) & 0x7;
619
620 if self.sSelector == '11':
621 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
622 return bOpcode & 0x3f;
623
624 assert False, self.sSelector;
625 return -1;
626
627 def getInstructionsInTableOrder(self):
628 """
629 Get instructions in table order.
630
631 Returns array of instructions. Normally there is exactly one
632 instruction per entry. However the entry could also be None if
633 not instruction was specified for that opcode value. Or there
634 could be a list of instructions to deal with special encodings
635 where for instance prefix (e.g. REX.W) encodes a different
636 instruction or different CPUs have different instructions or
637 prefixes in the same place.
638 """
639 # Start with empty table.
640 cTable = self.getTableSize();
641 aoTable = [None] * cTable;
642
643 # Insert the instructions.
644 for oInstr in self.aoInstructions:
645 if oInstr.sOpcode:
646 idxOpcode = self.getInstructionIndex(oInstr);
647 assert idxOpcode < cTable, str(idxOpcode);
648
649 oExisting = aoTable[idxOpcode];
650 if oExisting is None:
651 aoTable[idxOpcode] = oInstr;
652 elif not isinstance(oExisting, list):
653 aoTable[idxOpcode] = list([oExisting, oInstr]);
654 else:
655 oExisting.append(oInstr);
656
657 return aoTable;
658
659
660 def getDisasTableName(self):
661 """
662 Returns the disassembler table name for this map.
663 """
664 sName = 'g_aDisas';
665 for sWord in self.sName.split('_'):
666 if sWord == 'm': # suffix indicating modrm.mod==mem
667 sName += '_m';
668 elif sWord == 'r': # suffix indicating modrm.mod==reg
669 sName += '_r';
670 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
671 sName += '_' + sWord;
672 else:
673 sWord = sWord.replace('grp', 'Grp');
674 sWord = sWord.replace('map', 'Map');
675 sName += sWord[0].upper() + sWord[1:];
676 return sName;
677
678
679 def isVexMap(self):
680 """ Returns True if a VEX map. """
681 return self.sEncoding.startswith('vex');
682
683
684class TestType(object):
685 """
686 Test value type.
687
688 This base class deals with integer like values. The fUnsigned constructor
689 parameter indicates the default stance on zero vs sign extending. It is
690 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
691 """
692 def __init__(self, sName, acbSizes = None, fUnsigned = True):
693 self.sName = sName;
694 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
695 self.fUnsigned = fUnsigned;
696
697 class BadValue(Exception):
698 """ Bad value exception. """
699 def __init__(self, sMessage):
700 Exception.__init__(self, sMessage);
701 self.sMessage = sMessage;
702
703 ## For ascii ~ operator.
704 kdHexInv = {
705 '0': 'f',
706 '1': 'e',
707 '2': 'd',
708 '3': 'c',
709 '4': 'b',
710 '5': 'a',
711 '6': '9',
712 '7': '8',
713 '8': '7',
714 '9': '6',
715 'a': '5',
716 'b': '4',
717 'c': '3',
718 'd': '2',
719 'e': '1',
720 'f': '0',
721 };
722
723 def get(self, sValue):
724 """
725 Get the shortest normal sized byte representation of oValue.
726
727 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
728 The latter form is for AND+OR pairs where the first entry is what to
729 AND with the field and the second the one or OR with.
730
731 Raises BadValue if invalid value.
732 """
733 if not sValue:
734 raise TestType.BadValue('empty value');
735
736 # Deal with sign and detect hexadecimal or decimal.
737 fSignExtend = not self.fUnsigned;
738 if sValue[0] == '-' or sValue[0] == '+':
739 fSignExtend = True;
740 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
741 else:
742 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
743
744 # try convert it to long integer.
745 try:
746 iValue = long(sValue, 16 if fHex else 10);
747 except Exception as oXcpt:
748 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
749
750 # Convert the hex string and pad it to a decent value. Negative values
751 # needs to be manually converted to something non-negative (~-n + 1).
752 if iValue >= 0:
753 sHex = hex(iValue);
754 if sys.version_info[0] < 3:
755 assert sHex[-1] == 'L';
756 sHex = sHex[:-1];
757 assert sHex[:2] == '0x';
758 sHex = sHex[2:];
759 else:
760 sHex = hex(-iValue - 1);
761 if sys.version_info[0] < 3:
762 assert sHex[-1] == 'L';
763 sHex = sHex[:-1];
764 assert sHex[:2] == '0x';
765 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
766 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
767 sHex = 'f' + sHex;
768
769 cDigits = len(sHex);
770 if cDigits <= self.acbSizes[-1] * 2:
771 for cb in self.acbSizes:
772 cNaturalDigits = cb * 2;
773 if cDigits <= cNaturalDigits:
774 break;
775 else:
776 cNaturalDigits = self.acbSizes[-1] * 2;
777 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
778 assert isinstance(cNaturalDigits, int)
779
780 if cNaturalDigits != cDigits:
781 cNeeded = cNaturalDigits - cDigits;
782 if iValue >= 0:
783 sHex = ('0' * cNeeded) + sHex;
784 else:
785 sHex = ('f' * cNeeded) + sHex;
786
787 # Invert and convert to bytearray and return it.
788 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
789
790 return ((fSignExtend, abValue),);
791
792 def validate(self, sValue):
793 """
794 Returns True if value is okay, error message on failure.
795 """
796 try:
797 self.get(sValue);
798 except TestType.BadValue as oXcpt:
799 return oXcpt.sMessage;
800 return True;
801
802 def isAndOrPair(self, sValue):
803 """
804 Checks if sValue is a pair.
805 """
806 _ = sValue;
807 return False;
808
809
810class TestTypeEflags(TestType):
811 """
812 Special value parsing for EFLAGS/RFLAGS/FLAGS.
813 """
814
815 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
816
817 def __init__(self, sName):
818 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
819
820 def get(self, sValue):
821 fClear = 0;
822 fSet = 0;
823 for sFlag in sValue.split(','):
824 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
825 if sConstant is None:
826 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
827 if sConstant[0] == '!':
828 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
829 else:
830 fSet |= g_kdX86EFlagsConstants[sConstant];
831
832 aoSet = TestType.get(self, '0x%x' % (fSet,));
833 if fClear != 0:
834 aoClear = TestType.get(self, '%#x' % (fClear,))
835 assert self.isAndOrPair(sValue) is True;
836 return (aoClear[0], aoSet[0]);
837 assert self.isAndOrPair(sValue) is False;
838 return aoSet;
839
840 def isAndOrPair(self, sValue):
841 for sZeroFlag in self.kdZeroValueFlags:
842 if sValue.find(sZeroFlag) >= 0:
843 return True;
844 return False;
845
846class TestTypeFromDict(TestType):
847 """
848 Special value parsing for CR0.
849 """
850
851 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
852
853 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
854 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
855 self.kdConstantsAndValues = kdConstantsAndValues;
856 self.sConstantPrefix = sConstantPrefix;
857
858 def get(self, sValue):
859 fValue = 0;
860 for sFlag in sValue.split(','):
861 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
862 if fFlagValue is None:
863 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
864 fValue |= fFlagValue;
865 return TestType.get(self, '0x%x' % (fValue,));
866
867
868class TestInOut(object):
869 """
870 One input or output state modifier.
871
872 This should be thought as values to modify BS3REGCTX and extended (needs
873 to be structured) state.
874 """
875 ## Assigned operators.
876 kasOperators = [
877 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
878 '&~=',
879 '&=',
880 '|=',
881 '='
882 ];
883 ## Types
884 kdTypes = {
885 'uint': TestType('uint', fUnsigned = True),
886 'int': TestType('int'),
887 'efl': TestTypeEflags('efl'),
888 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
889 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
890 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
891 };
892 ## CPU context fields.
893 kdFields = {
894 # name: ( default type, [both|input|output], )
895 # Operands.
896 'op1': ( 'uint', 'both', ), ## \@op1
897 'op2': ( 'uint', 'both', ), ## \@op2
898 'op3': ( 'uint', 'both', ), ## \@op3
899 'op4': ( 'uint', 'both', ), ## \@op4
900 # Flags.
901 'efl': ( 'efl', 'both', ),
902 'efl_undef': ( 'uint', 'output', ),
903 # 8-bit GPRs.
904 'al': ( 'uint', 'both', ),
905 'cl': ( 'uint', 'both', ),
906 'dl': ( 'uint', 'both', ),
907 'bl': ( 'uint', 'both', ),
908 'ah': ( 'uint', 'both', ),
909 'ch': ( 'uint', 'both', ),
910 'dh': ( 'uint', 'both', ),
911 'bh': ( 'uint', 'both', ),
912 'r8l': ( 'uint', 'both', ),
913 'r9l': ( 'uint', 'both', ),
914 'r10l': ( 'uint', 'both', ),
915 'r11l': ( 'uint', 'both', ),
916 'r12l': ( 'uint', 'both', ),
917 'r13l': ( 'uint', 'both', ),
918 'r14l': ( 'uint', 'both', ),
919 'r15l': ( 'uint', 'both', ),
920 # 16-bit GPRs.
921 'ax': ( 'uint', 'both', ),
922 'dx': ( 'uint', 'both', ),
923 'cx': ( 'uint', 'both', ),
924 'bx': ( 'uint', 'both', ),
925 'sp': ( 'uint', 'both', ),
926 'bp': ( 'uint', 'both', ),
927 'si': ( 'uint', 'both', ),
928 'di': ( 'uint', 'both', ),
929 'r8w': ( 'uint', 'both', ),
930 'r9w': ( 'uint', 'both', ),
931 'r10w': ( 'uint', 'both', ),
932 'r11w': ( 'uint', 'both', ),
933 'r12w': ( 'uint', 'both', ),
934 'r13w': ( 'uint', 'both', ),
935 'r14w': ( 'uint', 'both', ),
936 'r15w': ( 'uint', 'both', ),
937 # 32-bit GPRs.
938 'eax': ( 'uint', 'both', ),
939 'edx': ( 'uint', 'both', ),
940 'ecx': ( 'uint', 'both', ),
941 'ebx': ( 'uint', 'both', ),
942 'esp': ( 'uint', 'both', ),
943 'ebp': ( 'uint', 'both', ),
944 'esi': ( 'uint', 'both', ),
945 'edi': ( 'uint', 'both', ),
946 'r8d': ( 'uint', 'both', ),
947 'r9d': ( 'uint', 'both', ),
948 'r10d': ( 'uint', 'both', ),
949 'r11d': ( 'uint', 'both', ),
950 'r12d': ( 'uint', 'both', ),
951 'r13d': ( 'uint', 'both', ),
952 'r14d': ( 'uint', 'both', ),
953 'r15d': ( 'uint', 'both', ),
954 # 64-bit GPRs.
955 'rax': ( 'uint', 'both', ),
956 'rdx': ( 'uint', 'both', ),
957 'rcx': ( 'uint', 'both', ),
958 'rbx': ( 'uint', 'both', ),
959 'rsp': ( 'uint', 'both', ),
960 'rbp': ( 'uint', 'both', ),
961 'rsi': ( 'uint', 'both', ),
962 'rdi': ( 'uint', 'both', ),
963 'r8': ( 'uint', 'both', ),
964 'r9': ( 'uint', 'both', ),
965 'r10': ( 'uint', 'both', ),
966 'r11': ( 'uint', 'both', ),
967 'r12': ( 'uint', 'both', ),
968 'r13': ( 'uint', 'both', ),
969 'r14': ( 'uint', 'both', ),
970 'r15': ( 'uint', 'both', ),
971 # 16-bit, 32-bit or 64-bit registers according to operand size.
972 'oz.rax': ( 'uint', 'both', ),
973 'oz.rdx': ( 'uint', 'both', ),
974 'oz.rcx': ( 'uint', 'both', ),
975 'oz.rbx': ( 'uint', 'both', ),
976 'oz.rsp': ( 'uint', 'both', ),
977 'oz.rbp': ( 'uint', 'both', ),
978 'oz.rsi': ( 'uint', 'both', ),
979 'oz.rdi': ( 'uint', 'both', ),
980 'oz.r8': ( 'uint', 'both', ),
981 'oz.r9': ( 'uint', 'both', ),
982 'oz.r10': ( 'uint', 'both', ),
983 'oz.r11': ( 'uint', 'both', ),
984 'oz.r12': ( 'uint', 'both', ),
985 'oz.r13': ( 'uint', 'both', ),
986 'oz.r14': ( 'uint', 'both', ),
987 'oz.r15': ( 'uint', 'both', ),
988 # Control registers.
989 'cr0': ( 'cr0', 'both', ),
990 'cr4': ( 'cr4', 'both', ),
991 'xcr0': ( 'xcr0', 'both', ),
992 # FPU Registers
993 'fcw': ( 'uint', 'both', ),
994 'fsw': ( 'uint', 'both', ),
995 'ftw': ( 'uint', 'both', ),
996 'fop': ( 'uint', 'both', ),
997 'fpuip': ( 'uint', 'both', ),
998 'fpucs': ( 'uint', 'both', ),
999 'fpudp': ( 'uint', 'both', ),
1000 'fpuds': ( 'uint', 'both', ),
1001 'mxcsr': ( 'uint', 'both', ),
1002 'st0': ( 'uint', 'both', ),
1003 'st1': ( 'uint', 'both', ),
1004 'st2': ( 'uint', 'both', ),
1005 'st3': ( 'uint', 'both', ),
1006 'st4': ( 'uint', 'both', ),
1007 'st5': ( 'uint', 'both', ),
1008 'st6': ( 'uint', 'both', ),
1009 'st7': ( 'uint', 'both', ),
1010 # MMX registers.
1011 'mm0': ( 'uint', 'both', ),
1012 'mm1': ( 'uint', 'both', ),
1013 'mm2': ( 'uint', 'both', ),
1014 'mm3': ( 'uint', 'both', ),
1015 'mm4': ( 'uint', 'both', ),
1016 'mm5': ( 'uint', 'both', ),
1017 'mm6': ( 'uint', 'both', ),
1018 'mm7': ( 'uint', 'both', ),
1019 # SSE registers.
1020 'xmm0': ( 'uint', 'both', ),
1021 'xmm1': ( 'uint', 'both', ),
1022 'xmm2': ( 'uint', 'both', ),
1023 'xmm3': ( 'uint', 'both', ),
1024 'xmm4': ( 'uint', 'both', ),
1025 'xmm5': ( 'uint', 'both', ),
1026 'xmm6': ( 'uint', 'both', ),
1027 'xmm7': ( 'uint', 'both', ),
1028 'xmm8': ( 'uint', 'both', ),
1029 'xmm9': ( 'uint', 'both', ),
1030 'xmm10': ( 'uint', 'both', ),
1031 'xmm11': ( 'uint', 'both', ),
1032 'xmm12': ( 'uint', 'both', ),
1033 'xmm13': ( 'uint', 'both', ),
1034 'xmm14': ( 'uint', 'both', ),
1035 'xmm15': ( 'uint', 'both', ),
1036 'xmm0.lo': ( 'uint', 'both', ),
1037 'xmm1.lo': ( 'uint', 'both', ),
1038 'xmm2.lo': ( 'uint', 'both', ),
1039 'xmm3.lo': ( 'uint', 'both', ),
1040 'xmm4.lo': ( 'uint', 'both', ),
1041 'xmm5.lo': ( 'uint', 'both', ),
1042 'xmm6.lo': ( 'uint', 'both', ),
1043 'xmm7.lo': ( 'uint', 'both', ),
1044 'xmm8.lo': ( 'uint', 'both', ),
1045 'xmm9.lo': ( 'uint', 'both', ),
1046 'xmm10.lo': ( 'uint', 'both', ),
1047 'xmm11.lo': ( 'uint', 'both', ),
1048 'xmm12.lo': ( 'uint', 'both', ),
1049 'xmm13.lo': ( 'uint', 'both', ),
1050 'xmm14.lo': ( 'uint', 'both', ),
1051 'xmm15.lo': ( 'uint', 'both', ),
1052 'xmm0.hi': ( 'uint', 'both', ),
1053 'xmm1.hi': ( 'uint', 'both', ),
1054 'xmm2.hi': ( 'uint', 'both', ),
1055 'xmm3.hi': ( 'uint', 'both', ),
1056 'xmm4.hi': ( 'uint', 'both', ),
1057 'xmm5.hi': ( 'uint', 'both', ),
1058 'xmm6.hi': ( 'uint', 'both', ),
1059 'xmm7.hi': ( 'uint', 'both', ),
1060 'xmm8.hi': ( 'uint', 'both', ),
1061 'xmm9.hi': ( 'uint', 'both', ),
1062 'xmm10.hi': ( 'uint', 'both', ),
1063 'xmm11.hi': ( 'uint', 'both', ),
1064 'xmm12.hi': ( 'uint', 'both', ),
1065 'xmm13.hi': ( 'uint', 'both', ),
1066 'xmm14.hi': ( 'uint', 'both', ),
1067 'xmm15.hi': ( 'uint', 'both', ),
1068 'xmm0.lo.zx': ( 'uint', 'both', ),
1069 'xmm1.lo.zx': ( 'uint', 'both', ),
1070 'xmm2.lo.zx': ( 'uint', 'both', ),
1071 'xmm3.lo.zx': ( 'uint', 'both', ),
1072 'xmm4.lo.zx': ( 'uint', 'both', ),
1073 'xmm5.lo.zx': ( 'uint', 'both', ),
1074 'xmm6.lo.zx': ( 'uint', 'both', ),
1075 'xmm7.lo.zx': ( 'uint', 'both', ),
1076 'xmm8.lo.zx': ( 'uint', 'both', ),
1077 'xmm9.lo.zx': ( 'uint', 'both', ),
1078 'xmm10.lo.zx': ( 'uint', 'both', ),
1079 'xmm11.lo.zx': ( 'uint', 'both', ),
1080 'xmm12.lo.zx': ( 'uint', 'both', ),
1081 'xmm13.lo.zx': ( 'uint', 'both', ),
1082 'xmm14.lo.zx': ( 'uint', 'both', ),
1083 'xmm15.lo.zx': ( 'uint', 'both', ),
1084 'xmm0.dw0': ( 'uint', 'both', ),
1085 'xmm1.dw0': ( 'uint', 'both', ),
1086 'xmm2.dw0': ( 'uint', 'both', ),
1087 'xmm3.dw0': ( 'uint', 'both', ),
1088 'xmm4.dw0': ( 'uint', 'both', ),
1089 'xmm5.dw0': ( 'uint', 'both', ),
1090 'xmm6.dw0': ( 'uint', 'both', ),
1091 'xmm7.dw0': ( 'uint', 'both', ),
1092 'xmm8.dw0': ( 'uint', 'both', ),
1093 'xmm9.dw0': ( 'uint', 'both', ),
1094 'xmm10.dw0': ( 'uint', 'both', ),
1095 'xmm11.dw0': ( 'uint', 'both', ),
1096 'xmm12.dw0': ( 'uint', 'both', ),
1097 'xmm13.dw0': ( 'uint', 'both', ),
1098 'xmm14.dw0': ( 'uint', 'both', ),
1099 'xmm15_dw0': ( 'uint', 'both', ),
1100 # AVX registers.
1101 'ymm0': ( 'uint', 'both', ),
1102 'ymm1': ( 'uint', 'both', ),
1103 'ymm2': ( 'uint', 'both', ),
1104 'ymm3': ( 'uint', 'both', ),
1105 'ymm4': ( 'uint', 'both', ),
1106 'ymm5': ( 'uint', 'both', ),
1107 'ymm6': ( 'uint', 'both', ),
1108 'ymm7': ( 'uint', 'both', ),
1109 'ymm8': ( 'uint', 'both', ),
1110 'ymm9': ( 'uint', 'both', ),
1111 'ymm10': ( 'uint', 'both', ),
1112 'ymm11': ( 'uint', 'both', ),
1113 'ymm12': ( 'uint', 'both', ),
1114 'ymm13': ( 'uint', 'both', ),
1115 'ymm14': ( 'uint', 'both', ),
1116 'ymm15': ( 'uint', 'both', ),
1117
1118 # Special ones.
1119 'value.xcpt': ( 'uint', 'output', ),
1120 };
1121
1122 def __init__(self, sField, sOp, sValue, sType):
1123 assert sField in self.kdFields;
1124 assert sOp in self.kasOperators;
1125 self.sField = sField;
1126 self.sOp = sOp;
1127 self.sValue = sValue;
1128 self.sType = sType;
1129 assert isinstance(sField, str);
1130 assert isinstance(sOp, str);
1131 assert isinstance(sType, str);
1132 assert isinstance(sValue, str);
1133
1134
1135class TestSelector(object):
1136 """
1137 One selector for an instruction test.
1138 """
1139 ## Selector compare operators.
1140 kasCompareOps = [ '==', '!=' ];
1141 ## Selector variables and their valid values.
1142 kdVariables = {
1143 # Operand size.
1144 'size': {
1145 'o16': 'size_o16',
1146 'o32': 'size_o32',
1147 'o64': 'size_o64',
1148 },
1149 # VEX.L value.
1150 'vex.l': {
1151 '0': 'vexl_0',
1152 '1': 'vexl_1',
1153 },
1154 # Execution ring.
1155 'ring': {
1156 '0': 'ring_0',
1157 '1': 'ring_1',
1158 '2': 'ring_2',
1159 '3': 'ring_3',
1160 '0..2': 'ring_0_thru_2',
1161 '1..3': 'ring_1_thru_3',
1162 },
1163 # Basic code mode.
1164 'codebits': {
1165 '64': 'code_64bit',
1166 '32': 'code_32bit',
1167 '16': 'code_16bit',
1168 },
1169 # cpu modes.
1170 'mode': {
1171 'real': 'mode_real',
1172 'prot': 'mode_prot',
1173 'long': 'mode_long',
1174 'v86': 'mode_v86',
1175 'smm': 'mode_smm',
1176 'vmx': 'mode_vmx',
1177 'svm': 'mode_svm',
1178 },
1179 # paging on/off
1180 'paging': {
1181 'on': 'paging_on',
1182 'off': 'paging_off',
1183 },
1184 # CPU vendor
1185 'vendor': {
1186 'amd': 'vendor_amd',
1187 'intel': 'vendor_intel',
1188 'via': 'vendor_via',
1189 },
1190 };
1191 ## Selector shorthand predicates.
1192 ## These translates into variable expressions.
1193 kdPredicates = {
1194 'o16': 'size==o16',
1195 'o32': 'size==o32',
1196 'o64': 'size==o64',
1197 'ring0': 'ring==0',
1198 '!ring0': 'ring==1..3',
1199 'ring1': 'ring==1',
1200 'ring2': 'ring==2',
1201 'ring3': 'ring==3',
1202 'user': 'ring==3',
1203 'supervisor': 'ring==0..2',
1204 'real': 'mode==real',
1205 'prot': 'mode==prot',
1206 'long': 'mode==long',
1207 'v86': 'mode==v86',
1208 'smm': 'mode==smm',
1209 'vmx': 'mode==vmx',
1210 'svm': 'mode==svm',
1211 'paging': 'paging==on',
1212 '!paging': 'paging==off',
1213 'amd': 'vendor==amd',
1214 '!amd': 'vendor!=amd',
1215 'intel': 'vendor==intel',
1216 '!intel': 'vendor!=intel',
1217 'via': 'vendor==via',
1218 '!via': 'vendor!=via',
1219 };
1220
1221 def __init__(self, sVariable, sOp, sValue):
1222 assert sVariable in self.kdVariables;
1223 assert sOp in self.kasCompareOps;
1224 assert sValue in self.kdVariables[sVariable];
1225 self.sVariable = sVariable;
1226 self.sOp = sOp;
1227 self.sValue = sValue;
1228
1229
1230class InstructionTest(object):
1231 """
1232 Instruction test.
1233 """
1234
1235 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1236 self.oInstr = oInstr; # type: InstructionTest
1237 self.aoInputs = []; # type: list(TestInOut)
1238 self.aoOutputs = []; # type: list(TestInOut)
1239 self.aoSelectors = []; # type: list(TestSelector)
1240
1241 def toString(self, fRepr = False):
1242 """
1243 Converts it to string representation.
1244 """
1245 asWords = [];
1246 if self.aoSelectors:
1247 for oSelector in self.aoSelectors:
1248 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1249 asWords.append('/');
1250
1251 for oModifier in self.aoInputs:
1252 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1253
1254 asWords.append('->');
1255
1256 for oModifier in self.aoOutputs:
1257 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1258
1259 if fRepr:
1260 return '<' + ' '.join(asWords) + '>';
1261 return ' '.join(asWords);
1262
1263 def __str__(self):
1264 """ Provide string represenation. """
1265 return self.toString(False);
1266
1267 def __repr__(self):
1268 """ Provide unambigious string representation. """
1269 return self.toString(True);
1270
1271class Operand(object):
1272 """
1273 Instruction operand.
1274 """
1275
1276 def __init__(self, sWhere, sType):
1277 assert sWhere in g_kdOpLocations, sWhere;
1278 assert sType in g_kdOpTypes, sType;
1279 self.sWhere = sWhere; ##< g_kdOpLocations
1280 self.sType = sType; ##< g_kdOpTypes
1281
1282 def usesModRM(self):
1283 """ Returns True if using some form of ModR/M encoding. """
1284 return self.sType[0] in ['E', 'G', 'M'];
1285
1286
1287
1288class Instruction(object): # pylint: disable=too-many-instance-attributes
1289 """
1290 Instruction.
1291 """
1292
1293 def __init__(self, sSrcFile, iLine):
1294 ## @name Core attributes.
1295 ## @{
1296 self.sMnemonic = None;
1297 self.sBrief = None;
1298 self.asDescSections = []; # type: list(str)
1299 self.aoMaps = []; # type: list(InstructionMap)
1300 self.aoOperands = []; # type: list(Operand)
1301 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1302 self.sOpcode = None; # type: str
1303 self.sSubOpcode = None; # type: str
1304 self.sEncoding = None;
1305 self.asFlTest = None;
1306 self.asFlModify = None;
1307 self.asFlUndefined = None;
1308 self.asFlSet = None;
1309 self.asFlClear = None;
1310 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1311 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1312 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1313 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1314 self.aoTests = []; # type: list(InstructionTest)
1315 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1316 self.oCpuExpr = None; ##< Some CPU restriction expression...
1317 self.sGroup = None;
1318 self.fUnused = False; ##< Unused instruction.
1319 self.fInvalid = False; ##< Invalid instruction (like UD2).
1320 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1321 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1322 ## @}
1323
1324 ## @name Implementation attributes.
1325 ## @{
1326 self.sStats = None;
1327 self.sFunction = None;
1328 self.fStub = False;
1329 self.fUdStub = False;
1330 ## @}
1331
1332 ## @name Decoding info
1333 ## @{
1334 self.sSrcFile = sSrcFile;
1335 self.iLineCreated = iLine;
1336 self.iLineCompleted = None;
1337 self.cOpTags = 0;
1338 self.iLineFnIemOpMacro = -1;
1339 self.iLineMnemonicMacro = -1;
1340 ## @}
1341
1342 ## @name Intermediate input fields.
1343 ## @{
1344 self.sRawDisOpNo = None;
1345 self.asRawDisParams = [];
1346 self.sRawIemOpFlags = None;
1347 self.sRawOldOpcodes = None;
1348 self.asCopyTests = [];
1349 ## @}
1350
1351 def toString(self, fRepr = False):
1352 """ Turn object into a string. """
1353 aasFields = [];
1354
1355 aasFields.append(['opcode', self.sOpcode]);
1356 aasFields.append(['mnemonic', self.sMnemonic]);
1357 for iOperand, oOperand in enumerate(self.aoOperands):
1358 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1359 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1360 aasFields.append(['encoding', self.sEncoding]);
1361 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1362 aasFields.append(['disenum', self.sDisEnum]);
1363 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1364 aasFields.append(['group', self.sGroup]);
1365 if self.fUnused: aasFields.append(['unused', 'True']);
1366 if self.fInvalid: aasFields.append(['invalid', 'True']);
1367 aasFields.append(['invlstyle', self.sInvalidStyle]);
1368 aasFields.append(['fltest', self.asFlTest]);
1369 aasFields.append(['flmodify', self.asFlModify]);
1370 aasFields.append(['flundef', self.asFlUndefined]);
1371 aasFields.append(['flset', self.asFlSet]);
1372 aasFields.append(['flclear', self.asFlClear]);
1373 aasFields.append(['mincpu', self.sMinCpu]);
1374 aasFields.append(['stats', self.sStats]);
1375 aasFields.append(['sFunction', self.sFunction]);
1376 if self.fStub: aasFields.append(['fStub', 'True']);
1377 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1378 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1379 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1380 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1381
1382 sRet = '<' if fRepr else '';
1383 for sField, sValue in aasFields:
1384 if sValue != None:
1385 if len(sRet) > 1:
1386 sRet += '; ';
1387 sRet += '%s=%s' % (sField, sValue,);
1388 if fRepr:
1389 sRet += '>';
1390
1391 return sRet;
1392
1393 def __str__(self):
1394 """ Provide string represenation. """
1395 return self.toString(False);
1396
1397 def __repr__(self):
1398 """ Provide unambigious string representation. """
1399 return self.toString(True);
1400
1401 def getOpcodeByte(self):
1402 """
1403 Decodes sOpcode into a byte range integer value.
1404 Raises exception if sOpcode is None or invalid.
1405 """
1406 if self.sOpcode is None:
1407 raise Exception('No opcode byte for %s!' % (self,));
1408 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1409
1410 # Full hex byte form.
1411 if sOpcode[:2] == '0x':
1412 return int(sOpcode, 16);
1413
1414 # The /r form:
1415 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1416 return int(sOpcode[-1:]) << 3;
1417
1418 # The 11/r form:
1419 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1420 return (int(sOpcode[-1:]) << 3) | 0xc0;
1421
1422 # The !11/r form (returns mod=1):
1423 ## @todo this doesn't really work...
1424 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1425 return (int(sOpcode[-1:]) << 3) | 0x80;
1426
1427 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1428
1429 @staticmethod
1430 def _flagsToIntegerMask(asFlags):
1431 """
1432 Returns the integer mask value for asFlags.
1433 """
1434 uRet = 0;
1435 if asFlags:
1436 for sFlag in asFlags:
1437 sConstant = g_kdEFlagsMnemonics[sFlag];
1438 assert sConstant[0] != '!', sConstant
1439 uRet |= g_kdX86EFlagsConstants[sConstant];
1440 return uRet;
1441
1442 def getTestedFlagsMask(self):
1443 """ Returns asFlTest into a integer mask value """
1444 return self._flagsToIntegerMask(self.asFlTest);
1445
1446 def getModifiedFlagsMask(self):
1447 """ Returns asFlModify into a integer mask value """
1448 return self._flagsToIntegerMask(self.asFlModify);
1449
1450 def getUndefinedFlagsMask(self):
1451 """ Returns asFlUndefined into a integer mask value """
1452 return self._flagsToIntegerMask(self.asFlUndefined);
1453
1454 def getSetFlagsMask(self):
1455 """ Returns asFlSet into a integer mask value """
1456 return self._flagsToIntegerMask(self.asFlSet);
1457
1458 def getClearedFlagsMask(self):
1459 """ Returns asFlClear into a integer mask value """
1460 return self._flagsToIntegerMask(self.asFlClear);
1461
1462 def onlyInVexMaps(self):
1463 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1464 if not self.aoMaps:
1465 return False;
1466 for oMap in self.aoMaps:
1467 if not oMap.isVexMap():
1468 return False;
1469 return True;
1470
1471
1472
1473## All the instructions.
1474g_aoAllInstructions = []; # type: list(Instruction)
1475
1476## All the instructions indexed by statistics name (opstat).
1477g_dAllInstructionsByStat = {}; # type: dict(Instruction)
1478
1479## All the instructions indexed by function name (opfunction).
1480g_dAllInstructionsByFunction = {}; # type: dict(list(Instruction))
1481
1482## Instructions tagged by oponlytest
1483g_aoOnlyTestInstructions = []; # type: list(Instruction)
1484
1485## Instruction maps.
1486g_dInstructionMaps = {
1487 'one': InstructionMap('one'),
1488 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
1489 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1490 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1491 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1492 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1493 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1494 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1495 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1496 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1497 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1498 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1499 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1500 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1501 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1502 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1503 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1504 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1505 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1506 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1507
1508 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1509 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1510 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1511 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1512 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1513 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1514 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1515 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1516 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1517 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1518 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1519 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1520 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1521 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1522
1523 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1524 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1525
1526 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
1527 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1528 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1529 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1530 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1531 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1532
1533 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
1534 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
1535
1536 '3dnow': InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1537 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
1538 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
1539 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1540 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1541 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1542 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
1543 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1544};
1545
1546
1547
1548class ParserException(Exception):
1549 """ Parser exception """
1550 def __init__(self, sMessage):
1551 Exception.__init__(self, sMessage);
1552
1553
1554class SimpleParser(object):
1555 """
1556 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1557 """
1558
1559 ## @name Parser state.
1560 ## @{
1561 kiCode = 0;
1562 kiCommentMulti = 1;
1563 ## @}
1564
1565 def __init__(self, sSrcFile, asLines, sDefaultMap):
1566 self.sSrcFile = sSrcFile;
1567 self.asLines = asLines;
1568 self.iLine = 0;
1569 self.iState = self.kiCode;
1570 self.sComment = '';
1571 self.iCommentLine = 0;
1572 self.aoCurInstrs = [];
1573
1574 assert sDefaultMap in g_dInstructionMaps;
1575 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1576
1577 self.cTotalInstr = 0;
1578 self.cTotalStubs = 0;
1579 self.cTotalTagged = 0;
1580
1581 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1582 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1583 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1584 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1585 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1586 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1587 self.fDebug = True;
1588
1589 self.dTagHandlers = {
1590 '@opbrief': self.parseTagOpBrief,
1591 '@opdesc': self.parseTagOpDesc,
1592 '@opmnemonic': self.parseTagOpMnemonic,
1593 '@op1': self.parseTagOpOperandN,
1594 '@op2': self.parseTagOpOperandN,
1595 '@op3': self.parseTagOpOperandN,
1596 '@op4': self.parseTagOpOperandN,
1597 '@oppfx': self.parseTagOpPfx,
1598 '@opmaps': self.parseTagOpMaps,
1599 '@opcode': self.parseTagOpcode,
1600 '@opcodesub': self.parseTagOpcodeSub,
1601 '@openc': self.parseTagOpEnc,
1602 '@opfltest': self.parseTagOpEFlags,
1603 '@opflmodify': self.parseTagOpEFlags,
1604 '@opflundef': self.parseTagOpEFlags,
1605 '@opflset': self.parseTagOpEFlags,
1606 '@opflclear': self.parseTagOpEFlags,
1607 '@ophints': self.parseTagOpHints,
1608 '@opdisenum': self.parseTagOpDisEnum,
1609 '@opmincpu': self.parseTagOpMinCpu,
1610 '@opcpuid': self.parseTagOpCpuId,
1611 '@opgroup': self.parseTagOpGroup,
1612 '@opunused': self.parseTagOpUnusedInvalid,
1613 '@opinvalid': self.parseTagOpUnusedInvalid,
1614 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1615 '@optest': self.parseTagOpTest,
1616 '@optestign': self.parseTagOpTestIgnore,
1617 '@optestignore': self.parseTagOpTestIgnore,
1618 '@opcopytests': self.parseTagOpCopyTests,
1619 '@oponly': self.parseTagOpOnlyTest,
1620 '@oponlytest': self.parseTagOpOnlyTest,
1621 '@opxcpttype': self.parseTagOpXcptType,
1622 '@opstats': self.parseTagOpStats,
1623 '@opfunction': self.parseTagOpFunction,
1624 '@opdone': self.parseTagOpDone,
1625 };
1626 for i in range(48):
1627 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1628 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1629
1630 self.asErrors = [];
1631
1632 def raiseError(self, sMessage):
1633 """
1634 Raise error prefixed with the source and line number.
1635 """
1636 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1637
1638 def raiseCommentError(self, iLineInComment, sMessage):
1639 """
1640 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1641 """
1642 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1643
1644 def error(self, sMessage):
1645 """
1646 Adds an error.
1647 returns False;
1648 """
1649 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1650 return False;
1651
1652 def errorComment(self, iLineInComment, sMessage):
1653 """
1654 Adds a comment error.
1655 returns False;
1656 """
1657 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1658 return False;
1659
1660 def printErrors(self):
1661 """
1662 Print the errors to stderr.
1663 Returns number of errors.
1664 """
1665 if self.asErrors:
1666 sys.stderr.write(u''.join(self.asErrors));
1667 return len(self.asErrors);
1668
1669 def debug(self, sMessage):
1670 """
1671 For debugging.
1672 """
1673 if self.fDebug:
1674 print('debug: %s' % (sMessage,));
1675
1676
1677 def addInstruction(self, iLine = None):
1678 """
1679 Adds an instruction.
1680 """
1681 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1682 g_aoAllInstructions.append(oInstr);
1683 self.aoCurInstrs.append(oInstr);
1684 return oInstr;
1685
1686 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1687 """
1688 Derives the mnemonic and operands from a IEM stats base name like string.
1689 """
1690 if oInstr.sMnemonic is None:
1691 asWords = sStats.split('_');
1692 oInstr.sMnemonic = asWords[0].lower();
1693 if len(asWords) > 1 and not oInstr.aoOperands:
1694 for sType in asWords[1:]:
1695 if sType in g_kdOpTypes:
1696 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1697 else:
1698 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1699 return False;
1700 return True;
1701
1702 def doneInstructionOne(self, oInstr, iLine):
1703 """
1704 Complete the parsing by processing, validating and expanding raw inputs.
1705 """
1706 assert oInstr.iLineCompleted is None;
1707 oInstr.iLineCompleted = iLine;
1708
1709 #
1710 # Specified instructions.
1711 #
1712 if oInstr.cOpTags > 0:
1713 if oInstr.sStats is None:
1714 pass;
1715
1716 #
1717 # Unspecified legacy stuff. We generally only got a few things to go on here.
1718 # /** Opcode 0x0f 0x00 /0. */
1719 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
1720 #
1721 else:
1722 #if oInstr.sRawOldOpcodes:
1723 #
1724 #if oInstr.sMnemonic:
1725 pass;
1726
1727 #
1728 # Common defaults.
1729 #
1730
1731 # Guess mnemonic and operands from stats if the former is missing.
1732 if oInstr.sMnemonic is None:
1733 if oInstr.sStats is not None:
1734 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
1735 elif oInstr.sFunction is not None:
1736 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
1737
1738 # Derive the disassembler op enum constant from the mnemonic.
1739 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
1740 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
1741
1742 # Derive the IEM statistics base name from mnemonic and operand types.
1743 if oInstr.sStats is None:
1744 if oInstr.sFunction is not None:
1745 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
1746 elif oInstr.sMnemonic is not None:
1747 oInstr.sStats = oInstr.sMnemonic;
1748 for oOperand in oInstr.aoOperands:
1749 if oOperand.sType:
1750 oInstr.sStats += '_' + oOperand.sType;
1751
1752 # Derive the IEM function name from mnemonic and operand types.
1753 if oInstr.sFunction is None:
1754 if oInstr.sMnemonic is not None:
1755 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
1756 for oOperand in oInstr.aoOperands:
1757 if oOperand.sType:
1758 oInstr.sFunction += '_' + oOperand.sType;
1759 elif oInstr.sStats:
1760 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
1761
1762 #
1763 # Apply default map and then add the instruction to all it's groups.
1764 #
1765 if not oInstr.aoMaps:
1766 oInstr.aoMaps = [ self.oDefaultMap, ];
1767 for oMap in oInstr.aoMaps:
1768 oMap.aoInstructions.append(oInstr);
1769
1770 #
1771 # Derive encoding from operands and maps.
1772 #
1773 if oInstr.sEncoding is None:
1774 if not oInstr.aoOperands:
1775 if oInstr.fUnused and oInstr.sSubOpcode:
1776 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
1777 else:
1778 oInstr.sEncoding = 'fixed';
1779 elif oInstr.aoOperands[0].usesModRM():
1780 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
1781 or oInstr.onlyInVexMaps():
1782 oInstr.sEncoding = 'VEX.ModR/M';
1783 else:
1784 oInstr.sEncoding = 'ModR/M';
1785
1786 #
1787 # Check the opstat value and add it to the opstat indexed dictionary.
1788 #
1789 if oInstr.sStats:
1790 if oInstr.sStats not in g_dAllInstructionsByStat:
1791 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
1792 else:
1793 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
1794 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
1795
1796 #
1797 # Add to function indexed dictionary. We allow multiple instructions per function.
1798 #
1799 if oInstr.sFunction:
1800 if oInstr.sFunction not in g_dAllInstructionsByFunction:
1801 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
1802 else:
1803 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
1804
1805 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
1806 return True;
1807
1808 def doneInstructions(self, iLineInComment = None):
1809 """
1810 Done with current instruction.
1811 """
1812 for oInstr in self.aoCurInstrs:
1813 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
1814 if oInstr.fStub:
1815 self.cTotalStubs += 1;
1816
1817 self.cTotalInstr += len(self.aoCurInstrs);
1818
1819 self.sComment = '';
1820 self.aoCurInstrs = [];
1821 return True;
1822
1823 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
1824 """
1825 Sets the sAttrib of all current instruction to oValue. If fOverwrite
1826 is False, only None values and empty strings are replaced.
1827 """
1828 for oInstr in self.aoCurInstrs:
1829 if fOverwrite is not True:
1830 oOldValue = getattr(oInstr, sAttrib);
1831 if oOldValue is not None:
1832 continue;
1833 setattr(oInstr, sAttrib, oValue);
1834
1835 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
1836 """
1837 Sets the iEntry of the array sAttrib of all current instruction to oValue.
1838 If fOverwrite is False, only None values and empty strings are replaced.
1839 """
1840 for oInstr in self.aoCurInstrs:
1841 aoArray = getattr(oInstr, sAttrib);
1842 while len(aoArray) <= iEntry:
1843 aoArray.append(None);
1844 if fOverwrite is True or aoArray[iEntry] is None:
1845 aoArray[iEntry] = oValue;
1846
1847 def parseCommentOldOpcode(self, asLines):
1848 """ Deals with 'Opcode 0xff /4' like comments """
1849 asWords = asLines[0].split();
1850 if len(asWords) >= 2 \
1851 and asWords[0] == 'Opcode' \
1852 and ( asWords[1].startswith('0x')
1853 or asWords[1].startswith('0X')):
1854 asWords = asWords[:1];
1855 for iWord, sWord in enumerate(asWords):
1856 if sWord.startswith('0X'):
1857 sWord = '0x' + sWord[:2];
1858 asWords[iWord] = asWords;
1859 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
1860
1861 return False;
1862
1863 def ensureInstructionForOpTag(self, iTagLine):
1864 """ Ensure there is an instruction for the op-tag being parsed. """
1865 if not self.aoCurInstrs:
1866 self.addInstruction(self.iCommentLine + iTagLine);
1867 for oInstr in self.aoCurInstrs:
1868 oInstr.cOpTags += 1;
1869 if oInstr.cOpTags == 1:
1870 self.cTotalTagged += 1;
1871 return self.aoCurInstrs[-1];
1872
1873 @staticmethod
1874 def flattenSections(aasSections):
1875 """
1876 Flattens multiline sections into stripped single strings.
1877 Returns list of strings, on section per string.
1878 """
1879 asRet = [];
1880 for asLines in aasSections:
1881 if asLines:
1882 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
1883 return asRet;
1884
1885 @staticmethod
1886 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
1887 """
1888 Flattens sections into a simple stripped string with newlines as
1889 section breaks. The final section does not sport a trailing newline.
1890 """
1891 # Typical: One section with a single line.
1892 if len(aasSections) == 1 and len(aasSections[0]) == 1:
1893 return aasSections[0][0].strip();
1894
1895 sRet = '';
1896 for iSection, asLines in enumerate(aasSections):
1897 if asLines:
1898 if iSection > 0:
1899 sRet += sSectionSep;
1900 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
1901 return sRet;
1902
1903
1904
1905 ## @name Tag parsers
1906 ## @{
1907
1908 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
1909 """
1910 Tag: \@opbrief
1911 Value: Text description, multiple sections, appended.
1912
1913 Brief description. If not given, it's the first sentence from @opdesc.
1914 """
1915 oInstr = self.ensureInstructionForOpTag(iTagLine);
1916
1917 # Flatten and validate the value.
1918 sBrief = self.flattenAllSections(aasSections);
1919 if not sBrief:
1920 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1921 if sBrief[-1] != '.':
1922 sBrief = sBrief + '.';
1923 if len(sBrief) > 180:
1924 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
1925 offDot = sBrief.find('.');
1926 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
1927 offDot = sBrief.find('.', offDot + 1);
1928 if offDot >= 0 and offDot != len(sBrief) - 1:
1929 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
1930
1931 # Update the instruction.
1932 if oInstr.sBrief is not None:
1933 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
1934 % (sTag, oInstr.sBrief, sBrief,));
1935 _ = iEndLine;
1936 return True;
1937
1938 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
1939 """
1940 Tag: \@opdesc
1941 Value: Text description, multiple sections, appended.
1942
1943 It is used to describe instructions.
1944 """
1945 oInstr = self.ensureInstructionForOpTag(iTagLine);
1946 if aasSections:
1947 oInstr.asDescSections.extend(self.flattenSections(aasSections));
1948 return True;
1949
1950 _ = sTag; _ = iEndLine;
1951 return True;
1952
1953 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
1954 """
1955 Tag: @opmenmonic
1956 Value: mnemonic
1957
1958 The 'mnemonic' value must be a valid C identifier string. Because of
1959 prefixes, groups and whatnot, there times when the mnemonic isn't that
1960 of an actual assembler mnemonic.
1961 """
1962 oInstr = self.ensureInstructionForOpTag(iTagLine);
1963
1964 # Flatten and validate the value.
1965 sMnemonic = self.flattenAllSections(aasSections);
1966 if not self.oReMnemonic.match(sMnemonic):
1967 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
1968 if oInstr.sMnemonic is not None:
1969 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
1970 % (sTag, oInstr.sMnemonic, sMnemonic,));
1971 oInstr.sMnemonic = sMnemonic
1972
1973 _ = iEndLine;
1974 return True;
1975
1976 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
1977 """
1978 Tags: \@op1, \@op2, \@op3, \@op4
1979 Value: [where:]type
1980
1981 The 'where' value indicates where the operand is found, like the 'reg'
1982 part of the ModR/M encoding. See Instruction.kdOperandLocations for
1983 a list.
1984
1985 The 'type' value indicates the operand type. These follow the types
1986 given in the opcode tables in the CPU reference manuals.
1987 See Instruction.kdOperandTypes for a list.
1988
1989 """
1990 oInstr = self.ensureInstructionForOpTag(iTagLine);
1991 idxOp = int(sTag[-1]) - 1;
1992 assert idxOp >= 0 and idxOp < 4;
1993
1994 # flatten, split up, and validate the "where:type" value.
1995 sFlattened = self.flattenAllSections(aasSections);
1996 asSplit = sFlattened.split(':');
1997 if len(asSplit) == 1:
1998 sType = asSplit[0];
1999 sWhere = None;
2000 elif len(asSplit) == 2:
2001 (sWhere, sType) = asSplit;
2002 else:
2003 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2004
2005 if sType not in g_kdOpTypes:
2006 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2007 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2008 if sWhere is None:
2009 sWhere = g_kdOpTypes[sType][1];
2010 elif sWhere not in g_kdOpLocations:
2011 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2012 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2013
2014 # Insert the operand, refusing to overwrite an existing one.
2015 while idxOp >= len(oInstr.aoOperands):
2016 oInstr.aoOperands.append(None);
2017 if oInstr.aoOperands[idxOp] is not None:
2018 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2019 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2020 sWhere, sType,));
2021 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2022
2023 _ = iEndLine;
2024 return True;
2025
2026 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2027 """
2028 Tag: \@opmaps
2029 Value: map[,map2]
2030
2031 Indicates which maps the instruction is in. There is a default map
2032 associated with each input file.
2033 """
2034 oInstr = self.ensureInstructionForOpTag(iTagLine);
2035
2036 # Flatten, split up and validate the value.
2037 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2038 asMaps = sFlattened.split(',');
2039 if not asMaps:
2040 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2041 for sMap in asMaps:
2042 if sMap not in g_dInstructionMaps:
2043 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2044 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2045
2046 # Add the maps to the current list. Throw errors on duplicates.
2047 for oMap in oInstr.aoMaps:
2048 if oMap.sName in asMaps:
2049 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2050
2051 for sMap in asMaps:
2052 oMap = g_dInstructionMaps[sMap];
2053 if oMap not in oInstr.aoMaps:
2054 oInstr.aoMaps.append(oMap);
2055 else:
2056 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2057
2058 _ = iEndLine;
2059 return True;
2060
2061 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2062 """
2063 Tag: \@oppfx
2064 Value: n/a|none|0x66|0xf3|0xf2
2065
2066 Required prefix for the instruction. (In a (E)VEX context this is the
2067 value of the 'pp' field rather than an actual prefix.)
2068 """
2069 oInstr = self.ensureInstructionForOpTag(iTagLine);
2070
2071 # Flatten and validate the value.
2072 sFlattened = self.flattenAllSections(aasSections);
2073 asPrefixes = sFlattened.split();
2074 if len(asPrefixes) > 1:
2075 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2076
2077 sPrefix = asPrefixes[0].lower();
2078 if sPrefix == 'none':
2079 sPrefix = 'none';
2080 elif sPrefix == 'n/a':
2081 sPrefix = None;
2082 else:
2083 if len(sPrefix) == 2:
2084 sPrefix = '0x' + sPrefix;
2085 if not _isValidOpcodeByte(sPrefix):
2086 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2087
2088 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2089 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2090
2091 # Set it.
2092 if oInstr.sPrefix is not None:
2093 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2094 oInstr.sPrefix = sPrefix;
2095
2096 _ = iEndLine;
2097 return True;
2098
2099 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2100 """
2101 Tag: \@opcode
2102 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2103
2104 The opcode byte or sub-byte for the instruction in the context of a map.
2105 """
2106 oInstr = self.ensureInstructionForOpTag(iTagLine);
2107
2108 # Flatten and validate the value.
2109 sOpcode = self.flattenAllSections(aasSections);
2110 if _isValidOpcodeByte(sOpcode):
2111 pass;
2112 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2113 pass;
2114 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2115 pass;
2116 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2117 pass;
2118 else:
2119 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2120
2121 # Set it.
2122 if oInstr.sOpcode is not None:
2123 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2124 oInstr.sOpcode = sOpcode;
2125
2126 _ = iEndLine;
2127 return True;
2128
2129 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2130 """
2131 Tag: \@opcodesub
2132 Value: none | 11 mr/reg | !11 mr/reg
2133
2134 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2135 represents exactly two different instructions. The more proper way would
2136 be to go via maps with two members, but this is faster.
2137 """
2138 oInstr = self.ensureInstructionForOpTag(iTagLine);
2139
2140 # Flatten and validate the value.
2141 sSubOpcode = self.flattenAllSections(aasSections);
2142 if sSubOpcode not in g_kdSubOpcodes:
2143 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2144 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2145
2146 # Set it.
2147 if oInstr.sSubOpcode is not None:
2148 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2149 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2150 oInstr.sSubOpcode = sSubOpcode;
2151
2152 _ = iEndLine;
2153 return True;
2154
2155 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2156 """
2157 Tag: \@openc
2158 Value: ModR/M|fixed|prefix|<map name>
2159
2160 The instruction operand encoding style.
2161 """
2162 oInstr = self.ensureInstructionForOpTag(iTagLine);
2163
2164 # Flatten and validate the value.
2165 sEncoding = self.flattenAllSections(aasSections);
2166 if sEncoding in g_kdEncodings:
2167 pass;
2168 elif sEncoding in g_dInstructionMaps:
2169 pass;
2170 elif not _isValidOpcodeByte(sEncoding):
2171 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2172
2173 # Set it.
2174 if oInstr.sEncoding is not None:
2175 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2176 % ( sTag, oInstr.sEncoding, sEncoding,));
2177 oInstr.sEncoding = sEncoding;
2178
2179 _ = iEndLine;
2180 return True;
2181
2182 ## EFlags tag to Instruction attribute name.
2183 kdOpFlagToAttr = {
2184 '@opfltest': 'asFlTest',
2185 '@opflmodify': 'asFlModify',
2186 '@opflundef': 'asFlUndefined',
2187 '@opflset': 'asFlSet',
2188 '@opflclear': 'asFlClear',
2189 };
2190
2191 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2192 """
2193 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2194 Value: <eflags specifier>
2195
2196 """
2197 oInstr = self.ensureInstructionForOpTag(iTagLine);
2198
2199 # Flatten, split up and validate the values.
2200 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2201 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2202 asFlags = [];
2203 else:
2204 fRc = True;
2205 for iFlag, sFlag in enumerate(asFlags):
2206 if sFlag not in g_kdEFlagsMnemonics:
2207 if sFlag.strip() in g_kdEFlagsMnemonics:
2208 asFlags[iFlag] = sFlag.strip();
2209 else:
2210 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2211 if not fRc:
2212 return False;
2213
2214 # Set them.
2215 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2216 if asOld is not None:
2217 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2218 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2219
2220 _ = iEndLine;
2221 return True;
2222
2223 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2224 """
2225 Tag: \@ophints
2226 Value: Comma or space separated list of flags and hints.
2227
2228 This covers the disassembler flags table and more.
2229 """
2230 oInstr = self.ensureInstructionForOpTag(iTagLine);
2231
2232 # Flatten as a space separated list, split it up and validate the values.
2233 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2234 if len(asHints) == 1 and asHints[0].lower() == 'none':
2235 asHints = [];
2236 else:
2237 fRc = True;
2238 for iHint, sHint in enumerate(asHints):
2239 if sHint not in g_kdHints:
2240 if sHint.strip() in g_kdHints:
2241 sHint[iHint] = sHint.strip();
2242 else:
2243 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2244 if not fRc:
2245 return False;
2246
2247 # Append them.
2248 for sHint in asHints:
2249 if sHint not in oInstr.dHints:
2250 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2251 else:
2252 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2253
2254 _ = iEndLine;
2255 return True;
2256
2257 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2258 """
2259 Tag: \@opdisenum
2260 Value: OP_XXXX
2261
2262 This is for select a specific (legacy) disassembler enum value for the
2263 instruction.
2264 """
2265 oInstr = self.ensureInstructionForOpTag(iTagLine);
2266
2267 # Flatten and split.
2268 asWords = self.flattenAllSections(aasSections).split();
2269 if len(asWords) != 1:
2270 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2271 if not asWords:
2272 return False;
2273 sDisEnum = asWords[0];
2274 if not self.oReDisEnum.match(sDisEnum):
2275 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2276 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2277
2278 # Set it.
2279 if oInstr.sDisEnum is not None:
2280 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2281 oInstr.sDisEnum = sDisEnum;
2282
2283 _ = iEndLine;
2284 return True;
2285
2286 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2287 """
2288 Tag: \@opmincpu
2289 Value: <simple CPU name>
2290
2291 Indicates when this instruction was introduced.
2292 """
2293 oInstr = self.ensureInstructionForOpTag(iTagLine);
2294
2295 # Flatten the value, split into words, make sure there's just one, valid it.
2296 asCpus = self.flattenAllSections(aasSections).split();
2297 if len(asCpus) > 1:
2298 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2299
2300 sMinCpu = asCpus[0];
2301 if sMinCpu in g_kdCpuNames:
2302 oInstr.sMinCpu = sMinCpu;
2303 else:
2304 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2305 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2306
2307 # Set it.
2308 if oInstr.sMinCpu is None:
2309 oInstr.sMinCpu = sMinCpu;
2310 elif oInstr.sMinCpu != sMinCpu:
2311 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2312
2313 _ = iEndLine;
2314 return True;
2315
2316 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2317 """
2318 Tag: \@opcpuid
2319 Value: none | <CPUID flag specifier>
2320
2321 CPUID feature bit which is required for the instruction to be present.
2322 """
2323 oInstr = self.ensureInstructionForOpTag(iTagLine);
2324
2325 # Flatten as a space separated list, split it up and validate the values.
2326 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2327 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2328 asCpuIds = [];
2329 else:
2330 fRc = True;
2331 for iCpuId, sCpuId in enumerate(asCpuIds):
2332 if sCpuId not in g_kdCpuIdFlags:
2333 if sCpuId.strip() in g_kdCpuIdFlags:
2334 sCpuId[iCpuId] = sCpuId.strip();
2335 else:
2336 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2337 if not fRc:
2338 return False;
2339
2340 # Append them.
2341 for sCpuId in asCpuIds:
2342 if sCpuId not in oInstr.asCpuIds:
2343 oInstr.asCpuIds.append(sCpuId);
2344 else:
2345 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2346
2347 _ = iEndLine;
2348 return True;
2349
2350 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2351 """
2352 Tag: \@opgroup
2353 Value: op_grp1[_subgrp2[_subsubgrp3]]
2354
2355 Instruction grouping.
2356 """
2357 oInstr = self.ensureInstructionForOpTag(iTagLine);
2358
2359 # Flatten as a space separated list, split it up and validate the values.
2360 asGroups = self.flattenAllSections(aasSections).split();
2361 if len(asGroups) != 1:
2362 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2363 sGroup = asGroups[0];
2364 if not self.oReGroupName.match(sGroup):
2365 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2366 % (sTag, sGroup, self.oReGroupName.pattern));
2367
2368 # Set it.
2369 if oInstr.sGroup is not None:
2370 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2371 oInstr.sGroup = sGroup;
2372
2373 _ = iEndLine;
2374 return True;
2375
2376 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2377 """
2378 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2379 Value: <invalid opcode behaviour style>
2380
2381 The \@opunused indicates the specification is for a currently unused
2382 instruction encoding.
2383
2384 The \@opinvalid indicates the specification is for an invalid currently
2385 instruction encoding (like UD2).
2386
2387 The \@opinvlstyle just indicates how CPUs decode the instruction when
2388 not supported (\@opcpuid, \@opmincpu) or disabled.
2389 """
2390 oInstr = self.ensureInstructionForOpTag(iTagLine);
2391
2392 # Flatten as a space separated list, split it up and validate the values.
2393 asStyles = self.flattenAllSections(aasSections).split();
2394 if len(asStyles) != 1:
2395 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2396 sStyle = asStyles[0];
2397 if sStyle not in g_kdInvalidStyles:
2398 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2399 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2400 # Set it.
2401 if oInstr.sInvalidStyle is not None:
2402 return self.errorComment(iTagLine,
2403 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2404 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2405 oInstr.sInvalidStyle = sStyle;
2406 if sTag == '@opunused':
2407 oInstr.fUnused = True;
2408 elif sTag == '@opinvalid':
2409 oInstr.fInvalid = True;
2410
2411 _ = iEndLine;
2412 return True;
2413
2414 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2415 """
2416 Tag: \@optest
2417 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2418 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2419
2420 The main idea here is to generate basic instruction tests.
2421
2422 The probably simplest way of handling the diverse input, would be to use
2423 it to produce size optimized byte code for a simple interpreter that
2424 modifies the register input and output states.
2425
2426 An alternative to the interpreter would be creating multiple tables,
2427 but that becomes rather complicated wrt what goes where and then to use
2428 them in an efficient manner.
2429 """
2430 oInstr = self.ensureInstructionForOpTag(iTagLine);
2431
2432 #
2433 # Do it section by section.
2434 #
2435 for asSectionLines in aasSections:
2436 #
2437 # Sort the input into outputs, inputs and selector conditions.
2438 #
2439 sFlatSection = self.flattenAllSections([asSectionLines,]);
2440 if not sFlatSection:
2441 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2442 continue;
2443 oTest = InstructionTest(oInstr);
2444
2445 asSelectors = [];
2446 asInputs = [];
2447 asOutputs = [];
2448 asCur = asOutputs;
2449 fRc = True;
2450 asWords = sFlatSection.split();
2451 for iWord in range(len(asWords) - 1, -1, -1):
2452 sWord = asWords[iWord];
2453 # Check for array switchers.
2454 if sWord == '->':
2455 if asCur != asOutputs:
2456 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2457 break;
2458 asCur = asInputs;
2459 elif sWord == '/':
2460 if asCur != asInputs:
2461 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2462 break;
2463 asCur = asSelectors;
2464 else:
2465 asCur.insert(0, sWord);
2466
2467 #
2468 # Validate and add selectors.
2469 #
2470 for sCond in asSelectors:
2471 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2472 oSelector = None;
2473 for sOp in TestSelector.kasCompareOps:
2474 off = sCondExp.find(sOp);
2475 if off >= 0:
2476 sVariable = sCondExp[:off];
2477 sValue = sCondExp[off + len(sOp):];
2478 if sVariable in TestSelector.kdVariables:
2479 if sValue in TestSelector.kdVariables[sVariable]:
2480 oSelector = TestSelector(sVariable, sOp, sValue);
2481 else:
2482 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2483 % ( sTag, sValue, sCond,
2484 TestSelector.kdVariables[sVariable].keys(),));
2485 else:
2486 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2487 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2488 break;
2489 if oSelector is not None:
2490 for oExisting in oTest.aoSelectors:
2491 if oExisting.sVariable == oSelector.sVariable:
2492 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2493 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2494 oTest.aoSelectors.append(oSelector);
2495 else:
2496 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2497
2498 #
2499 # Validate outputs and inputs, adding them to the test as we go along.
2500 #
2501 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2502 asValidFieldKinds = [ 'both', sDesc, ];
2503 for sItem in asItems:
2504 oItem = None;
2505 for sOp in TestInOut.kasOperators:
2506 off = sItem.find(sOp);
2507 if off < 0:
2508 continue;
2509 sField = sItem[:off];
2510 sValueType = sItem[off + len(sOp):];
2511 if sField in TestInOut.kdFields \
2512 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2513 asSplit = sValueType.split(':', 1);
2514 sValue = asSplit[0];
2515 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2516 if sType in TestInOut.kdTypes:
2517 oValid = TestInOut.kdTypes[sType].validate(sValue);
2518 if oValid is True:
2519 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2520 oItem = TestInOut(sField, sOp, sValue, sType);
2521 else:
2522 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2523 % ( sTag, sDesc, sItem, ));
2524 else:
2525 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2526 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2527 else:
2528 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2529 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2530 else:
2531 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2532 % ( sTag, sDesc, sField, sItem,
2533 ', '.join([sKey for sKey in TestInOut.kdFields.keys()
2534 if TestInOut.kdFields[sKey][1] in asValidFieldKinds]),));
2535 break;
2536 if oItem is not None:
2537 for oExisting in aoDst:
2538 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2539 self.errorComment(iTagLine,
2540 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2541 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2542 aoDst.append(oItem);
2543 else:
2544 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2545
2546 #
2547 # .
2548 #
2549 if fRc:
2550 oInstr.aoTests.append(oTest);
2551 else:
2552 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2553 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2554 % (sTag, asSelectors, asInputs, asOutputs,));
2555
2556 _ = iEndLine;
2557 return True;
2558
2559 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2560 """
2561 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2562 """
2563 oInstr = self.ensureInstructionForOpTag(iTagLine);
2564
2565 iTest = 0;
2566 if sTag[-1] == ']':
2567 iTest = int(sTag[8:-1]);
2568 else:
2569 iTest = int(sTag[7:]);
2570
2571 if iTest != len(oInstr.aoTests):
2572 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2573 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2574
2575 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2576 """
2577 Tag: \@optestign | \@optestignore
2578 Value: <value is ignored>
2579
2580 This is a simple trick to ignore a test while debugging another.
2581
2582 See also \@oponlytest.
2583 """
2584 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2585 return True;
2586
2587 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2588 """
2589 Tag: \@opcopytests
2590 Value: <opstat | function> [..]
2591 Example: \@opcopytests add_Eb_Gb
2592
2593 Trick to avoid duplicating tests for different encodings of the same
2594 operation.
2595 """
2596 oInstr = self.ensureInstructionForOpTag(iTagLine);
2597
2598 # Flatten, validate and append the copy job to the instruction. We execute
2599 # them after parsing all the input so we can handle forward references.
2600 asToCopy = self.flattenAllSections(aasSections).split();
2601 if not asToCopy:
2602 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2603 for sToCopy in asToCopy:
2604 if sToCopy not in oInstr.asCopyTests:
2605 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2606 oInstr.asCopyTests.append(sToCopy);
2607 else:
2608 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2609 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2610 else:
2611 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2612
2613 _ = iEndLine;
2614 return True;
2615
2616 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2617 """
2618 Tag: \@oponlytest | \@oponly
2619 Value: none
2620
2621 Only test instructions with this tag. This is a trick that is handy
2622 for singling out one or two new instructions or tests.
2623
2624 See also \@optestignore.
2625 """
2626 oInstr = self.ensureInstructionForOpTag(iTagLine);
2627
2628 # Validate and add instruction to only test dictionary.
2629 sValue = self.flattenAllSections(aasSections).strip();
2630 if sValue:
2631 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2632
2633 if oInstr not in g_aoOnlyTestInstructions:
2634 g_aoOnlyTestInstructions.append(oInstr);
2635
2636 _ = iEndLine;
2637 return True;
2638
2639 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2640 """
2641 Tag: \@opxcpttype
2642 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2643
2644 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2645 """
2646 oInstr = self.ensureInstructionForOpTag(iTagLine);
2647
2648 # Flatten as a space separated list, split it up and validate the values.
2649 asTypes = self.flattenAllSections(aasSections).split();
2650 if len(asTypes) != 1:
2651 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2652 sType = asTypes[0];
2653 if sType not in g_kdXcptTypes:
2654 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2655 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2656 # Set it.
2657 if oInstr.sXcptType is not None:
2658 return self.errorComment(iTagLine,
2659 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2660 % ( sTag, oInstr.sXcptType, sType,));
2661 oInstr.sXcptType = sType;
2662
2663 _ = iEndLine;
2664 return True;
2665
2666 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2667 """
2668 Tag: \@opfunction
2669 Value: <VMM function name>
2670
2671 This is for explicitly setting the IEM function name. Normally we pick
2672 this up from the FNIEMOP_XXX macro invocation after the description, or
2673 generate it from the mnemonic and operands.
2674
2675 It it thought it maybe necessary to set it when specifying instructions
2676 which implementation isn't following immediately or aren't implemented yet.
2677 """
2678 oInstr = self.ensureInstructionForOpTag(iTagLine);
2679
2680 # Flatten and validate the value.
2681 sFunction = self.flattenAllSections(aasSections);
2682 if not self.oReFunctionName.match(sFunction):
2683 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2684 % (sTag, sFunction, self.oReFunctionName.pattern));
2685
2686 if oInstr.sFunction is not None:
2687 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2688 % (sTag, oInstr.sFunction, sFunction,));
2689 oInstr.sFunction = sFunction;
2690
2691 _ = iEndLine;
2692 return True;
2693
2694 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2695 """
2696 Tag: \@opstats
2697 Value: <VMM statistics base name>
2698
2699 This is for explicitly setting the statistics name. Normally we pick
2700 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2701 the mnemonic and operands.
2702
2703 It it thought it maybe necessary to set it when specifying instructions
2704 which implementation isn't following immediately or aren't implemented yet.
2705 """
2706 oInstr = self.ensureInstructionForOpTag(iTagLine);
2707
2708 # Flatten and validate the value.
2709 sStats = self.flattenAllSections(aasSections);
2710 if not self.oReStatsName.match(sStats):
2711 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2712 % (sTag, sStats, self.oReStatsName.pattern));
2713
2714 if oInstr.sStats is not None:
2715 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
2716 % (sTag, oInstr.sStats, sStats,));
2717 oInstr.sStats = sStats;
2718
2719 _ = iEndLine;
2720 return True;
2721
2722 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
2723 """
2724 Tag: \@opdone
2725 Value: none
2726
2727 Used to explictily flush the instructions that have been specified.
2728 """
2729 sFlattened = self.flattenAllSections(aasSections);
2730 if sFlattened != '':
2731 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
2732 _ = sTag; _ = iEndLine;
2733 return self.doneInstructions();
2734
2735 ## @}
2736
2737
2738 def parseComment(self):
2739 """
2740 Parse the current comment (self.sComment).
2741
2742 If it's a opcode specifiying comment, we reset the macro stuff.
2743 """
2744 #
2745 # Reject if comment doesn't seem to contain anything interesting.
2746 #
2747 if self.sComment.find('Opcode') < 0 \
2748 and self.sComment.find('@') < 0:
2749 return False;
2750
2751 #
2752 # Split the comment into lines, removing leading asterisks and spaces.
2753 # Also remove leading and trailing empty lines.
2754 #
2755 asLines = self.sComment.split('\n');
2756 for iLine, sLine in enumerate(asLines):
2757 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
2758
2759 while asLines and not asLines[0]:
2760 self.iCommentLine += 1;
2761 asLines.pop(0);
2762
2763 while asLines and not asLines[-1]:
2764 asLines.pop(len(asLines) - 1);
2765
2766 #
2767 # Check for old style: Opcode 0x0f 0x12
2768 #
2769 if asLines[0].startswith('Opcode '):
2770 self.parseCommentOldOpcode(asLines);
2771
2772 #
2773 # Look for @op* tagged data.
2774 #
2775 cOpTags = 0;
2776 sFlatDefault = None;
2777 sCurTag = '@default';
2778 iCurTagLine = 0;
2779 asCurSection = [];
2780 aasSections = [ asCurSection, ];
2781 for iLine, sLine in enumerate(asLines):
2782 if not sLine.startswith('@'):
2783 if sLine:
2784 asCurSection.append(sLine);
2785 elif asCurSection:
2786 asCurSection = [];
2787 aasSections.append(asCurSection);
2788 else:
2789 #
2790 # Process the previous tag.
2791 #
2792 if not asCurSection and len(aasSections) > 1:
2793 aasSections.pop(-1);
2794 if sCurTag in self.dTagHandlers:
2795 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2796 cOpTags += 1;
2797 elif sCurTag.startswith('@op'):
2798 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2799 elif sCurTag == '@default':
2800 sFlatDefault = self.flattenAllSections(aasSections);
2801 elif '@op' + sCurTag[1:] in self.dTagHandlers:
2802 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
2803 elif sCurTag in ['@encoding', '@opencoding']:
2804 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
2805
2806 #
2807 # New tag.
2808 #
2809 asSplit = sLine.split(None, 1);
2810 sCurTag = asSplit[0].lower();
2811 if len(asSplit) > 1:
2812 asCurSection = [asSplit[1],];
2813 else:
2814 asCurSection = [];
2815 aasSections = [asCurSection, ];
2816 iCurTagLine = iLine;
2817
2818 #
2819 # Process the final tag.
2820 #
2821 if not asCurSection and len(aasSections) > 1:
2822 aasSections.pop(-1);
2823 if sCurTag in self.dTagHandlers:
2824 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
2825 cOpTags += 1;
2826 elif sCurTag.startswith('@op'):
2827 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
2828 elif sCurTag == '@default':
2829 sFlatDefault = self.flattenAllSections(aasSections);
2830
2831 #
2832 # Don't allow default text in blocks containing @op*.
2833 #
2834 if cOpTags > 0 and sFlatDefault:
2835 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
2836
2837 return True;
2838
2839 def parseMacroInvocation(self, sInvocation):
2840 """
2841 Parses a macro invocation.
2842
2843 Returns a tuple, first element is the offset following the macro
2844 invocation. The second element is a list of macro arguments, where the
2845 zero'th is the macro name.
2846 """
2847 # First the name.
2848 offOpen = sInvocation.find('(');
2849 if offOpen <= 0:
2850 self.raiseError("macro invocation open parenthesis not found");
2851 sName = sInvocation[:offOpen].strip();
2852 if not self.oReMacroName.match(sName):
2853 return self.error("invalid macro name '%s'" % (sName,));
2854 asRet = [sName, ];
2855
2856 # Arguments.
2857 iLine = self.iLine;
2858 cDepth = 1;
2859 off = offOpen + 1;
2860 offStart = off;
2861 while cDepth > 0:
2862 if off >= len(sInvocation):
2863 if iLine >= len(self.asLines):
2864 return self.error('macro invocation beyond end of file');
2865 sInvocation += self.asLines[iLine];
2866 iLine += 1;
2867 ch = sInvocation[off];
2868
2869 if ch == ',' or ch == ')':
2870 if cDepth == 1:
2871 asRet.append(sInvocation[offStart:off].strip());
2872 offStart = off + 1;
2873 if ch == ')':
2874 cDepth -= 1;
2875 elif ch == '(':
2876 cDepth += 1;
2877 off += 1;
2878
2879 return (off, asRet);
2880
2881 def findAndParseMacroInvocationEx(self, sCode, sMacro):
2882 """
2883 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
2884 """
2885 offHit = sCode.find(sMacro);
2886 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
2887 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
2888 return (offHit + offAfter, asRet);
2889 return (len(sCode), None);
2890
2891 def findAndParseMacroInvocation(self, sCode, sMacro):
2892 """
2893 Returns None if not found, arguments as per parseMacroInvocation if found.
2894 """
2895 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
2896
2897 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
2898 """
2899 Returns same as findAndParseMacroInvocation.
2900 """
2901 for sMacro in asMacro:
2902 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
2903 if asRet is not None:
2904 return asRet;
2905 return None;
2906
2907 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
2908 sDisHints, sIemHints, asOperands):
2909 """
2910 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
2911 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
2912 """
2913 #
2914 # Some invocation checks.
2915 #
2916 if sUpper != sUpper.upper():
2917 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
2918 if sLower != sLower.lower():
2919 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
2920 if sUpper.lower() != sLower:
2921 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
2922 if not self.oReMnemonic.match(sLower):
2923 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
2924
2925 #
2926 # Check if sIemHints tells us to not consider this macro invocation.
2927 #
2928 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
2929 return True;
2930
2931 # Apply to the last instruction only for now.
2932 if not self.aoCurInstrs:
2933 self.addInstruction();
2934 oInstr = self.aoCurInstrs[-1];
2935 if oInstr.iLineMnemonicMacro == -1:
2936 oInstr.iLineMnemonicMacro = self.iLine;
2937 else:
2938 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
2939 % (sMacro, oInstr.iLineMnemonicMacro,));
2940
2941 # Mnemonic
2942 if oInstr.sMnemonic is None:
2943 oInstr.sMnemonic = sLower;
2944 elif oInstr.sMnemonic != sLower:
2945 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
2946
2947 # Process operands.
2948 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
2949 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
2950 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
2951 for iOperand, sType in enumerate(asOperands):
2952 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
2953 if sWhere is None:
2954 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
2955 if iOperand < len(oInstr.aoOperands): # error recovery.
2956 sWhere = oInstr.aoOperands[iOperand].sWhere;
2957 sType = oInstr.aoOperands[iOperand].sType;
2958 else:
2959 sWhere = 'reg';
2960 sType = 'Gb';
2961 if iOperand == len(oInstr.aoOperands):
2962 oInstr.aoOperands.append(Operand(sWhere, sType))
2963 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
2964 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
2965 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
2966 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
2967
2968 # Encoding.
2969 if sForm not in g_kdIemForms:
2970 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
2971 else:
2972 if oInstr.sEncoding is None:
2973 oInstr.sEncoding = g_kdIemForms[sForm][0];
2974 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
2975 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
2976 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
2977
2978 # Check the parameter locations for the encoding.
2979 if g_kdIemForms[sForm][1] is not None:
2980 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
2981 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
2982 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
2983 else:
2984 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
2985 if oInstr.aoOperands[iOperand].sWhere != sWhere:
2986 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
2987 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
2988
2989 # Stats.
2990 if not self.oReStatsName.match(sStats):
2991 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
2992 elif oInstr.sStats is None:
2993 oInstr.sStats = sStats;
2994 elif oInstr.sStats != sStats:
2995 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
2996 % (sMacro, oInstr.sStats, sStats,));
2997
2998 # Process the hints (simply merge with @ophints w/o checking anything).
2999 for sHint in sDisHints.split('|'):
3000 sHint = sHint.strip();
3001 if sHint.startswith('DISOPTYPE_'):
3002 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3003 if sShortHint in g_kdHints:
3004 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3005 else:
3006 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3007 elif sHint != '0':
3008 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3009
3010 for sHint in sIemHints.split('|'):
3011 sHint = sHint.strip();
3012 if sHint.startswith('IEMOPHINT_'):
3013 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3014 if sShortHint in g_kdHints:
3015 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3016 else:
3017 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3018 elif sHint != '0':
3019 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3020
3021
3022 _ = sAsm;
3023 return True;
3024
3025 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3026 """
3027 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3028 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3029 """
3030 if not asOperands:
3031 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3032 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3033 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3034
3035 def checkCodeForMacro(self, sCode):
3036 """
3037 Checks code for relevant macro invocation.
3038 """
3039 #
3040 # Scan macro invocations.
3041 #
3042 if sCode.find('(') > 0:
3043 # Look for instruction decoder function definitions. ASSUME single line.
3044 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3045 [ 'FNIEMOP_DEF',
3046 'FNIEMOP_STUB',
3047 'FNIEMOP_STUB_1',
3048 'FNIEMOP_UD_STUB',
3049 'FNIEMOP_UD_STUB_1' ]);
3050 if asArgs is not None:
3051 sFunction = asArgs[1];
3052
3053 if not self.aoCurInstrs:
3054 self.addInstruction();
3055 for oInstr in self.aoCurInstrs:
3056 if oInstr.iLineFnIemOpMacro == -1:
3057 oInstr.iLineFnIemOpMacro = self.iLine;
3058 else:
3059 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3060 self.setInstrunctionAttrib('sFunction', sFunction);
3061 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3062 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3063 if asArgs[0].find('STUB') > 0:
3064 self.doneInstructions();
3065 return True;
3066
3067 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3068 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3069 if asArgs is not None:
3070 if len(self.aoCurInstrs) == 1:
3071 oInstr = self.aoCurInstrs[0];
3072 if oInstr.sStats is None:
3073 oInstr.sStats = asArgs[1];
3074 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3075
3076 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3077 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3078 if asArgs is not None:
3079 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3080 []);
3081 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3082 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3083 if asArgs is not None:
3084 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3085 [asArgs[6],]);
3086 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3087 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3088 if asArgs is not None:
3089 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3090 [asArgs[6], asArgs[7]]);
3091 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3092 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3093 if asArgs is not None:
3094 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3095 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3096 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3097 # a_fIemHints)
3098 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3099 if asArgs is not None:
3100 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3101 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3102
3103 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3104 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3105 if asArgs is not None:
3106 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3107 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3108 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3109 if asArgs is not None:
3110 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3111 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3112 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3113 if asArgs is not None:
3114 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3115 [asArgs[4], asArgs[5],]);
3116 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3117 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3118 if asArgs is not None:
3119 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3120 [asArgs[4], asArgs[5], asArgs[6],]);
3121 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3122 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3123 if asArgs is not None:
3124 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3125 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3126
3127 return False;
3128
3129
3130 def parse(self):
3131 """
3132 Parses the given file.
3133 Returns number or errors.
3134 Raises exception on fatal trouble.
3135 """
3136 #self.debug('Parsing %s' % (self.sSrcFile,));
3137
3138 while self.iLine < len(self.asLines):
3139 sLine = self.asLines[self.iLine];
3140 self.iLine += 1;
3141
3142 # We only look for comments, so only lines with a slash might possibly
3143 # influence the parser state.
3144 offSlash = sLine.find('/');
3145 if offSlash >= 0:
3146 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3147 offLine = 0;
3148 while offLine < len(sLine):
3149 if self.iState == self.kiCode:
3150 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3151 if offHit >= 0:
3152 self.checkCodeForMacro(sLine[offLine:offHit]);
3153 self.sComment = '';
3154 self.iCommentLine = self.iLine;
3155 self.iState = self.kiCommentMulti;
3156 offLine = offHit + 2;
3157 else:
3158 self.checkCodeForMacro(sLine[offLine:]);
3159 offLine = len(sLine);
3160
3161 elif self.iState == self.kiCommentMulti:
3162 offHit = sLine.find('*/', offLine);
3163 if offHit >= 0:
3164 self.sComment += sLine[offLine:offHit];
3165 self.iState = self.kiCode;
3166 offLine = offHit + 2;
3167 self.parseComment();
3168 else:
3169 self.sComment += sLine[offLine:];
3170 offLine = len(sLine);
3171 else:
3172 assert False;
3173 # C++ line comment.
3174 elif offSlash > 0:
3175 self.checkCodeForMacro(sLine[:offSlash]);
3176
3177 # No slash, but append the line if in multi-line comment.
3178 elif self.iState == self.kiCommentMulti:
3179 #self.debug('line %d: multi' % (self.iLine,));
3180 self.sComment += sLine;
3181
3182 # No slash, but check code line for relevant macro.
3183 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3184 #self.debug('line %d: macro' % (self.iLine,));
3185 self.checkCodeForMacro(sLine);
3186
3187 # If the line is a '}' in the first position, complete the instructions.
3188 elif self.iState == self.kiCode and sLine[0] == '}':
3189 #self.debug('line %d: }' % (self.iLine,));
3190 self.doneInstructions();
3191
3192 self.doneInstructions();
3193 self.debug('%3s stubs out of %3s instructions in %s'
3194 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3195 return self.printErrors();
3196
3197
3198def __parseFileByName(sSrcFile, sDefaultMap):
3199 """
3200 Parses one source file for instruction specfications.
3201 """
3202 #
3203 # Read sSrcFile into a line array.
3204 #
3205 try:
3206 oFile = open(sSrcFile, "r");
3207 except Exception as oXcpt:
3208 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3209 try:
3210 asLines = oFile.readlines();
3211 except Exception as oXcpt:
3212 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3213 finally:
3214 oFile.close();
3215
3216 #
3217 # Do the parsing.
3218 #
3219 try:
3220 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3221 except ParserException as oXcpt:
3222 print(str(oXcpt));
3223 raise;
3224 except Exception as oXcpt:
3225 raise;
3226
3227 return cErrors;
3228
3229
3230def __doTestCopying():
3231 """
3232 Executes the asCopyTests instructions.
3233 """
3234 asErrors = [];
3235 for oDstInstr in g_aoAllInstructions:
3236 if oDstInstr.asCopyTests:
3237 for sSrcInstr in oDstInstr.asCopyTests:
3238 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3239 if oSrcInstr:
3240 aoSrcInstrs = [oSrcInstr,];
3241 else:
3242 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3243 if aoSrcInstrs:
3244 for oSrcInstr in aoSrcInstrs:
3245 if oSrcInstr != oDstInstr:
3246 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3247 else:
3248 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3249 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3250 else:
3251 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3252 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3253
3254 if asErrors:
3255 sys.stderr.write(u''.join(asErrors));
3256 return len(asErrors);
3257
3258
3259def __applyOnlyTest():
3260 """
3261 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3262 all other instructions so that only these get tested.
3263 """
3264 if g_aoOnlyTestInstructions:
3265 for oInstr in g_aoAllInstructions:
3266 if oInstr.aoTests:
3267 if oInstr not in g_aoOnlyTestInstructions:
3268 oInstr.aoTests = [];
3269 return 0;
3270
3271def __parseAll():
3272 """
3273 Parses all the IEMAllInstruction*.cpp.h files.
3274
3275 Raises exception on failure.
3276 """
3277 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3278 cErrors = 0;
3279 for sDefaultMap, sName in [
3280 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3281 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3282 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3283 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3284 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3285 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3286 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3287 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3288 ]:
3289 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3290 cErrors += __doTestCopying();
3291 cErrors += __applyOnlyTest();
3292
3293 if cErrors != 0:
3294 #raise Exception('%d parse errors' % (cErrors,));
3295 sys.exit(1);
3296 return True;
3297
3298
3299
3300__parseAll();
3301
3302
3303#
3304# Generators (may perhaps move later).
3305#
3306def generateDisassemblerTables(oDstFile = sys.stdout):
3307 """
3308 Generates disassembler tables.
3309 """
3310
3311 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3312 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3313 assert oMap.sName == sName;
3314 asLines = [];
3315
3316 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3317 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3318 asLines.append('const DISOPCODE %s[] =' % (oMap.getDisasTableName(),));
3319 asLines.append('{');
3320
3321 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3322
3323 aoTableOrder = oMap.getInstructionsInTableOrder();
3324 for iInstr, oInstr in enumerate(aoTableOrder):
3325
3326 if (iInstr & 0xf) == 0:
3327 if iInstr != 0:
3328 asLines.append('');
3329 asLines.append(' /* %x */' % (iInstr >> 4,));
3330
3331 if oInstr is None:
3332 pass;#asLines.append(' /* %#04x */ None,' % (iInstr));
3333 elif isinstance(oInstr, list):
3334 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper,' % (iInstr));
3335 else:
3336 sMacro = 'OP';
3337 cMaxOperands = 3;
3338 if len(oInstr.aoOperands) > 3:
3339 sMacro = 'OPVEX'
3340 cMaxOperands = 4;
3341 assert len(oInstr.aoOperands) <= cMaxOperands;
3342
3343 #
3344 # Format string.
3345 #
3346 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3347 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3348 sTmp += ' ' if iOperand == 0 else ',';
3349 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3350 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3351 else:
3352 sTmp += g_kdOpTypes[oOperand.sType][2];
3353 sTmp += '",';
3354 asColumns = [ sTmp, ];
3355
3356 #
3357 # Decoders.
3358 #
3359 iStart = len(asColumns);
3360 if oInstr.sEncoding is None:
3361 pass;
3362 elif oInstr.sEncoding == 'ModR/M':
3363 # ASSUME the first operand is using the ModR/M encoding
3364 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3365 asColumns.append('IDX_ParseModRM,');
3366 ## @todo IDX_ParseVexDest
3367 # Is second operand using ModR/M too?
3368 if len(oInstr.aoOperands) > 1 and oInstr.aoOperands[1].usesModRM():
3369 asColumns.append('IDX_UseModRM,')
3370 elif oInstr.sEncoding in [ 'prefix', ]:
3371 for oOperand in oInstr.aoOperands:
3372 asColumns.append('0,');
3373 elif oInstr.sEncoding in [ 'fixed' ]:
3374 pass;
3375 elif oInstr.sEncoding == 'vex2':
3376 asColumns.append('IDX_ParseVex2b,')
3377 elif oInstr.sEncoding == 'vex3':
3378 asColumns.append('IDX_ParseVex3b,')
3379 elif oInstr.sEncoding in g_dInstructionMaps:
3380 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3381 else:
3382 ## @todo
3383 #IDX_ParseTwoByteEsc,
3384 #IDX_ParseGrp1,
3385 #IDX_ParseShiftGrp2,
3386 #IDX_ParseGrp3,
3387 #IDX_ParseGrp4,
3388 #IDX_ParseGrp5,
3389 #IDX_Parse3DNow,
3390 #IDX_ParseGrp6,
3391 #IDX_ParseGrp7,
3392 #IDX_ParseGrp8,
3393 #IDX_ParseGrp9,
3394 #IDX_ParseGrp10,
3395 #IDX_ParseGrp12,
3396 #IDX_ParseGrp13,
3397 #IDX_ParseGrp14,
3398 #IDX_ParseGrp15,
3399 #IDX_ParseGrp16,
3400 #IDX_ParseThreeByteEsc4,
3401 #IDX_ParseThreeByteEsc5,
3402 #IDX_ParseModFence,
3403 #IDX_ParseEscFP,
3404 #IDX_ParseNopPause,
3405 #IDX_ParseInvOpModRM,
3406 assert False, str(oInstr);
3407
3408 # Check for immediates and stuff in the remaining operands.
3409 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3410 sIdx = g_kdOpTypes[oOperand.sType][0];
3411 if sIdx != 'IDX_UseModRM':
3412 asColumns.append(sIdx + ',');
3413 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3414
3415 #
3416 # Opcode and operands.
3417 #
3418 assert oInstr.sDisEnum, str(oInstr);
3419 asColumns.append(oInstr.sDisEnum + ',');
3420 iStart = len(asColumns)
3421 for oOperand in oInstr.aoOperands:
3422 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3423 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3424
3425 #
3426 # Flags.
3427 #
3428 sTmp = '';
3429 for sHint in sorted(oInstr.dHints.keys()):
3430 sDefine = g_kdHints[sHint];
3431 if sDefine.startswith('DISOPTYPE_'):
3432 if sTmp:
3433 sTmp += ' | ' + sDefine;
3434 else:
3435 sTmp += sDefine;
3436 if sTmp:
3437 sTmp += '),';
3438 else:
3439 sTmp += '0),';
3440 asColumns.append(sTmp);
3441
3442 #
3443 # Format the columns into a line.
3444 #
3445 sLine = '';
3446 for i, s in enumerate(asColumns):
3447 if len(sLine) < aoffColumns[i]:
3448 sLine += ' ' * (aoffColumns[i] - len(sLine));
3449 else:
3450 sLine += ' ';
3451 sLine += s;
3452
3453 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3454 # DISOPTYPE_HARMLESS),
3455 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3456 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3457
3458 asLines.append(sLine);
3459
3460 asLines.append('};');
3461 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), oMap.getTableSize(),));
3462
3463 #
3464 # Write out the lines.
3465 #
3466 oDstFile.write('\n'.join(asLines));
3467 oDstFile.write('\n');
3468 break; #for now
3469
3470if __name__ == '__main__':
3471 generateDisassemblerTables();
3472
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette