VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65828

Last change on this file since 65828 was 65828, checked in by vboxsync, 8 years ago

python 3 fixes.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 76.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 65828 2017-02-21 09:55:39Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 65828 $"
35
36# Standard python imports.
37import os
38import re
39import sys
40
41# Only the main script needs to modify the path.
42g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
43 'ValidationKit');
44sys.path.append(g_ksValidationKitDir);
45
46from common import utils;
47
48# Python 3 hacks:
49if sys.version_info[0] >= 3:
50 long = int; # pylint: disable=redefined-builtin,invalid-name
51
52
53# Annotation example:
54#
55# \@opmnemonic add
56# \@op1 reg:Eb
57# \@op2 rm:Gb
58# \@opmaps onebyte
59# \@oppfx none
60# \@opcode 0x00
61# \@openc ModR/M
62# \@opfltest none
63# \@opflmodify of,sz,zf,af,pf,cf
64# \@opflundef none
65# \@opflset none
66# \@opflclear none
67# \@ophints harmless
68# \@opstats add_Eb_Gb
69# \@opgroup op_gen_arith_bin
70# \@optest in1=1 in2=1 -> out1=2 outfl=a?,p?
71# \@optest oppfx:o32 in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
72
73
74def _isValidOpcodeByte(sOpcode):
75 """
76 Checks if sOpcode is a valid lower case opcode byte.
77 Returns true/false.
78 """
79 if len(sOpcode) == 4:
80 if sOpcode[:2] == '0x':
81 if sOpcode[2] in '0123456789abcdef':
82 if sOpcode[3] in '0123456789abcdef':
83 return True;
84 return False;
85
86
87class InstructionMap(object):
88 """
89 Instruction map.
90
91 The opcode map provides the lead opcode bytes (empty for the one byte
92 opcode map). An instruction can be member of multiple opcode maps as long
93 as it uses the same opcode value within the map (because of VEX).
94 """
95
96 kdEncodings = {
97 'legacy': [],
98 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
99 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
100 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
101 'xop8': [], ##< XOP prefix with vvvvv = 8
102 'xop9': [], ##< XOP prefix with vvvvv = 9
103 'xop10': [], ##< XOP prefix with vvvvv = 10
104 };
105 kdSelectors = {
106 'byte': [], ##< next opcode byte selects the instruction (default).
107 '/r': [], ##< modrm.reg selects the instruction.
108 'mod /r': [], ##< modrm.reg and modrm.mod selects the instruction.
109 '!11 /r': [], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
110 '11 /r': [], ##< modrm.reg select the instruction with modrm.mod == 0y11.
111 '11': [], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
112 };
113
114 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy'):
115 assert sSelector in self.kdSelectors;
116 assert sEncoding in self.kdEncodings;
117 if asLeadOpcodes is None:
118 asLeadOpcodes = [];
119 else:
120 for sOpcode in asLeadOpcodes:
121 assert _isValidOpcodeByte(sOpcode);
122
123 self.sName = sName;
124 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
125 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
126 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
127 self.aoInstructions = []; # type: Instruction
128
129
130class TestType(object):
131 """
132 Test value type.
133
134 This base class deals with integer like values. The fUnsigned constructor
135 parameter indicates the default stance on zero vs sign extending. It is
136 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
137 """
138 def __init__(self, sName, fUnsigned = True):
139 self.sName = sName;
140 self.fUnsigned = fUnsigned;
141
142 class BadValue(Exception):
143 """ Bad value exception. """
144 def __init__(self, sMessage):
145 Exception.__init__(sMessage);
146 self.sMessage = sMessage;
147
148 def get(self, sValue):
149 """
150 Get the shortest byte representation of oValue.
151
152 Returns (fSignExtend, bytearray)
153 Raises BadValue if invalid value.
154
155 The returned byte array is a reasonable size, e.g. for an integer type
156 it's for instance 1, 2, 4, or 8 byte in size but never 3, 5 or 7 bytes.
157 """
158 if len(sValue) == 0:
159 raise TestType.BadValue('empty value');
160
161 # Deal with sign and detect hexadecimal or decimal.
162 fSignExtend = not self.fUnsigned;
163 if sValue[0] == '-' or sValue[0] == '+':
164 fSignExtend = True;
165 fHex = len(sValue) > 3 and sValue[1:2].lower() == '0x';
166 else:
167 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
168
169 # try convert it to long integer.
170 try:
171 iValue = long(sValue, 16 if fHex else 10);
172 except:
173 raise TestType.BadValue('failed to convert "%s" to integer' % (iValue,));
174
175 # Convert the hex string and pad it to a decent value.
176 sHex = hex(iValue);
177 assert sHex[:2] == '0x', sHex;
178 if sys.version_info[0] >= 3:
179 sHex = sHex[2:];
180 else:
181 assert sHex[-1] == 'L';
182 sHex = sHex[2:-1];
183
184 cDigits = len(sHex);
185 if cDigits <= 2:
186 cDigits = (cDigits + 1) & ~1;
187 elif cDigits <= 4:
188 cDigits = (cDigits + 3) & ~3;
189 elif cDigits <= 8:
190 cDigits = (cDigits + 7) & ~7;
191 else:
192 cDigits = (cDigits + 15) & ~15;
193
194 if cDigits != len(sHex):
195 if iValue >= 0:
196 sHex = '0' * (cDigits - len(sHex)) + sHex;
197 else:
198 sHex = 'f' * (cDigits - len(sHex)) + sHex;
199
200 # Invert and convert to bytearray and return it.
201 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
202
203 return (fSignExtend, abValue);
204
205 def validate(self, sValue):
206 """
207 Returns True if value is okay, error message on failure.
208 """
209 try:
210 self.get(sValue);
211 except TestType.BadValue as oXcpt:
212 return oXcpt.sMessage;
213 return True;
214
215
216
217class TestTypeEflags(TestType):
218 """
219 Special value parsing for EFLAGS/RFLAGS/FLAGS.
220 """
221
222 def __init__(self, sName):
223 TestType.__init__(self, sName, fUnsigned = True);
224
225 def get(self, sValue):
226
227 return None;
228
229
230
231class TestInOut(object):
232 """
233 One input or output state modifier.
234
235 This should be thought as values to modify BS3REGCTX and extended (needs
236 to be structured) state.
237 """
238 ## Assigned operators.
239 kasOperators = [
240 '&~=',
241 '&=',
242 '|=',
243 '='
244 ];
245 ## Types
246 kdTypes = {
247 'uint': TestType('uint', fUnsigned = True),
248 'int': TestType('int'),
249 'efl': TestTypeEflags('efl'),
250 };
251 ## CPU context fields.
252 kdFields = {
253 # name: ( default type, tbd, )
254 # Operands.
255 'op1': ( 'uint', '', ), ## \@op1
256 'op2': ( 'uint', '', ), ## \@op2
257 'op3': ( 'uint', '', ), ## \@op3
258 'op4': ( 'uint', '', ), ## \@op4
259 # Flags.
260 'efl': ( 'efl', '', ),
261 # 8-bit GPRs.
262 'al': ( 'uint', '', ),
263 'cl': ( 'uint', '', ),
264 'dl': ( 'uint', '', ),
265 'bl': ( 'uint', '', ),
266 'ah': ( 'uint', '', ),
267 'ch': ( 'uint', '', ),
268 'dh': ( 'uint', '', ),
269 'bh': ( 'uint', '', ),
270 'r8l': ( 'uint', '', ),
271 'r9l': ( 'uint', '', ),
272 'r10l': ( 'uint', '', ),
273 'r11l': ( 'uint', '', ),
274 'r12l': ( 'uint', '', ),
275 'r13l': ( 'uint', '', ),
276 'r14l': ( 'uint', '', ),
277 'r15l': ( 'uint', '', ),
278 # 16-bit GPRs.
279 'ax': ( 'uint', '', ),
280 'dx': ( 'uint', '', ),
281 'cx': ( 'uint', '', ),
282 'bx': ( 'uint', '', ),
283 'sp': ( 'uint', '', ),
284 'bp': ( 'uint', '', ),
285 'si': ( 'uint', '', ),
286 'di': ( 'uint', '', ),
287 'r8w': ( 'uint', '', ),
288 'r9w': ( 'uint', '', ),
289 'r10w': ( 'uint', '', ),
290 'r11w': ( 'uint', '', ),
291 'r12w': ( 'uint', '', ),
292 'r13w': ( 'uint', '', ),
293 'r14w': ( 'uint', '', ),
294 'r15w': ( 'uint', '', ),
295 # 32-bit GPRs.
296 'eax': ( 'uint', '', ),
297 'edx': ( 'uint', '', ),
298 'ecx': ( 'uint', '', ),
299 'ebx': ( 'uint', '', ),
300 'esp': ( 'uint', '', ),
301 'ebp': ( 'uint', '', ),
302 'esi': ( 'uint', '', ),
303 'edi': ( 'uint', '', ),
304 'r8d': ( 'uint', '', ),
305 'r9d': ( 'uint', '', ),
306 'r10d': ( 'uint', '', ),
307 'r11d': ( 'uint', '', ),
308 'r12d': ( 'uint', '', ),
309 'r13d': ( 'uint', '', ),
310 'r14d': ( 'uint', '', ),
311 'r15d': ( 'uint', '', ),
312 # 64-bit GPRs.
313 'rax': ( 'uint', '', ),
314 'rdx': ( 'uint', '', ),
315 'rcx': ( 'uint', '', ),
316 'rbx': ( 'uint', '', ),
317 'rsp': ( 'uint', '', ),
318 'rbp': ( 'uint', '', ),
319 'rsi': ( 'uint', '', ),
320 'rdi': ( 'uint', '', ),
321 'r8': ( 'uint', '', ),
322 'r9': ( 'uint', '', ),
323 'r10': ( 'uint', '', ),
324 'r11': ( 'uint', '', ),
325 'r12': ( 'uint', '', ),
326 'r13': ( 'uint', '', ),
327 'r14': ( 'uint', '', ),
328 'r15': ( 'uint', '', ),
329 # 16-bit, 32-bit or 64-bit registers according to operand size.
330 'oz.rax': ( 'uint', '', ),
331 'oz.rdx': ( 'uint', '', ),
332 'oz.rcx': ( 'uint', '', ),
333 'oz.rbx': ( 'uint', '', ),
334 'oz.rsp': ( 'uint', '', ),
335 'oz.rbp': ( 'uint', '', ),
336 'oz.rsi': ( 'uint', '', ),
337 'oz.rdi': ( 'uint', '', ),
338 'oz.r8': ( 'uint', '', ),
339 'oz.r9': ( 'uint', '', ),
340 'oz.r10': ( 'uint', '', ),
341 'oz.r11': ( 'uint', '', ),
342 'oz.r12': ( 'uint', '', ),
343 'oz.r13': ( 'uint', '', ),
344 'oz.r14': ( 'uint', '', ),
345 'oz.r15': ( 'uint', '', ),
346 };
347
348 def __init__(self, sField, sOp, sValue, sType):
349 assert sField in self.kdFields;
350 assert sOp in self.kasOperators;
351 self.sField = sField;
352 self.sOp = sOp;
353 self.sValue = sValue;
354 self.sType = sType;
355
356
357class TestSelector(object):
358 """
359 One selector for an instruction test.
360 """
361 ## Selector compare operators.
362 kasCompareOps = [ '==', '!=' ];
363 ## Selector variables and their valid values.
364 kdVariables = {
365 # Operand size.
366 'size': {
367 'o16': 'size_o16',
368 'o32': 'size_o32',
369 'o64': 'size_o64',
370 },
371 # Execution ring.
372 'ring': {
373 '0': 'ring_0',
374 '1': 'ring_1',
375 '2': 'ring_2',
376 '3': 'ring_3',
377 '0..2': 'ring_0_thru_2',
378 '1..3': 'ring_1_thru_3',
379 },
380 # Basic code mode.
381 'codebits': {
382 '64': 'code_64bit',
383 '32': 'code_32bit',
384 '16': 'code_16bit',
385 },
386 # cpu modes.
387 'mode': {
388 'real': 'mode_real',
389 'prot': 'mode_prot',
390 'long': 'mode_long',
391 'v86': 'mode_v86',
392 'smm': 'mode_smm',
393 'vmx': 'mode_vmx',
394 'svm': 'mode_svm',
395 },
396 # paging on/off
397 'paging': {
398 'on': 'paging_on',
399 'off': 'paging_off',
400 },
401 };
402 ## Selector shorthand predicates.
403 ## These translates into variable expressions.
404 kdPredicates = {
405 'o16': 'size==o16',
406 'o32': 'size==o32',
407 'o64': 'size==o64',
408 'ring0': 'ring==0',
409 '!ring0': 'ring==1..3',
410 'ring1': 'ring==1',
411 'ring2': 'ring==2',
412 'ring3': 'ring==3',
413 'user': 'ring==3',
414 'supervisor': 'ring==0..2',
415 'real': 'mode==real',
416 'prot': 'mode==prot',
417 'long': 'mode==long',
418 'v86': 'mode==v86',
419 'smm': 'mode==smm',
420 'vmx': 'mode==vmx',
421 'svm': 'mode==svm',
422 'paging': 'paging==on',
423 '!paging': 'paging==off',
424 };
425
426 def __init__(self, sVariable, sOp, sValue):
427 assert sVariable in self.kdVariables;
428 assert sOp in self.kasCompareOps;
429 assert sValue in self.kdVariables[sValue];
430 self.sVariable = sVariable;
431 self.sOp = sOp;
432 self.sValue = sValue;
433
434
435class InstructionTest(object):
436 """
437 Instruction test.
438 """
439
440 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
441 self.oInstr = oInstr; # type: InstructionTest
442 self.aoInputs = [];
443 self.aoOutputs = [];
444 self.aoSelectors = []; # type: list(TestSelector)
445
446
447class Operand(object):
448 """
449 Instruction operand.
450 """
451
452 ## \@op[1-4]
453 kdLocations = {
454 'reg': [], ## modrm.reg
455 'rm': [], ## modrm.rm
456 };
457
458 ## \@op[1-4]
459 kdTypes = {
460 'Eb': [],
461 'Gb': [],
462 };
463
464 def __init__(self, sWhere, sType):
465 assert sWhere in self.kdLocations;
466 assert sType in self.kdTypes;
467 self.sWhere = sWhere; ##< kdLocations
468 self.sType = sType; ##< kdTypes
469
470
471class Instruction(object):
472 """
473 Instruction.
474 """
475
476 def __init__(self, sSrcFile, iLine):
477 ## @name Core attributes.
478 ## @{
479 self.sMnemonic = None;
480 self.sBrief = None;
481 self.asDescSections = []; # type: list(str)
482 self.aoMaps = []; # type: list(InstructionMap)
483 self.aoOperands = []; # type: list(Operand)
484 self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
485 self.sOpcode = None;
486 self.sEncoding = None;
487 self.asFlTest = None;
488 self.asFlModify = None;
489 self.asFlUndefined = None;
490 self.asFlSet = None;
491 self.asFlClear = None;
492 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictioarny for speed; dummy value).
493 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
494 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
495 self.aoTests = []; # type: list(InstructionTest)
496 self.oCpus = None; ##< Some CPU restriction expression...
497 self.sGroup = None;
498 self.fUnused = False; ##< Unused instruction.
499 self.fInvalid = False; ##< Invalid instruction (like UD2).
500 self.sInvalidStyle = None; ##< Invalid behviour style
501 ## @}
502
503 ## @name Implementation attributes.
504 ## @{
505 self.sStats = None;
506 self.sFunction = None;
507 self.fStub = False;
508 self.fUdStub = False;
509 ## @}
510
511 ## @name Decoding info
512 ## @{
513 self.sSrcFile = sSrcFile;
514 self.iLineCreated = iLine;
515 self.iLineCompleted = None;
516 self.cOpTags = 0;
517 ## @}
518
519 ## @name Intermediate input fields.
520 ## @{
521 self.sRawDisOpNo = None;
522 self.asRawDisParams = [];
523 self.sRawIemOpFlags = None;
524 self.sRawOldOpcodes = None;
525 ## @}
526
527
528## All the instructions.
529g_aoAllInstructions = []; # type: Instruction
530
531## Instruction maps.
532g_dInstructionMaps = {
533 'one': InstructionMap('one'),
534 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
535 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
536 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
537 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
538 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
539 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
540 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
541 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
542 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
543 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
544 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
545 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
546 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
547 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
548 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
549 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
550 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
551 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
552 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
553
554 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',]),
555 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
556 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
557 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
558 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
559 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
560 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
561 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
562 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
563 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
564 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
565 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
566 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
567 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
568
569 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
570 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
571 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
572
573 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
574 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
575 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
576 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
577 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
578 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
579
580 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
581 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
582
583 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
584 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
585 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
586 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
587 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
588 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
589 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
590};
591
592
593
594class ParserException(Exception):
595 """ Parser exception """
596 def __init__(self, sMessage):
597 Exception.__init__(self, sMessage);
598
599
600class SimpleParser(object):
601 """
602 Parser of IEMAllInstruction*.cpp.h instruction specifications.
603 """
604
605 ## @name Parser state.
606 ## @{
607 kiCode = 0;
608 kiCommentMulti = 1;
609 ## @}
610
611 def __init__(self, sSrcFile, asLines, sDefaultMap):
612 self.sSrcFile = sSrcFile;
613 self.asLines = asLines;
614 self.iLine = 0;
615 self.iState = self.kiCode;
616 self.sComment = '';
617 self.iCommentLine = 0;
618 self.asCurInstr = [];
619
620 assert sDefaultMap in g_dInstructionMaps;
621 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
622
623 self.cTotalInstr = 0;
624 self.cTotalStubs = 0;
625 self.cTotalTagged = 0;
626
627 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
628 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
629 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
630 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
631 self.oReGroupName = re.compile('^op_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
632 self.fDebug = True;
633
634 self.dTagHandlers = {
635 '@opbrief': self.parseTagOpBrief,
636 '@opdesc': self.parseTagOpDesc,
637 '@opmnemonic': self.parseTagOpMnemonic,
638 '@op1': self.parseTagOpOperandN,
639 '@op2': self.parseTagOpOperandN,
640 '@op3': self.parseTagOpOperandN,
641 '@op4': self.parseTagOpOperandN,
642 '@oppfx': self.parseTagOpPfx,
643 '@opmaps': self.parseTagOpMaps,
644 '@opcode': self.parseTagOpcode,
645 '@openc': self.parseTagOpEnc,
646 '@opfltest': self.parseTagOpEFlags,
647 '@opflmodify': self.parseTagOpEFlags,
648 '@opflundef': self.parseTagOpEFlags,
649 '@opflset': self.parseTagOpEFlags,
650 '@opflclear': self.parseTagOpEFlags,
651 '@ophints': self.parseTagOpHints,
652 '@opcpuid': self.parseTagOpCpuId,
653 '@opgroup': self.parseTagOpGroup,
654 '@opunused': self.parseTagOpUnusedInvalid,
655 '@opinvalid': self.parseTagOpUnusedInvalid,
656 '@opinvlstyle': self.parseTagOpUnusedInvalid,
657 '@optest': self.parseTagOpTest,
658 '@opstats': self.parseTagOpStats,
659 '@opfunction': self.parseTagOpFunction,
660 '@opdone': self.parseTagOpDone,
661 };
662
663 self.asErrors = [];
664
665 def raiseError(self, sMessage):
666 """
667 Raise error prefixed with the source and line number.
668 """
669 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
670
671 def raiseCommentError(self, iLineInComment, sMessage):
672 """
673 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
674 """
675 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
676
677 def error(self, sMessage):
678 """
679 Adds an error.
680 returns False;
681 """
682 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
683 return False;
684
685 def errorComment(self, iLineInComment, sMessage):
686 """
687 Adds a comment error.
688 returns False;
689 """
690 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
691 return False;
692
693 def printErrors(self):
694 """
695 Print the errors to stderr.
696 Returns number of errors.
697 """
698 if len(self.asErrors) > 0:
699 sys.stderr.write(u''.join(self.asErrors));
700 return len(self.asErrors);
701
702 def debug(self, sMessage):
703 """
704 """
705 if self.fDebug:
706 print('debug: %s' % (sMessage,));
707
708
709 def addInstruction(self, iLine = None):
710 """
711 Adds an instruction.
712 """
713 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
714 g_aoAllInstructions.append(oInstr);
715 self.asCurInstr.append(oInstr);
716 return oInstr;
717
718 def doneInstructionOne(self, oInstr, iLine):
719 """
720 Complete the parsing by processing, validating and expanding raw inputs.
721 """
722 assert oInstr.iLineCompleted is None;
723 oInstr.iLineCompleted = iLine;
724
725 #
726 # Specified instructions.
727 #
728 if oInstr.cOpTags > 0:
729 if oInstr.sStats is None:
730 pass;
731
732 #
733 # Unspecified legacy stuff. We generally only got a few things to go on here.
734 # /** Opcode 0x0f 0x00 /0. */
735 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
736 #
737 else:
738 #if oInstr.sRawOldOpcodes:
739 #
740 #if oInstr.sMnemonic:
741 pass;
742
743 #
744 # Apply default map and then add the instruction to all it's groups.
745 #
746 if len(oInstr.aoMaps) == 0:
747 oInstr.aoMaps = [ self.oDefaultMap, ];
748 for oMap in oInstr.aoMaps:
749 oMap.aoInstructions.append(oInstr);
750
751 self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
752 return True;
753
754 def doneInstructions(self, iLineInComment = None):
755 """
756 Done with current instruction.
757 """
758 for oInstr in self.asCurInstr:
759 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
760 if oInstr.fStub:
761 self.cTotalStubs += 1;
762
763 self.cTotalInstr += len(self.asCurInstr);
764
765 self.sComment = '';
766 self.asCurInstr = [];
767 return True;
768
769 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
770 """
771 Sets the sAttrib of all current instruction to oValue. If fOverwrite
772 is False, only None values and empty strings are replaced.
773 """
774 for oInstr in self.asCurInstr:
775 if fOverwrite is not True:
776 oOldValue = getattr(oInstr, sAttrib);
777 if oOldValue is not None:
778 continue;
779 setattr(oInstr, sAttrib, oValue);
780
781 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
782 """
783 Sets the iEntry of the array sAttrib of all current instruction to oValue.
784 If fOverwrite is False, only None values and empty strings are replaced.
785 """
786 for oInstr in self.asCurInstr:
787 aoArray = getattr(oInstr, sAttrib);
788 while len(aoArray) <= iEntry:
789 aoArray.append(None);
790 if fOverwrite is True or aoArray[iEntry] is None:
791 aoArray[iEntry] = oValue;
792
793 def parseCommentOldOpcode(self, asLines):
794 """ Deals with 'Opcode 0xff /4' like comments """
795 asWords = asLines[0].split();
796 if len(asWords) >= 2 \
797 and asWords[0] == 'Opcode' \
798 and ( asWords[1].startswith('0x')
799 or asWords[1].startswith('0X')):
800 asWords = asWords[:1];
801 for iWord, sWord in enumerate(asWords):
802 if sWord.startswith('0X'):
803 sWord = '0x' + sWord[:2];
804 asWords[iWord] = asWords;
805 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
806
807 return False;
808
809 def ensureInstructionForOpTag(self, iTagLine):
810 """ Ensure there is an instruction for the op-tag being parsed. """
811 if len(self.asCurInstr) == 0:
812 self.addInstruction(self.iCommentLine + iTagLine);
813 for oInstr in self.asCurInstr:
814 oInstr.cOpTags += 1;
815 if oInstr.cOpTags == 1:
816 self.cTotalTagged += 1;
817 return self.asCurInstr[-1];
818
819 @staticmethod
820 def flattenSections(aasSections):
821 """
822 Flattens multiline sections into stripped single strings.
823 Returns list of strings, on section per string.
824 """
825 asRet = [];
826 for asLines in assSections:
827 if len(asLines) > 0:
828 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
829 return asRet;
830
831 @staticmethod
832 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
833 """
834 Flattens sections into a simple stripped string with newlines as
835 section breaks. The final section does not sport a trailing newline.
836 """
837 # Typical: One section with a single line.
838 if len(aasSections) == 1 and len(aasSections[0]) == 1:
839 return aasSections[0][0].strip();
840
841 sRet = '';
842 for iSection, asLines in enumerate(aasSections):
843 if len(asLines) > 0:
844 if iSection > 0:
845 sRet += sSectionSep;
846 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
847 return sRet;
848
849
850
851 ## @name Tag parsers
852 ## @{
853
854 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
855 """
856 Tag: \@opbrief
857 Value: Text description, multiple sections, appended.
858
859 Brief description. If not given, it's the first sentence from @opdesc.
860 """
861 oInstr = self.ensureInstructionForOpTag(iTagLine);
862
863 # Flatten and validate the value.
864 sBrief = self.flattenAllSections(aasSections);
865 if len(sBrief) == 0:
866 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
867 if sBrief[-1] != '.':
868 sBrief = sBrief + '.';
869 if len(sBrief) > 180:
870 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
871 offDot = sBrief.find('.');
872 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
873 offDot = sBrief.find('.', offDot + 1);
874 if offDot >= 0 and offDot != len(sBrief) - 1:
875 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
876
877 # Update the instruction.
878 if oInstr.sBrief is not None:
879 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
880 % (sTag, oInstr.sBrief, sBrief,));
881 _ = iEndLine;
882 return True;
883
884 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
885 """
886 Tag: \@opdesc
887 Value: Text description, multiple sections, appended.
888
889 It is used to describe instructions.
890 """
891 oInstr = self.ensureInstructionForOpTag(iTagLine);
892 if len(self.aoInstructions) > 0 and len(aasSections) > 0:
893 oInstr.asDescSections.extend(self.flattenSections(aasSections));
894 return True;
895
896 _ = sTag; _ = iEndLine;
897 return True;
898
899 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
900 """
901 Tag: @opmenmonic
902 Value: mnemonic
903
904 The 'mnemonic' value must be a valid C identifier string. Because of
905 prefixes, groups and whatnot, there times when the mnemonic isn't that
906 of an actual assembler mnemonic.
907 """
908 oInstr = self.ensureInstructionForOpTag(iTagLine);
909
910 # Flatten and validate the value.
911 sMnemonic = self.flattenAllSections(aasSections);
912 if not self.oReMnemonic.match(sMnemonic):
913 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
914 if oInstr.sMnemonic is not None:
915 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
916 % (sTag, oInstr.sMnemonic, sMnemonic,));
917 oInstr.sMnemonic = sMnemonic
918
919 _ = iEndLine;
920 return True;
921
922 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
923 """
924 Tags: \@op1, \@op2, \@op3, \@op4
925 Value: where:type
926
927 The 'where' value indicates where the operand is found, like the 'reg'
928 part of the ModR/M encoding. See Instruction.kdOperandLocations for
929 a list.
930
931 The 'type' value indicates the operand type. These follow the types
932 given in the opcode tables in the CPU reference manuals.
933 See Instruction.kdOperandTypes for a list.
934
935 """
936 oInstr = self.ensureInstructionForOpTag(iTagLine);
937 idxOp = int(sTag[-1]) - 1;
938 assert idxOp >= 0 and idxOp < 4;
939
940 # flatten, split up, and validate the "where:type" value.
941 sFlattened = self.flattenAllSections(aasSections);
942 asSplit = sFlattened.split(':');
943 if len(asSplit) != 2:
944 return self.errorComment(iTagLine, 'expected %s value on format "<where>:<type>" not "%s"' % (sTag, sFlattened,));
945
946 (sWhere, sType) = asSplit;
947 if sWhere not in Operand.kdLocations:
948 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
949 % (sTag, sWhere, ', '.join(Operand.kdLocations.keys()),), iTagLine);
950
951 if sType not in Operand.kdTypes:
952 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
953 % (sTag, sType, ', '.join(Operand.kdTypes.keys()),));
954
955 # Insert the operand, refusing to overwrite an existing one.
956 while idxOp >= len(oInstr.aoOperands):
957 oInstr.aoOperands.append(None);
958 if oInstr.aoOperands[idxOp] is not None:
959 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
960 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
961 sWhere, sType,));
962 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
963
964 _ = iEndLine;
965 return True;
966
967 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
968 """
969 Tag: \@opmaps
970 Value: map[,map2]
971
972 Indicates which maps the instruction is in. There is a default map
973 associated with each input file.
974 """
975 oInstr = self.ensureInstructionForOpTag(iTagLine);
976
977 # Flatten, split up and validate the value.
978 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
979 asMaps = sFlattened.split(',');
980 if len(asMaps) == 0:
981 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
982 for sMap in asMaps:
983 if sMap not in g_dInstructionMaps:
984 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
985 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
986
987 # Add the maps to the current list. Throw errors on duplicates.
988 for oMap in oInstr.aoMaps:
989 if oMap.sName in asMaps:
990 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
991
992 for sMap in asMaps:
993 oMap = g_dInstructionMaps[sMap];
994 if oMap not in oInstr.aoMaps:
995 oInstr.aoMaps.append(oMap);
996 else:
997 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
998
999 _ = iEndLine;
1000 return True;
1001
1002 ## \@oppfx values.
1003 kdPrefixes = {
1004 '0x66': [],
1005 '0xf3': [],
1006 '0xf2': [],
1007 };
1008
1009 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1010 """
1011 Tag: \@oppfx
1012 Value: none|0x66|0xf3|0xf2
1013
1014 Required prefix for the instruction. (In a (E)VEX context this is the
1015 value of the 'pp' field rather than an actual prefix.)
1016 """
1017 oInstr = self.ensureInstructionForOpTag(iTagLine);
1018
1019 # Flatten and validate the value.
1020 sFlattened = self.flattenAllSections(aasSections);
1021 asPrefixes = sFlattened.split();
1022 if len(asPrefixes) > 1:
1023 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1024
1025 sPrefix = asPrefixes[0].lower();
1026 if sPrefix == 'none':
1027 sPrefix = None;
1028 else:
1029 if len(sPrefix) == 2:
1030 sPrefix = '0x' + sPrefix;
1031 if _isValidOpcodeByte(sPrefix):
1032 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1033
1034 if sPrefix is not None and sPrefix not in self.kdPrefixes:
1035 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, self.kdPrefixes,));
1036
1037 # Set it.
1038 if oInstr.sPrefix is not None:
1039 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1040 oInstr.sPrefix = sPrefix;
1041
1042 _ = iEndLine;
1043 return True;
1044
1045 ## Special \@opcode tag values.
1046 kdSpecialOpcodes = {
1047 '/reg': [],
1048 'mr/reg': [],
1049 '11 /reg': [],
1050 '!11 /reg': [],
1051 '11 mr/reg': [],
1052 '!11 mr/reg': [],
1053 };
1054
1055 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1056 """
1057 Tag: \@opcode
1058 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1059
1060 The opcode byte or sub-byte for the instruction in the context of a map.
1061 """
1062 oInstr = self.ensureInstructionForOpTag(iTagLine);
1063
1064 # Flatten and validate the value.
1065 sOpcode = self.flattenAllSections(aasSections);
1066 if sOpcode in self.kdSpecialOpcodes:
1067 pass;
1068 elif not _isValidOpcodeByte(sOpcode):
1069 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1070
1071 # Set it.
1072 if oInstr.sOpcode is not None:
1073 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1074 oInstr.sOpcode = sOpcode;
1075
1076 _ = iEndLine;
1077 return True;
1078
1079 ## Valid values for \@openc
1080 kdEncodings = {
1081 'ModR/M': [],
1082 };
1083
1084 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1085 """
1086 Tag: \@openc
1087 Value: ModR/M|TBD
1088
1089 The instruction operand encoding style.
1090 """
1091 oInstr = self.ensureInstructionForOpTag(iTagLine);
1092
1093 # Flatten and validate the value.
1094 sEncoding = self.flattenAllSections(aasSections);
1095 if sEncoding in self.kdEncodings:
1096 pass;
1097 elif not _isValidOpcodeByte(sEncoding):
1098 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1099
1100 # Set it.
1101 if oInstr.sEncoding is not None:
1102 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1103 % ( sTag, oInstr.sEncoding, sEncoding,));
1104 oInstr.sEncoding = sEncoding;
1105
1106 _ = iEndLine;
1107 return True;
1108
1109 ## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
1110 kdEFlags = {
1111 # Debugger flag notation:
1112 'ov': 'X86_EFL_OF', ##< OVerflow.
1113 'nv': '!X86_EFL_OF', ##< No Overflow.
1114
1115 'ng': 'X86_EFL_SF', ##< NeGative (sign).
1116 'pl': '!X86_EFL_SF', ##< PLuss (sign).
1117
1118 'zr': 'X86_EFL_ZF', ##< ZeRo.
1119 'nz': '!X86_EFL_ZF', ##< No Zero.
1120
1121 'af': 'X86_EFL_AF', ##< Aux Flag.
1122 'na': '!X86_EFL_AF', ##< No Aux.
1123
1124 'po': 'X86_EFL_PF', ##< Parity Pdd.
1125 'pe': '!X86_EFL_PF', ##< Parity Even.
1126
1127 'cf': 'X86_EFL_CF', ##< Carry Flag.
1128 'nc': '!X86_EFL_CF', ##< No Carry.
1129
1130 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
1131 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
1132
1133 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
1134 'up': '!X86_EFL_DF', ##< UP (string op direction).
1135
1136 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
1137 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
1138 'ac': 'X86_EFL_AC', ##< Alignment Check.
1139 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
1140 'rf': 'X86_EFL_RF', ##< Resume Flag.
1141 'nt': 'X86_EFL_NT', ##< Nested Task.
1142 'tf': 'X86_EFL_TF', ##< Trap flag.
1143
1144 # Reference manual notation:
1145 'of': 'X86_EFL_OF',
1146 'sf': 'X86_EFL_SF',
1147 'zf': 'X86_EFL_ZF',
1148 'cf': 'X86_EFL_CF',
1149 'pf': 'X86_EFL_PF',
1150 'if': 'X86_EFL_IF',
1151 'df': 'X86_EFL_DF',
1152 'iopl': 'X86_EFL_IOPL',
1153 'id': 'X86_EFL_ID',
1154 };
1155
1156 ## EFlags tag to Instruction attribute name.
1157 kdOpFlagToAttr = {
1158 '@opfltest': 'asFlTest',
1159 '@opflmodify': 'asFlModify',
1160 '@opflundef': 'asFlUndefined',
1161 '@opflset': 'asFlSet',
1162 '@opflclear': 'asFlClear',
1163 };
1164
1165 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1166 """
1167 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1168 Value: <eflags specifier>
1169
1170 """
1171 oInstr = self.ensureInstructionForOpTag(iTagLine);
1172
1173 # Flatten, split up and validate the values.
1174 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1175 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1176 asFlags = [];
1177 else:
1178 fRc = True;
1179 for iFlag, sFlag in enumerate(asFlags):
1180 if sFlag not in self.kdEFlags:
1181 if sFlag.strip() in self.kdEFlags:
1182 asFlags[iFlag] = sFlag.strip();
1183 else:
1184 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1185 if not fRc:
1186 return False;
1187
1188 # Set them.
1189 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1190 if asOld is not None:
1191 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1192 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1193
1194 _ = iEndLine;
1195 return True;
1196
1197 ## \@ophints values.
1198 kdHints = {
1199 'invalid': 'DISOPTYPE_INVALID', ##<
1200 'harmless': 'DISOPTYPE_HARMLESS', ##<
1201 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
1202 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
1203 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
1204 'portio': 'DISOPTYPE_PORTIO', ##<
1205 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
1206 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
1207 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
1208 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
1209 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
1210 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
1211 'illegal': 'DISOPTYPE_ILLEGAL', ##<
1212 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. */
1213 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. */
1214 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
1215 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
1216 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
1217 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode */
1218 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode */
1219 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size */
1220 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes */
1221 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte */
1222 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b */
1223 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes (only in 16 & 32 bits mode!) */
1224 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! */
1225 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! */
1226 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! */
1227 };
1228
1229 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1230 """
1231 Tag: \@ophints
1232 Value: Comma or space separated list of flags and hints.
1233
1234 This covers the disassembler flags table and more.
1235 """
1236 oInstr = self.ensureInstructionForOpTag(iTagLine);
1237
1238 # Flatten as a space separated list, split it up and validate the values.
1239 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1240 if len(asHints) == 1 and asHints[0].lower() == 'none':
1241 asHints = [];
1242 else:
1243 fRc = True;
1244 for iHint, sHint in enumerate(asHints):
1245 if sHint not in self.kdHints:
1246 if sHint.strip() in self.kdHints:
1247 sHint[iHint] = sHint.strip();
1248 else:
1249 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1250 if not fRc:
1251 return False;
1252
1253 # Append them.
1254 for sHint in asHints:
1255 if sHint not in oInstr.dHints:
1256 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1257 else:
1258 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1259
1260 _ = iEndLine;
1261 return True;
1262
1263 ## \@opcpuid
1264 kdCpuIdFlags = {
1265 'vme': 'X86_CPUID_FEATURE_EDX_VME',
1266 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
1267 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
1268 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
1269 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
1270 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
1271 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
1272 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
1273 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
1274 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
1275 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
1276 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
1277 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
1278 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
1279 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
1280 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
1281 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
1282 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
1283 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
1284 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
1285 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
1286 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
1287 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
1288 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
1289 'aes': 'X86_CPUID_FEATURE_ECX_AES',
1290 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
1291 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
1292 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
1293 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
1294
1295 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
1296 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
1297 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
1298 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
1299 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
1300 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
1301 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
1302 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
1303 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
1304 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
1305 };
1306
1307 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1308 """
1309 Tag: \@opcpuid
1310 Value: none | <CPUID flag specifier>
1311
1312 CPUID feature bit which is required for the instruction to be present.
1313 """
1314 oInstr = self.ensureInstructionForOpTag(iTagLine);
1315
1316 # Flatten as a space separated list, split it up and validate the values.
1317 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1318 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1319 asCpuIds = [];
1320 else:
1321 fRc = True;
1322 for iCpuId, sCpuId in enumerate(asCpuIds):
1323 if sCpuId not in self.kdCpuIds:
1324 if sCpuId.strip() in self.kdCpuIds:
1325 sCpuId[iCpuId] = sCpuId.strip();
1326 else:
1327 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1328 if not fRc:
1329 return False;
1330
1331 # Append them.
1332 for sCpuId in asCpuIds:
1333 if sCpuId not in oInstr.asCpuIds:
1334 oInstr.asCpuIds.append(sCpuId);
1335 else:
1336 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1337
1338 _ = iEndLine;
1339 return True;
1340
1341 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1342 """
1343 Tag: \@opgroup
1344 Value: op_grp1[_subgrp2[_subsubgrp3]]
1345
1346 Instruction grouping.
1347 """
1348 oInstr = self.ensureInstructionForOpTag(iTagLine);
1349
1350 # Flatten as a space separated list, split it up and validate the values.
1351 asGroups = self.flattenAllSections(aasSections).split();
1352 if len(asGroups) != 1:
1353 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1354 sGroup = asGroups[0];
1355 if not self.oReGroupName.match(sGroup):
1356 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1357 % (sTag, sGroup, self.oReGroupName.pattern));
1358
1359 # Set it.
1360 if oInstr.sGroup is not None:
1361 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1362 oInstr.sGroup = sGroup;
1363
1364 _ = iEndLine;
1365 return True;
1366
1367 ## \@opunused, \@opinvalid, \@opinvlstyle
1368 kdInvalidStyles = {
1369 'immediate': [], ##< CPU stops decoding immediately after the opcode.
1370 'intel-modrm': [], ##< Intel decodes ModR/M.
1371 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
1372 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
1373 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
1374 };
1375
1376 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1377 """
1378 Tag: \@opunused, \@opinvalid, \@opinvlstyle
1379 Value: <invalid opcode behaviour style>
1380
1381 The \@opunused indicates the specification is for a currently unused
1382 instruction encoding.
1383
1384 The \@opinvalid indicates the specification is for an invalid currently
1385 instruction encoding (like UD2).
1386
1387 The \@opinvlstyle just indicates how CPUs decode the instruction when
1388 not supported (\@opcpuid, \@opmincpu) or disabled.
1389 """
1390 oInstr = self.ensureInstructionForOpTag(iTagLine);
1391
1392 # Flatten as a space separated list, split it up and validate the values.
1393 asStyles = self.flattenAllSections(aasSections).split();
1394 if len(asStyles) != 1:
1395 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1396 sStyle = asStyles[0];
1397 if sStyle not in self.kdInvalidStyle:
1398 return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1399 % (sTag, sStyle, self.kdInvalidStyles.keys(),));
1400 # Set it.
1401 if oInstr.sInvlStyle is not None:
1402 return self.errorComment(iTagLine,
1403 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1404 % ( sTag, oInstr.sInvlStyle, sStyle,));
1405 oInstr.sInvlStyle = sStyle;
1406 if sTag == '@opunused':
1407 oInstr.fUnused = True;
1408 elif sTag == '@opinvalid':
1409 oInstr.fInvalid = True;
1410
1411 _ = iEndLine;
1412 return True;
1413
1414 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine):
1415 """
1416 Tag: \@optest
1417 Value: [<selectors>[ ]?] <inputs> -> <outputs>
1418 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1419
1420 The main idea here is to generate basic instruction tests.
1421
1422 The probably simplest way of handling the diverse input, would be to use
1423 it to produce size optimized byte code for a simple interpreter that
1424 modifies the register input and output states.
1425
1426 An alternative to the interpreter would be creating multiple tables,
1427 but that becomes rather complicated wrt what goes where and then to use
1428 them in an efficient manner.
1429 """
1430 oInstr = self.ensureInstructionForOpTag(iTagLine);
1431
1432 #
1433 # Do it section by section.
1434 #
1435 for asSectionLines in aasSections:
1436 #
1437 # Sort the input into outputs, inputs and selector conditions.
1438 #
1439 sFlatSection = self.flattenAllSections([asSectionLines,]);
1440 if len(sFlatSection) == 0:
1441 self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1442 continue;
1443 oTest = InstructionTest(oInstr);
1444
1445 asSelectors = [];
1446 asInputs = [];
1447 asOutputs = [];
1448 asCur = asOutputs;
1449 fRc = True;
1450 asWords = sFlatSection.split();
1451 for iWord in range(len(asWords) - 1, -1, -1):
1452 sWord = asWords[iWord];
1453 # Check for array switchers.
1454 if sWord == '->':
1455 if asCur != asOutputs:
1456 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
1457 break;
1458 asCur = asInputs;
1459 elif sWord == '/':
1460 if asCur != asInputs:
1461 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
1462 break;
1463 asCur = asSelectors;
1464 else:
1465 asCur.insert(0, sWord);
1466
1467 #
1468 # Validate and add selectors.
1469 #
1470 for sCond in asSelectors:
1471 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
1472 oSelector = None;
1473 for sOp in TestSelector.kasCompareOps:
1474 off = sCondExp.find(sOp);
1475 if off >= 0:
1476 sVariable = sCondExp[:off];
1477 sValue = sCondExp[off + len(sOp):];
1478 if sVariable in TestSelector.kdVariables:
1479 if sValue in TestSelector.kdVariables[sVariable]:
1480 oSelector = TestSelector(sVariable, sOp, sValue);
1481 else:
1482 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1483 % ( sTag, sValue, sCond,
1484 TestSelector.kdVariables[sVariable].keys(),));
1485 else:
1486 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1487 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1488 break;
1489 if oSelector is not None:
1490 for oExisting in oTest.aoSelectors:
1491 if oExisting.sVariable == oSelector.sVariable:
1492 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1493 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
1494 oTest.aoSelectors.append(oSelector);
1495 else:
1496 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1497
1498 #
1499 # Validate outputs and inputs, adding them to the test as we go along.
1500 #
1501 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1502 for sItem in asItems:
1503 oItem = None;
1504 for sOp in TestInOut.kasOperators:
1505 off = sItem.find(sOp);
1506 if off >= 0:
1507 sField = sItem[:off];
1508 sValueType = sItem[off + len(sOp):];
1509 if sField in TestInOut.kdFields:
1510 asSplit = sValueType.split(':', 1);
1511 sValue = asSplit[0];
1512 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
1513 if sType in TestInOut.kdTypes:
1514 oValid = TestInOut.kdTypes[sType].validate(sValue);
1515 if oValid is True:
1516 oItem = TestInOut(sField, sOp, sValue, sType);
1517 else:
1518 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s)'
1519 % ( sTag, sDesc, sValue, sItem, sType, ));
1520 else:
1521 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1522 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
1523 else:
1524 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
1525 % ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
1526 break;
1527 if oItem is not None:
1528 for oExisting in aoDst:
1529 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
1530 self.errorComment(iTagLine,
1531 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
1532 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
1533 aoDst.append(oItem);
1534 else:
1535 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sItem,));
1536
1537 #
1538 # .
1539 #
1540 if fRc:
1541 oInstr.aoTests.append(oTest);
1542 else:
1543 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
1544 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
1545 % (sTag, asSelectors, asInputs, asOutputs,));
1546
1547 return True;
1548
1549 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
1550 """
1551 Tag: \@opfunction
1552 Value: <VMM function name>
1553
1554 This is for explicitly setting the IEM function name. Normally we pick
1555 this up from the FNIEMOP_XXX macro invocation after the description, or
1556 generate it from the mnemonic and operands.
1557
1558 It it thought it maybe necessary to set it when specifying instructions
1559 which implementation isn't following immediately or aren't implemented yet.
1560 """
1561 oInstr = self.ensureInstructionForOpTag(iTagLine);
1562
1563 # Flatten and validate the value.
1564 sFunction = self.flattenAllSections(aasSections);
1565 if not self.oReFunctionName.match(sFunction):
1566 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
1567 % (sTag, Name, self.oReFunctionName.pattern));
1568
1569 if oInstr.sFunction is not None:
1570 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
1571 % (sTag, oInstr.sStats, sStats,));
1572 oInstr.sFunction = sFunction;
1573
1574 _ = iEndLine;
1575 return True;
1576
1577 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
1578 """
1579 Tag: \@opstats
1580 Value: <VMM statistics base name>
1581
1582 This is for explicitly setting the statistics name. Normally we pick
1583 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
1584 the mnemonic and operands.
1585
1586 It it thought it maybe necessary to set it when specifying instructions
1587 which implementation isn't following immediately or aren't implemented yet.
1588 """
1589 oInstr = self.ensureInstructionForOpTag(iTagLine);
1590
1591 # Flatten and validate the value.
1592 sStats = self.flattenAllSections(aasSections);
1593 if not self.oReStatsName.match(sStats):
1594 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
1595 % (sTag, Name, self.oReStatsName.pattern));
1596
1597 if oInstr.sStats is not None:
1598 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
1599 % (sTag, oInstr.sStats, sStats,));
1600 oInstr.sStats = sStats;
1601
1602 _ = iEndLine;
1603 return True;
1604
1605 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
1606 """
1607 Tag: \@opdone
1608 Value: none
1609
1610 Used to explictily flush the instructions that have been specified.
1611 """
1612 sFlattened = self.flattenAllSections(aasSections);
1613 if sFlattened != '':
1614 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
1615 _ = sTag; _ = iEndLine;
1616 return self.doneInstructions();
1617
1618 ## @}
1619
1620
1621 def parseComment(self):
1622 """
1623 Parse the current comment (self.sComment).
1624
1625 If it's a opcode specifiying comment, we reset the macro stuff.
1626 """
1627 #
1628 # Reject if comment doesn't seem to contain anything interesting.
1629 #
1630 if self.sComment.find('Opcode') < 0 \
1631 and self.sComment.find('@') < 0:
1632 return False;
1633
1634 #
1635 # Split the comment into lines, removing leading asterisks and spaces.
1636 # Also remove leading and trailing empty lines.
1637 #
1638 asLines = self.sComment.split('\n');
1639 for iLine, sLine in enumerate(asLines):
1640 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
1641
1642 while len(asLines) > 0 and len(asLines[0]) == 0:
1643 self.iCommentLine += 1;
1644 asLines.pop(0);
1645
1646 while len(asLines) > 0 and len(asLines[-1]) == 0:
1647 asLines.pop(len(asLines) - 1);
1648
1649 #
1650 # Check for old style: Opcode 0x0f 0x12
1651 #
1652 if asLines[0].startswith('Opcode '):
1653 self.parseCommentOldOpcode(asLines);
1654
1655 #
1656 # Look for @op* tagged data.
1657 #
1658 cOpTags = 0;
1659 sFlatDefault = None;
1660 sCurTag = '@default';
1661 iCurTagLine = 0;
1662 asCurSection = [];
1663 aasSections = [ asCurSection, ];
1664 for iLine, sLine in enumerate(asLines):
1665 if not sLine.startswith('@'):
1666 if len(sLine) > 0:
1667 asCurSection.append(sLine);
1668 elif len(asCurSection) != 0:
1669 asCurSection = [];
1670 aasSections.append(asCurSection);
1671 else:
1672 #
1673 # Process the previous tag.
1674 #
1675 if sCurTag in self.dTagHandlers:
1676 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1677 cOpTags += 1;
1678 elif sCurTag.startswith('@op'):
1679 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1680 elif sCurTag == '@default':
1681 sFlatDefault = self.flattenAllSections(aasSections);
1682
1683 #
1684 # New tag.
1685 #
1686 asSplit = sLine.split(None, 1);
1687 sCurTag = asSplit[0].lower();
1688 if len(asSplit) > 1:
1689 asCurSection = [asSplit[1],];
1690 else:
1691 asCurSection = [];
1692 aasSections = [asCurSection, ];
1693 iCurTagLine = iLine;
1694
1695 #
1696 # Don't allow default text in blocks containing @op*.
1697 #
1698 if cOpTags > 0 and len(sFlatDefault) > 0:
1699 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
1700
1701 return True;
1702
1703 def parseMacroInvocation(self, sInvocation):
1704 """
1705 Parses a macro invocation.
1706
1707 Returns a tuple, first element is the offset following the macro
1708 invocation. The second element is a list of macro arguments, where the
1709 zero'th is the macro name.
1710 """
1711 # First the name.
1712 offOpen = sInvocation.find('(');
1713 if offOpen <= 0:
1714 raiseError("macro invocation open parenthesis not found");
1715 sName = sInvocation[:offOpen].strip();
1716 if not self.oReMacroName.match(sName):
1717 return self.error("invalid macro name '%s'" % (sName,));
1718 asRet = [sName, ];
1719
1720 # Arguments.
1721 iLine = self.iLine;
1722 cDepth = 1;
1723 off = offOpen + 1;
1724 offStart = off;
1725 while cDepth > 0:
1726 if off >= len(sInvocation):
1727 if iLine >= len(self.asLines):
1728 return self.error('macro invocation beyond end of file');
1729 sInvocation += self.asLines[iLine];
1730 iLine += 1;
1731 ch = sInvocation[off];
1732
1733 if ch == ',' or ch == ')':
1734 if cDepth == 1:
1735 asRet.append(sInvocation[offStart:off].strip());
1736 offStart = off + 1;
1737 if ch == ')':
1738 cDepth -= 1;
1739 elif ch == '(':
1740 cDepth += 1;
1741 off += 1;
1742
1743 return (off, asRet);
1744
1745 def findAndParseMacroInvocation(self, sCode, sMacro):
1746 """
1747 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1748 """
1749 offHit = sCode.find(sMacro);
1750 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
1751 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
1752 return (offHit + offAfter, asRet);
1753 return (len(sCode), None);
1754
1755 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
1756 """
1757 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1758 """
1759 for sMacro in asMacro:
1760 offAfter, asRet = self.findAndParseMacroInvocation(sCode, sMacro);
1761 if asRet is not None:
1762 return (offAfter, asRet);
1763 return (len(sCode), None);
1764
1765 def checkCodeForMacro(self, sCode):
1766 """
1767 Checks code for relevant macro invocation.
1768 """
1769 #
1770 # Scan macro invocations.
1771 #
1772 if sCode.find('(') > 0:
1773 # Look for instruction decoder function definitions. ASSUME single line.
1774 (_, asArgs) = self.findAndParseFirstMacroInvocation(sCode,
1775 [ 'FNIEMOP_DEF',
1776 'FNIEMOP_STUB',
1777 'FNIEMOP_STUB_1',
1778 'FNIEMOP_UD_STUB',
1779 'FNIEMOP_UD_STUB_1' ]);
1780 if asArgs is not None:
1781 sFunction = asArgs[1];
1782
1783 if len(self.asCurInstr) == 0:
1784 self.addInstruction().sMnemonic = sFunction.split('_')[1];
1785 self.setInstrunctionAttrib('sFunction', sFunction);
1786 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
1787 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
1788 if asArgs[0].find('STUB') > 0:
1789 self.doneInstructions();
1790 return True;
1791
1792 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
1793 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
1794 if asArgs is not None:
1795 if len(self.asCurInstr) == 1:
1796 self.setInstrunctionAttrib('sStats', asArgs[1]);
1797 self.setInstrunctionAttrib('sMnemonic', asArgs[1].split('_')[0]);
1798
1799 # IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType)
1800 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_1');
1801 if asArgs is not None:
1802 if len(self.asCurInstr) == 1:
1803 self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1804 self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1805 self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1806
1807 # IEMOP_HLP_DECODED_NL_2(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_uDisParam1, a_fDisOpType)
1808 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_2');
1809 if asArgs is not None:
1810 if len(self.asCurInstr) == 1:
1811 self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1812 self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1813 self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1814 self.setInstrunctionArrayAttrib('asRawDisParams', 1, asArgs[4]);
1815
1816 return False;
1817
1818
1819 def parse(self):
1820 """
1821 Parses the given file.
1822 Returns number or errors.
1823 Raises exception on fatal trouble.
1824 """
1825 self.debug('Parsing %s' % (self.sSrcFile,));
1826
1827 while self.iLine < len(self.asLines):
1828 sLine = self.asLines[self.iLine];
1829 self.iLine += 1;
1830
1831 # We only look for comments, so only lines with a slash might possibly
1832 # influence the parser state.
1833 if sLine.find('/') >= 0:
1834 #self.debug('line %d: slash' % (self.iLine,));
1835
1836 offLine = 0;
1837 while offLine < len(sLine):
1838 if self.iState == self.kiCode:
1839 offHit = sLine.find('/*', offLine); # only multiline comments for now.
1840 if offHit >= 0:
1841 self.sComment = '';
1842 self.iCommentLine = self.iLine;
1843 self.iState = self.kiCommentMulti;
1844 offLine = offHit + 2;
1845 else:
1846 offLine = len(sLine);
1847
1848 elif self.iState == self.kiCommentMulti:
1849 offHit = sLine.find('*/', offLine);
1850 if offHit >= 0:
1851 self.sComment += sLine[offLine:offHit];
1852 self.iState = self.kiCode;
1853 offLine = offHit + 2;
1854 self.parseComment();
1855 else:
1856 self.sComment += sLine[offLine:];
1857 offLine = len(sLine);
1858 else:
1859 assert False;
1860
1861 # No slash, but append the line if in multi-line comment.
1862 elif self.iState == self.kiCommentMulti:
1863 #self.debug('line %d: multi' % (self.iLine,));
1864 self.sComment += sLine;
1865
1866 # No slash, but check code line for relevant macro.
1867 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
1868 #self.debug('line %d: macro' % (self.iLine,));
1869 self.checkCodeForMacro(sLine);
1870
1871 # If the line is a '}' in the first position, complete the instructions.
1872 elif self.iState == self.kiCode and sLine[0] == '}':
1873 #self.debug('line %d: }' % (self.iLine,));
1874 self.doneInstructions();
1875
1876 self.doneInstructions();
1877 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
1878 self.debug('%s instruction stubs' % (self.cTotalStubs,));
1879 return self.printErrors();
1880
1881
1882def __parseFileByName(sSrcFile, sDefaultMap):
1883 """
1884 Parses one source file for instruction specfications.
1885 """
1886 #
1887 # Read sSrcFile into a line array.
1888 #
1889 try:
1890 oFile = open(sSrcFile, "r");
1891 except Exception as oXcpt:
1892 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
1893 try:
1894 asLines = oFile.readlines();
1895 except Exception as oXcpt:
1896 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
1897 finally:
1898 oFile.close();
1899
1900 #
1901 # Do the parsing.
1902 #
1903 try:
1904 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
1905 except ParserException as oXcpt:
1906 print(str(oXcpt));
1907 raise;
1908 except Exception as oXcpt:
1909 raise;
1910
1911 return cErrors;
1912
1913
1914def __parseAll():
1915 """
1916 Parses all the IEMAllInstruction*.cpp.h files.
1917
1918 Raises exception on failure.
1919 """
1920 sSrcDir = os.path.dirname(os.path.abspath(__file__));
1921 cErrors = 0;
1922 for sDefaultMap, sName in [
1923 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
1924 #( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
1925 ]:
1926 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
1927
1928 if cErrors != 0:
1929 raise Exception('%d parse errors' % (cErrors,));
1930 return True;
1931
1932
1933
1934__parseAll();
1935
1936
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette