VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 65834

Last change on this file since 65834 was 65834, checked in by vboxsync, 8 years ago

IEMAllInstructionsPython.py: some more tinkering.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 80.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 65834 2017-02-21 16:21:36Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.virtualbox.org. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 65834 $"
35
36# Standard python imports.
37import os
38import re
39import sys
40
41# Only the main script needs to modify the path.
42g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
43 'ValidationKit');
44sys.path.append(g_ksValidationKitDir);
45
46from common import utils;
47
48# Python 3 hacks:
49if sys.version_info[0] >= 3:
50 long = int; # pylint: disable=redefined-builtin,invalid-name
51
52
53# Annotation example:
54#
55# \@opmnemonic add
56# \@op1 reg:Eb
57# \@op2 rm:Gb
58# \@opmaps onebyte
59# \@oppfx none
60# \@opcode 0x00
61# \@openc ModR/M
62# \@opfltest none
63# \@opflmodify of,sz,zf,af,pf,cf
64# \@opflundef none
65# \@opflset none
66# \@opflclear none
67# \@ophints harmless
68# \@opstats add_Eb_Gb
69# \@opgroup op_gen_arith_bin
70# \@optest in1=1 in2=1 -> out1=2 outfl=a?,p?
71# \@optest oppfx:o32 in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
72
73
74g_kdX86EFlagsConstants = {
75 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
76 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
77 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
78 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
79 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
80 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
81 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
82 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
83 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
84 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
85 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
86 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
87 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
88 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
89 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
90 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
91 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
92 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
93 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
94 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
95};
96
97
98def _isValidOpcodeByte(sOpcode):
99 """
100 Checks if sOpcode is a valid lower case opcode byte.
101 Returns true/false.
102 """
103 if len(sOpcode) == 4:
104 if sOpcode[:2] == '0x':
105 if sOpcode[2] in '0123456789abcdef':
106 if sOpcode[3] in '0123456789abcdef':
107 return True;
108 return False;
109
110
111class InstructionMap(object):
112 """
113 Instruction map.
114
115 The opcode map provides the lead opcode bytes (empty for the one byte
116 opcode map). An instruction can be member of multiple opcode maps as long
117 as it uses the same opcode value within the map (because of VEX).
118 """
119
120 kdEncodings = {
121 'legacy': [],
122 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
123 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
124 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
125 'xop8': [], ##< XOP prefix with vvvvv = 8
126 'xop9': [], ##< XOP prefix with vvvvv = 9
127 'xop10': [], ##< XOP prefix with vvvvv = 10
128 };
129 kdSelectors = {
130 'byte': [], ##< next opcode byte selects the instruction (default).
131 '/r': [], ##< modrm.reg selects the instruction.
132 'mod /r': [], ##< modrm.reg and modrm.mod selects the instruction.
133 '!11 /r': [], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
134 '11 /r': [], ##< modrm.reg select the instruction with modrm.mod == 0y11.
135 '11': [], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
136 };
137
138 def __init__(self, sName, asLeadOpcodes = None, sSelector = 'byte', sEncoding = 'legacy'):
139 assert sSelector in self.kdSelectors;
140 assert sEncoding in self.kdEncodings;
141 if asLeadOpcodes is None:
142 asLeadOpcodes = [];
143 else:
144 for sOpcode in asLeadOpcodes:
145 assert _isValidOpcodeByte(sOpcode);
146
147 self.sName = sName;
148 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
149 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
150 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
151 self.aoInstructions = []; # type: Instruction
152
153
154class TestType(object):
155 """
156 Test value type.
157
158 This base class deals with integer like values. The fUnsigned constructor
159 parameter indicates the default stance on zero vs sign extending. It is
160 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
161 """
162 def __init__(self, sName, acbSizes = None, fUnsigned = True):
163 self.sName = sName;
164 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
165 self.fUnsigned = fUnsigned;
166
167 class BadValue(Exception):
168 """ Bad value exception. """
169 def __init__(self, sMessage):
170 Exception.__init__(self, sMessage);
171 self.sMessage = sMessage;
172
173 def get(self, sValue):
174 """
175 Get the shortest normal sized byte representation of oValue.
176
177 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
178 The latter form is for AND+OR pairs where the first entry is what to
179 AND with the field and the second the one or OR with.
180
181 Raises BadValue if invalid value.
182 """
183 if len(sValue) == 0:
184 raise TestType.BadValue('empty value');
185
186 # Deal with sign and detect hexadecimal or decimal.
187 fSignExtend = not self.fUnsigned;
188 if sValue[0] == '-' or sValue[0] == '+':
189 fSignExtend = True;
190 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
191 else:
192 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
193
194 # try convert it to long integer.
195 try:
196 iValue = long(sValue, 16 if fHex else 10);
197 except Exception as oXcpt:
198 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
199
200 # Convert the hex string and pad it to a decent value.
201 if iValue >= 0:
202 sHex = hex(iValue);
203 else:
204 sHex = hex(iValue);
205 assert sHex[:2] == '0x', sHex;
206 if sys.version_info[0] >= 3:
207 sHex = sHex[2:];
208 else:
209 assert sHex[-1] == 'L';
210 sHex = sHex[2:-1];
211
212 cDigits = len(sHex);
213 if cDigits <= self.acbSizes[-1] * 2:
214 for cb in self.acbSizes:
215 if cDigits <= cb * 2:
216 cDigits = int((cDigits + cb - 1) / cb) * cb; # Seems like integer division returns a float in python.
217 break;
218 else:
219 cDigits = int((cDigits + self.acbSizes[-1] - 1) / self.acbSizes[-1]) * self.acbSizes[-1];
220 assert isinstance(cDigits, int)
221
222 if cDigits != len(sHex):
223 cNeeded = cDigits - len(sHex);
224 if iValue >= 0:
225 sHex = ('0' * cNeeded) + sHex;
226 else:
227 sHex = ('f' * cNeeded) + sHex;
228
229 # Invert and convert to bytearray and return it.
230 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
231
232 return ((fSignExtend, abValue),);
233
234 def validate(self, sValue):
235 """
236 Returns True if value is okay, error message on failure.
237 """
238 try:
239 self.get(sValue);
240 except TestType.BadValue as oXcpt:
241 return oXcpt.sMessage;
242 return True;
243
244 def isAndOrPair(self, sValue):
245 """
246 Checks if sValue is a pair.
247 """
248 return False;
249
250
251class TestTypeEflags(TestType):
252 """
253 Special value parsing for EFLAGS/RFLAGS/FLAGS.
254 """
255
256 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
257
258 def __init__(self, sName):
259 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
260
261 def get(self, sValue):
262 print('get(%s)' % (sValue,));
263 fClear = 0;
264 fSet = 0;
265 for sFlag in sValue.split(','):
266 sConstant = SimpleParser.kdEFlags.get(sFlag, None);
267 if sConstant is None:
268 print('get(%s) raise for %s/%s' % (sValue, sFlag,sConstant));
269 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
270 if sConstant[0] == '!':
271 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
272 else:
273 fSet |= g_kdX86EFlagsConstants[sConstant];
274
275 print('get -> TestType.get');
276 aoSet = TestType.get(self, '0x%x' % (fSet,));
277 print('get: aoSet=%s' % (aoSet,));
278 if fClear != 0:
279 print('get -> TestType.get(%#x)' % (~fClear));
280 try:
281 aoClear = TestType.get(self, '%#x' % (~fClear))
282 except Exception as oXcpt:
283 print( '%s' % (oXcpt,))
284 raise;
285 print('get: aoClear=%s' % (aoSet,));
286 assert self.isAndOrPair(sValue) == True;
287 return (aoClear[0], aoSet[0]);
288 assert self.isAndOrPair(sValue) == False;
289 return aoSet;
290
291 def isAndOrPair(self, sValue):
292 for sZeroFlag in self.kdZeroValueFlags.keys():
293 if sValue.find(sZeroFlag) >= 0:
294 print('isAndOrPair(%s) -> True' % (sValue,));
295 return True;
296 print('isAndOrPair(%s) -> False' % (sValue,));
297 return False;
298
299
300
301class TestInOut(object):
302 """
303 One input or output state modifier.
304
305 This should be thought as values to modify BS3REGCTX and extended (needs
306 to be structured) state.
307 """
308 ## Assigned operators.
309 kasOperators = [
310 '&~=',
311 '&=',
312 '|=',
313 '='
314 ];
315 ## Types
316 kdTypes = {
317 'uint': TestType('uint', fUnsigned = True),
318 'int': TestType('int'),
319 'efl': TestTypeEflags('efl'),
320 };
321 ## CPU context fields.
322 kdFields = {
323 # name: ( default type, tbd, )
324 # Operands.
325 'op1': ( 'uint', '', ), ## \@op1
326 'op2': ( 'uint', '', ), ## \@op2
327 'op3': ( 'uint', '', ), ## \@op3
328 'op4': ( 'uint', '', ), ## \@op4
329 # Flags.
330 'efl': ( 'efl', '', ),
331 # 8-bit GPRs.
332 'al': ( 'uint', '', ),
333 'cl': ( 'uint', '', ),
334 'dl': ( 'uint', '', ),
335 'bl': ( 'uint', '', ),
336 'ah': ( 'uint', '', ),
337 'ch': ( 'uint', '', ),
338 'dh': ( 'uint', '', ),
339 'bh': ( 'uint', '', ),
340 'r8l': ( 'uint', '', ),
341 'r9l': ( 'uint', '', ),
342 'r10l': ( 'uint', '', ),
343 'r11l': ( 'uint', '', ),
344 'r12l': ( 'uint', '', ),
345 'r13l': ( 'uint', '', ),
346 'r14l': ( 'uint', '', ),
347 'r15l': ( 'uint', '', ),
348 # 16-bit GPRs.
349 'ax': ( 'uint', '', ),
350 'dx': ( 'uint', '', ),
351 'cx': ( 'uint', '', ),
352 'bx': ( 'uint', '', ),
353 'sp': ( 'uint', '', ),
354 'bp': ( 'uint', '', ),
355 'si': ( 'uint', '', ),
356 'di': ( 'uint', '', ),
357 'r8w': ( 'uint', '', ),
358 'r9w': ( 'uint', '', ),
359 'r10w': ( 'uint', '', ),
360 'r11w': ( 'uint', '', ),
361 'r12w': ( 'uint', '', ),
362 'r13w': ( 'uint', '', ),
363 'r14w': ( 'uint', '', ),
364 'r15w': ( 'uint', '', ),
365 # 32-bit GPRs.
366 'eax': ( 'uint', '', ),
367 'edx': ( 'uint', '', ),
368 'ecx': ( 'uint', '', ),
369 'ebx': ( 'uint', '', ),
370 'esp': ( 'uint', '', ),
371 'ebp': ( 'uint', '', ),
372 'esi': ( 'uint', '', ),
373 'edi': ( 'uint', '', ),
374 'r8d': ( 'uint', '', ),
375 'r9d': ( 'uint', '', ),
376 'r10d': ( 'uint', '', ),
377 'r11d': ( 'uint', '', ),
378 'r12d': ( 'uint', '', ),
379 'r13d': ( 'uint', '', ),
380 'r14d': ( 'uint', '', ),
381 'r15d': ( 'uint', '', ),
382 # 64-bit GPRs.
383 'rax': ( 'uint', '', ),
384 'rdx': ( 'uint', '', ),
385 'rcx': ( 'uint', '', ),
386 'rbx': ( 'uint', '', ),
387 'rsp': ( 'uint', '', ),
388 'rbp': ( 'uint', '', ),
389 'rsi': ( 'uint', '', ),
390 'rdi': ( 'uint', '', ),
391 'r8': ( 'uint', '', ),
392 'r9': ( 'uint', '', ),
393 'r10': ( 'uint', '', ),
394 'r11': ( 'uint', '', ),
395 'r12': ( 'uint', '', ),
396 'r13': ( 'uint', '', ),
397 'r14': ( 'uint', '', ),
398 'r15': ( 'uint', '', ),
399 # 16-bit, 32-bit or 64-bit registers according to operand size.
400 'oz.rax': ( 'uint', '', ),
401 'oz.rdx': ( 'uint', '', ),
402 'oz.rcx': ( 'uint', '', ),
403 'oz.rbx': ( 'uint', '', ),
404 'oz.rsp': ( 'uint', '', ),
405 'oz.rbp': ( 'uint', '', ),
406 'oz.rsi': ( 'uint', '', ),
407 'oz.rdi': ( 'uint', '', ),
408 'oz.r8': ( 'uint', '', ),
409 'oz.r9': ( 'uint', '', ),
410 'oz.r10': ( 'uint', '', ),
411 'oz.r11': ( 'uint', '', ),
412 'oz.r12': ( 'uint', '', ),
413 'oz.r13': ( 'uint', '', ),
414 'oz.r14': ( 'uint', '', ),
415 'oz.r15': ( 'uint', '', ),
416 };
417
418 def __init__(self, sField, sOp, sValue, sType):
419 assert sField in self.kdFields;
420 assert sOp in self.kasOperators;
421 self.sField = sField;
422 self.sOp = sOp;
423 self.sValue = sValue;
424 self.sType = sType;
425
426
427class TestSelector(object):
428 """
429 One selector for an instruction test.
430 """
431 ## Selector compare operators.
432 kasCompareOps = [ '==', '!=' ];
433 ## Selector variables and their valid values.
434 kdVariables = {
435 # Operand size.
436 'size': {
437 'o16': 'size_o16',
438 'o32': 'size_o32',
439 'o64': 'size_o64',
440 },
441 # Execution ring.
442 'ring': {
443 '0': 'ring_0',
444 '1': 'ring_1',
445 '2': 'ring_2',
446 '3': 'ring_3',
447 '0..2': 'ring_0_thru_2',
448 '1..3': 'ring_1_thru_3',
449 },
450 # Basic code mode.
451 'codebits': {
452 '64': 'code_64bit',
453 '32': 'code_32bit',
454 '16': 'code_16bit',
455 },
456 # cpu modes.
457 'mode': {
458 'real': 'mode_real',
459 'prot': 'mode_prot',
460 'long': 'mode_long',
461 'v86': 'mode_v86',
462 'smm': 'mode_smm',
463 'vmx': 'mode_vmx',
464 'svm': 'mode_svm',
465 },
466 # paging on/off
467 'paging': {
468 'on': 'paging_on',
469 'off': 'paging_off',
470 },
471 };
472 ## Selector shorthand predicates.
473 ## These translates into variable expressions.
474 kdPredicates = {
475 'o16': 'size==o16',
476 'o32': 'size==o32',
477 'o64': 'size==o64',
478 'ring0': 'ring==0',
479 '!ring0': 'ring==1..3',
480 'ring1': 'ring==1',
481 'ring2': 'ring==2',
482 'ring3': 'ring==3',
483 'user': 'ring==3',
484 'supervisor': 'ring==0..2',
485 'real': 'mode==real',
486 'prot': 'mode==prot',
487 'long': 'mode==long',
488 'v86': 'mode==v86',
489 'smm': 'mode==smm',
490 'vmx': 'mode==vmx',
491 'svm': 'mode==svm',
492 'paging': 'paging==on',
493 '!paging': 'paging==off',
494 };
495
496 def __init__(self, sVariable, sOp, sValue):
497 assert sVariable in self.kdVariables;
498 assert sOp in self.kasCompareOps;
499 assert sValue in self.kdVariables[sVariable];
500 self.sVariable = sVariable;
501 self.sOp = sOp;
502 self.sValue = sValue;
503
504
505class InstructionTest(object):
506 """
507 Instruction test.
508 """
509
510 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
511 self.oInstr = oInstr; # type: InstructionTest
512 self.aoInputs = [];
513 self.aoOutputs = [];
514 self.aoSelectors = []; # type: list(TestSelector)
515
516
517class Operand(object):
518 """
519 Instruction operand.
520 """
521
522 ## \@op[1-4]
523 kdLocations = {
524 'reg': [], ## modrm.reg
525 'rm': [], ## modrm.rm
526 };
527
528 ## \@op[1-4]
529 kdTypes = {
530 'Eb': [],
531 'Gb': [],
532 };
533
534 def __init__(self, sWhere, sType):
535 assert sWhere in self.kdLocations;
536 assert sType in self.kdTypes;
537 self.sWhere = sWhere; ##< kdLocations
538 self.sType = sType; ##< kdTypes
539
540
541class Instruction(object):
542 """
543 Instruction.
544 """
545
546 def __init__(self, sSrcFile, iLine):
547 ## @name Core attributes.
548 ## @{
549 self.sMnemonic = None;
550 self.sBrief = None;
551 self.asDescSections = []; # type: list(str)
552 self.aoMaps = []; # type: list(InstructionMap)
553 self.aoOperands = []; # type: list(Operand)
554 self.sPrefix = None; ##< Single prefix: None, 0x66, 0xf3, 0xf2
555 self.sOpcode = None;
556 self.sEncoding = None;
557 self.asFlTest = None;
558 self.asFlModify = None;
559 self.asFlUndefined = None;
560 self.asFlSet = None;
561 self.asFlClear = None;
562 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictioarny for speed; dummy value).
563 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
564 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
565 self.aoTests = []; # type: list(InstructionTest)
566 self.oCpus = None; ##< Some CPU restriction expression...
567 self.sGroup = None;
568 self.fUnused = False; ##< Unused instruction.
569 self.fInvalid = False; ##< Invalid instruction (like UD2).
570 self.sInvalidStyle = None; ##< Invalid behviour style
571 ## @}
572
573 ## @name Implementation attributes.
574 ## @{
575 self.sStats = None;
576 self.sFunction = None;
577 self.fStub = False;
578 self.fUdStub = False;
579 ## @}
580
581 ## @name Decoding info
582 ## @{
583 self.sSrcFile = sSrcFile;
584 self.iLineCreated = iLine;
585 self.iLineCompleted = None;
586 self.cOpTags = 0;
587 ## @}
588
589 ## @name Intermediate input fields.
590 ## @{
591 self.sRawDisOpNo = None;
592 self.asRawDisParams = [];
593 self.sRawIemOpFlags = None;
594 self.sRawOldOpcodes = None;
595 ## @}
596
597
598## All the instructions.
599g_aoAllInstructions = []; # type: Instruction
600
601## Instruction maps.
602g_dInstructionMaps = {
603 'one': InstructionMap('one'),
604 'grp1_80': InstructionMap('grp1_80', asLeadOpcodes = ['0x80',]),
605 'grp1_81': InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
606 'grp1_82': InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
607 'grp1_83': InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
608 'grp1a': InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
609 'grp2_c0': InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
610 'grp2_c1': InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
611 'grp2_d0': InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
612 'grp2_d1': InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
613 'grp2_d2': InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
614 'grp2_d3': InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
615 'grp3_f6': InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
616 'grp3_f7': InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
617 'grp4': InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
618 'grp5': InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
619 'grp11_c6_m': InstructionMap('grp11_c6_m',asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
620 'grp11_c6_r': InstructionMap('grp11_c6_r',asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
621 'grp11_c7_m': InstructionMap('grp11_c7_m',asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
622 'grp11_c7_r': InstructionMap('grp11_c7_r',asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
623
624 'two0f': InstructionMap('two0f', asLeadOpcodes = ['0x0f',]),
625 'grp6': InstructionMap('grp6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
626 'grp7_m': InstructionMap('grp7_m', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
627 'grp7_r': InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
628 'grp8': InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
629 'grp9': InstructionMap('grp9', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
630 'grp10': InstructionMap('grp10', asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
631 'grp12': InstructionMap('grp12', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
632 'grp13': InstructionMap('grp13', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
633 'grp14': InstructionMap('grp14', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
634 'grp15': InstructionMap('grp15', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'mod /r'),
635 'grp16': InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
636 'grpA17': InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
637 'grpP': InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
638
639 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
640 'three0f38': InstructionMap('three0f38', asLeadOpcodes = ['0x0f', '0x38',]),
641 'three0f3a': InstructionMap('three0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
642
643 'vexmap1': InstructionMap('vexmap1', sEncoding = 'vex1'),
644 'vexgrp12': InstructionMap('vexgrp12', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
645 'vexgrp13': InstructionMap('vexgrp13', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
646 'vexgrp14': InstructionMap('vexgrp14', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
647 'vexgrp15': InstructionMap('vexgrp15', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'mod /r'),
648 'vexgrp17': InstructionMap('vexgrp17', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
649
650 'vexmap2': InstructionMap('vexmap2', sEncoding = 'vex2'),
651 'vexmap3': InstructionMap('vexmap3', sEncoding = 'vex3'),
652
653 'xopmap8': InstructionMap('xopmap8', sEncoding = 'xop8'),
654 'xopmap9': InstructionMap('xopmap9', sEncoding = 'xop9'),
655 'xopgrp1': InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
656 'xopgrp2': InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
657 'xopgrp3': InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
658 'xopmap10': InstructionMap('xopmap10', sEncoding = 'xop10'),
659 'xopgrp4': InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
660};
661
662
663
664class ParserException(Exception):
665 """ Parser exception """
666 def __init__(self, sMessage):
667 Exception.__init__(self, sMessage);
668
669
670class SimpleParser(object):
671 """
672 Parser of IEMAllInstruction*.cpp.h instruction specifications.
673 """
674
675 ## @name Parser state.
676 ## @{
677 kiCode = 0;
678 kiCommentMulti = 1;
679 ## @}
680
681 def __init__(self, sSrcFile, asLines, sDefaultMap):
682 self.sSrcFile = sSrcFile;
683 self.asLines = asLines;
684 self.iLine = 0;
685 self.iState = self.kiCode;
686 self.sComment = '';
687 self.iCommentLine = 0;
688 self.asCurInstr = [];
689
690 assert sDefaultMap in g_dInstructionMaps;
691 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
692
693 self.cTotalInstr = 0;
694 self.cTotalStubs = 0;
695 self.cTotalTagged = 0;
696
697 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
698 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
699 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
700 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
701 self.oReGroupName = re.compile('^op_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
702 self.fDebug = True;
703
704 self.dTagHandlers = {
705 '@opbrief': self.parseTagOpBrief,
706 '@opdesc': self.parseTagOpDesc,
707 '@opmnemonic': self.parseTagOpMnemonic,
708 '@op1': self.parseTagOpOperandN,
709 '@op2': self.parseTagOpOperandN,
710 '@op3': self.parseTagOpOperandN,
711 '@op4': self.parseTagOpOperandN,
712 '@oppfx': self.parseTagOpPfx,
713 '@opmaps': self.parseTagOpMaps,
714 '@opcode': self.parseTagOpcode,
715 '@openc': self.parseTagOpEnc,
716 '@opfltest': self.parseTagOpEFlags,
717 '@opflmodify': self.parseTagOpEFlags,
718 '@opflundef': self.parseTagOpEFlags,
719 '@opflset': self.parseTagOpEFlags,
720 '@opflclear': self.parseTagOpEFlags,
721 '@ophints': self.parseTagOpHints,
722 '@opcpuid': self.parseTagOpCpuId,
723 '@opgroup': self.parseTagOpGroup,
724 '@opunused': self.parseTagOpUnusedInvalid,
725 '@opinvalid': self.parseTagOpUnusedInvalid,
726 '@opinvlstyle': self.parseTagOpUnusedInvalid,
727 '@optest': self.parseTagOpTest,
728 '@opstats': self.parseTagOpStats,
729 '@opfunction': self.parseTagOpFunction,
730 '@opdone': self.parseTagOpDone,
731 };
732
733 self.asErrors = [];
734
735 def raiseError(self, sMessage):
736 """
737 Raise error prefixed with the source and line number.
738 """
739 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
740
741 def raiseCommentError(self, iLineInComment, sMessage):
742 """
743 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
744 """
745 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
746
747 def error(self, sMessage):
748 """
749 Adds an error.
750 returns False;
751 """
752 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
753 return False;
754
755 def errorComment(self, iLineInComment, sMessage):
756 """
757 Adds a comment error.
758 returns False;
759 """
760 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
761 return False;
762
763 def printErrors(self):
764 """
765 Print the errors to stderr.
766 Returns number of errors.
767 """
768 if len(self.asErrors) > 0:
769 sys.stderr.write(u''.join(self.asErrors));
770 return len(self.asErrors);
771
772 def debug(self, sMessage):
773 """
774 """
775 if self.fDebug:
776 print('debug: %s' % (sMessage,));
777
778
779 def addInstruction(self, iLine = None):
780 """
781 Adds an instruction.
782 """
783 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
784 g_aoAllInstructions.append(oInstr);
785 self.asCurInstr.append(oInstr);
786 return oInstr;
787
788 def doneInstructionOne(self, oInstr, iLine):
789 """
790 Complete the parsing by processing, validating and expanding raw inputs.
791 """
792 assert oInstr.iLineCompleted is None;
793 oInstr.iLineCompleted = iLine;
794
795 #
796 # Specified instructions.
797 #
798 if oInstr.cOpTags > 0:
799 if oInstr.sStats is None:
800 pass;
801
802 #
803 # Unspecified legacy stuff. We generally only got a few things to go on here.
804 # /** Opcode 0x0f 0x00 /0. */
805 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
806 #
807 else:
808 #if oInstr.sRawOldOpcodes:
809 #
810 #if oInstr.sMnemonic:
811 pass;
812
813 #
814 # Apply default map and then add the instruction to all it's groups.
815 #
816 if len(oInstr.aoMaps) == 0:
817 oInstr.aoMaps = [ self.oDefaultMap, ];
818 for oMap in oInstr.aoMaps:
819 oMap.aoInstructions.append(oInstr);
820
821 self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
822 return True;
823
824 def doneInstructions(self, iLineInComment = None):
825 """
826 Done with current instruction.
827 """
828 for oInstr in self.asCurInstr:
829 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
830 if oInstr.fStub:
831 self.cTotalStubs += 1;
832
833 self.cTotalInstr += len(self.asCurInstr);
834
835 self.sComment = '';
836 self.asCurInstr = [];
837 return True;
838
839 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
840 """
841 Sets the sAttrib of all current instruction to oValue. If fOverwrite
842 is False, only None values and empty strings are replaced.
843 """
844 for oInstr in self.asCurInstr:
845 if fOverwrite is not True:
846 oOldValue = getattr(oInstr, sAttrib);
847 if oOldValue is not None:
848 continue;
849 setattr(oInstr, sAttrib, oValue);
850
851 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
852 """
853 Sets the iEntry of the array sAttrib of all current instruction to oValue.
854 If fOverwrite is False, only None values and empty strings are replaced.
855 """
856 for oInstr in self.asCurInstr:
857 aoArray = getattr(oInstr, sAttrib);
858 while len(aoArray) <= iEntry:
859 aoArray.append(None);
860 if fOverwrite is True or aoArray[iEntry] is None:
861 aoArray[iEntry] = oValue;
862
863 def parseCommentOldOpcode(self, asLines):
864 """ Deals with 'Opcode 0xff /4' like comments """
865 asWords = asLines[0].split();
866 if len(asWords) >= 2 \
867 and asWords[0] == 'Opcode' \
868 and ( asWords[1].startswith('0x')
869 or asWords[1].startswith('0X')):
870 asWords = asWords[:1];
871 for iWord, sWord in enumerate(asWords):
872 if sWord.startswith('0X'):
873 sWord = '0x' + sWord[:2];
874 asWords[iWord] = asWords;
875 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
876
877 return False;
878
879 def ensureInstructionForOpTag(self, iTagLine):
880 """ Ensure there is an instruction for the op-tag being parsed. """
881 if len(self.asCurInstr) == 0:
882 self.addInstruction(self.iCommentLine + iTagLine);
883 for oInstr in self.asCurInstr:
884 oInstr.cOpTags += 1;
885 if oInstr.cOpTags == 1:
886 self.cTotalTagged += 1;
887 return self.asCurInstr[-1];
888
889 @staticmethod
890 def flattenSections(aasSections):
891 """
892 Flattens multiline sections into stripped single strings.
893 Returns list of strings, on section per string.
894 """
895 asRet = [];
896 for asLines in assSections:
897 if len(asLines) > 0:
898 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
899 return asRet;
900
901 @staticmethod
902 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
903 """
904 Flattens sections into a simple stripped string with newlines as
905 section breaks. The final section does not sport a trailing newline.
906 """
907 # Typical: One section with a single line.
908 if len(aasSections) == 1 and len(aasSections[0]) == 1:
909 return aasSections[0][0].strip();
910
911 sRet = '';
912 for iSection, asLines in enumerate(aasSections):
913 if len(asLines) > 0:
914 if iSection > 0:
915 sRet += sSectionSep;
916 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
917 return sRet;
918
919
920
921 ## @name Tag parsers
922 ## @{
923
924 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
925 """
926 Tag: \@opbrief
927 Value: Text description, multiple sections, appended.
928
929 Brief description. If not given, it's the first sentence from @opdesc.
930 """
931 oInstr = self.ensureInstructionForOpTag(iTagLine);
932
933 # Flatten and validate the value.
934 sBrief = self.flattenAllSections(aasSections);
935 if len(sBrief) == 0:
936 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
937 if sBrief[-1] != '.':
938 sBrief = sBrief + '.';
939 if len(sBrief) > 180:
940 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
941 offDot = sBrief.find('.');
942 while offDot >= 0 and offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
943 offDot = sBrief.find('.', offDot + 1);
944 if offDot >= 0 and offDot != len(sBrief) - 1:
945 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
946
947 # Update the instruction.
948 if oInstr.sBrief is not None:
949 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
950 % (sTag, oInstr.sBrief, sBrief,));
951 _ = iEndLine;
952 return True;
953
954 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
955 """
956 Tag: \@opdesc
957 Value: Text description, multiple sections, appended.
958
959 It is used to describe instructions.
960 """
961 oInstr = self.ensureInstructionForOpTag(iTagLine);
962 if len(self.aoInstructions) > 0 and len(aasSections) > 0:
963 oInstr.asDescSections.extend(self.flattenSections(aasSections));
964 return True;
965
966 _ = sTag; _ = iEndLine;
967 return True;
968
969 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
970 """
971 Tag: @opmenmonic
972 Value: mnemonic
973
974 The 'mnemonic' value must be a valid C identifier string. Because of
975 prefixes, groups and whatnot, there times when the mnemonic isn't that
976 of an actual assembler mnemonic.
977 """
978 oInstr = self.ensureInstructionForOpTag(iTagLine);
979
980 # Flatten and validate the value.
981 sMnemonic = self.flattenAllSections(aasSections);
982 if not self.oReMnemonic.match(sMnemonic):
983 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
984 if oInstr.sMnemonic is not None:
985 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
986 % (sTag, oInstr.sMnemonic, sMnemonic,));
987 oInstr.sMnemonic = sMnemonic
988
989 _ = iEndLine;
990 return True;
991
992 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
993 """
994 Tags: \@op1, \@op2, \@op3, \@op4
995 Value: where:type
996
997 The 'where' value indicates where the operand is found, like the 'reg'
998 part of the ModR/M encoding. See Instruction.kdOperandLocations for
999 a list.
1000
1001 The 'type' value indicates the operand type. These follow the types
1002 given in the opcode tables in the CPU reference manuals.
1003 See Instruction.kdOperandTypes for a list.
1004
1005 """
1006 oInstr = self.ensureInstructionForOpTag(iTagLine);
1007 idxOp = int(sTag[-1]) - 1;
1008 assert idxOp >= 0 and idxOp < 4;
1009
1010 # flatten, split up, and validate the "where:type" value.
1011 sFlattened = self.flattenAllSections(aasSections);
1012 asSplit = sFlattened.split(':');
1013 if len(asSplit) != 2:
1014 return self.errorComment(iTagLine, 'expected %s value on format "<where>:<type>" not "%s"' % (sTag, sFlattened,));
1015
1016 (sWhere, sType) = asSplit;
1017 if sWhere not in Operand.kdLocations:
1018 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1019 % (sTag, sWhere, ', '.join(Operand.kdLocations.keys()),), iTagLine);
1020
1021 if sType not in Operand.kdTypes:
1022 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
1023 % (sTag, sType, ', '.join(Operand.kdTypes.keys()),));
1024
1025 # Insert the operand, refusing to overwrite an existing one.
1026 while idxOp >= len(oInstr.aoOperands):
1027 oInstr.aoOperands.append(None);
1028 if oInstr.aoOperands[idxOp] is not None:
1029 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
1030 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
1031 sWhere, sType,));
1032 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
1033
1034 _ = iEndLine;
1035 return True;
1036
1037 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
1038 """
1039 Tag: \@opmaps
1040 Value: map[,map2]
1041
1042 Indicates which maps the instruction is in. There is a default map
1043 associated with each input file.
1044 """
1045 oInstr = self.ensureInstructionForOpTag(iTagLine);
1046
1047 # Flatten, split up and validate the value.
1048 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
1049 asMaps = sFlattened.split(',');
1050 if len(asMaps) == 0:
1051 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
1052 for sMap in asMaps:
1053 if sMap not in g_dInstructionMaps:
1054 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
1055 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
1056
1057 # Add the maps to the current list. Throw errors on duplicates.
1058 for oMap in oInstr.aoMaps:
1059 if oMap.sName in asMaps:
1060 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
1061
1062 for sMap in asMaps:
1063 oMap = g_dInstructionMaps[sMap];
1064 if oMap not in oInstr.aoMaps:
1065 oInstr.aoMaps.append(oMap);
1066 else:
1067 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
1068
1069 _ = iEndLine;
1070 return True;
1071
1072 ## \@oppfx values.
1073 kdPrefixes = {
1074 '0x66': [],
1075 '0xf3': [],
1076 '0xf2': [],
1077 };
1078
1079 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
1080 """
1081 Tag: \@oppfx
1082 Value: none|0x66|0xf3|0xf2
1083
1084 Required prefix for the instruction. (In a (E)VEX context this is the
1085 value of the 'pp' field rather than an actual prefix.)
1086 """
1087 oInstr = self.ensureInstructionForOpTag(iTagLine);
1088
1089 # Flatten and validate the value.
1090 sFlattened = self.flattenAllSections(aasSections);
1091 asPrefixes = sFlattened.split();
1092 if len(asPrefixes) > 1:
1093 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
1094
1095 sPrefix = asPrefixes[0].lower();
1096 if sPrefix == 'none':
1097 sPrefix = None;
1098 else:
1099 if len(sPrefix) == 2:
1100 sPrefix = '0x' + sPrefix;
1101 if _isValidOpcodeByte(sPrefix):
1102 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
1103
1104 if sPrefix is not None and sPrefix not in self.kdPrefixes:
1105 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, self.kdPrefixes,));
1106
1107 # Set it.
1108 if oInstr.sPrefix is not None:
1109 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
1110 oInstr.sPrefix = sPrefix;
1111
1112 _ = iEndLine;
1113 return True;
1114
1115 ## Special \@opcode tag values.
1116 kdSpecialOpcodes = {
1117 '/reg': [],
1118 'mr/reg': [],
1119 '11 /reg': [],
1120 '!11 /reg': [],
1121 '11 mr/reg': [],
1122 '!11 mr/reg': [],
1123 };
1124
1125 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
1126 """
1127 Tag: \@opcode
1128 Value: 0x?? | /reg | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg
1129
1130 The opcode byte or sub-byte for the instruction in the context of a map.
1131 """
1132 oInstr = self.ensureInstructionForOpTag(iTagLine);
1133
1134 # Flatten and validate the value.
1135 sOpcode = self.flattenAllSections(aasSections);
1136 if sOpcode in self.kdSpecialOpcodes:
1137 pass;
1138 elif not _isValidOpcodeByte(sOpcode):
1139 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
1140
1141 # Set it.
1142 if oInstr.sOpcode is not None:
1143 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
1144 oInstr.sOpcode = sOpcode;
1145
1146 _ = iEndLine;
1147 return True;
1148
1149 ## Valid values for \@openc
1150 kdEncodings = {
1151 'ModR/M': [],
1152 };
1153
1154 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
1155 """
1156 Tag: \@openc
1157 Value: ModR/M|TBD
1158
1159 The instruction operand encoding style.
1160 """
1161 oInstr = self.ensureInstructionForOpTag(iTagLine);
1162
1163 # Flatten and validate the value.
1164 sEncoding = self.flattenAllSections(aasSections);
1165 if sEncoding in self.kdEncodings:
1166 pass;
1167 elif not _isValidOpcodeByte(sEncoding):
1168 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
1169
1170 # Set it.
1171 if oInstr.sEncoding is not None:
1172 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
1173 % ( sTag, oInstr.sEncoding, sEncoding,));
1174 oInstr.sEncoding = sEncoding;
1175
1176 _ = iEndLine;
1177 return True;
1178
1179 ## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
1180 kdEFlags = {
1181 # Debugger flag notation:
1182 'ov': 'X86_EFL_OF', ##< OVerflow.
1183 'nv': '!X86_EFL_OF', ##< No Overflow.
1184
1185 'ng': 'X86_EFL_SF', ##< NeGative (sign).
1186 'pl': '!X86_EFL_SF', ##< PLuss (sign).
1187
1188 'zr': 'X86_EFL_ZF', ##< ZeRo.
1189 'nz': '!X86_EFL_ZF', ##< No Zero.
1190
1191 'af': 'X86_EFL_AF', ##< Aux Flag.
1192 'na': '!X86_EFL_AF', ##< No Aux.
1193
1194 'po': 'X86_EFL_PF', ##< Parity Pdd.
1195 'pe': '!X86_EFL_PF', ##< Parity Even.
1196
1197 'cf': 'X86_EFL_CF', ##< Carry Flag.
1198 'nc': '!X86_EFL_CF', ##< No Carry.
1199
1200 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
1201 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
1202
1203 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
1204 'up': '!X86_EFL_DF', ##< UP (string op direction).
1205
1206 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
1207 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
1208 'ac': 'X86_EFL_AC', ##< Alignment Check.
1209 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
1210 'rf': 'X86_EFL_RF', ##< Resume Flag.
1211 'nt': 'X86_EFL_NT', ##< Nested Task.
1212 'tf': 'X86_EFL_TF', ##< Trap flag.
1213
1214 # Reference manual notation:
1215 'of': 'X86_EFL_OF',
1216 'sf': 'X86_EFL_SF',
1217 'zf': 'X86_EFL_ZF',
1218 'cf': 'X86_EFL_CF',
1219 'pf': 'X86_EFL_PF',
1220 'if': 'X86_EFL_IF',
1221 'df': 'X86_EFL_DF',
1222 'iopl': 'X86_EFL_IOPL',
1223 'id': 'X86_EFL_ID',
1224 };
1225
1226 ## EFlags tag to Instruction attribute name.
1227 kdOpFlagToAttr = {
1228 '@opfltest': 'asFlTest',
1229 '@opflmodify': 'asFlModify',
1230 '@opflundef': 'asFlUndefined',
1231 '@opflset': 'asFlSet',
1232 '@opflclear': 'asFlClear',
1233 };
1234
1235 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
1236 """
1237 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
1238 Value: <eflags specifier>
1239
1240 """
1241 oInstr = self.ensureInstructionForOpTag(iTagLine);
1242
1243 # Flatten, split up and validate the values.
1244 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
1245 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
1246 asFlags = [];
1247 else:
1248 fRc = True;
1249 for iFlag, sFlag in enumerate(asFlags):
1250 if sFlag not in self.kdEFlags:
1251 if sFlag.strip() in self.kdEFlags:
1252 asFlags[iFlag] = sFlag.strip();
1253 else:
1254 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
1255 if not fRc:
1256 return False;
1257
1258 # Set them.
1259 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
1260 if asOld is not None:
1261 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
1262 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
1263
1264 _ = iEndLine;
1265 return True;
1266
1267 ## \@ophints values.
1268 kdHints = {
1269 'invalid': 'DISOPTYPE_INVALID', ##<
1270 'harmless': 'DISOPTYPE_HARMLESS', ##<
1271 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
1272 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
1273 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
1274 'portio': 'DISOPTYPE_PORTIO', ##<
1275 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
1276 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
1277 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
1278 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
1279 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
1280 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
1281 'illegal': 'DISOPTYPE_ILLEGAL', ##<
1282 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. */
1283 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. */
1284 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
1285 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
1286 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
1287 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode */
1288 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode */
1289 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size */
1290 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes */
1291 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte */
1292 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b */
1293 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes (only in 16 & 32 bits mode!) */
1294 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet! */
1295 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! */
1296 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! */
1297 };
1298
1299 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
1300 """
1301 Tag: \@ophints
1302 Value: Comma or space separated list of flags and hints.
1303
1304 This covers the disassembler flags table and more.
1305 """
1306 oInstr = self.ensureInstructionForOpTag(iTagLine);
1307
1308 # Flatten as a space separated list, split it up and validate the values.
1309 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1310 if len(asHints) == 1 and asHints[0].lower() == 'none':
1311 asHints = [];
1312 else:
1313 fRc = True;
1314 for iHint, sHint in enumerate(asHints):
1315 if sHint not in self.kdHints:
1316 if sHint.strip() in self.kdHints:
1317 sHint[iHint] = sHint.strip();
1318 else:
1319 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
1320 if not fRc:
1321 return False;
1322
1323 # Append them.
1324 for sHint in asHints:
1325 if sHint not in oInstr.dHints:
1326 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
1327 else:
1328 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
1329
1330 _ = iEndLine;
1331 return True;
1332
1333 ## \@opcpuid
1334 kdCpuIdFlags = {
1335 'vme': 'X86_CPUID_FEATURE_EDX_VME',
1336 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
1337 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
1338 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
1339 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
1340 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
1341 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
1342 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
1343 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
1344 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
1345 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
1346 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
1347 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
1348 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
1349 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
1350 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
1351 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
1352 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
1353 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
1354 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
1355 'sse41': 'X86_CPUID_FEATURE_ECX_SSE4_1',
1356 'sse42': 'X86_CPUID_FEATURE_ECX_SSE4_2',
1357 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
1358 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
1359 'aes': 'X86_CPUID_FEATURE_ECX_AES',
1360 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
1361 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
1362 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
1363 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
1364
1365 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
1366 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
1367 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
1368 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
1369 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
1370 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
1371 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
1372 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
1373 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
1374 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
1375 };
1376
1377 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
1378 """
1379 Tag: \@opcpuid
1380 Value: none | <CPUID flag specifier>
1381
1382 CPUID feature bit which is required for the instruction to be present.
1383 """
1384 oInstr = self.ensureInstructionForOpTag(iTagLine);
1385
1386 # Flatten as a space separated list, split it up and validate the values.
1387 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
1388 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
1389 asCpuIds = [];
1390 else:
1391 fRc = True;
1392 for iCpuId, sCpuId in enumerate(asCpuIds):
1393 if sCpuId not in self.kdCpuIds:
1394 if sCpuId.strip() in self.kdCpuIds:
1395 sCpuId[iCpuId] = sCpuId.strip();
1396 else:
1397 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
1398 if not fRc:
1399 return False;
1400
1401 # Append them.
1402 for sCpuId in asCpuIds:
1403 if sCpuId not in oInstr.asCpuIds:
1404 oInstr.asCpuIds.append(sCpuId);
1405 else:
1406 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
1407
1408 _ = iEndLine;
1409 return True;
1410
1411 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
1412 """
1413 Tag: \@opgroup
1414 Value: op_grp1[_subgrp2[_subsubgrp3]]
1415
1416 Instruction grouping.
1417 """
1418 oInstr = self.ensureInstructionForOpTag(iTagLine);
1419
1420 # Flatten as a space separated list, split it up and validate the values.
1421 asGroups = self.flattenAllSections(aasSections).split();
1422 if len(asGroups) != 1:
1423 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
1424 sGroup = asGroups[0];
1425 if not self.oReGroupName.match(sGroup):
1426 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
1427 % (sTag, sGroup, self.oReGroupName.pattern));
1428
1429 # Set it.
1430 if oInstr.sGroup is not None:
1431 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
1432 oInstr.sGroup = sGroup;
1433
1434 _ = iEndLine;
1435 return True;
1436
1437 ## \@opunused, \@opinvalid, \@opinvlstyle
1438 kdInvalidStyles = {
1439 'immediate': [], ##< CPU stops decoding immediately after the opcode.
1440 'intel-modrm': [], ##< Intel decodes ModR/M.
1441 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
1442 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
1443 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
1444 };
1445
1446 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
1447 """
1448 Tag: \@opunused, \@opinvalid, \@opinvlstyle
1449 Value: <invalid opcode behaviour style>
1450
1451 The \@opunused indicates the specification is for a currently unused
1452 instruction encoding.
1453
1454 The \@opinvalid indicates the specification is for an invalid currently
1455 instruction encoding (like UD2).
1456
1457 The \@opinvlstyle just indicates how CPUs decode the instruction when
1458 not supported (\@opcpuid, \@opmincpu) or disabled.
1459 """
1460 oInstr = self.ensureInstructionForOpTag(iTagLine);
1461
1462 # Flatten as a space separated list, split it up and validate the values.
1463 asStyles = self.flattenAllSections(aasSections).split();
1464 if len(asStyles) != 1:
1465 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
1466 sStyle = asStyles[0];
1467 if sStyle not in self.kdInvalidStyle:
1468 return self.errorComment(iTagLine, '%s: invalid invalid behviour style: %s (valid: %s)'
1469 % (sTag, sStyle, self.kdInvalidStyles.keys(),));
1470 # Set it.
1471 if oInstr.sInvlStyle is not None:
1472 return self.errorComment(iTagLine,
1473 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
1474 % ( sTag, oInstr.sInvlStyle, sStyle,));
1475 oInstr.sInvlStyle = sStyle;
1476 if sTag == '@opunused':
1477 oInstr.fUnused = True;
1478 elif sTag == '@opinvalid':
1479 oInstr.fInvalid = True;
1480
1481 _ = iEndLine;
1482 return True;
1483
1484 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine):
1485 """
1486 Tag: \@optest
1487 Value: [<selectors>[ ]?] <inputs> -> <outputs>
1488 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
1489
1490 The main idea here is to generate basic instruction tests.
1491
1492 The probably simplest way of handling the diverse input, would be to use
1493 it to produce size optimized byte code for a simple interpreter that
1494 modifies the register input and output states.
1495
1496 An alternative to the interpreter would be creating multiple tables,
1497 but that becomes rather complicated wrt what goes where and then to use
1498 them in an efficient manner.
1499 """
1500 oInstr = self.ensureInstructionForOpTag(iTagLine);
1501
1502 #
1503 # Do it section by section.
1504 #
1505 for asSectionLines in aasSections:
1506 #
1507 # Sort the input into outputs, inputs and selector conditions.
1508 #
1509 sFlatSection = self.flattenAllSections([asSectionLines,]);
1510 if len(sFlatSection) == 0:
1511 self.errorComment(iTagLine, '%s: missing value' % ( sTag,));
1512 continue;
1513 oTest = InstructionTest(oInstr);
1514
1515 asSelectors = [];
1516 asInputs = [];
1517 asOutputs = [];
1518 asCur = asOutputs;
1519 fRc = True;
1520 asWords = sFlatSection.split();
1521 for iWord in range(len(asWords) - 1, -1, -1):
1522 sWord = asWords[iWord];
1523 # Check for array switchers.
1524 if sWord == '->':
1525 if asCur != asOutputs:
1526 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
1527 break;
1528 asCur = asInputs;
1529 elif sWord == '/':
1530 if asCur != asInputs:
1531 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
1532 break;
1533 asCur = asSelectors;
1534 else:
1535 asCur.insert(0, sWord);
1536
1537 #
1538 # Validate and add selectors.
1539 #
1540 for sCond in asSelectors:
1541 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
1542 oSelector = None;
1543 for sOp in TestSelector.kasCompareOps:
1544 off = sCondExp.find(sOp);
1545 if off >= 0:
1546 sVariable = sCondExp[:off];
1547 sValue = sCondExp[off + len(sOp):];
1548 if sVariable in TestSelector.kdVariables:
1549 if sValue in TestSelector.kdVariables[sVariable]:
1550 oSelector = TestSelector(sVariable, sOp, sValue);
1551 else:
1552 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
1553 % ( sTag, sValue, sCond,
1554 TestSelector.kdVariables[sVariable].keys(),));
1555 else:
1556 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
1557 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
1558 break;
1559 if oSelector is not None:
1560 for oExisting in oTest.aoSelectors:
1561 if oExisting.sVariable == oSelector.sVariable:
1562 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
1563 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
1564 oTest.aoSelectors.append(oSelector);
1565 else:
1566 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
1567
1568 #
1569 # Validate outputs and inputs, adding them to the test as we go along.
1570 #
1571 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
1572 for sItem in asItems:
1573 oItem = None;
1574 for sOp in TestInOut.kasOperators:
1575 off = sItem.find(sOp);
1576 if off >= 0:
1577 sField = sItem[:off];
1578 sValueType = sItem[off + len(sOp):];
1579 if sField in TestInOut.kdFields:
1580 asSplit = sValueType.split(':', 1);
1581 sValue = asSplit[0];
1582 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
1583 if sType in TestInOut.kdTypes:
1584 oValid = TestInOut.kdTypes[sType].validate(sValue);
1585 if oValid is True:
1586 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '!=':
1587 oItem = TestInOut(sField, sOp, sValue, sType);
1588 else:
1589 self.errorComment(iTagLine,
1590 '%s: and-or value "%s" can only be used with the "="'
1591 % ( sTag, sDesc, sValue, sItem, sType, ));
1592 else:
1593 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s)'
1594 % ( sTag, sDesc, sValue, sItem, sType, ));
1595 else:
1596 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
1597 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
1598 else:
1599 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s" (valid fields: %s)'
1600 % ( sTag, sDesc, sField, sItem, TestInOut.kdFields.keys(),));
1601 break;
1602 if oItem is not None:
1603 for oExisting in aoDst:
1604 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
1605 self.errorComment(iTagLine,
1606 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
1607 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
1608 aoDst.append(oItem);
1609 else:
1610 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
1611
1612 #
1613 # .
1614 #
1615 if fRc:
1616 oInstr.aoTests.append(oTest);
1617 else:
1618 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
1619 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
1620 % (sTag, asSelectors, asInputs, asOutputs,));
1621
1622 return True;
1623
1624 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
1625 """
1626 Tag: \@opfunction
1627 Value: <VMM function name>
1628
1629 This is for explicitly setting the IEM function name. Normally we pick
1630 this up from the FNIEMOP_XXX macro invocation after the description, or
1631 generate it from the mnemonic and operands.
1632
1633 It it thought it maybe necessary to set it when specifying instructions
1634 which implementation isn't following immediately or aren't implemented yet.
1635 """
1636 oInstr = self.ensureInstructionForOpTag(iTagLine);
1637
1638 # Flatten and validate the value.
1639 sFunction = self.flattenAllSections(aasSections);
1640 if not self.oReFunctionName.match(sFunction):
1641 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
1642 % (sTag, Name, self.oReFunctionName.pattern));
1643
1644 if oInstr.sFunction is not None:
1645 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
1646 % (sTag, oInstr.sStats, sStats,));
1647 oInstr.sFunction = sFunction;
1648
1649 _ = iEndLine;
1650 return True;
1651
1652 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
1653 """
1654 Tag: \@opstats
1655 Value: <VMM statistics base name>
1656
1657 This is for explicitly setting the statistics name. Normally we pick
1658 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
1659 the mnemonic and operands.
1660
1661 It it thought it maybe necessary to set it when specifying instructions
1662 which implementation isn't following immediately or aren't implemented yet.
1663 """
1664 oInstr = self.ensureInstructionForOpTag(iTagLine);
1665
1666 # Flatten and validate the value.
1667 sStats = self.flattenAllSections(aasSections);
1668 if not self.oReStatsName.match(sStats):
1669 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
1670 % (sTag, Name, self.oReStatsName.pattern));
1671
1672 if oInstr.sStats is not None:
1673 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
1674 % (sTag, oInstr.sStats, sStats,));
1675 oInstr.sStats = sStats;
1676
1677 _ = iEndLine;
1678 return True;
1679
1680 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
1681 """
1682 Tag: \@opdone
1683 Value: none
1684
1685 Used to explictily flush the instructions that have been specified.
1686 """
1687 sFlattened = self.flattenAllSections(aasSections);
1688 if sFlattened != '':
1689 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
1690 _ = sTag; _ = iEndLine;
1691 return self.doneInstructions();
1692
1693 ## @}
1694
1695
1696 def parseComment(self):
1697 """
1698 Parse the current comment (self.sComment).
1699
1700 If it's a opcode specifiying comment, we reset the macro stuff.
1701 """
1702 #
1703 # Reject if comment doesn't seem to contain anything interesting.
1704 #
1705 if self.sComment.find('Opcode') < 0 \
1706 and self.sComment.find('@') < 0:
1707 return False;
1708
1709 #
1710 # Split the comment into lines, removing leading asterisks and spaces.
1711 # Also remove leading and trailing empty lines.
1712 #
1713 asLines = self.sComment.split('\n');
1714 for iLine, sLine in enumerate(asLines):
1715 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
1716
1717 while len(asLines) > 0 and len(asLines[0]) == 0:
1718 self.iCommentLine += 1;
1719 asLines.pop(0);
1720
1721 while len(asLines) > 0 and len(asLines[-1]) == 0:
1722 asLines.pop(len(asLines) - 1);
1723
1724 #
1725 # Check for old style: Opcode 0x0f 0x12
1726 #
1727 if asLines[0].startswith('Opcode '):
1728 self.parseCommentOldOpcode(asLines);
1729
1730 #
1731 # Look for @op* tagged data.
1732 #
1733 cOpTags = 0;
1734 sFlatDefault = None;
1735 sCurTag = '@default';
1736 iCurTagLine = 0;
1737 asCurSection = [];
1738 aasSections = [ asCurSection, ];
1739 for iLine, sLine in enumerate(asLines):
1740 if not sLine.startswith('@'):
1741 if len(sLine) > 0:
1742 asCurSection.append(sLine);
1743 elif len(asCurSection) != 0:
1744 asCurSection = [];
1745 aasSections.append(asCurSection);
1746 else:
1747 #
1748 # Process the previous tag.
1749 #
1750 if sCurTag in self.dTagHandlers:
1751 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1752 cOpTags += 1;
1753 elif sCurTag.startswith('@op'):
1754 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1755 elif sCurTag == '@default':
1756 sFlatDefault = self.flattenAllSections(aasSections);
1757
1758 #
1759 # New tag.
1760 #
1761 asSplit = sLine.split(None, 1);
1762 sCurTag = asSplit[0].lower();
1763 if len(asSplit) > 1:
1764 asCurSection = [asSplit[1],];
1765 else:
1766 asCurSection = [];
1767 aasSections = [asCurSection, ];
1768 iCurTagLine = iLine;
1769
1770 #
1771 # Process the final tag.
1772 #
1773 if sCurTag in self.dTagHandlers:
1774 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
1775 cOpTags += 1;
1776 elif sCurTag.startswith('@op'):
1777 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
1778 elif sCurTag == '@default':
1779 sFlatDefault = self.flattenAllSections(aasSections);
1780
1781 #
1782 # Don't allow default text in blocks containing @op*.
1783 #
1784 if cOpTags > 0 and len(sFlatDefault) > 0:
1785 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
1786
1787 return True;
1788
1789 def parseMacroInvocation(self, sInvocation):
1790 """
1791 Parses a macro invocation.
1792
1793 Returns a tuple, first element is the offset following the macro
1794 invocation. The second element is a list of macro arguments, where the
1795 zero'th is the macro name.
1796 """
1797 # First the name.
1798 offOpen = sInvocation.find('(');
1799 if offOpen <= 0:
1800 raiseError("macro invocation open parenthesis not found");
1801 sName = sInvocation[:offOpen].strip();
1802 if not self.oReMacroName.match(sName):
1803 return self.error("invalid macro name '%s'" % (sName,));
1804 asRet = [sName, ];
1805
1806 # Arguments.
1807 iLine = self.iLine;
1808 cDepth = 1;
1809 off = offOpen + 1;
1810 offStart = off;
1811 while cDepth > 0:
1812 if off >= len(sInvocation):
1813 if iLine >= len(self.asLines):
1814 return self.error('macro invocation beyond end of file');
1815 sInvocation += self.asLines[iLine];
1816 iLine += 1;
1817 ch = sInvocation[off];
1818
1819 if ch == ',' or ch == ')':
1820 if cDepth == 1:
1821 asRet.append(sInvocation[offStart:off].strip());
1822 offStart = off + 1;
1823 if ch == ')':
1824 cDepth -= 1;
1825 elif ch == '(':
1826 cDepth += 1;
1827 off += 1;
1828
1829 return (off, asRet);
1830
1831 def findAndParseMacroInvocation(self, sCode, sMacro):
1832 """
1833 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1834 """
1835 offHit = sCode.find(sMacro);
1836 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
1837 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
1838 return (offHit + offAfter, asRet);
1839 return (len(sCode), None);
1840
1841 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
1842 """
1843 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
1844 """
1845 for sMacro in asMacro:
1846 offAfter, asRet = self.findAndParseMacroInvocation(sCode, sMacro);
1847 if asRet is not None:
1848 return (offAfter, asRet);
1849 return (len(sCode), None);
1850
1851 def checkCodeForMacro(self, sCode):
1852 """
1853 Checks code for relevant macro invocation.
1854 """
1855 #
1856 # Scan macro invocations.
1857 #
1858 if sCode.find('(') > 0:
1859 # Look for instruction decoder function definitions. ASSUME single line.
1860 (_, asArgs) = self.findAndParseFirstMacroInvocation(sCode,
1861 [ 'FNIEMOP_DEF',
1862 'FNIEMOP_STUB',
1863 'FNIEMOP_STUB_1',
1864 'FNIEMOP_UD_STUB',
1865 'FNIEMOP_UD_STUB_1' ]);
1866 if asArgs is not None:
1867 sFunction = asArgs[1];
1868
1869 if len(self.asCurInstr) == 0:
1870 self.addInstruction().sMnemonic = sFunction.split('_')[1];
1871 self.setInstrunctionAttrib('sFunction', sFunction);
1872 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
1873 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
1874 if asArgs[0].find('STUB') > 0:
1875 self.doneInstructions();
1876 return True;
1877
1878 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
1879 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
1880 if asArgs is not None:
1881 if len(self.asCurInstr) == 1:
1882 self.setInstrunctionAttrib('sStats', asArgs[1]);
1883 self.setInstrunctionAttrib('sMnemonic', asArgs[1].split('_')[0]);
1884
1885 # IEMOP_HLP_DECODED_NL_1(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_fDisOpType)
1886 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_1');
1887 if asArgs is not None:
1888 if len(self.asCurInstr) == 1:
1889 self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1890 self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1891 self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1892
1893 # IEMOP_HLP_DECODED_NL_2(a_uDisOpNo, a_fIemOpFlags, a_uDisParam0, a_uDisParam1, a_fDisOpType)
1894 (_, asArgs) = self.findAndParseMacroInvocation(sCode, 'IEMOP_HLP_DECODED_NL_2');
1895 if asArgs is not None:
1896 if len(self.asCurInstr) == 1:
1897 self.setInstrunctionAttrib('sRawDisOpNo', asArgs[1]);
1898 self.setInstrunctionAttrib('sRawIemOpFlags', asArgs[2]);
1899 self.setInstrunctionArrayAttrib('asRawDisParams', 0, asArgs[3]);
1900 self.setInstrunctionArrayAttrib('asRawDisParams', 1, asArgs[4]);
1901
1902 return False;
1903
1904
1905 def parse(self):
1906 """
1907 Parses the given file.
1908 Returns number or errors.
1909 Raises exception on fatal trouble.
1910 """
1911 self.debug('Parsing %s' % (self.sSrcFile,));
1912
1913 while self.iLine < len(self.asLines):
1914 sLine = self.asLines[self.iLine];
1915 self.iLine += 1;
1916
1917 # We only look for comments, so only lines with a slash might possibly
1918 # influence the parser state.
1919 if sLine.find('/') >= 0:
1920 #self.debug('line %d: slash' % (self.iLine,));
1921
1922 offLine = 0;
1923 while offLine < len(sLine):
1924 if self.iState == self.kiCode:
1925 offHit = sLine.find('/*', offLine); # only multiline comments for now.
1926 if offHit >= 0:
1927 self.sComment = '';
1928 self.iCommentLine = self.iLine;
1929 self.iState = self.kiCommentMulti;
1930 offLine = offHit + 2;
1931 else:
1932 offLine = len(sLine);
1933
1934 elif self.iState == self.kiCommentMulti:
1935 offHit = sLine.find('*/', offLine);
1936 if offHit >= 0:
1937 self.sComment += sLine[offLine:offHit];
1938 self.iState = self.kiCode;
1939 offLine = offHit + 2;
1940 self.parseComment();
1941 else:
1942 self.sComment += sLine[offLine:];
1943 offLine = len(sLine);
1944 else:
1945 assert False;
1946
1947 # No slash, but append the line if in multi-line comment.
1948 elif self.iState == self.kiCommentMulti:
1949 #self.debug('line %d: multi' % (self.iLine,));
1950 self.sComment += sLine;
1951
1952 # No slash, but check code line for relevant macro.
1953 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
1954 #self.debug('line %d: macro' % (self.iLine,));
1955 self.checkCodeForMacro(sLine);
1956
1957 # If the line is a '}' in the first position, complete the instructions.
1958 elif self.iState == self.kiCode and sLine[0] == '}':
1959 #self.debug('line %d: }' % (self.iLine,));
1960 self.doneInstructions();
1961
1962 self.doneInstructions();
1963 self.debug('%s instructions in %s' % (self.cTotalInstr, self.sSrcFile,));
1964 self.debug('%s instruction stubs' % (self.cTotalStubs,));
1965 return self.printErrors();
1966
1967
1968def __parseFileByName(sSrcFile, sDefaultMap):
1969 """
1970 Parses one source file for instruction specfications.
1971 """
1972 #
1973 # Read sSrcFile into a line array.
1974 #
1975 try:
1976 oFile = open(sSrcFile, "r");
1977 except Exception as oXcpt:
1978 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
1979 try:
1980 asLines = oFile.readlines();
1981 except Exception as oXcpt:
1982 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
1983 finally:
1984 oFile.close();
1985
1986 #
1987 # Do the parsing.
1988 #
1989 try:
1990 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
1991 except ParserException as oXcpt:
1992 print(str(oXcpt));
1993 raise;
1994 except Exception as oXcpt:
1995 raise;
1996
1997 return cErrors;
1998
1999
2000def __parseAll():
2001 """
2002 Parses all the IEMAllInstruction*.cpp.h files.
2003
2004 Raises exception on failure.
2005 """
2006 sSrcDir = os.path.dirname(os.path.abspath(__file__));
2007 cErrors = 0;
2008 for sDefaultMap, sName in [
2009 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
2010 #( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
2011 ]:
2012 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
2013
2014 if cErrors != 0:
2015 raise Exception('%d parse errors' % (cErrors,));
2016 return True;
2017
2018
2019
2020__parseAll();
2021
2022
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette