VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/DevVGA-SVGA3d-dx-shader.cpp@ 94832

Last change on this file since 94832 was 94832, checked in by vboxsync, 3 years ago

Devices/Graphics: guest shader signatures do not need sorting: bugref:9830

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 106.3 KB
Line 
1/* $Id: DevVGA-SVGA3d-dx-shader.cpp 94832 2022-05-05 06:45:48Z vboxsync $ */
2/** @file
3 * DevVMWare - VMWare SVGA device - VGPU10+ (DX) shader utilities.
4 */
5
6/*
7 * Copyright (C) 2020-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
23#include <VBox/AssertGuest.h>
24#include <VBox/log.h>
25
26#include <iprt/asm.h>
27#include <iprt/md5.h>
28#include <iprt/mem.h>
29#include <iprt/string.h>
30
31#include "DevVGA-SVGA3d-dx-shader.h"
32
33
34/*
35 *
36 * DXBC shader binary format definitions.
37 *
38 */
39
40/* DXBC container header. */
41typedef struct DXBCHeader
42{
43 uint32_t u32DXBC; /* 0x43425844 = 'D', 'X', 'B', 'C' */
44 uint8_t au8Hash[16]; /* Modified MD5 hash. See dxbcHash. */
45 uint32_t u32Version; /* 1 */
46 uint32_t cbTotal; /* Total size in bytes. Including the header. */
47 uint32_t cBlob; /* Number of entries in aBlobOffset array. */
48 uint32_t aBlobOffset[1]; /* Offsets of blobs from the start of DXBC header. */
49} DXBCHeader;
50
51#define DXBC_MAGIC RT_MAKE_U32_FROM_U8('D', 'X', 'B', 'C')
52
53/* DXBC blob header. */
54typedef struct DXBCBlobHeader
55{
56 uint32_t u32BlobType; /* FourCC code. DXBC_BLOB_TYPE_* */
57 uint32_t cbBlob; /* Size of the blob excluding the blob header. 4 bytes aligned. */
58 /* Followed by the blob's data. */
59} DXBCBlobHeader;
60
61/* DXBC blob types. */
62#define DXBC_BLOB_TYPE_ISGN RT_MAKE_U32_FROM_U8('I', 'S', 'G', 'N')
63#define DXBC_BLOB_TYPE_OSGN RT_MAKE_U32_FROM_U8('O', 'S', 'G', 'N')
64#define DXBC_BLOB_TYPE_SHDR RT_MAKE_U32_FROM_U8('S', 'H', 'D', 'R')
65/** @todo More... */
66
67/* 'SHDR' blob data format. */
68typedef struct DXBCBlobSHDR
69{
70 VGPU10ProgramToken programToken;
71 uint32_t cToken; /* Number of 32 bit tokens including programToken and cToken. */
72 uint32_t au32Token[1]; /* cToken - 2 number of tokens. */
73} DXBCBlobSHDR;
74
75/* Element of an input or output signature. */
76typedef struct DXBCBlobIOSGNElement
77{
78 uint32_t offElementName; /* Offset of the semantic's name relative to the start of the blob data. */
79 uint32_t idxSemantic; /* Semantic index. */
80 uint32_t enmSystemValue; /* SVGA3dDXSignatureSemanticName */
81 uint32_t enmComponentType; /* 1 - unsigned, 2 - integer, 3 - float. */
82 uint32_t idxRegister; /* Shader register index. Elements must be sorted by register index. */
83 union
84 {
85 struct
86 {
87 uint32_t mask : 8; /* Component mask. Lower 4 bits represent X, Y, Z, W channels. */
88 uint32_t mask2 : 8; /* Which components are used in the shader. */
89 uint32_t pad : 16;
90 } m;
91 uint32_t mask;
92 } u;
93} DXBCBlobIOSGNElement;
94
95/* 'ISGN' and 'OSGN' blob data format. */
96typedef struct DXBCBlobIOSGN
97{
98 uint32_t cElement; /* Number of signature elements. */
99 uint32_t offElement; /* Offset of the first element from the start of the blob. Equals to 8. */
100 DXBCBlobIOSGNElement aElement[1]; /* Signature elements. Size is cElement. */
101 /* Followed by ASCIIZ semantic names. */
102} DXBCBlobIOSGN;
103
104
105/*
106 * VGPU10 shader parser definitions.
107 */
108
109/* Parsed info about an operand index. */
110typedef struct VGPUOperandIndex
111{
112 uint32_t indexRepresentation; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
113 uint64_t iOperandImmediate; /* Needs up to a qword. */
114 struct VGPUOperand *pOperandRelative; /* For VGPU10_OPERAND_INDEX_*RELATIVE */
115} VGPUOperandIndex;
116
117/* Parsed info about an operand. */
118typedef struct VGPUOperand
119{
120 uint32_t numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
121 uint32_t selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
122 uint32_t mask : 4; /* 4-bits X, Y, Z, W mask for VGPU10_OPERAND_4_COMPONENT_MASK_MODE. */
123 uint32_t operandType : 8; /* VGPU10_OPERAND_TYPE */
124 uint32_t indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
125 VGPUOperandIndex aOperandIndex[VGPU10_OPERAND_INDEX_3D]; /* Up to 3. */
126 uint32_t aImm[4]; /* Immediate values for VGPU10_OPERAND_TYPE_IMMEDIATE* */
127 uint32_t cOperandToken; /* Number of tokens in this operand. */
128 uint32_t const *paOperandToken; /* Pointer to operand tokens in the input buffer. */
129} VGPUOperand;
130
131/* Parsed info about an opcode. */
132typedef struct VGPUOpcode
133{
134 uint32_t cOpcodeToken; /* Number of tokens for this operation. */
135 uint32_t opcodeType; /* VGPU10_OPCODE_* */
136 uint32_t opcodeSubtype; /* For example VGPU10_VMWARE_OPCODE_* */
137 uint32_t semanticName; /* SVGA3dDXSignatureSemanticName for system value declarations. */
138 uint32_t cOperand; /* Number of operands for this instruction. */
139 uint32_t aIdxOperand[8]; /* Indices of the instruction operands in the aValOperand array. */
140 /* 8 should be enough for everyone. */
141 VGPUOperand aValOperand[16]; /* Operands including VGPU10_OPERAND_INDEX_*RELATIVE if they are used: */
142 /* Operand1, VGPU10_OPERAND_INDEX_*RELATIVE for Operand1, ... */
143 /* ... */
144 /* OperandN, VGPU10_OPERAND_INDEX_*RELATIVE for OperandN, ... */
145 /* 16 probably should be enough for everyone. */
146 uint32_t const *paOpcodeToken; /* Pointer to opcode tokens in the input buffer. */
147} VGPUOpcode;
148
149typedef struct VGPUOpcodeInfo
150{
151 uint32_t cOperand; /* Number of operands for this opcode. */
152} VGPUOpcodeInfo;
153
154static VGPUOpcodeInfo const g_aOpcodeInfo[] =
155{
156 { 3 }, /* VGPU10_OPCODE_ADD */
157 { 3 }, /* VGPU10_OPCODE_AND */
158 { 0 }, /* VGPU10_OPCODE_BREAK */
159 { 1 }, /* VGPU10_OPCODE_BREAKC */
160 { 1 }, /* VGPU10_OPCODE_CALL */
161 { 2 }, /* VGPU10_OPCODE_CALLC */
162 { 1 }, /* VGPU10_OPCODE_CASE */
163 { 0 }, /* VGPU10_OPCODE_CONTINUE */
164 { 1 }, /* VGPU10_OPCODE_CONTINUEC */
165 { 0 }, /* VGPU10_OPCODE_CUT */
166 { 0 }, /* VGPU10_OPCODE_DEFAULT */
167 { 2 }, /* VGPU10_OPCODE_DERIV_RTX */
168 { 2 }, /* VGPU10_OPCODE_DERIV_RTY */
169 { 1 }, /* VGPU10_OPCODE_DISCARD */
170 { 3 }, /* VGPU10_OPCODE_DIV */
171 { 3 }, /* VGPU10_OPCODE_DP2 */
172 { 3 }, /* VGPU10_OPCODE_DP3 */
173 { 3 }, /* VGPU10_OPCODE_DP4 */
174 { 0 }, /* VGPU10_OPCODE_ELSE */
175 { 0 }, /* VGPU10_OPCODE_EMIT */
176 { 0 }, /* VGPU10_OPCODE_EMITTHENCUT */
177 { 0 }, /* VGPU10_OPCODE_ENDIF */
178 { 0 }, /* VGPU10_OPCODE_ENDLOOP */
179 { 0 }, /* VGPU10_OPCODE_ENDSWITCH */
180 { 3 }, /* VGPU10_OPCODE_EQ */
181 { 2 }, /* VGPU10_OPCODE_EXP */
182 { 2 }, /* VGPU10_OPCODE_FRC */
183 { 2 }, /* VGPU10_OPCODE_FTOI */
184 { 2 }, /* VGPU10_OPCODE_FTOU */
185 { 3 }, /* VGPU10_OPCODE_GE */
186 { 3 }, /* VGPU10_OPCODE_IADD */
187 { 1 }, /* VGPU10_OPCODE_IF */
188 { 3 }, /* VGPU10_OPCODE_IEQ */
189 { 3 }, /* VGPU10_OPCODE_IGE */
190 { 3 }, /* VGPU10_OPCODE_ILT */
191 { 4 }, /* VGPU10_OPCODE_IMAD */
192 { 3 }, /* VGPU10_OPCODE_IMAX */
193 { 3 }, /* VGPU10_OPCODE_IMIN */
194 { 4 }, /* VGPU10_OPCODE_IMUL */
195 { 3 }, /* VGPU10_OPCODE_INE */
196 { 2 }, /* VGPU10_OPCODE_INEG */
197 { 3 }, /* VGPU10_OPCODE_ISHL */
198 { 3 }, /* VGPU10_OPCODE_ISHR */
199 { 2 }, /* VGPU10_OPCODE_ITOF */
200 { 1 }, /* VGPU10_OPCODE_LABEL */
201 { 3 }, /* VGPU10_OPCODE_LD */
202 { 4 }, /* VGPU10_OPCODE_LD_MS */
203 { 2 }, /* VGPU10_OPCODE_LOG */
204 { 0 }, /* VGPU10_OPCODE_LOOP */
205 { 3 }, /* VGPU10_OPCODE_LT */
206 { 4 }, /* VGPU10_OPCODE_MAD */
207 { 3 }, /* VGPU10_OPCODE_MIN */
208 { 3 }, /* VGPU10_OPCODE_MAX */
209 { UINT32_MAX }, /* VGPU10_OPCODE_CUSTOMDATA: special opcode */
210 { 2 }, /* VGPU10_OPCODE_MOV */
211 { 4 }, /* VGPU10_OPCODE_MOVC */
212 { 3 }, /* VGPU10_OPCODE_MUL */
213 { 3 }, /* VGPU10_OPCODE_NE */
214 { 0 }, /* VGPU10_OPCODE_NOP */
215 { 2 }, /* VGPU10_OPCODE_NOT */
216 { 3 }, /* VGPU10_OPCODE_OR */
217 { 3 }, /* VGPU10_OPCODE_RESINFO */
218 { 0 }, /* VGPU10_OPCODE_RET */
219 { 1 }, /* VGPU10_OPCODE_RETC */
220 { 2 }, /* VGPU10_OPCODE_ROUND_NE */
221 { 2 }, /* VGPU10_OPCODE_ROUND_NI */
222 { 2 }, /* VGPU10_OPCODE_ROUND_PI */
223 { 2 }, /* VGPU10_OPCODE_ROUND_Z */
224 { 2 }, /* VGPU10_OPCODE_RSQ */
225 { 4 }, /* VGPU10_OPCODE_SAMPLE */
226 { 5 }, /* VGPU10_OPCODE_SAMPLE_C */
227 { 5 }, /* VGPU10_OPCODE_SAMPLE_C_LZ */
228 { 5 }, /* VGPU10_OPCODE_SAMPLE_L */
229 { 6 }, /* VGPU10_OPCODE_SAMPLE_D */
230 { 5 }, /* VGPU10_OPCODE_SAMPLE_B */
231 { 2 }, /* VGPU10_OPCODE_SQRT */
232 { 1 }, /* VGPU10_OPCODE_SWITCH */
233 { 3 }, /* VGPU10_OPCODE_SINCOS */
234 { 4 }, /* VGPU10_OPCODE_UDIV */
235 { 3 }, /* VGPU10_OPCODE_ULT */
236 { 3 }, /* VGPU10_OPCODE_UGE */
237 { 4 }, /* VGPU10_OPCODE_UMUL */
238 { 4 }, /* VGPU10_OPCODE_UMAD */
239 { 3 }, /* VGPU10_OPCODE_UMAX */
240 { 3 }, /* VGPU10_OPCODE_UMIN */
241 { 3 }, /* VGPU10_OPCODE_USHR */
242 { 2 }, /* VGPU10_OPCODE_UTOF */
243 { 3 }, /* VGPU10_OPCODE_XOR */
244 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE */
245 { 1 }, /* VGPU10_OPCODE_DCL_CONSTANT_BUFFER */
246 { 1 }, /* VGPU10_OPCODE_DCL_SAMPLER */
247 { 1 }, /* VGPU10_OPCODE_DCL_INDEX_RANGE */
248 { 0 }, /* VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY */
249 { 0 }, /* VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE */
250 { 0 }, /* VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT */
251 { 1 }, /* VGPU10_OPCODE_DCL_INPUT */
252 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SGV */
253 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SIV */
254 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS */
255 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SGV */
256 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SIV */
257 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT */
258 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SGV */
259 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SIV */
260 { 0 }, /* VGPU10_OPCODE_DCL_TEMPS */
261 { 0 }, /* VGPU10_OPCODE_DCL_INDEXABLE_TEMP */
262 { 0 }, /* VGPU10_OPCODE_DCL_GLOBAL_FLAGS */
263 { UINT32_MAX }, /* VGPU10_OPCODE_VMWARE: special opcode */
264 { 4 }, /* VGPU10_OPCODE_LOD */
265 { 4 }, /* VGPU10_OPCODE_GATHER4 */
266 { 3 }, /* VGPU10_OPCODE_SAMPLE_POS */
267 { 2 }, /* VGPU10_OPCODE_SAMPLE_INFO */
268 { UINT32_MAX }, /* VGPU10_OPCODE_RESERVED1: special opcode */
269 { 0 }, /* VGPU10_OPCODE_HS_DECLS */
270 { 0 }, /* VGPU10_OPCODE_HS_CONTROL_POINT_PHASE */
271 { 0 }, /* VGPU10_OPCODE_HS_FORK_PHASE */
272 { 0 }, /* VGPU10_OPCODE_HS_JOIN_PHASE */
273 { 1 }, /* VGPU10_OPCODE_EMIT_STREAM */
274 { 1 }, /* VGPU10_OPCODE_CUT_STREAM */
275 { 1 }, /* VGPU10_OPCODE_EMITTHENCUT_STREAM */
276 { 1 }, /* VGPU10_OPCODE_INTERFACE_CALL */
277 { 2 }, /* VGPU10_OPCODE_BUFINFO */
278 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_COARSE */
279 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_FINE */
280 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_COARSE */
281 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_FINE */
282 { 5 }, /* VGPU10_OPCODE_GATHER4_C */
283 { 5 }, /* VGPU10_OPCODE_GATHER4_PO */
284 { 6 }, /* VGPU10_OPCODE_GATHER4_PO_C */
285 { 2 }, /* VGPU10_OPCODE_RCP */
286 { 2 }, /* VGPU10_OPCODE_F32TOF16 */
287 { 2 }, /* VGPU10_OPCODE_F16TOF32 */
288 { 4 }, /* VGPU10_OPCODE_UADDC */
289 { 4 }, /* VGPU10_OPCODE_USUBB */
290 { 2 }, /* VGPU10_OPCODE_COUNTBITS */
291 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_HI */
292 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_LO */
293 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_SHI */
294 { 4 }, /* VGPU10_OPCODE_UBFE */
295 { 4 }, /* VGPU10_OPCODE_IBFE */
296 { 5 }, /* VGPU10_OPCODE_BFI */
297 { 2 }, /* VGPU10_OPCODE_BFREV */
298 { 5 }, /* VGPU10_OPCODE_SWAPC */
299 { 1 }, /* VGPU10_OPCODE_DCL_STREAM */
300 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_BODY */
301 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_TABLE */
302 { 0 }, /* VGPU10_OPCODE_DCL_INTERFACE */
303 { 0 }, /* VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT */
304 { 0 }, /* VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT */
305 { 0 }, /* VGPU10_OPCODE_DCL_TESS_DOMAIN */
306 { 0 }, /* VGPU10_OPCODE_DCL_TESS_PARTITIONING */
307 { 0 }, /* VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE */
308 { 0 }, /* VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR */
309 { 0 }, /* VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT */
310 { 0 }, /* VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */
311 { 0 }, /* VGPU10_OPCODE_DCL_THREAD_GROUP */
312 { 1 }, /* VGPU10_OPCODE_DCL_UAV_TYPED */
313 { 1 }, /* VGPU10_OPCODE_DCL_UAV_RAW */
314 { 1 }, /* VGPU10_OPCODE_DCL_UAV_STRUCTURED */
315 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_RAW */
316 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_STRUCTURED */
317 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_RAW */
318 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED */
319 { 3 }, /* VGPU10_OPCODE_LD_UAV_TYPED */
320 { 3 }, /* VGPU10_OPCODE_STORE_UAV_TYPED */
321 { 3 }, /* VGPU10_OPCODE_LD_RAW */
322 { 3 }, /* VGPU10_OPCODE_STORE_RAW */
323 { 4 }, /* VGPU10_OPCODE_LD_STRUCTURED */
324 { 4 }, /* VGPU10_OPCODE_STORE_STRUCTURED */
325 { 3 }, /* VGPU10_OPCODE_ATOMIC_AND */
326 { 3 }, /* VGPU10_OPCODE_ATOMIC_OR */
327 { 3 }, /* VGPU10_OPCODE_ATOMIC_XOR */
328 { 4 }, /* VGPU10_OPCODE_ATOMIC_CMP_STORE */
329 { 3 }, /* VGPU10_OPCODE_ATOMIC_IADD */
330 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMAX */
331 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMIN */
332 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMAX */
333 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMIN */
334 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_ALLOC */
335 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_CONSUME */
336 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IADD */
337 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_AND */
338 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_OR */
339 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_XOR */
340 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_EXCH */
341 { 5 }, /* VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH */
342 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMAX */
343 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMIN */
344 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMAX */
345 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMIN */
346 { 0 }, /* VGPU10_OPCODE_SYNC */
347 { 3 }, /* VGPU10_OPCODE_DADD */
348 { 3 }, /* VGPU10_OPCODE_DMAX */
349 { 3 }, /* VGPU10_OPCODE_DMIN */
350 { 3 }, /* VGPU10_OPCODE_DMUL */
351 { 3 }, /* VGPU10_OPCODE_DEQ */
352 { 3 }, /* VGPU10_OPCODE_DGE */
353 { 3 }, /* VGPU10_OPCODE_DLT */
354 { 3 }, /* VGPU10_OPCODE_DNE */
355 { 2 }, /* VGPU10_OPCODE_DMOV */
356 { 4 }, /* VGPU10_OPCODE_DMOVC */
357 { 2 }, /* VGPU10_OPCODE_DTOF */
358 { 2 }, /* VGPU10_OPCODE_FTOD */
359 { 3 }, /* VGPU10_OPCODE_EVAL_SNAPPED */
360 { 3 }, /* VGPU10_OPCODE_EVAL_SAMPLE_INDEX */
361 { 2 }, /* VGPU10_OPCODE_EVAL_CENTROID */
362 { 0 }, /* VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT */
363 { 0 }, /* VGPU10_OPCODE_ABORT */
364 { 0 }, /* VGPU10_OPCODE_DEBUG_BREAK */
365 { 0 }, /* VGPU10_OPCODE_RESERVED0 */
366 { 3 }, /* VGPU10_OPCODE_DDIV */
367 { 4 }, /* VGPU10_OPCODE_DFMA */
368 { 2 }, /* VGPU10_OPCODE_DRCP */
369 { 4 }, /* VGPU10_OPCODE_MSAD */
370 { 2 }, /* VGPU10_OPCODE_DTOI */
371 { 2 }, /* VGPU10_OPCODE_DTOU */
372 { 2 }, /* VGPU10_OPCODE_ITOD */
373 { 2 }, /* VGPU10_OPCODE_UTOD */
374};
375AssertCompile(RT_ELEMENTS(g_aOpcodeInfo) == VGPU10_NUM_OPCODES);
376
377#ifdef LOG_ENABLED
378/*
379 *
380 * Helpers to translate a VGPU10 shader constant to a string.
381 *
382 */
383
384#define SVGA_CASE_ID2STR(idx) case idx: return #idx
385
386static const char *dxbcOpcodeToString(uint32_t opcodeType)
387{
388 VGPU10_OPCODE_TYPE enm = (VGPU10_OPCODE_TYPE)opcodeType;
389 switch (enm)
390 {
391 SVGA_CASE_ID2STR(VGPU10_OPCODE_ADD);
392 SVGA_CASE_ID2STR(VGPU10_OPCODE_AND);
393 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAK);
394 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAKC);
395 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALL);
396 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALLC);
397 SVGA_CASE_ID2STR(VGPU10_OPCODE_CASE);
398 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUE);
399 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUEC);
400 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT);
401 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEFAULT);
402 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX);
403 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY);
404 SVGA_CASE_ID2STR(VGPU10_OPCODE_DISCARD);
405 SVGA_CASE_ID2STR(VGPU10_OPCODE_DIV);
406 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP2);
407 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP3);
408 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP4);
409 SVGA_CASE_ID2STR(VGPU10_OPCODE_ELSE);
410 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT);
411 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT);
412 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDIF);
413 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDLOOP);
414 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDSWITCH);
415 SVGA_CASE_ID2STR(VGPU10_OPCODE_EQ);
416 SVGA_CASE_ID2STR(VGPU10_OPCODE_EXP);
417 SVGA_CASE_ID2STR(VGPU10_OPCODE_FRC);
418 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOI);
419 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOU);
420 SVGA_CASE_ID2STR(VGPU10_OPCODE_GE);
421 SVGA_CASE_ID2STR(VGPU10_OPCODE_IADD);
422 SVGA_CASE_ID2STR(VGPU10_OPCODE_IF);
423 SVGA_CASE_ID2STR(VGPU10_OPCODE_IEQ);
424 SVGA_CASE_ID2STR(VGPU10_OPCODE_IGE);
425 SVGA_CASE_ID2STR(VGPU10_OPCODE_ILT);
426 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAD);
427 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAX);
428 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMIN);
429 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMUL);
430 SVGA_CASE_ID2STR(VGPU10_OPCODE_INE);
431 SVGA_CASE_ID2STR(VGPU10_OPCODE_INEG);
432 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHL);
433 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHR);
434 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOF);
435 SVGA_CASE_ID2STR(VGPU10_OPCODE_LABEL);
436 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD);
437 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_MS);
438 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOG);
439 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOOP);
440 SVGA_CASE_ID2STR(VGPU10_OPCODE_LT);
441 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAD);
442 SVGA_CASE_ID2STR(VGPU10_OPCODE_MIN);
443 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAX);
444 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUSTOMDATA);
445 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOV);
446 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOVC);
447 SVGA_CASE_ID2STR(VGPU10_OPCODE_MUL);
448 SVGA_CASE_ID2STR(VGPU10_OPCODE_NE);
449 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOP);
450 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOT);
451 SVGA_CASE_ID2STR(VGPU10_OPCODE_OR);
452 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESINFO);
453 SVGA_CASE_ID2STR(VGPU10_OPCODE_RET);
454 SVGA_CASE_ID2STR(VGPU10_OPCODE_RETC);
455 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NE);
456 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NI);
457 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_PI);
458 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_Z);
459 SVGA_CASE_ID2STR(VGPU10_OPCODE_RSQ);
460 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE);
461 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C);
462 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C_LZ);
463 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_L);
464 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_D);
465 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_B);
466 SVGA_CASE_ID2STR(VGPU10_OPCODE_SQRT);
467 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWITCH);
468 SVGA_CASE_ID2STR(VGPU10_OPCODE_SINCOS);
469 SVGA_CASE_ID2STR(VGPU10_OPCODE_UDIV);
470 SVGA_CASE_ID2STR(VGPU10_OPCODE_ULT);
471 SVGA_CASE_ID2STR(VGPU10_OPCODE_UGE);
472 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMUL);
473 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAD);
474 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAX);
475 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMIN);
476 SVGA_CASE_ID2STR(VGPU10_OPCODE_USHR);
477 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOF);
478 SVGA_CASE_ID2STR(VGPU10_OPCODE_XOR);
479 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE);
480 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_CONSTANT_BUFFER);
481 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_SAMPLER);
482 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEX_RANGE);
483 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY);
484 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE);
485 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT);
486 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT);
487 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SGV);
488 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SIV);
489 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS);
490 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SGV);
491 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SIV);
492 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT);
493 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SGV);
494 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SIV);
495 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TEMPS);
496 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEXABLE_TEMP);
497 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GLOBAL_FLAGS);
498 SVGA_CASE_ID2STR(VGPU10_OPCODE_VMWARE);
499 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOD);
500 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4);
501 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_POS);
502 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_INFO);
503 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED1);
504 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_DECLS);
505 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_CONTROL_POINT_PHASE);
506 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_FORK_PHASE);
507 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_JOIN_PHASE);
508 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT_STREAM);
509 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT_STREAM);
510 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT_STREAM);
511 SVGA_CASE_ID2STR(VGPU10_OPCODE_INTERFACE_CALL);
512 SVGA_CASE_ID2STR(VGPU10_OPCODE_BUFINFO);
513 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_COARSE);
514 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_FINE);
515 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_COARSE);
516 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_FINE);
517 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_C);
518 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO);
519 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO_C);
520 SVGA_CASE_ID2STR(VGPU10_OPCODE_RCP);
521 SVGA_CASE_ID2STR(VGPU10_OPCODE_F32TOF16);
522 SVGA_CASE_ID2STR(VGPU10_OPCODE_F16TOF32);
523 SVGA_CASE_ID2STR(VGPU10_OPCODE_UADDC);
524 SVGA_CASE_ID2STR(VGPU10_OPCODE_USUBB);
525 SVGA_CASE_ID2STR(VGPU10_OPCODE_COUNTBITS);
526 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_HI);
527 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_LO);
528 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_SHI);
529 SVGA_CASE_ID2STR(VGPU10_OPCODE_UBFE);
530 SVGA_CASE_ID2STR(VGPU10_OPCODE_IBFE);
531 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFI);
532 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFREV);
533 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWAPC);
534 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_STREAM);
535 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_BODY);
536 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_TABLE);
537 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INTERFACE);
538 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT);
539 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT);
540 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_DOMAIN);
541 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_PARTITIONING);
542 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE);
543 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR);
544 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT);
545 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT);
546 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_THREAD_GROUP);
547 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_TYPED);
548 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_RAW);
549 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_STRUCTURED);
550 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_RAW);
551 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_STRUCTURED);
552 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_RAW);
553 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED);
554 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_UAV_TYPED);
555 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_UAV_TYPED);
556 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_RAW);
557 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_RAW);
558 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_STRUCTURED);
559 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_STRUCTURED);
560 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_AND);
561 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_OR);
562 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_XOR);
563 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_CMP_STORE);
564 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IADD);
565 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMAX);
566 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMIN);
567 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMAX);
568 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMIN);
569 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_ALLOC);
570 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CONSUME);
571 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IADD);
572 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_AND);
573 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_OR);
574 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_XOR);
575 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_EXCH);
576 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
577 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMAX);
578 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMIN);
579 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMAX);
580 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMIN);
581 SVGA_CASE_ID2STR(VGPU10_OPCODE_SYNC);
582 SVGA_CASE_ID2STR(VGPU10_OPCODE_DADD);
583 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMAX);
584 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMIN);
585 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMUL);
586 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEQ);
587 SVGA_CASE_ID2STR(VGPU10_OPCODE_DGE);
588 SVGA_CASE_ID2STR(VGPU10_OPCODE_DLT);
589 SVGA_CASE_ID2STR(VGPU10_OPCODE_DNE);
590 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOV);
591 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOVC);
592 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOF);
593 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOD);
594 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SNAPPED);
595 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SAMPLE_INDEX);
596 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_CENTROID);
597 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT);
598 SVGA_CASE_ID2STR(VGPU10_OPCODE_ABORT);
599 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEBUG_BREAK);
600 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED0);
601 SVGA_CASE_ID2STR(VGPU10_OPCODE_DDIV);
602 SVGA_CASE_ID2STR(VGPU10_OPCODE_DFMA);
603 SVGA_CASE_ID2STR(VGPU10_OPCODE_DRCP);
604 SVGA_CASE_ID2STR(VGPU10_OPCODE_MSAD);
605 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOI);
606 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOU);
607 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOD);
608 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOD);
609 SVGA_CASE_ID2STR(VGPU10_NUM_OPCODES);
610 }
611 return NULL;
612}
613
614
615static const char *dxbcShaderTypeToString(uint32_t value)
616{
617 VGPU10_PROGRAM_TYPE enm = (VGPU10_PROGRAM_TYPE)value;
618 switch (enm)
619 {
620 SVGA_CASE_ID2STR(VGPU10_PIXEL_SHADER);
621 SVGA_CASE_ID2STR(VGPU10_VERTEX_SHADER);
622 SVGA_CASE_ID2STR(VGPU10_GEOMETRY_SHADER);
623 SVGA_CASE_ID2STR(VGPU10_HULL_SHADER);
624 SVGA_CASE_ID2STR(VGPU10_DOMAIN_SHADER);
625 SVGA_CASE_ID2STR(VGPU10_COMPUTE_SHADER);
626 }
627 return NULL;
628}
629
630
631static const char *dxbcCustomDataClassToString(uint32_t value)
632{
633 VGPU10_CUSTOMDATA_CLASS enm = (VGPU10_CUSTOMDATA_CLASS)value;
634 switch (enm)
635 {
636 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_COMMENT);
637 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DEBUGINFO);
638 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_OPAQUE);
639 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER);
640 }
641 return NULL;
642}
643
644
645static const char *dxbcSystemNameToString(uint32_t value)
646{
647 VGPU10_SYSTEM_NAME enm = (VGPU10_SYSTEM_NAME)value;
648 switch (enm)
649 {
650 SVGA_CASE_ID2STR(VGPU10_NAME_UNDEFINED);
651 SVGA_CASE_ID2STR(VGPU10_NAME_POSITION);
652 SVGA_CASE_ID2STR(VGPU10_NAME_CLIP_DISTANCE);
653 SVGA_CASE_ID2STR(VGPU10_NAME_CULL_DISTANCE);
654 SVGA_CASE_ID2STR(VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX);
655 SVGA_CASE_ID2STR(VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
656 SVGA_CASE_ID2STR(VGPU10_NAME_VERTEX_ID);
657 SVGA_CASE_ID2STR(VGPU10_NAME_PRIMITIVE_ID);
658 SVGA_CASE_ID2STR(VGPU10_NAME_INSTANCE_ID);
659 SVGA_CASE_ID2STR(VGPU10_NAME_IS_FRONT_FACE);
660 SVGA_CASE_ID2STR(VGPU10_NAME_SAMPLE_INDEX);
661 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR);
662 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR);
663 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR);
664 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR);
665 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
666 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
667 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR);
668 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR);
669 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR);
670 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
671 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
672 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
673 }
674 return NULL;
675}
676
677
678static const char *dxbcOperandTypeToString(uint32_t value)
679{
680 VGPU10_OPERAND_TYPE enm = (VGPU10_OPERAND_TYPE)value;
681 switch (enm)
682 {
683 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_TEMP);
684 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT);
685 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT);
686 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INDEXABLE_TEMP);
687 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE32);
688 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE64);
689 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_SAMPLER);
690 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RESOURCE);
691 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CONSTANT_BUFFER);
692 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER);
693 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_LABEL);
694 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
695 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH);
696 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_NULL);
697 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RASTERIZER);
698 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK);
699 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_STREAM);
700 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_BODY);
701 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_TABLE);
702 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INTERFACE);
703 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_INPUT);
704 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_OUTPUT);
705 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID);
706 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID);
707 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID);
708 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT);
709 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT);
710 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
711 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT);
712 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THIS_POINTER);
713 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_UAV);
714 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
715 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID);
716 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID);
717 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
718 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK);
719 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED);
720 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID);
721 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL);
722 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL);
723 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CYCLE_COUNTER);
724 SVGA_CASE_ID2STR(VGPU10_NUM_OPERANDS);
725 }
726 return NULL;
727}
728
729
730static const char *dxbcOperandNumComponentsToString(uint32_t value)
731{
732 VGPU10_OPERAND_NUM_COMPONENTS enm = (VGPU10_OPERAND_NUM_COMPONENTS)value;
733 switch (enm)
734 {
735 SVGA_CASE_ID2STR(VGPU10_OPERAND_0_COMPONENT);
736 SVGA_CASE_ID2STR(VGPU10_OPERAND_1_COMPONENT);
737 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT);
738 SVGA_CASE_ID2STR(VGPU10_OPERAND_N_COMPONENT);
739 }
740 return NULL;
741}
742
743
744static const char *dxbcOperandComponentModeToString(uint32_t value)
745{
746 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE enm = (VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE)value;
747 switch (enm)
748 {
749 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
750 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE);
751 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE);
752 }
753 return NULL;
754}
755
756
757static const char *dxbcOperandComponentNameToString(uint32_t value)
758{
759 VGPU10_COMPONENT_NAME enm = (VGPU10_COMPONENT_NAME)value;
760 switch (enm)
761 {
762 SVGA_CASE_ID2STR(VGPU10_COMPONENT_X);
763 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Y);
764 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Z);
765 SVGA_CASE_ID2STR(VGPU10_COMPONENT_W);
766 }
767 return NULL;
768}
769
770
771static const char *dxbcOperandIndexDimensionToString(uint32_t value)
772{
773 VGPU10_OPERAND_INDEX_DIMENSION enm = (VGPU10_OPERAND_INDEX_DIMENSION)value;
774 switch (enm)
775 {
776 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_0D);
777 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_1D);
778 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_2D);
779 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_3D);
780 }
781 return NULL;
782}
783
784
785static const char *dxbcOperandIndexRepresentationToString(uint32_t value)
786{
787 VGPU10_OPERAND_INDEX_REPRESENTATION enm = (VGPU10_OPERAND_INDEX_REPRESENTATION)value;
788 switch (enm)
789 {
790 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32);
791 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64);
792 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_RELATIVE);
793 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
794 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE);
795 }
796 return NULL;
797}
798
799
800static const char *dxbcInterpolationModeToString(uint32_t value)
801{
802 VGPU10_INTERPOLATION_MODE enm = (VGPU10_INTERPOLATION_MODE)value;
803 switch (enm)
804 {
805 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_UNDEFINED);
806 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_CONSTANT);
807 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR);
808 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_CENTROID);
809 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE);
810 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
811 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_SAMPLE);
812 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
813 }
814 return NULL;
815}
816
817
818static const char *dxbcResourceDimensionToString(uint32_t value)
819{
820 VGPU10_RESOURCE_DIMENSION enm = (VGPU10_RESOURCE_DIMENSION)value;
821 switch (enm)
822 {
823 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_UNKNOWN);
824 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_BUFFER);
825 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1D);
826 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2D);
827 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS);
828 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE3D);
829 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
830 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY);
831 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY);
832 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY);
833 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
834 }
835 return NULL;
836}
837
838
839static const char *dxbcVmwareOpcodeTypeToString(uint32_t value)
840{
841 VGPU10_VMWARE_OPCODE_TYPE enm = (VGPU10_VMWARE_OPCODE_TYPE)value;
842 switch (enm)
843 {
844 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_IDIV);
845 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DFRC);
846 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DRSQ);
847 SVGA_CASE_ID2STR(VGPU10_VMWARE_NUM_OPCODES);
848 }
849 return NULL;
850}
851
852#endif /* LOG_ENABLED */
853
854/*
855 * MD5 from IPRT (alt-md5.cpp) for DXBC hash calculation.
856 * DXBC hash function uses a different padding for the data, see dxbcHash.
857 * Therefore RTMd5Final is not needed. Two functions have been renamed: dxbcRTMd5Update dxbcRTMd5Init.
858 */
859
860
861/* The four core functions - F1 is optimized somewhat */
862/* #define F1(x, y, z) (x & y | ~x & z) */
863#define F1(x, y, z) (z ^ (x & (y ^ z)))
864#define F2(x, y, z) F1(z, x, y)
865#define F3(x, y, z) (x ^ y ^ z)
866#define F4(x, y, z) (y ^ (x | ~z))
867
868
869/* This is the central step in the MD5 algorithm. */
870#define MD5STEP(f, w, x, y, z, data, s) \
871 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
872
873
874/**
875 * The core of the MD5 algorithm, this alters an existing MD5 hash to reflect
876 * the addition of 16 longwords of new data. RTMd5Update blocks the data and
877 * converts bytes into longwords for this routine.
878 */
879static void rtMd5Transform(uint32_t buf[4], uint32_t const in[16])
880{
881 uint32_t a, b, c, d;
882
883 a = buf[0];
884 b = buf[1];
885 c = buf[2];
886 d = buf[3];
887
888 /* fn, w, x, y, z, data, s) */
889 MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7);
890 MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12);
891 MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17);
892 MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22);
893 MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7);
894 MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12);
895 MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17);
896 MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22);
897 MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7);
898 MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12);
899 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
900 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
901 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
902 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
903 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
904 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
905
906 MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5);
907 MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9);
908 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
909 MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20);
910 MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5);
911 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
912 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
913 MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20);
914 MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5);
915 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
916 MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14);
917 MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20);
918 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
919 MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9);
920 MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14);
921 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
922
923 MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4);
924 MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11);
925 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
926 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
927 MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4);
928 MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11);
929 MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16);
930 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
931 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
932 MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11);
933 MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16);
934 MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23);
935 MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4);
936 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
937 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
938 MD5STEP(F3, b, c, d, a, in[ 2] + 0xc4ac5665, 23);
939
940 MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6);
941 MD5STEP(F4, d, a, b, c, in[ 7] + 0x432aff97, 10);
942 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
943 MD5STEP(F4, b, c, d, a, in[ 5] + 0xfc93a039, 21);
944 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
945 MD5STEP(F4, d, a, b, c, in[ 3] + 0x8f0ccc92, 10);
946 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
947 MD5STEP(F4, b, c, d, a, in[ 1] + 0x85845dd1, 21);
948 MD5STEP(F4, a, b, c, d, in[ 8] + 0x6fa87e4f, 6);
949 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
950 MD5STEP(F4, c, d, a, b, in[ 6] + 0xa3014314, 15);
951 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
952 MD5STEP(F4, a, b, c, d, in[ 4] + 0xf7537e82, 6);
953 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
954 MD5STEP(F4, c, d, a, b, in[ 2] + 0x2ad7d2bb, 15);
955 MD5STEP(F4, b, c, d, a, in[ 9] + 0xeb86d391, 21);
956
957 buf[0] += a;
958 buf[1] += b;
959 buf[2] += c;
960 buf[3] += d;
961}
962
963
964#ifdef RT_BIG_ENDIAN
965/*
966 * Note: this code is harmless on little-endian machines.
967 */
968static void rtMd5ByteReverse(uint32_t *buf, unsigned int longs)
969{
970 uint32_t t;
971 do
972 {
973 t = *buf;
974 t = RT_LE2H_U32(t);
975 *buf = t;
976 buf++;
977 } while (--longs);
978}
979#else /* little endian - do nothing */
980# define rtMd5ByteReverse(buf, len) do { /* Nothing */ } while (0)
981#endif
982
983
984/*
985 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
986 * initialization constants.
987 */
988static void dxbcRTMd5Init(PRTMD5CONTEXT pCtx)
989{
990 pCtx->AltPrivate.buf[0] = 0x67452301;
991 pCtx->AltPrivate.buf[1] = 0xefcdab89;
992 pCtx->AltPrivate.buf[2] = 0x98badcfe;
993 pCtx->AltPrivate.buf[3] = 0x10325476;
994
995 pCtx->AltPrivate.bits[0] = 0;
996 pCtx->AltPrivate.bits[1] = 0;
997}
998
999
1000/*
1001 * Update context to reflect the concatenation of another buffer full
1002 * of bytes.
1003 */
1004/** @todo Optimize this, because len is always a multiple of 64. */
1005static void dxbcRTMd5Update(PRTMD5CONTEXT pCtx, const void *pvBuf, size_t len)
1006{
1007 const uint8_t *buf = (const uint8_t *)pvBuf;
1008 uint32_t t;
1009
1010 /* Update bitcount */
1011 t = pCtx->AltPrivate.bits[0];
1012 if ((pCtx->AltPrivate.bits[0] = t + ((uint32_t) len << 3)) < t)
1013 pCtx->AltPrivate.bits[1]++; /* Carry from low to high */
1014 pCtx->AltPrivate.bits[1] += (uint32_t)(len >> 29);
1015
1016 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
1017
1018 /* Handle any leading odd-sized chunks */
1019 if (t)
1020 {
1021 uint8_t *p = (uint8_t *) pCtx->AltPrivate.in + t;
1022
1023 t = 64 - t;
1024 if (len < t)
1025 {
1026 memcpy(p, buf, len);
1027 return;
1028 }
1029 memcpy(p, buf, t);
1030 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1031 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1032 buf += t;
1033 len -= t;
1034 }
1035
1036 /* Process data in 64-byte chunks */
1037#ifndef RT_BIG_ENDIAN
1038 if (!((uintptr_t)buf & 0x3))
1039 {
1040 while (len >= 64) {
1041 rtMd5Transform(pCtx->AltPrivate.buf, (uint32_t const *)buf);
1042 buf += 64;
1043 len -= 64;
1044 }
1045 }
1046 else
1047#endif
1048 {
1049 while (len >= 64) {
1050 memcpy(pCtx->AltPrivate.in, buf, 64);
1051 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1052 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1053 buf += 64;
1054 len -= 64;
1055 }
1056 }
1057
1058 /* Handle any remaining bytes of data */
1059 memcpy(pCtx->AltPrivate.in, buf, len);
1060}
1061
1062
1063static void dxbcHash(void const *pvData, uint32_t cbData, uint8_t pabDigest[RTMD5HASHSIZE])
1064{
1065 size_t const kBlockSize = 64;
1066 uint8_t au8BlockBuffer[kBlockSize];
1067
1068 static uint8_t const s_au8Padding[kBlockSize] =
1069 {
1070 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1071 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1072 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1073 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1074 };
1075
1076 RTMD5CONTEXT Ctx;
1077 PRTMD5CONTEXT const pCtx = &Ctx;
1078 dxbcRTMd5Init(pCtx);
1079
1080 uint8_t const *pu8Data = (uint8_t *)pvData;
1081 size_t cbRemaining = cbData;
1082
1083 size_t const cbCompleteBlocks = cbData & ~ (kBlockSize - 1);
1084 dxbcRTMd5Update(pCtx, pu8Data, cbCompleteBlocks);
1085 pu8Data += cbCompleteBlocks;
1086 cbRemaining -= cbCompleteBlocks;
1087
1088 /* Custom padding. */
1089 if (cbRemaining >= kBlockSize - 2 * sizeof(uint32_t))
1090 {
1091 /* Two additional blocks. */
1092 memcpy(&au8BlockBuffer[0], pu8Data, cbRemaining);
1093 memcpy(&au8BlockBuffer[cbRemaining], s_au8Padding, kBlockSize - cbRemaining);
1094 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1095
1096 memset(&au8BlockBuffer[sizeof(uint32_t)], 0, kBlockSize - 2 * sizeof(uint32_t));
1097 }
1098 else
1099 {
1100 /* One additional block. */
1101 memcpy(&au8BlockBuffer[sizeof(uint32_t)], pu8Data, cbRemaining);
1102 memcpy(&au8BlockBuffer[sizeof(uint32_t) + cbRemaining], s_au8Padding, kBlockSize - cbRemaining - 2 * sizeof(uint32_t));
1103 }
1104
1105 /* Set the first and last dwords of the last block. */
1106 *(uint32_t *)&au8BlockBuffer[0] = cbData << 3;
1107 *(uint32_t *)&au8BlockBuffer[kBlockSize - sizeof(uint32_t)] = (cbData << 1) | 1;
1108 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1109
1110 AssertCompile(sizeof(pCtx->AltPrivate.buf) == RTMD5HASHSIZE);
1111 memcpy(pabDigest, pCtx->AltPrivate.buf, RTMD5HASHSIZE);
1112}
1113
1114
1115/*
1116 *
1117 * Shader token reader.
1118 *
1119 */
1120
1121typedef struct DXBCTokenReader
1122{
1123 uint32_t const *pToken; /* Next token to read. */
1124 uint32_t cToken; /* How many tokens total. */
1125 uint32_t cRemainingToken; /* How many tokens remain. */
1126} DXBCTokenReader;
1127
1128
1129#ifdef LOG_ENABLED
1130DECLINLINE(uint32_t) dxbcTokenReaderByteOffset(DXBCTokenReader *r)
1131{
1132 return (r->cToken - r->cRemainingToken) * 4;
1133}
1134#endif
1135
1136
1137#if 0 // Unused for now
1138DECLINLINE(uint32_t) dxbcTokenReaderRemaining(DXBCTokenReader *r)
1139{
1140 return r->cRemainingToken;
1141}
1142#endif
1143
1144
1145DECLINLINE(uint32_t const *) dxbcTokenReaderPtr(DXBCTokenReader *r)
1146{
1147 return r->pToken;
1148}
1149
1150
1151DECLINLINE(bool) dxbcTokenReaderCanRead(DXBCTokenReader *r, uint32_t cToken)
1152{
1153 return cToken <= r->cRemainingToken;
1154}
1155
1156
1157DECLINLINE(void) dxbcTokenReaderSkip(DXBCTokenReader *r, uint32_t cToken)
1158{
1159 AssertReturnVoid(r->cRemainingToken >= cToken);
1160 r->cRemainingToken -= cToken;
1161 r->pToken += cToken;
1162}
1163
1164
1165DECLINLINE(uint32_t) dxbcTokenReaderRead32(DXBCTokenReader *r)
1166{
1167 AssertReturn(r->cRemainingToken, 0);
1168 --r->cRemainingToken;
1169 return *(r->pToken++);
1170}
1171
1172
1173DECLINLINE(uint64_t) dxbcTokenReaderRead64(DXBCTokenReader *r)
1174{
1175 uint64_t const u64Low = dxbcTokenReaderRead32(r);
1176 uint64_t const u64High = dxbcTokenReaderRead32(r);
1177 return u64Low + (u64High << 32);
1178}
1179
1180
1181/*
1182 *
1183 * Byte writer.
1184 *
1185 */
1186
1187typedef struct DXBCByteWriter
1188{
1189 uint8_t *pu8ByteCodeBegin; /* First byte of the buffer. */
1190 uint8_t *pu8ByteCodePtr; /* Next byte to be written. */
1191 uint32_t cbAllocated; /* How many bytes allocated in the buffer. */
1192 uint32_t cbRemaining; /* How many bytes remain in the buffer. */
1193 uint32_t cbWritten; /* Offset of first never written byte.
1194 * Since the writer allows to jump in the buffer, this field tracks
1195 * the upper boundary of the written data.
1196 */
1197 int32_t rc;
1198} DXBCByteWriter;
1199
1200
1201typedef struct DXBCByteWriterState
1202{
1203 uint32_t off; /* Offset of the next free byte. */
1204} DXBCByteWriterState;
1205
1206
1207DECLINLINE(void *) dxbcByteWriterPtr(DXBCByteWriter *w)
1208{
1209 return w->pu8ByteCodePtr;
1210}
1211
1212
1213DECLINLINE(uint32_t) dxbcByteWriterSize(DXBCByteWriter *w)
1214{
1215 return (uint32_t)(w->pu8ByteCodePtr - w->pu8ByteCodeBegin);
1216}
1217
1218
1219static bool dxbcByteWriterRealloc(DXBCByteWriter *w, uint32_t cbNew)
1220{
1221 void *pvNew = RTMemAllocZ(cbNew);
1222 if (!pvNew)
1223 {
1224 w->rc = VERR_NO_MEMORY;
1225 return false;
1226 }
1227
1228 uint32_t const cbCurrent = dxbcByteWriterSize(w);
1229 memcpy(pvNew, w->pu8ByteCodeBegin, cbCurrent);
1230 RTMemFree(w->pu8ByteCodeBegin);
1231
1232 w->pu8ByteCodeBegin = (uint8_t *)pvNew;
1233 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + cbCurrent;
1234 w->cbAllocated = cbNew;
1235 w->cbRemaining = cbNew - cbCurrent;
1236 return true;
1237}
1238
1239
1240DECLINLINE(bool) dxbcByteWriterSetOffset(DXBCByteWriter *w, uint32_t off, DXBCByteWriterState *pSavedWriterState)
1241{
1242 if (RT_FAILURE(w->rc))
1243 return false;
1244
1245 uint32_t const cbNew = RT_ALIGN_32(off, 1024);
1246 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1247 AssertReturnStmt(off < cbMax && cbNew < cbMax, w->rc = VERR_INVALID_PARAMETER, false);
1248
1249 if (cbNew > w->cbAllocated)
1250 {
1251 if (!dxbcByteWriterRealloc(w, cbNew))
1252 return false;
1253 }
1254
1255 pSavedWriterState->off = dxbcByteWriterSize(w);
1256
1257 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + off;
1258 w->cbRemaining = w->cbAllocated - off;
1259 return true;
1260}
1261
1262
1263DECLINLINE(void) dxbcByteWriterRestore(DXBCByteWriter *w, DXBCByteWriterState *pSavedWriterState)
1264{
1265 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + pSavedWriterState->off;
1266 w->cbRemaining = w->cbAllocated - pSavedWriterState->off;
1267}
1268
1269
1270DECLINLINE(void) dxbcByteWriterCommit(DXBCByteWriter *w, uint32_t cbCommit)
1271{
1272 if (RT_FAILURE(w->rc))
1273 return;
1274
1275 Assert(cbCommit < w->cbRemaining);
1276 cbCommit = RT_MIN(cbCommit, w->cbRemaining);
1277 w->pu8ByteCodePtr += cbCommit;
1278 w->cbRemaining -= cbCommit;
1279 w->cbWritten = RT_MAX(w->cbWritten, w->cbAllocated - w->cbRemaining);
1280}
1281
1282
1283DECLINLINE(bool) dxbcByteWriterCanWrite(DXBCByteWriter *w, uint32_t cbMore)
1284{
1285 if (RT_FAILURE(w->rc))
1286 return false;
1287
1288 if (cbMore <= w->cbRemaining)
1289 return true;
1290
1291 /* Do not allow to allocate more than 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES */
1292 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1293 AssertReturnStmt(cbMore < cbMax && RT_ALIGN_32(cbMore, 4096) <= cbMax - w->cbAllocated, w->rc = VERR_INVALID_PARAMETER, false);
1294
1295 uint32_t cbNew = w->cbAllocated + RT_ALIGN_32(cbMore, 4096);
1296 return dxbcByteWriterRealloc(w, cbNew);
1297}
1298
1299
1300DECLINLINE(bool) dxbcByteWriterAddTokens(DXBCByteWriter *w, uint32_t const *paToken, uint32_t cToken)
1301{
1302 uint32_t const cbWrite = cToken * sizeof(uint32_t);
1303 if (dxbcByteWriterCanWrite(w, cbWrite))
1304 {
1305 memcpy(dxbcByteWriterPtr(w), paToken, cbWrite);
1306 dxbcByteWriterCommit(w, cbWrite);
1307 return true;
1308 }
1309
1310 AssertFailed();
1311 return false;
1312}
1313
1314
1315DECLINLINE(bool) dxbcByteWriterInit(DXBCByteWriter *w, uint32_t cbInitial)
1316{
1317 RT_ZERO(*w);
1318 return dxbcByteWriterCanWrite(w, cbInitial);
1319}
1320
1321
1322DECLINLINE(void) dxbcByteWriterReset(DXBCByteWriter *w)
1323{
1324 RTMemFree(w->pu8ByteCodeBegin);
1325 RT_ZERO(*w);
1326}
1327
1328
1329DECLINLINE(void) dxbcByteWriterFetchData(DXBCByteWriter *w, void **ppv, uint32_t *pcb)
1330{
1331 *ppv = w->pu8ByteCodeBegin;
1332 *pcb = w->cbWritten;
1333
1334 w->pu8ByteCodeBegin = NULL;
1335 dxbcByteWriterReset(w);
1336}
1337
1338
1339/*
1340 *
1341 * VGPU10 shader parser.
1342 *
1343 */
1344
1345/* Parse an instruction operand. */
1346static int dxbcParseOperand(DXBCTokenReader *r, VGPUOperand *paOperand, uint32_t *pcOperandRemain)
1347{
1348 ASSERT_GUEST_RETURN(*pcOperandRemain > 0, VERR_NOT_SUPPORTED);
1349
1350 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1351
1352 paOperand->paOperandToken = dxbcTokenReaderPtr(r);
1353 paOperand->cOperandToken = 0;
1354
1355 VGPU10OperandToken0 operand0;
1356 operand0.value = dxbcTokenReaderRead32(r);
1357
1358 Log6((" %s(%d) %s(%d) %s(%d) %s(%d)\n",
1359 dxbcOperandNumComponentsToString(operand0.numComponents), operand0.numComponents,
1360 dxbcOperandComponentModeToString(operand0.selectionMode), operand0.selectionMode,
1361 dxbcOperandTypeToString(operand0.operandType), operand0.operandType,
1362 dxbcOperandIndexDimensionToString(operand0.indexDimension), operand0.indexDimension));
1363
1364 ASSERT_GUEST_RETURN(operand0.numComponents <= VGPU10_OPERAND_4_COMPONENT, VERR_INVALID_PARAMETER);
1365 if ( operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32
1366 && operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE64)
1367 {
1368 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1369 {
1370 ASSERT_GUEST_RETURN(operand0.selectionMode <= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE, VERR_INVALID_PARAMETER);
1371 switch (operand0.selectionMode)
1372 {
1373 case VGPU10_OPERAND_4_COMPONENT_MASK_MODE:
1374 Log6((" Mask %#x\n", operand0.mask));
1375 break;
1376 case VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE:
1377 Log6((" Swizzle %s(%d) %s(%d) %s(%d) %s(%d)\n",
1378 dxbcOperandComponentNameToString(operand0.swizzleX), operand0.swizzleX,
1379 dxbcOperandComponentNameToString(operand0.swizzleY), operand0.swizzleY,
1380 dxbcOperandComponentNameToString(operand0.swizzleZ), operand0.swizzleZ,
1381 dxbcOperandComponentNameToString(operand0.swizzleW), operand0.swizzleW));
1382 break;
1383 case VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE:
1384 Log6((" Select %s(%d)\n",
1385 dxbcOperandComponentNameToString(operand0.selectMask), operand0.selectMask));
1386 break;
1387 default: /* Never happens. */
1388 break;
1389 }
1390 }
1391 }
1392
1393 if (operand0.extended)
1394 {
1395 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1396
1397 VGPU10OperandToken1 operand1;
1398 operand1.value = dxbcTokenReaderRead32(r);
1399 }
1400
1401 ASSERT_GUEST_RETURN(operand0.operandType < VGPU10_NUM_OPERANDS, VERR_INVALID_PARAMETER);
1402
1403 if ( operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32
1404 || operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE64)
1405 {
1406 uint32_t cComponent = 0;
1407 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1408 cComponent = 4;
1409 else if (operand0.numComponents == VGPU10_OPERAND_1_COMPONENT)
1410 cComponent = 1;
1411
1412 for (uint32_t i = 0; i < cComponent; ++i)
1413 {
1414 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1415 paOperand->aImm[i] = dxbcTokenReaderRead32(r);
1416 }
1417 }
1418
1419 paOperand->numComponents = operand0.numComponents;
1420 paOperand->selectionMode = operand0.selectionMode;
1421 paOperand->mask = operand0.mask;
1422 paOperand->operandType = operand0.operandType;
1423 paOperand->indexDimension = operand0.indexDimension;
1424
1425 int rc = VINF_SUCCESS;
1426 /* 'indexDimension' tells the number of indices. 'i' is the array index, i.e. i = 0 for 1D, etc. */
1427 for (uint32_t i = 0; i < operand0.indexDimension; ++i)
1428 {
1429 if (i == 0) /* VGPU10_OPERAND_INDEX_1D */
1430 paOperand->aOperandIndex[i].indexRepresentation = operand0.index0Representation;
1431 else if (i == 1) /* VGPU10_OPERAND_INDEX_2D */
1432 paOperand->aOperandIndex[i].indexRepresentation = operand0.index1Representation;
1433 else /* VGPU10_OPERAND_INDEX_3D */
1434 continue; /* Skip because it is "rarely if ever used" and is not supported by VGPU10. */
1435
1436 uint32_t const indexRepresentation = paOperand->aOperandIndex[i].indexRepresentation;
1437 switch (indexRepresentation)
1438 {
1439 case VGPU10_OPERAND_INDEX_IMMEDIATE32:
1440 {
1441 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1442 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1443 break;
1444 }
1445 case VGPU10_OPERAND_INDEX_IMMEDIATE64:
1446 {
1447 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1448 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1449 break;
1450 }
1451 case VGPU10_OPERAND_INDEX_RELATIVE:
1452 {
1453 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1454 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1455 Log6((" [operand index %d] parsing relative\n", i));
1456 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1457 break;
1458 }
1459 case VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1460 {
1461 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1462 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1463 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1464 Log6((" [operand index %d] parsing relative\n", i));
1465 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1466 break;
1467 }
1468 case VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
1469 {
1470 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1471 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1472 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1473 Log6((" [operand index %d] parsing relative\n", i));
1474 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1475 break;
1476 }
1477 default:
1478 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1479 }
1480 Log6((" [operand index %d] %s(%d): %#llx%s\n",
1481 i, dxbcOperandIndexRepresentationToString(indexRepresentation), indexRepresentation,
1482 paOperand->aOperandIndex[i].iOperandImmediate, paOperand->aOperandIndex[i].pOperandRelative ? " + relative" : ""));
1483 if (RT_FAILURE(rc))
1484 break;
1485 }
1486
1487 paOperand->cOperandToken = dxbcTokenReaderPtr(r) - paOperand->paOperandToken;
1488
1489 *pcOperandRemain -= 1;
1490 return VINF_SUCCESS;
1491}
1492
1493
1494/* Parse an instruction. */
1495static int dxbcParseOpcode(DXBCTokenReader *r, VGPUOpcode *pOpcode)
1496{
1497 RT_ZERO(*pOpcode);
1498 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1499
1500 pOpcode->paOpcodeToken = dxbcTokenReaderPtr(r);
1501
1502 VGPU10OpcodeToken0 opcode;
1503 opcode.value = dxbcTokenReaderRead32(r);
1504
1505 pOpcode->opcodeType = opcode.opcodeType;
1506 ASSERT_GUEST_RETURN(pOpcode->opcodeType < VGPU10_NUM_OPCODES, VERR_INVALID_PARAMETER);
1507
1508 Log6(("[%#x] %s length %d\n",
1509 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength));
1510
1511 uint32_t const cOperand = g_aOpcodeInfo[pOpcode->opcodeType].cOperand;
1512 if (cOperand != UINT32_MAX)
1513 {
1514 ASSERT_GUEST_RETURN(cOperand < RT_ELEMENTS(pOpcode->aIdxOperand), VERR_INVALID_PARAMETER);
1515
1516 pOpcode->cOpcodeToken = opcode.instructionLength;
1517 if (opcode.extended)
1518 {
1519 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1520 if ( pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_BODY
1521 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_TABLE
1522 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_INTERFACE
1523 || pOpcode->opcodeType == VGPU10_OPCODE_INTERFACE_CALL
1524 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_THREAD_GROUP)
1525 {
1526 /* "next DWORD contains ... the actual instruction length in DWORD since it may not fit into 7 bits" */
1527 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1528 }
1529 else
1530 {
1531 VGPU10OpcodeToken1 opcode1;
1532 opcode1.value = dxbcTokenReaderRead32(r);
1533 ASSERT_GUEST(opcode1.opcodeType == VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS);
1534 }
1535 }
1536
1537 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken >= 1 && pOpcode->cOpcodeToken < 256, VERR_INVALID_PARAMETER);
1538 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 1), VERR_INVALID_PARAMETER);
1539
1540#ifdef LOG_ENABLED
1541 Log6((" %08X", opcode.value));
1542 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1543 Log6((" %08X", r->pToken[i - 1]));
1544 Log6(("\n"));
1545
1546 if (pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1547 Log6((" %s\n",
1548 dxbcResourceDimensionToString(opcode.resourceDimension)));
1549 else
1550 Log6((" %s\n",
1551 dxbcInterpolationModeToString(opcode.interpolationMode)));
1552#endif
1553 /* Additional tokens before operands. */
1554 switch (pOpcode->opcodeType)
1555 {
1556 case VGPU10_OPCODE_INTERFACE_CALL:
1557 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1558 dxbcTokenReaderSkip(r, 1); /* Function index */
1559 break;
1560
1561 default:
1562 break;
1563 }
1564
1565 /* Operands. */
1566 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1567 for (uint32_t i = 0; i < cOperand; ++i)
1568 {
1569 Log6((" [operand %d]\n", i));
1570 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1571 pOpcode->aIdxOperand[i] = idxOperand;
1572 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1573 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1574 }
1575
1576 pOpcode->cOperand = cOperand;
1577
1578 /* Additional tokens after operands. */
1579 switch (pOpcode->opcodeType)
1580 {
1581 case VGPU10_OPCODE_DCL_INPUT_SIV:
1582 case VGPU10_OPCODE_DCL_INPUT_SGV:
1583 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
1584 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
1585 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1586 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1587 {
1588 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1589
1590 VGPU10NameToken name;
1591 name.value = dxbcTokenReaderRead32(r);
1592 Log6((" %s(%d)\n",
1593 dxbcSystemNameToString(name.name), name.name));
1594 pOpcode->semanticName = name.name;
1595 break;
1596 }
1597 case VGPU10_OPCODE_DCL_RESOURCE:
1598 {
1599 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1600 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1601 break;
1602 }
1603 case VGPU10_OPCODE_DCL_TEMPS:
1604 {
1605 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1606 dxbcTokenReaderSkip(r, 1); /* number of temps */
1607 break;
1608 }
1609 case VGPU10_OPCODE_DCL_INDEXABLE_TEMP:
1610 {
1611 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1612 dxbcTokenReaderSkip(r, 3); /* register index; number of registers; number of components */
1613 break;
1614 }
1615 case VGPU10_OPCODE_DCL_INDEX_RANGE:
1616 {
1617 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1618 dxbcTokenReaderSkip(r, 1); /* count of registers */
1619 break;
1620 }
1621 case VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1622 {
1623 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1624 dxbcTokenReaderSkip(r, 1); /* maximum number of primitives */
1625 break;
1626 }
1627 case VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT:
1628 {
1629 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1630 dxbcTokenReaderSkip(r, 1); /* number of instances */
1631 break;
1632 }
1633 case VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR:
1634 {
1635 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1636 dxbcTokenReaderSkip(r, 1); /* maximum TessFactor */
1637 break;
1638 }
1639 case VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1640 case VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1641 {
1642 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1643 dxbcTokenReaderSkip(r, 1); /* number of instances of the current fork/join phase program to execute */
1644 break;
1645 }
1646 case VGPU10_OPCODE_DCL_THREAD_GROUP:
1647 {
1648 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1649 dxbcTokenReaderSkip(r, 3); /* Thread Group dimensions as UINT32: x, y, z */
1650 break;
1651 }
1652 case VGPU10_OPCODE_DCL_UAV_TYPED:
1653 {
1654 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1655 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1656 break;
1657 }
1658 case VGPU10_OPCODE_DCL_UAV_STRUCTURED:
1659 {
1660 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1661 dxbcTokenReaderSkip(r, 1); /* byte stride */
1662 break;
1663 }
1664 case VGPU10_OPCODE_DCL_TGSM_RAW:
1665 {
1666 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1667 dxbcTokenReaderSkip(r, 1); /* element count */
1668 break;
1669 }
1670 case VGPU10_OPCODE_DCL_TGSM_STRUCTURED:
1671 {
1672 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1673 dxbcTokenReaderSkip(r, 2); /* struct byte stride; struct count */
1674 break;
1675 }
1676 case VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED:
1677 {
1678 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1679 dxbcTokenReaderSkip(r, 1); /* struct byte stride */
1680 break;
1681 }
1682 default:
1683 break;
1684 }
1685 }
1686 else
1687 {
1688 /* Special opcodes. */
1689 if (pOpcode->opcodeType == VGPU10_OPCODE_CUSTOMDATA)
1690 {
1691 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1692 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1693
1694 if (pOpcode->cOpcodeToken < 2)
1695 pOpcode->cOpcodeToken = 2;
1696 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 2), VERR_INVALID_PARAMETER);
1697
1698#ifdef LOG_ENABLED
1699 Log6((" %08X", opcode.value));
1700 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1701 Log6((" %08X", r->pToken[i - 1]));
1702 Log6(("\n"));
1703
1704 Log6((" %s\n",
1705 dxbcCustomDataClassToString(opcode.customDataClass)));
1706#endif
1707 dxbcTokenReaderSkip(r, pOpcode->cOpcodeToken - 2);
1708 }
1709 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1710 {
1711 pOpcode->cOpcodeToken = opcode.instructionLength;
1712 pOpcode->opcodeSubtype = opcode.vmwareOpcodeType;
1713
1714#ifdef LOG_ENABLED
1715 Log6((" %08X", opcode.value));
1716 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1717 Log6((" %08X", r->pToken[i - 1]));
1718 Log6(("\n"));
1719
1720 Log6((" %s(%d)\n",
1721 dxbcVmwareOpcodeTypeToString(opcode.vmwareOpcodeType), opcode.vmwareOpcodeType));
1722#endif
1723
1724 if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_IDIV)
1725 {
1726 /* Integer divide. */
1727 pOpcode->cOperand = 4; /* dstQuit, dstRem, src0, src1. */
1728
1729 /* Operands. */
1730 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1731 for (uint32_t i = 0; i < pOpcode->cOperand; ++i)
1732 {
1733 Log6((" [operand %d]\n", i));
1734 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1735 pOpcode->aIdxOperand[i] = idxOperand;
1736 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1737 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1738 }
1739 }
1740 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DFRC)
1741 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DRSQ)
1742 else
1743 {
1744 /** @todo implement */
1745 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1746 }
1747 }
1748 else
1749 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1750
1751 // pOpcode->cOperand = 0;
1752 }
1753
1754 return VINF_SUCCESS;
1755}
1756
1757
1758typedef struct DXBCOUTPUTCTX
1759{
1760 VGPU10ProgramToken programToken;
1761 uint32_t cToken; /* Number of tokens in the original shader code. */
1762
1763 uint32_t offSubroutine; /* Current offset where to write subroutines. */
1764} DXBCOUTPUTCTX;
1765
1766
1767static void dxbcOutputInit(DXBCOUTPUTCTX *pOutctx, VGPU10ProgramToken const *pProgramToken, uint32_t cToken)
1768{
1769 RT_ZERO(*pOutctx);
1770 pOutctx->programToken = *pProgramToken;
1771 pOutctx->cToken = cToken;
1772
1773 pOutctx->offSubroutine = cToken * 4;
1774}
1775
1776
1777static int dxbcEmitVmwareIDIV(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1778{
1779 /* Insert a call and append a subroutne. */
1780 VGPU10OpcodeToken0 opcode;
1781 VGPU10OperandToken0 operand;
1782
1783 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1784
1785 /*
1786 * Call
1787 */
1788 opcode.value = 0;
1789 opcode.opcodeType = VGPU10_OPCODE_CALL;
1790 opcode.instructionLength = 3;
1791 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1792
1793 operand.value = 0;
1794 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1795 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1796 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1797 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1798 dxbcByteWriterAddTokens(w, &operand.value, 1);
1799
1800 dxbcByteWriterAddTokens(w, &label, 1);
1801
1802 opcode.value = 0;
1803 opcode.opcodeType = VGPU10_OPCODE_NOP;
1804 opcode.instructionLength = 1;
1805 for (int i = 0; i < 8 - 3; ++i)
1806 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1807
1808 /*
1809 * Subroutine.
1810 */
1811 DXBCByteWriterState savedWriterState;
1812 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1813 return w->rc;
1814
1815 /* label */
1816 opcode.value = 0;
1817 opcode.opcodeType = VGPU10_OPCODE_LABEL;
1818 opcode.instructionLength = 3;
1819 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1820
1821 operand.value = 0;
1822 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1823 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1824 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1825 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1826 dxbcByteWriterAddTokens(w, &operand.value, 1);
1827 dxbcByteWriterAddTokens(w, &label, 1);
1828
1829 /* Just output UDIV for now. */
1830 opcode.value = 0;
1831 opcode.opcodeType = VGPU10_OPCODE_UDIV;
1832 opcode.instructionLength = pOpcode->cOpcodeToken;
1833 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1834 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1835
1836 /* ret */
1837 opcode.value = 0;
1838 opcode.opcodeType = VGPU10_OPCODE_RET;
1839 opcode.instructionLength = 1;
1840 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1841
1842 pOutctx->offSubroutine = dxbcByteWriterSize(w);
1843 dxbcByteWriterRestore(w, &savedWriterState);
1844
1845 return w->rc;
1846}
1847
1848
1849static int dxbcOutputOpcode(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1850{
1851#ifdef DEBUG
1852 void *pvBegin = dxbcByteWriterPtr(w);
1853#endif
1854
1855 if ( pOutctx->programToken.programType == VGPU10_PIXEL_SHADER
1856 && pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1857 {
1858 /** @todo This is a workaround. */
1859 /* Sometimes the guest (Mesa) created a shader with uninitialized resource dimension.
1860 * Use texture 2d because it is what a pixel shader normally uses.
1861 */
1862 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken == 4, VERR_INVALID_PARAMETER);
1863
1864 VGPU10OpcodeToken0 opcode;
1865 opcode.value = pOpcode->paOpcodeToken[0];
1866 if (opcode.resourceDimension == VGPU10_RESOURCE_DIMENSION_BUFFER)
1867 {
1868 opcode.resourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
1869 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1870 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], 2);
1871 uint32_t const returnType = 0x5555; /* float */
1872 dxbcByteWriterAddTokens(w, &returnType, 1);
1873 return VINF_SUCCESS;
1874 }
1875 }
1876 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1877 {
1878 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_IDIV)
1879 {
1880 return dxbcEmitVmwareIDIV(pOutctx, w, pOpcode);
1881 }
1882
1883 ASSERT_GUEST_FAILED_RETURN(VERR_NOT_SUPPORTED);
1884 }
1885
1886#ifdef DEBUG
1887 /* The code above must emit either nothing or everything. */
1888 Assert((uintptr_t)pvBegin == (uintptr_t)dxbcByteWriterPtr(w));
1889#endif
1890
1891 /* Just emit the unmodified instruction. */
1892 dxbcByteWriterAddTokens(w, pOpcode->paOpcodeToken, pOpcode->cOpcodeToken);
1893 return VINF_SUCCESS;
1894}
1895
1896
1897static int dxbcOutputFinalize(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w)
1898{
1899 RT_NOREF(pOutctx, w);
1900 return VINF_SUCCESS;
1901}
1902
1903
1904/*
1905 * Parse and verify the shader byte code. Extract input and output signatures into pInfo.
1906 */
1907int DXShaderParse(void const *pvShaderCode, uint32_t cbShaderCode, DXShaderInfo *pInfo)
1908{
1909 if (pInfo)
1910 RT_ZERO(*pInfo);
1911
1912 ASSERT_GUEST_RETURN(cbShaderCode <= SVGA3D_MAX_SHADER_MEMORY_BYTES, VERR_INVALID_PARAMETER);
1913 ASSERT_GUEST_RETURN((cbShaderCode & 0x3) == 0, VERR_INVALID_PARAMETER); /* Aligned to the token size. */
1914 ASSERT_GUEST_RETURN(cbShaderCode >= 8, VERR_INVALID_PARAMETER); /* At least program and length tokens. */
1915
1916 uint32_t const *paToken = (uint32_t *)pvShaderCode;
1917
1918 VGPU10ProgramToken const *pProgramToken = (VGPU10ProgramToken *)&paToken[0];
1919 ASSERT_GUEST_RETURN( pProgramToken->majorVersion >= 4
1920 && pProgramToken->programType <= VGPU10_COMPUTE_SHADER, VERR_INVALID_PARAMETER);
1921 if (pInfo)
1922 pInfo->enmProgramType = (VGPU10_PROGRAM_TYPE)pProgramToken->programType;
1923
1924 uint32_t const cToken = paToken[1];
1925 Log6(("Shader version %d.%d type %s(%d) Length %d\n",
1926 pProgramToken->majorVersion, pProgramToken->minorVersion, dxbcShaderTypeToString(pProgramToken->programType), pProgramToken->programType, cToken));
1927 ASSERT_GUEST_RETURN(cbShaderCode / 4 >= cToken, VERR_INVALID_PARAMETER); /* Declared length should be less or equal to the actual. */
1928
1929 /* Write the parsed (and possibly modified) shader to a memory buffer. */
1930 DXBCByteWriter dxbcByteWriter;
1931 DXBCByteWriter *w = &dxbcByteWriter;
1932 if (!dxbcByteWriterInit(w, 4096 + cbShaderCode))
1933 return VERR_NO_MEMORY;
1934
1935 dxbcByteWriterAddTokens(w, paToken, 2);
1936
1937 DXBCTokenReader parser;
1938 RT_ZERO(parser);
1939
1940 DXBCTokenReader *r = &parser;
1941 r->pToken = &paToken[2];
1942 r->cToken = r->cRemainingToken = cToken - 2;
1943
1944 DXBCOUTPUTCTX outctx;
1945 dxbcOutputInit(&outctx, pProgramToken, cToken);
1946
1947 int rc = VINF_SUCCESS;
1948 while (dxbcTokenReaderCanRead(r, 1))
1949 {
1950 uint32_t const offOpcode = dxbcByteWriterSize(w);
1951
1952 VGPUOpcode opcode;
1953 rc = dxbcParseOpcode(r, &opcode);
1954 ASSERT_GUEST_STMT_BREAK(RT_SUCCESS(rc), rc = VERR_INVALID_PARAMETER);
1955
1956 rc = dxbcOutputOpcode(&outctx, w, &opcode);
1957 AssertRCBreak(rc);
1958
1959 if (pInfo)
1960 {
1961 /* Remember offsets of DCL_RESOURCE instructions. */
1962 if ( outctx.programToken.programType == VGPU10_PIXEL_SHADER
1963 && opcode.opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1964 {
1965 ASSERT_GUEST_STMT_BREAK(pInfo->cDclResource <= SVGA3D_DX_MAX_SRVIEWS,
1966 rc = VERR_NOT_SUPPORTED);
1967
1968 pInfo->aOffDclResource[pInfo->cDclResource++] = offOpcode;
1969 }
1970
1971 /* Fetch signatures. */
1972 SVGA3dDXSignatureEntry *pSignatureEntry = NULL;
1973 switch (opcode.opcodeType)
1974 {
1975 case VGPU10_OPCODE_DCL_INPUT:
1976 case VGPU10_OPCODE_DCL_INPUT_PS:
1977 case VGPU10_OPCODE_DCL_INPUT_SIV:
1978 ASSERT_GUEST_STMT_BREAK(pInfo->cInputSignature < RT_ELEMENTS(pInfo->aInputSignature), rc = VERR_INVALID_PARAMETER);
1979 pSignatureEntry = &pInfo->aInputSignature[pInfo->cInputSignature++];
1980 break;
1981 case VGPU10_OPCODE_DCL_OUTPUT:
1982 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1983 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1984 ASSERT_GUEST_STMT_BREAK(pInfo->cOutputSignature < RT_ELEMENTS(pInfo->aOutputSignature), rc = VERR_INVALID_PARAMETER);
1985 pSignatureEntry = &pInfo->aOutputSignature[pInfo->cOutputSignature++];
1986 break;
1987 default:
1988 break;
1989 }
1990
1991 if (RT_FAILURE(rc))
1992 break;
1993
1994 if (pSignatureEntry)
1995 {
1996 ASSERT_GUEST_STMT_BREAK( opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32
1997 || opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE64,
1998 rc = VERR_NOT_SUPPORTED);
1999
2000 uint32_t const indexDimension = opcode.aValOperand[0].indexDimension;
2001 if (indexDimension == VGPU10_OPERAND_INDEX_0D)
2002 {
2003 if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID)
2004 {
2005 pSignatureEntry->registerIndex = 0;
2006 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID;
2007 }
2008 else if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH)
2009 {
2010 /* oDepth is always last in the signature. Register index is equal to 0xFFFFFFFF. */
2011 pSignatureEntry->registerIndex = 0xFFFFFFFF;
2012 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
2013 }
2014 else
2015 ASSERT_GUEST_FAILED_STMT_BREAK(rc = VERR_NOT_SUPPORTED);
2016 }
2017 else
2018 {
2019 ASSERT_GUEST_STMT_BREAK( indexDimension == VGPU10_OPERAND_INDEX_1D
2020 || indexDimension == VGPU10_OPERAND_INDEX_2D
2021 || indexDimension == VGPU10_OPERAND_INDEX_3D,
2022 rc = VERR_NOT_SUPPORTED);
2023 /* The register index seems to be in the highest dimension. */
2024 pSignatureEntry->registerIndex = opcode.aValOperand[0].aOperandIndex[indexDimension - VGPU10_OPERAND_INDEX_1D].iOperandImmediate;
2025 pSignatureEntry->semanticName = opcode.semanticName;
2026 }
2027 pSignatureEntry->mask = opcode.aValOperand[0].mask;
2028 pSignatureEntry->componentType = SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN; /// @todo Proper value? Seems that it is not important.
2029 pSignatureEntry->minPrecision = SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT;
2030 }
2031 }
2032 }
2033
2034 if (RT_FAILURE(rc))
2035 {
2036 return rc;
2037 }
2038
2039 rc = dxbcOutputFinalize(&outctx, w);
2040 if (RT_FAILURE(rc))
2041 {
2042 return rc;
2043 }
2044
2045 dxbcByteWriterFetchData(w, &pInfo->pvBytecode, &pInfo->cbBytecode);
2046 uint32_t *pcOutputToken = (uint32_t *)pInfo->pvBytecode + 1;
2047 *pcOutputToken = pInfo->cbBytecode / 4;
2048
2049#ifdef LOG_ENABLED
2050 if (pInfo->cInputSignature)
2051 {
2052 Log6(("Input signatures:\n"));
2053 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2054 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask));
2055 }
2056 if (pInfo->cOutputSignature)
2057 {
2058 Log6(("Output signatures:\n"));
2059 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2060 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask));
2061 }
2062 if (pInfo->cPatchConstantSignature)
2063 {
2064 Log6(("Patch constant signatures:\n"));
2065 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2066 Log6((" [%u]: %u %u 0x%X\n", i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask));
2067 }
2068#endif
2069
2070 return VINF_SUCCESS;
2071}
2072
2073
2074void DXShaderFree(DXShaderInfo *pInfo)
2075{
2076 RTMemFree(pInfo->pvBytecode);
2077 RT_ZERO(*pInfo);
2078}
2079
2080
2081#if 0 // Unused. Replaced with dxbcSemanticInfo.
2082static char const *dxbcSemanticName(SVGA3dDXSignatureSemanticName enmSemanticName)
2083{
2084 /* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics */
2085 switch (enmSemanticName)
2086 {
2087 case SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION: return "SV_Position";
2088 case SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE: return "SV_ClipDistance";
2089 case SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE: return "SV_CullDistance";
2090 case SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX: return "SV_RenderTargetArrayIndex";
2091 case SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX: return "SV_ViewportArrayIndex";
2092 case SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID: return "SV_VertexID";
2093 case SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID: return "SV_PrimitiveID";
2094 case SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID: return "SV_InstanceID";
2095 case SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE: return "SV_IsFrontFace";
2096 case SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX: return "SV_SampleIndex";
2097 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadUeq0EdgeTessFactor";
2098 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadVeq0EdgeTessFactor";
2099 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadUeq1EdgeTessFactor";
2100 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadVeq1EdgeTessFactor";
2101 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: return "SV_FinalQuadUInsideTessFactor";
2102 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: return "SV_FinalQuadVInsideTessFactor";
2103 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriUeq0EdgeTessFactor";
2104 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriVeq0EdgeTessFactor";
2105 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriWeq0EdgeTessFactor";
2106 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR: return "SV_FinalTriInsideTessFactor";
2107 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR: return "SV_FinalLineDetailTessFactor";
2108 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR: return "SV_FinalLineDensityTessFactor";
2109 default:
2110 Assert(enmSemanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
2111 break;
2112 }
2113 /* Generic. Arbitrary name. It does not have any meaning. */
2114 return "ATTRIB";
2115}
2116#endif
2117
2118
2119/* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics
2120 * Type:
2121 * 0 - undefined
2122 * 1 - unsigned int
2123 * 2 - signed int
2124 * 3 - float
2125 */
2126typedef struct VGPUSemanticInfo
2127{
2128 char const *pszName;
2129 uint32_t u32Type;
2130} VGPUSemanticInfo;
2131
2132static VGPUSemanticInfo const g_aSemanticInfo[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX] =
2133{
2134 { "ATTRIB", 0 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2135 { "SV_Position", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1
2136 { "SV_ClipDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2
2137 { "SV_CullDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3
2138 { "SV_RenderTargetArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4
2139 { "SV_ViewportArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5
2140 { "SV_VertexID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6
2141 { "SV_PrimitiveID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7
2142 { "SV_InstanceID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8
2143 { "SV_IsFrontFace", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9
2144 { "SV_SampleIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10
2145 /** @todo Is this a correct name for all TessFactors? */
2146 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11
2147 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12
2148 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13
2149 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14
2150 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15
2151 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16
2152 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17
2153 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18
2154 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19
2155 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20
2156 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21
2157 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22
2158};
2159
2160static VGPUSemanticInfo const g_SemanticPSOutput =
2161 { "SV_TARGET", 3 }; // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2162
2163
2164static VGPUSemanticInfo const *dxbcSemanticInfo(DXShaderInfo const *pInfo, SVGA3dDXSignatureSemanticName enmSemanticName, uint32_t u32BlobType)
2165{
2166 if (enmSemanticName < RT_ELEMENTS(g_aSemanticInfo))
2167 {
2168 if ( enmSemanticName == 0
2169 && pInfo->enmProgramType == VGPU10_PIXEL_SHADER
2170 && u32BlobType == DXBC_BLOB_TYPE_OSGN)
2171 return &g_SemanticPSOutput;
2172 return &g_aSemanticInfo[enmSemanticName];
2173 }
2174 return &g_aSemanticInfo[0];
2175}
2176
2177
2178static int dxbcCreateIOSGNBlob(DXShaderInfo const *pInfo, DXBCHeader *pHdr, uint32_t u32BlobType,
2179 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature, DXBCByteWriter *w)
2180{
2181 uint32_t cbBlob = RT_UOFFSETOF_DYN(DXBCBlobIOSGN, aElement[cSignature])
2182 + cSignature * RT_SIZEOFMEMB(DXBCBlobIOSGN, aElement[0]);
2183 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2184 return VERR_NO_MEMORY;
2185
2186 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2187 pHdrBlob->u32BlobType = u32BlobType;
2188 // pHdrBlob->cbBlob = 0;
2189
2190 DXBCBlobIOSGN *pHdrISGN = (DXBCBlobIOSGN *)&pHdrBlob[1];
2191 pHdrISGN->cElement = cSignature;
2192 pHdrISGN->offElement = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[0]);
2193
2194 if (pInfo->fGuestSignatures)
2195 {
2196 uint32_t aSemanticIdx[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX];
2197 RT_ZERO(aSemanticIdx);
2198 for (uint32_t iSignature = 0; iSignature < cSignature; ++iSignature)
2199 {
2200 SVGA3dDXSignatureEntry const *src = &paSignature[iSignature];
2201 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignature];
2202
2203 ASSERT_GUEST_RETURN(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, VERR_INVALID_PARAMETER);
2204 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2205
2206 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without hdr). */
2207 /* Use the register index as the semantic index for generic attributes in order to
2208 * produce compatible semantic names between shaders.
2209 */
2210 dst->idxSemantic = src->semanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
2211 ? src->registerIndex
2212 : aSemanticIdx[src->semanticName]++;
2213 dst->enmSystemValue = src->semanticName;
2214 dst->enmComponentType = src->componentType;
2215 dst->idxRegister = src->registerIndex;
2216 dst->u.mask = src->mask;
2217
2218 /* Figure out the semantic name for this element. */
2219 char const * const pszElementName = pSemanticInfo->pszName;
2220 uint32_t const cbElementName = (uint32_t)strlen(pszElementName) + 1;
2221
2222 if (!dxbcByteWriterCanWrite(w, cbBlob + cbElementName))
2223 return VERR_NO_MEMORY;
2224
2225 char *pszElementNameDst = (char *)pHdrISGN + dst->offElementName;
2226 memcpy(pszElementNameDst, pszElementName, cbElementName);
2227
2228 cbBlob += cbElementName;
2229 }
2230 }
2231 else
2232 {
2233 /* If the signature has been created from the shader code, then sort the signature entries
2234 * by the register index in order to write them into the blob sorted.
2235 * This is necessary to match signatures between shader stages.
2236 */
2237 /* aIdxSignature contains signature indices. aIdxSignature[0] = signature index for register 0. */
2238 uint32_t aIdxSignature[32];
2239 memset(aIdxSignature, 0xFF, sizeof(aIdxSignature));
2240 AssertReturn(cSignature <= RT_ELEMENTS(aIdxSignature), VERR_INTERNAL_ERROR);
2241 for (uint32_t i = 0; i < cSignature; ++i)
2242 {
2243 SVGA3dDXSignatureEntry const *src = &paSignature[i];
2244 if (src->registerIndex == 0xFFFFFFFF)
2245 {
2246 /* oDepth for PS output. */
2247 ASSERT_GUEST_RETURN(pInfo->enmProgramType == VGPU10_PIXEL_SHADER, VERR_INVALID_PARAMETER);
2248
2249 /* Must be placed last in the signature. */
2250 ASSERT_GUEST_RETURN(aIdxSignature[cSignature - 1] == 0xFFFFFFFF, VERR_INVALID_PARAMETER);
2251 aIdxSignature[cSignature - 1] = i;
2252 continue;
2253 }
2254
2255 ASSERT_GUEST_RETURN(src->registerIndex < RT_ELEMENTS(aIdxSignature), VERR_INVALID_PARAMETER);
2256 ASSERT_GUEST_RETURN(aIdxSignature[src->registerIndex] == 0xFFFFFFFF, VERR_INVALID_PARAMETER);
2257 aIdxSignature[src->registerIndex] = i;
2258 }
2259
2260 uint32_t aSemanticIdx[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX];
2261 RT_ZERO(aSemanticIdx);
2262 uint32_t iSignature = 0;
2263 for (uint32_t iReg = 0; iReg < RT_ELEMENTS(aIdxSignature); ++iReg)
2264 {
2265 if (aIdxSignature[iReg] == 0xFFFFFFFF) /* This register is unused. */
2266 continue;
2267
2268 AssertReturn(iSignature < cSignature, VERR_INTERNAL_ERROR);
2269
2270 SVGA3dDXSignatureEntry const *src = &paSignature[aIdxSignature[iReg]];
2271 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignature];
2272
2273 ASSERT_GUEST_RETURN(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, VERR_INVALID_PARAMETER);
2274 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2275
2276 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without hdr). */
2277 /* Use the register index as the semantic index for generic attributes in order to
2278 * produce compatible semantic names between shaders.
2279 */
2280 dst->idxSemantic = src->semanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
2281 ? src->registerIndex
2282 : aSemanticIdx[src->semanticName]++;
2283 dst->enmSystemValue = src->semanticName;
2284 dst->enmComponentType = src->componentType ? src->componentType : pSemanticInfo->u32Type;
2285 dst->idxRegister = src->registerIndex;
2286 dst->u.m.mask = src->mask;
2287 if (u32BlobType == DXBC_BLOB_TYPE_OSGN)
2288 dst->u.m.mask2 = 0;
2289 else
2290 dst->u.m.mask2 = src->mask;
2291
2292 /* Figure out the semantic name for this element. */
2293 char const * const pszElementName = pSemanticInfo->pszName;
2294 uint32_t const cbElementName = (uint32_t)strlen(pszElementName) + 1;
2295
2296 if (!dxbcByteWriterCanWrite(w, cbBlob + cbElementName))
2297 return VERR_NO_MEMORY;
2298
2299 char *pszElementNameDst = (char *)pHdrISGN + dst->offElementName;
2300 memcpy(pszElementNameDst, pszElementName, cbElementName);
2301
2302 cbBlob += cbElementName;
2303 ++iSignature;
2304 }
2305 }
2306
2307 /* Blobs are 4 bytes aligned. Commit the blob data. */
2308 cbBlob = RT_ALIGN_32(cbBlob, 4);
2309 pHdrBlob->cbBlob = cbBlob;
2310 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2311 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2312 return VINF_SUCCESS;
2313}
2314
2315
2316static int dxbcCreateSHDRBlob(DXBCHeader *pHdr, uint32_t u32BlobType,
2317 void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2318{
2319 uint32_t cbBlob = cbShader;
2320 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2321 return VERR_NO_MEMORY;
2322
2323 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2324 pHdrBlob->u32BlobType = u32BlobType;
2325 // pHdrBlob->cbBlob = 0;
2326
2327 memcpy(&pHdrBlob[1], pvShader, cbShader);
2328
2329 /* Blobs are 4 bytes aligned. Commit the blob data. */
2330 cbBlob = RT_ALIGN_32(cbBlob, 4);
2331 pHdrBlob->cbBlob = cbBlob;
2332 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2333 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2334 return VINF_SUCCESS;
2335}
2336
2337
2338/*
2339 * Create a DXBC container with signature and shader code data blobs.
2340 */
2341static int dxbcCreateFromInfo(DXShaderInfo const *pInfo, void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2342{
2343 int rc;
2344
2345 /* Create a DXBC container with ISGN, OSGN and SHDR blobs. */
2346 uint32_t const cBlob = 3;
2347 uint32_t const cbHdr = RT_UOFFSETOF(DXBCHeader, aBlobOffset[cBlob]); /* Header with blob offsets. */
2348 if (!dxbcByteWriterCanWrite(w, cbHdr))
2349 return VERR_NO_MEMORY;
2350
2351 /* Container header. */
2352 DXBCHeader *pHdr = (DXBCHeader *)dxbcByteWriterPtr(w);
2353 pHdr->u32DXBC = DXBC_MAGIC;
2354 // RT_ZERO(pHdr->au8Hash);
2355 pHdr->u32Version = 1;
2356 pHdr->cbTotal = cbHdr;
2357 pHdr->cBlob = cBlob;
2358 //RT_ZERO(pHdr->aBlobOffset);
2359 dxbcByteWriterCommit(w, cbHdr);
2360
2361 /* Blobs. */
2362 uint32_t iBlob = 0;
2363
2364 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2365 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_ISGN, pInfo->cInputSignature, &pInfo->aInputSignature[0], w);
2366 AssertRCReturn(rc, rc);
2367
2368 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2369 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], w);
2370 AssertRCReturn(rc, rc);
2371
2372 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2373 rc = dxbcCreateSHDRBlob(pHdr, DXBC_BLOB_TYPE_SHDR, pvShader, cbShader, w);
2374 AssertRCReturn(rc, rc);
2375
2376 AssertCompile(RT_UOFFSETOF(DXBCHeader, u32Version) == 0x14);
2377 dxbcHash(&pHdr->u32Version, pHdr->cbTotal - RT_UOFFSETOF(DXBCHeader, u32Version), pHdr->au8Hash);
2378
2379 return VINF_SUCCESS;
2380}
2381
2382
2383int DXShaderCreateDXBC(DXShaderInfo const *pInfo, void **ppvDXBC, uint32_t *pcbDXBC)
2384{
2385 /* Build DXBC container. */
2386 int rc;
2387 DXBCByteWriter dxbcByteWriter;
2388 DXBCByteWriter *w = &dxbcByteWriter;
2389 if (dxbcByteWriterInit(w, 4096 + pInfo->cbBytecode))
2390 {
2391 rc = dxbcCreateFromInfo(pInfo, pInfo->pvBytecode, pInfo->cbBytecode, w);
2392 if (RT_SUCCESS(rc))
2393 dxbcByteWriterFetchData(w, ppvDXBC, pcbDXBC);
2394 }
2395 else
2396 rc = VERR_NO_MEMORY;
2397 return rc;
2398}
2399
2400
2401static char const *dxbcGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, uint32_t u32BlobType,
2402 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature,
2403 SVGA3dDXSignatureSemanticName *pSemanticName)
2404{
2405 for (uint32_t i = 0; i < cSignature; ++i)
2406 {
2407 SVGA3dDXSignatureEntry const *p = &paSignature[i];
2408 if (p->registerIndex == idxRegister)
2409 {
2410 AssertReturn(p->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, NULL);
2411 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, p->semanticName, u32BlobType);
2412 *pSemanticName = p->semanticName;
2413 return pSemanticInfo->pszName;
2414 }
2415 }
2416 return NULL;
2417}
2418
2419char const *DXShaderGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, SVGA3dDXSignatureSemanticName *pSemanticName)
2420{
2421 return dxbcGetOutputSemanticName(pInfo, idxRegister, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pSemanticName);
2422}
2423
2424int DXShaderUpdateResourceTypes(DXShaderInfo const *pInfo, VGPU10_RESOURCE_DIMENSION *paResourceType, uint32_t cResourceType)
2425{
2426 for (uint32_t i = 0; i < pInfo->cDclResource; ++i)
2427 {
2428 VGPU10_RESOURCE_DIMENSION const resourceType = i < cResourceType ? paResourceType[i] : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2429 AssertContinue(resourceType <= VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
2430
2431 uint32_t const offToken = pInfo->aOffDclResource[i];
2432 AssertContinue(offToken < pInfo->cbBytecode);
2433 uint32_t *paToken = (uint32_t *)((uintptr_t)pInfo->pvBytecode + offToken);
2434
2435 VGPU10OpcodeToken0 *pOpcode = (VGPU10OpcodeToken0 *)&paToken[0];
2436 pOpcode->resourceDimension = resourceType;
2437 // paToken[1] unmodified
2438 // paToken[2] unmodified
2439 paToken[3] = 0x5555; /** @todo VGPU10ResourceReturnTypeToken float */
2440 }
2441
2442 return VINF_SUCCESS;
2443}
2444
2445#ifdef DXBC_STANDALONE_TEST
2446static int dxbcCreateFromBytecode(void const *pvShaderCode, uint32_t cbShaderCode, void **ppvDXBC, uint32_t *pcbDXBC)
2447{
2448 /* Parse the shader bytecode and create DXBC container with resource, signature and shader bytecode blobs. */
2449 DXShaderInfo info;
2450 RT_ZERO(info);
2451 int rc = DXShaderParse(pvShaderCode, cbShaderCode, &info);
2452 if (RT_SUCCESS(rc))
2453 rc = DXShaderCreateDXBC(&info, ppvDXBC, pcbDXBC);
2454 return rc;
2455}
2456
2457static int parseShaderVM(void const *pvShaderCode, uint32_t cbShaderCode)
2458{
2459 void *pv = NULL;
2460 uint32_t cb = 0;
2461 int rc = dxbcCreateFromBytecode(pvShaderCode, cbShaderCode, &pv, &cb);
2462 if (RT_SUCCESS(rc))
2463 {
2464 /* Hexdump DXBC */
2465 printf("{\n");
2466 uint8_t *pu8 = (uint8_t *)pv;
2467 for (uint32_t i = 0; i < cb; ++i)
2468 {
2469 if ((i % 16) == 0)
2470 {
2471 if (i > 0)
2472 printf(",\n");
2473
2474 printf(" 0x%02x", pu8[i]);
2475 }
2476 else
2477 {
2478 printf(", 0x%02x", pu8[i]);
2479 }
2480 }
2481 printf("\n");
2482 printf("};\n");
2483
2484 RTMemFree(pv);
2485 }
2486
2487 return rc;
2488}
2489
2490static DXBCBlobHeader *dxbcFindBlob(DXBCHeader *pDXBCHeader, uint32_t u32BlobType)
2491{
2492 uint8_t const *pu8DXBCBegin = (uint8_t *)pDXBCHeader;
2493 for (uint32_t i = 0; i < pDXBCHeader->cBlob; ++i)
2494 {
2495 DXBCBlobHeader *pCurrentBlob = (DXBCBlobHeader *)&pu8DXBCBegin[pDXBCHeader->aBlobOffset[i]];
2496 if (pCurrentBlob->u32BlobType == u32BlobType)
2497 return pCurrentBlob;
2498 }
2499 return NULL;
2500}
2501
2502static int dxbcExtractShaderCode(DXBCHeader *pDXBCHeader, void **ppvCode, uint32_t *pcbCode)
2503{
2504 DXBCBlobHeader *pBlob = dxbcFindBlob(pDXBCHeader, DXBC_BLOB_TYPE_SHDR);
2505 AssertReturn(pBlob, VERR_NOT_IMPLEMENTED);
2506
2507 DXBCBlobSHDR *pSHDR = (DXBCBlobSHDR *)&pBlob[1];
2508 *pcbCode = pSHDR->cToken * 4;
2509 *ppvCode = RTMemAlloc(*pcbCode);
2510 AssertReturn(*ppvCode, VERR_NO_MEMORY);
2511
2512 memcpy(*ppvCode, pSHDR, *pcbCode);
2513 return VINF_SUCCESS;
2514}
2515
2516static int parseShaderDXBC(void const *pvDXBC)
2517{
2518 DXBCHeader *pDXBCHeader = (DXBCHeader *)pvDXBC;
2519 void *pvShaderCode = NULL;
2520 uint32_t cbShaderCode = 0;
2521 int rc = dxbcExtractShaderCode(pDXBCHeader, &pvShaderCode, &cbShaderCode);
2522 if (RT_SUCCESS(rc))
2523 {
2524 rc = parseShaderVM(pvShaderCode, cbShaderCode);
2525 RTMemFree(pvShaderCode);
2526 }
2527 return rc;
2528}
2529#endif /* DXBC_STANDALONE_TEST */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette