VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/DevVGA-SVGA3d-dx-shader.cpp@ 97722

Last change on this file since 97722 was 97722, checked in by vboxsync, 2 years ago

Devices/Graphics: corrected an assert; logging. bugref:9830

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 127.0 KB
Line 
1/* $Id: DevVGA-SVGA3d-dx-shader.cpp 97722 2022-11-30 18:27:08Z vboxsync $ */
2/** @file
3 * DevVMWare - VMWare SVGA device - VGPU10+ (DX) shader utilities.
4 */
5
6/*
7 * Copyright (C) 2020-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
33#include <VBox/AssertGuest.h>
34#include <VBox/log.h>
35
36#include <iprt/asm.h>
37#include <iprt/md5.h>
38#include <iprt/mem.h>
39#include <iprt/sort.h>
40#include <iprt/string.h>
41
42#include "DevVGA-SVGA3d-dx-shader.h"
43
44#ifdef RT_OS_WINDOWS
45#include <d3d11TokenizedProgramFormat.hpp>
46#else
47#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM 2
48#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE 3
49#endif
50
51/*
52 *
53 * DXBC shader binary format definitions.
54 *
55 */
56
57/* DXBC container header. */
58typedef struct DXBCHeader
59{
60 uint32_t u32DXBC; /* 0x43425844 = 'D', 'X', 'B', 'C' */
61 uint8_t au8Hash[16]; /* Modified MD5 hash. See dxbcHash. */
62 uint32_t u32Version; /* 1 */
63 uint32_t cbTotal; /* Total size in bytes. Including the header. */
64 uint32_t cBlob; /* Number of entries in aBlobOffset array. */
65 uint32_t aBlobOffset[1]; /* Offsets of blobs from the start of DXBC header. */
66} DXBCHeader;
67
68#define DXBC_MAGIC RT_MAKE_U32_FROM_U8('D', 'X', 'B', 'C')
69
70/* DXBC blob header. */
71typedef struct DXBCBlobHeader
72{
73 uint32_t u32BlobType; /* FourCC code. DXBC_BLOB_TYPE_* */
74 uint32_t cbBlob; /* Size of the blob excluding the blob header. 4 bytes aligned. */
75 /* Followed by the blob's data. */
76} DXBCBlobHeader;
77
78/* DXBC blob types. */
79#define DXBC_BLOB_TYPE_ISGN RT_MAKE_U32_FROM_U8('I', 'S', 'G', 'N')
80#define DXBC_BLOB_TYPE_OSGN RT_MAKE_U32_FROM_U8('O', 'S', 'G', 'N')
81#define DXBC_BLOB_TYPE_PCSG RT_MAKE_U32_FROM_U8('P', 'C', 'S', 'G')
82#define DXBC_BLOB_TYPE_SHDR RT_MAKE_U32_FROM_U8('S', 'H', 'D', 'R')
83/** @todo More... */
84
85/* 'SHDR' blob data format. */
86typedef struct DXBCBlobSHDR
87{
88 VGPU10ProgramToken programToken;
89 uint32_t cToken; /* Number of 32 bit tokens including programToken and cToken. */
90 uint32_t au32Token[1]; /* cToken - 2 number of tokens. */
91} DXBCBlobSHDR;
92
93/* Element of an input or output signature. */
94typedef struct DXBCBlobIOSGNElement
95{
96 uint32_t offElementName; /* Offset of the semantic's name relative to the start of the blob data. */
97 uint32_t idxSemantic; /* Semantic index. */
98 uint32_t enmSystemValue; /* SVGA3dDXSignatureSemanticName */
99 uint32_t enmComponentType; /* 1 - unsigned, 2 - integer, 3 - float. */
100 uint32_t idxRegister; /* Shader register index. Elements must be sorted by register index. */
101 union
102 {
103 struct
104 {
105 uint32_t mask : 8; /* Component mask. Lower 4 bits represent X, Y, Z, W channels. */
106 uint32_t mask2 : 8; /* Which components are used in the shader. */
107 uint32_t pad : 16;
108 } m;
109 uint32_t mask;
110 } u;
111} DXBCBlobIOSGNElement;
112
113/* 'ISGN' and 'OSGN' blob data format. */
114typedef struct DXBCBlobIOSGN
115{
116 uint32_t cElement; /* Number of signature elements. */
117 uint32_t offElement; /* Offset of the first element from the start of the blob. Equals to 8. */
118 DXBCBlobIOSGNElement aElement[1]; /* Signature elements. Size is cElement. */
119 /* Followed by ASCIIZ semantic names. */
120} DXBCBlobIOSGN;
121
122
123/*
124 * VGPU10 shader parser definitions.
125 */
126
127/* Parsed info about an operand index. */
128typedef struct VGPUOperandIndex
129{
130 uint32_t indexRepresentation; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
131 uint64_t iOperandImmediate; /* Needs up to a qword. */
132 struct VGPUOperand *pOperandRelative; /* For VGPU10_OPERAND_INDEX_*RELATIVE */
133} VGPUOperandIndex;
134
135/* Parsed info about an operand. */
136typedef struct VGPUOperand
137{
138 uint32_t numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
139 uint32_t selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
140 uint32_t mask : 4; /* 4-bits X, Y, Z, W mask for VGPU10_OPERAND_4_COMPONENT_MASK_MODE. */
141 uint32_t operandType : 8; /* VGPU10_OPERAND_TYPE */
142 uint32_t indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
143 VGPUOperandIndex aOperandIndex[VGPU10_OPERAND_INDEX_3D]; /* Up to 3. */
144 uint32_t aImm[4]; /* Immediate values for VGPU10_OPERAND_TYPE_IMMEDIATE* */
145 uint32_t cOperandToken; /* Number of tokens in this operand. */
146 uint32_t const *paOperandToken; /* Pointer to operand tokens in the input buffer. */
147} VGPUOperand;
148
149/* Parsed info about an opcode. */
150typedef struct VGPUOpcode
151{
152 uint32_t cOpcodeToken; /* Number of tokens for this operation. */
153 uint32_t opcodeType; /* VGPU10_OPCODE_* */
154 uint32_t opcodeSubtype; /* For example VGPU10_VMWARE_OPCODE_* */
155 uint32_t semanticName; /* SVGA3dDXSignatureSemanticName for system value declarations. */
156 uint32_t cOperand; /* Number of operands for this instruction. */
157 uint32_t aIdxOperand[8]; /* Indices of the instruction operands in the aValOperand array. */
158 /* 8 should be enough for everyone. */
159 VGPUOperand aValOperand[16]; /* Operands including VGPU10_OPERAND_INDEX_*RELATIVE if they are used: */
160 /* Operand1, VGPU10_OPERAND_INDEX_*RELATIVE for Operand1, ... */
161 /* ... */
162 /* OperandN, VGPU10_OPERAND_INDEX_*RELATIVE for OperandN, ... */
163 /* 16 probably should be enough for everyone. */
164 uint32_t const *paOpcodeToken; /* Pointer to opcode tokens in the input buffer. */
165} VGPUOpcode;
166
167typedef struct VGPUOpcodeInfo
168{
169 uint32_t cOperand; /* Number of operands for this opcode. */
170} VGPUOpcodeInfo;
171
172static VGPUOpcodeInfo const g_aOpcodeInfo[] =
173{
174 { 3 }, /* VGPU10_OPCODE_ADD */
175 { 3 }, /* VGPU10_OPCODE_AND */
176 { 0 }, /* VGPU10_OPCODE_BREAK */
177 { 1 }, /* VGPU10_OPCODE_BREAKC */
178 { 1 }, /* VGPU10_OPCODE_CALL */
179 { 2 }, /* VGPU10_OPCODE_CALLC */
180 { 1 }, /* VGPU10_OPCODE_CASE */
181 { 0 }, /* VGPU10_OPCODE_CONTINUE */
182 { 1 }, /* VGPU10_OPCODE_CONTINUEC */
183 { 0 }, /* VGPU10_OPCODE_CUT */
184 { 0 }, /* VGPU10_OPCODE_DEFAULT */
185 { 2 }, /* VGPU10_OPCODE_DERIV_RTX */
186 { 2 }, /* VGPU10_OPCODE_DERIV_RTY */
187 { 1 }, /* VGPU10_OPCODE_DISCARD */
188 { 3 }, /* VGPU10_OPCODE_DIV */
189 { 3 }, /* VGPU10_OPCODE_DP2 */
190 { 3 }, /* VGPU10_OPCODE_DP3 */
191 { 3 }, /* VGPU10_OPCODE_DP4 */
192 { 0 }, /* VGPU10_OPCODE_ELSE */
193 { 0 }, /* VGPU10_OPCODE_EMIT */
194 { 0 }, /* VGPU10_OPCODE_EMITTHENCUT */
195 { 0 }, /* VGPU10_OPCODE_ENDIF */
196 { 0 }, /* VGPU10_OPCODE_ENDLOOP */
197 { 0 }, /* VGPU10_OPCODE_ENDSWITCH */
198 { 3 }, /* VGPU10_OPCODE_EQ */
199 { 2 }, /* VGPU10_OPCODE_EXP */
200 { 2 }, /* VGPU10_OPCODE_FRC */
201 { 2 }, /* VGPU10_OPCODE_FTOI */
202 { 2 }, /* VGPU10_OPCODE_FTOU */
203 { 3 }, /* VGPU10_OPCODE_GE */
204 { 3 }, /* VGPU10_OPCODE_IADD */
205 { 1 }, /* VGPU10_OPCODE_IF */
206 { 3 }, /* VGPU10_OPCODE_IEQ */
207 { 3 }, /* VGPU10_OPCODE_IGE */
208 { 3 }, /* VGPU10_OPCODE_ILT */
209 { 4 }, /* VGPU10_OPCODE_IMAD */
210 { 3 }, /* VGPU10_OPCODE_IMAX */
211 { 3 }, /* VGPU10_OPCODE_IMIN */
212 { 4 }, /* VGPU10_OPCODE_IMUL */
213 { 3 }, /* VGPU10_OPCODE_INE */
214 { 2 }, /* VGPU10_OPCODE_INEG */
215 { 3 }, /* VGPU10_OPCODE_ISHL */
216 { 3 }, /* VGPU10_OPCODE_ISHR */
217 { 2 }, /* VGPU10_OPCODE_ITOF */
218 { 1 }, /* VGPU10_OPCODE_LABEL */
219 { 3 }, /* VGPU10_OPCODE_LD */
220 { 4 }, /* VGPU10_OPCODE_LD_MS */
221 { 2 }, /* VGPU10_OPCODE_LOG */
222 { 0 }, /* VGPU10_OPCODE_LOOP */
223 { 3 }, /* VGPU10_OPCODE_LT */
224 { 4 }, /* VGPU10_OPCODE_MAD */
225 { 3 }, /* VGPU10_OPCODE_MIN */
226 { 3 }, /* VGPU10_OPCODE_MAX */
227 { UINT32_MAX }, /* VGPU10_OPCODE_CUSTOMDATA: special opcode */
228 { 2 }, /* VGPU10_OPCODE_MOV */
229 { 4 }, /* VGPU10_OPCODE_MOVC */
230 { 3 }, /* VGPU10_OPCODE_MUL */
231 { 3 }, /* VGPU10_OPCODE_NE */
232 { 0 }, /* VGPU10_OPCODE_NOP */
233 { 2 }, /* VGPU10_OPCODE_NOT */
234 { 3 }, /* VGPU10_OPCODE_OR */
235 { 3 }, /* VGPU10_OPCODE_RESINFO */
236 { 0 }, /* VGPU10_OPCODE_RET */
237 { 1 }, /* VGPU10_OPCODE_RETC */
238 { 2 }, /* VGPU10_OPCODE_ROUND_NE */
239 { 2 }, /* VGPU10_OPCODE_ROUND_NI */
240 { 2 }, /* VGPU10_OPCODE_ROUND_PI */
241 { 2 }, /* VGPU10_OPCODE_ROUND_Z */
242 { 2 }, /* VGPU10_OPCODE_RSQ */
243 { 4 }, /* VGPU10_OPCODE_SAMPLE */
244 { 5 }, /* VGPU10_OPCODE_SAMPLE_C */
245 { 5 }, /* VGPU10_OPCODE_SAMPLE_C_LZ */
246 { 5 }, /* VGPU10_OPCODE_SAMPLE_L */
247 { 6 }, /* VGPU10_OPCODE_SAMPLE_D */
248 { 5 }, /* VGPU10_OPCODE_SAMPLE_B */
249 { 2 }, /* VGPU10_OPCODE_SQRT */
250 { 1 }, /* VGPU10_OPCODE_SWITCH */
251 { 3 }, /* VGPU10_OPCODE_SINCOS */
252 { 4 }, /* VGPU10_OPCODE_UDIV */
253 { 3 }, /* VGPU10_OPCODE_ULT */
254 { 3 }, /* VGPU10_OPCODE_UGE */
255 { 4 }, /* VGPU10_OPCODE_UMUL */
256 { 4 }, /* VGPU10_OPCODE_UMAD */
257 { 3 }, /* VGPU10_OPCODE_UMAX */
258 { 3 }, /* VGPU10_OPCODE_UMIN */
259 { 3 }, /* VGPU10_OPCODE_USHR */
260 { 2 }, /* VGPU10_OPCODE_UTOF */
261 { 3 }, /* VGPU10_OPCODE_XOR */
262 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE */
263 { 1 }, /* VGPU10_OPCODE_DCL_CONSTANT_BUFFER */
264 { 1 }, /* VGPU10_OPCODE_DCL_SAMPLER */
265 { 1 }, /* VGPU10_OPCODE_DCL_INDEX_RANGE */
266 { 0 }, /* VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY */
267 { 0 }, /* VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE */
268 { 0 }, /* VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT */
269 { 1 }, /* VGPU10_OPCODE_DCL_INPUT */
270 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SGV */
271 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SIV */
272 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS */
273 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SGV */
274 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SIV */
275 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT */
276 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SGV */
277 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SIV */
278 { 0 }, /* VGPU10_OPCODE_DCL_TEMPS */
279 { 0 }, /* VGPU10_OPCODE_DCL_INDEXABLE_TEMP */
280 { 0 }, /* VGPU10_OPCODE_DCL_GLOBAL_FLAGS */
281 { UINT32_MAX }, /* VGPU10_OPCODE_VMWARE: special opcode */
282 { 4 }, /* VGPU10_OPCODE_LOD */
283 { 4 }, /* VGPU10_OPCODE_GATHER4 */
284 { 3 }, /* VGPU10_OPCODE_SAMPLE_POS */
285 { 2 }, /* VGPU10_OPCODE_SAMPLE_INFO */
286 { UINT32_MAX }, /* VGPU10_OPCODE_RESERVED1: special opcode */
287 { 0 }, /* VGPU10_OPCODE_HS_DECLS */
288 { 0 }, /* VGPU10_OPCODE_HS_CONTROL_POINT_PHASE */
289 { 0 }, /* VGPU10_OPCODE_HS_FORK_PHASE */
290 { 0 }, /* VGPU10_OPCODE_HS_JOIN_PHASE */
291 { 1 }, /* VGPU10_OPCODE_EMIT_STREAM */
292 { 1 }, /* VGPU10_OPCODE_CUT_STREAM */
293 { 1 }, /* VGPU10_OPCODE_EMITTHENCUT_STREAM */
294 { 1 }, /* VGPU10_OPCODE_INTERFACE_CALL */
295 { 2 }, /* VGPU10_OPCODE_BUFINFO */
296 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_COARSE */
297 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_FINE */
298 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_COARSE */
299 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_FINE */
300 { 5 }, /* VGPU10_OPCODE_GATHER4_C */
301 { 5 }, /* VGPU10_OPCODE_GATHER4_PO */
302 { 6 }, /* VGPU10_OPCODE_GATHER4_PO_C */
303 { 2 }, /* VGPU10_OPCODE_RCP */
304 { 2 }, /* VGPU10_OPCODE_F32TOF16 */
305 { 2 }, /* VGPU10_OPCODE_F16TOF32 */
306 { 4 }, /* VGPU10_OPCODE_UADDC */
307 { 4 }, /* VGPU10_OPCODE_USUBB */
308 { 2 }, /* VGPU10_OPCODE_COUNTBITS */
309 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_HI */
310 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_LO */
311 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_SHI */
312 { 4 }, /* VGPU10_OPCODE_UBFE */
313 { 4 }, /* VGPU10_OPCODE_IBFE */
314 { 5 }, /* VGPU10_OPCODE_BFI */
315 { 2 }, /* VGPU10_OPCODE_BFREV */
316 { 5 }, /* VGPU10_OPCODE_SWAPC */
317 { 1 }, /* VGPU10_OPCODE_DCL_STREAM */
318 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_BODY */
319 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_TABLE */
320 { 0 }, /* VGPU10_OPCODE_DCL_INTERFACE */
321 { 0 }, /* VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT */
322 { 0 }, /* VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT */
323 { 0 }, /* VGPU10_OPCODE_DCL_TESS_DOMAIN */
324 { 0 }, /* VGPU10_OPCODE_DCL_TESS_PARTITIONING */
325 { 0 }, /* VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE */
326 { 0 }, /* VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR */
327 { 0 }, /* VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT */
328 { 0 }, /* VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */
329 { 0 }, /* VGPU10_OPCODE_DCL_THREAD_GROUP */
330 { 1 }, /* VGPU10_OPCODE_DCL_UAV_TYPED */
331 { 1 }, /* VGPU10_OPCODE_DCL_UAV_RAW */
332 { 1 }, /* VGPU10_OPCODE_DCL_UAV_STRUCTURED */
333 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_RAW */
334 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_STRUCTURED */
335 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_RAW */
336 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED */
337 { 3 }, /* VGPU10_OPCODE_LD_UAV_TYPED */
338 { 3 }, /* VGPU10_OPCODE_STORE_UAV_TYPED */
339 { 3 }, /* VGPU10_OPCODE_LD_RAW */
340 { 3 }, /* VGPU10_OPCODE_STORE_RAW */
341 { 4 }, /* VGPU10_OPCODE_LD_STRUCTURED */
342 { 4 }, /* VGPU10_OPCODE_STORE_STRUCTURED */
343 { 3 }, /* VGPU10_OPCODE_ATOMIC_AND */
344 { 3 }, /* VGPU10_OPCODE_ATOMIC_OR */
345 { 3 }, /* VGPU10_OPCODE_ATOMIC_XOR */
346 { 4 }, /* VGPU10_OPCODE_ATOMIC_CMP_STORE */
347 { 3 }, /* VGPU10_OPCODE_ATOMIC_IADD */
348 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMAX */
349 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMIN */
350 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMAX */
351 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMIN */
352 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_ALLOC */
353 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_CONSUME */
354 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IADD */
355 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_AND */
356 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_OR */
357 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_XOR */
358 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_EXCH */
359 { 5 }, /* VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH */
360 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMAX */
361 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMIN */
362 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMAX */
363 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMIN */
364 { 0 }, /* VGPU10_OPCODE_SYNC */
365 { 3 }, /* VGPU10_OPCODE_DADD */
366 { 3 }, /* VGPU10_OPCODE_DMAX */
367 { 3 }, /* VGPU10_OPCODE_DMIN */
368 { 3 }, /* VGPU10_OPCODE_DMUL */
369 { 3 }, /* VGPU10_OPCODE_DEQ */
370 { 3 }, /* VGPU10_OPCODE_DGE */
371 { 3 }, /* VGPU10_OPCODE_DLT */
372 { 3 }, /* VGPU10_OPCODE_DNE */
373 { 2 }, /* VGPU10_OPCODE_DMOV */
374 { 4 }, /* VGPU10_OPCODE_DMOVC */
375 { 2 }, /* VGPU10_OPCODE_DTOF */
376 { 2 }, /* VGPU10_OPCODE_FTOD */
377 { 3 }, /* VGPU10_OPCODE_EVAL_SNAPPED */
378 { 3 }, /* VGPU10_OPCODE_EVAL_SAMPLE_INDEX */
379 { 2 }, /* VGPU10_OPCODE_EVAL_CENTROID */
380 { 0 }, /* VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT */
381 { 0 }, /* VGPU10_OPCODE_ABORT */
382 { 0 }, /* VGPU10_OPCODE_DEBUG_BREAK */
383 { 0 }, /* VGPU10_OPCODE_RESERVED0 */
384 { 3 }, /* VGPU10_OPCODE_DDIV */
385 { 4 }, /* VGPU10_OPCODE_DFMA */
386 { 2 }, /* VGPU10_OPCODE_DRCP */
387 { 4 }, /* VGPU10_OPCODE_MSAD */
388 { 2 }, /* VGPU10_OPCODE_DTOI */
389 { 2 }, /* VGPU10_OPCODE_DTOU */
390 { 2 }, /* VGPU10_OPCODE_ITOD */
391 { 2 }, /* VGPU10_OPCODE_UTOD */
392};
393AssertCompile(RT_ELEMENTS(g_aOpcodeInfo) == VGPU10_NUM_OPCODES);
394
395#ifdef LOG_ENABLED
396/*
397 *
398 * Helpers to translate a VGPU10 shader constant to a string.
399 *
400 */
401
402#define SVGA_CASE_ID2STR(idx) case idx: return #idx
403
404static const char *dxbcOpcodeToString(uint32_t opcodeType)
405{
406 VGPU10_OPCODE_TYPE enm = (VGPU10_OPCODE_TYPE)opcodeType;
407 switch (enm)
408 {
409 SVGA_CASE_ID2STR(VGPU10_OPCODE_ADD);
410 SVGA_CASE_ID2STR(VGPU10_OPCODE_AND);
411 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAK);
412 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAKC);
413 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALL);
414 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALLC);
415 SVGA_CASE_ID2STR(VGPU10_OPCODE_CASE);
416 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUE);
417 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUEC);
418 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT);
419 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEFAULT);
420 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX);
421 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY);
422 SVGA_CASE_ID2STR(VGPU10_OPCODE_DISCARD);
423 SVGA_CASE_ID2STR(VGPU10_OPCODE_DIV);
424 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP2);
425 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP3);
426 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP4);
427 SVGA_CASE_ID2STR(VGPU10_OPCODE_ELSE);
428 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT);
429 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT);
430 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDIF);
431 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDLOOP);
432 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDSWITCH);
433 SVGA_CASE_ID2STR(VGPU10_OPCODE_EQ);
434 SVGA_CASE_ID2STR(VGPU10_OPCODE_EXP);
435 SVGA_CASE_ID2STR(VGPU10_OPCODE_FRC);
436 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOI);
437 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOU);
438 SVGA_CASE_ID2STR(VGPU10_OPCODE_GE);
439 SVGA_CASE_ID2STR(VGPU10_OPCODE_IADD);
440 SVGA_CASE_ID2STR(VGPU10_OPCODE_IF);
441 SVGA_CASE_ID2STR(VGPU10_OPCODE_IEQ);
442 SVGA_CASE_ID2STR(VGPU10_OPCODE_IGE);
443 SVGA_CASE_ID2STR(VGPU10_OPCODE_ILT);
444 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAD);
445 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAX);
446 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMIN);
447 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMUL);
448 SVGA_CASE_ID2STR(VGPU10_OPCODE_INE);
449 SVGA_CASE_ID2STR(VGPU10_OPCODE_INEG);
450 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHL);
451 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHR);
452 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOF);
453 SVGA_CASE_ID2STR(VGPU10_OPCODE_LABEL);
454 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD);
455 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_MS);
456 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOG);
457 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOOP);
458 SVGA_CASE_ID2STR(VGPU10_OPCODE_LT);
459 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAD);
460 SVGA_CASE_ID2STR(VGPU10_OPCODE_MIN);
461 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAX);
462 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUSTOMDATA);
463 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOV);
464 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOVC);
465 SVGA_CASE_ID2STR(VGPU10_OPCODE_MUL);
466 SVGA_CASE_ID2STR(VGPU10_OPCODE_NE);
467 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOP);
468 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOT);
469 SVGA_CASE_ID2STR(VGPU10_OPCODE_OR);
470 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESINFO);
471 SVGA_CASE_ID2STR(VGPU10_OPCODE_RET);
472 SVGA_CASE_ID2STR(VGPU10_OPCODE_RETC);
473 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NE);
474 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NI);
475 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_PI);
476 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_Z);
477 SVGA_CASE_ID2STR(VGPU10_OPCODE_RSQ);
478 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE);
479 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C);
480 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C_LZ);
481 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_L);
482 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_D);
483 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_B);
484 SVGA_CASE_ID2STR(VGPU10_OPCODE_SQRT);
485 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWITCH);
486 SVGA_CASE_ID2STR(VGPU10_OPCODE_SINCOS);
487 SVGA_CASE_ID2STR(VGPU10_OPCODE_UDIV);
488 SVGA_CASE_ID2STR(VGPU10_OPCODE_ULT);
489 SVGA_CASE_ID2STR(VGPU10_OPCODE_UGE);
490 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMUL);
491 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAD);
492 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAX);
493 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMIN);
494 SVGA_CASE_ID2STR(VGPU10_OPCODE_USHR);
495 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOF);
496 SVGA_CASE_ID2STR(VGPU10_OPCODE_XOR);
497 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE);
498 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_CONSTANT_BUFFER);
499 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_SAMPLER);
500 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEX_RANGE);
501 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY);
502 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE);
503 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT);
504 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT);
505 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SGV);
506 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SIV);
507 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS);
508 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SGV);
509 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SIV);
510 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT);
511 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SGV);
512 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SIV);
513 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TEMPS);
514 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEXABLE_TEMP);
515 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GLOBAL_FLAGS);
516 SVGA_CASE_ID2STR(VGPU10_OPCODE_VMWARE);
517 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOD);
518 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4);
519 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_POS);
520 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_INFO);
521 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED1);
522 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_DECLS);
523 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_CONTROL_POINT_PHASE);
524 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_FORK_PHASE);
525 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_JOIN_PHASE);
526 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT_STREAM);
527 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT_STREAM);
528 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT_STREAM);
529 SVGA_CASE_ID2STR(VGPU10_OPCODE_INTERFACE_CALL);
530 SVGA_CASE_ID2STR(VGPU10_OPCODE_BUFINFO);
531 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_COARSE);
532 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_FINE);
533 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_COARSE);
534 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_FINE);
535 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_C);
536 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO);
537 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO_C);
538 SVGA_CASE_ID2STR(VGPU10_OPCODE_RCP);
539 SVGA_CASE_ID2STR(VGPU10_OPCODE_F32TOF16);
540 SVGA_CASE_ID2STR(VGPU10_OPCODE_F16TOF32);
541 SVGA_CASE_ID2STR(VGPU10_OPCODE_UADDC);
542 SVGA_CASE_ID2STR(VGPU10_OPCODE_USUBB);
543 SVGA_CASE_ID2STR(VGPU10_OPCODE_COUNTBITS);
544 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_HI);
545 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_LO);
546 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_SHI);
547 SVGA_CASE_ID2STR(VGPU10_OPCODE_UBFE);
548 SVGA_CASE_ID2STR(VGPU10_OPCODE_IBFE);
549 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFI);
550 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFREV);
551 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWAPC);
552 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_STREAM);
553 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_BODY);
554 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_TABLE);
555 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INTERFACE);
556 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT);
557 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT);
558 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_DOMAIN);
559 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_PARTITIONING);
560 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE);
561 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR);
562 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT);
563 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT);
564 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_THREAD_GROUP);
565 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_TYPED);
566 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_RAW);
567 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_STRUCTURED);
568 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_RAW);
569 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_STRUCTURED);
570 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_RAW);
571 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED);
572 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_UAV_TYPED);
573 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_UAV_TYPED);
574 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_RAW);
575 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_RAW);
576 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_STRUCTURED);
577 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_STRUCTURED);
578 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_AND);
579 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_OR);
580 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_XOR);
581 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_CMP_STORE);
582 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IADD);
583 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMAX);
584 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMIN);
585 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMAX);
586 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMIN);
587 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_ALLOC);
588 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CONSUME);
589 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IADD);
590 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_AND);
591 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_OR);
592 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_XOR);
593 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_EXCH);
594 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
595 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMAX);
596 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMIN);
597 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMAX);
598 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMIN);
599 SVGA_CASE_ID2STR(VGPU10_OPCODE_SYNC);
600 SVGA_CASE_ID2STR(VGPU10_OPCODE_DADD);
601 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMAX);
602 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMIN);
603 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMUL);
604 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEQ);
605 SVGA_CASE_ID2STR(VGPU10_OPCODE_DGE);
606 SVGA_CASE_ID2STR(VGPU10_OPCODE_DLT);
607 SVGA_CASE_ID2STR(VGPU10_OPCODE_DNE);
608 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOV);
609 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOVC);
610 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOF);
611 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOD);
612 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SNAPPED);
613 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SAMPLE_INDEX);
614 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_CENTROID);
615 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT);
616 SVGA_CASE_ID2STR(VGPU10_OPCODE_ABORT);
617 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEBUG_BREAK);
618 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED0);
619 SVGA_CASE_ID2STR(VGPU10_OPCODE_DDIV);
620 SVGA_CASE_ID2STR(VGPU10_OPCODE_DFMA);
621 SVGA_CASE_ID2STR(VGPU10_OPCODE_DRCP);
622 SVGA_CASE_ID2STR(VGPU10_OPCODE_MSAD);
623 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOI);
624 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOU);
625 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOD);
626 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOD);
627 SVGA_CASE_ID2STR(VGPU10_NUM_OPCODES);
628 }
629 return NULL;
630}
631
632
633static const char *dxbcShaderTypeToString(uint32_t value)
634{
635 VGPU10_PROGRAM_TYPE enm = (VGPU10_PROGRAM_TYPE)value;
636 switch (enm)
637 {
638 SVGA_CASE_ID2STR(VGPU10_PIXEL_SHADER);
639 SVGA_CASE_ID2STR(VGPU10_VERTEX_SHADER);
640 SVGA_CASE_ID2STR(VGPU10_GEOMETRY_SHADER);
641 SVGA_CASE_ID2STR(VGPU10_HULL_SHADER);
642 SVGA_CASE_ID2STR(VGPU10_DOMAIN_SHADER);
643 SVGA_CASE_ID2STR(VGPU10_COMPUTE_SHADER);
644 }
645 return NULL;
646}
647
648
649static const char *dxbcCustomDataClassToString(uint32_t value)
650{
651 VGPU10_CUSTOMDATA_CLASS enm = (VGPU10_CUSTOMDATA_CLASS)value;
652 switch (enm)
653 {
654 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_COMMENT);
655 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DEBUGINFO);
656 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_OPAQUE);
657 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER);
658 }
659 return NULL;
660}
661
662
663static const char *dxbcSystemNameToString(uint32_t value)
664{
665 VGPU10_SYSTEM_NAME enm = (VGPU10_SYSTEM_NAME)value;
666 switch (enm)
667 {
668 SVGA_CASE_ID2STR(VGPU10_NAME_UNDEFINED);
669 SVGA_CASE_ID2STR(VGPU10_NAME_POSITION);
670 SVGA_CASE_ID2STR(VGPU10_NAME_CLIP_DISTANCE);
671 SVGA_CASE_ID2STR(VGPU10_NAME_CULL_DISTANCE);
672 SVGA_CASE_ID2STR(VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX);
673 SVGA_CASE_ID2STR(VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
674 SVGA_CASE_ID2STR(VGPU10_NAME_VERTEX_ID);
675 SVGA_CASE_ID2STR(VGPU10_NAME_PRIMITIVE_ID);
676 SVGA_CASE_ID2STR(VGPU10_NAME_INSTANCE_ID);
677 SVGA_CASE_ID2STR(VGPU10_NAME_IS_FRONT_FACE);
678 SVGA_CASE_ID2STR(VGPU10_NAME_SAMPLE_INDEX);
679 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR);
680 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR);
681 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR);
682 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR);
683 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
684 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
685 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR);
686 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR);
687 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR);
688 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
689 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
690 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
691 }
692 return NULL;
693}
694
695
696static const char *dxbcOperandTypeToString(uint32_t value)
697{
698 VGPU10_OPERAND_TYPE enm = (VGPU10_OPERAND_TYPE)value;
699 switch (enm)
700 {
701 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_TEMP);
702 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT);
703 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT);
704 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INDEXABLE_TEMP);
705 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE32);
706 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE64);
707 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_SAMPLER);
708 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RESOURCE);
709 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CONSTANT_BUFFER);
710 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER);
711 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_LABEL);
712 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
713 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH);
714 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_NULL);
715 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RASTERIZER);
716 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK);
717 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_STREAM);
718 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_BODY);
719 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_TABLE);
720 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INTERFACE);
721 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_INPUT);
722 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_OUTPUT);
723 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID);
724 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID);
725 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID);
726 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT);
727 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT);
728 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
729 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT);
730 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THIS_POINTER);
731 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_UAV);
732 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
733 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID);
734 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID);
735 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
736 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK);
737 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED);
738 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID);
739 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL);
740 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL);
741 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CYCLE_COUNTER);
742 SVGA_CASE_ID2STR(VGPU10_NUM_OPERANDS);
743 }
744 return NULL;
745}
746
747
748static const char *dxbcExtendedOperandTypeToString(uint32_t value)
749{
750 VGPU10_EXTENDED_OPERAND_TYPE enm = (VGPU10_EXTENDED_OPERAND_TYPE)value;
751 switch (enm)
752 {
753 SVGA_CASE_ID2STR(VGPU10_EXTENDED_OPERAND_EMPTY);
754 SVGA_CASE_ID2STR(VGPU10_EXTENDED_OPERAND_MODIFIER);
755 }
756 return NULL;
757}
758
759
760static const char *dxbcOperandModifierToString(uint32_t value)
761{
762 VGPU10_OPERAND_MODIFIER enm = (VGPU10_OPERAND_MODIFIER)value;
763 switch (enm)
764 {
765 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_NONE);
766 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_NEG);
767 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_ABS);
768 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_ABSNEG);
769 }
770 return NULL;
771}
772
773
774static const char *dxbcOperandNumComponentsToString(uint32_t value)
775{
776 VGPU10_OPERAND_NUM_COMPONENTS enm = (VGPU10_OPERAND_NUM_COMPONENTS)value;
777 switch (enm)
778 {
779 SVGA_CASE_ID2STR(VGPU10_OPERAND_0_COMPONENT);
780 SVGA_CASE_ID2STR(VGPU10_OPERAND_1_COMPONENT);
781 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT);
782 SVGA_CASE_ID2STR(VGPU10_OPERAND_N_COMPONENT);
783 }
784 return NULL;
785}
786
787
788static const char *dxbcOperandComponentModeToString(uint32_t value)
789{
790 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE enm = (VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE)value;
791 switch (enm)
792 {
793 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
794 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE);
795 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE);
796 }
797 return NULL;
798}
799
800
801static const char *dxbcOperandComponentNameToString(uint32_t value)
802{
803 VGPU10_COMPONENT_NAME enm = (VGPU10_COMPONENT_NAME)value;
804 switch (enm)
805 {
806 SVGA_CASE_ID2STR(VGPU10_COMPONENT_X);
807 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Y);
808 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Z);
809 SVGA_CASE_ID2STR(VGPU10_COMPONENT_W);
810 }
811 return NULL;
812}
813
814
815static const char *dxbcOperandIndexDimensionToString(uint32_t value)
816{
817 VGPU10_OPERAND_INDEX_DIMENSION enm = (VGPU10_OPERAND_INDEX_DIMENSION)value;
818 switch (enm)
819 {
820 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_0D);
821 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_1D);
822 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_2D);
823 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_3D);
824 }
825 return NULL;
826}
827
828
829static const char *dxbcOperandIndexRepresentationToString(uint32_t value)
830{
831 VGPU10_OPERAND_INDEX_REPRESENTATION enm = (VGPU10_OPERAND_INDEX_REPRESENTATION)value;
832 switch (enm)
833 {
834 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32);
835 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64);
836 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_RELATIVE);
837 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
838 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE);
839 }
840 return NULL;
841}
842
843
844static const char *dxbcInterpolationModeToString(uint32_t value)
845{
846 VGPU10_INTERPOLATION_MODE enm = (VGPU10_INTERPOLATION_MODE)value;
847 switch (enm)
848 {
849 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_UNDEFINED);
850 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_CONSTANT);
851 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR);
852 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_CENTROID);
853 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE);
854 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
855 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_SAMPLE);
856 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
857 }
858 return NULL;
859}
860
861
862static const char *dxbcResourceDimensionToString(uint32_t value)
863{
864 VGPU10_RESOURCE_DIMENSION enm = (VGPU10_RESOURCE_DIMENSION)value;
865 switch (enm)
866 {
867 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_UNKNOWN);
868 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_BUFFER);
869 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1D);
870 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2D);
871 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS);
872 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE3D);
873 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
874 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY);
875 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY);
876 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY);
877 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
878 }
879 return NULL;
880}
881
882
883static const char *dxbcVmwareOpcodeTypeToString(uint32_t value)
884{
885 VGPU10_VMWARE_OPCODE_TYPE enm = (VGPU10_VMWARE_OPCODE_TYPE)value;
886 switch (enm)
887 {
888 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_IDIV);
889 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DFRC);
890 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DRSQ);
891 SVGA_CASE_ID2STR(VGPU10_VMWARE_NUM_OPCODES);
892 }
893 return NULL;
894}
895
896#endif /* LOG_ENABLED */
897
898/*
899 * MD5 from IPRT (alt-md5.cpp) for DXBC hash calculation.
900 * DXBC hash function uses a different padding for the data, see dxbcHash.
901 * Therefore RTMd5Final is not needed. Two functions have been renamed: dxbcRTMd5Update dxbcRTMd5Init.
902 */
903
904
905/* The four core functions - F1 is optimized somewhat */
906/* #define F1(x, y, z) (x & y | ~x & z) */
907#define F1(x, y, z) (z ^ (x & (y ^ z)))
908#define F2(x, y, z) F1(z, x, y)
909#define F3(x, y, z) (x ^ y ^ z)
910#define F4(x, y, z) (y ^ (x | ~z))
911
912
913/* This is the central step in the MD5 algorithm. */
914#define MD5STEP(f, w, x, y, z, data, s) \
915 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
916
917
918/**
919 * The core of the MD5 algorithm, this alters an existing MD5 hash to reflect
920 * the addition of 16 longwords of new data. RTMd5Update blocks the data and
921 * converts bytes into longwords for this routine.
922 */
923static void rtMd5Transform(uint32_t buf[4], uint32_t const in[16])
924{
925 uint32_t a, b, c, d;
926
927 a = buf[0];
928 b = buf[1];
929 c = buf[2];
930 d = buf[3];
931
932 /* fn, w, x, y, z, data, s) */
933 MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7);
934 MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12);
935 MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17);
936 MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22);
937 MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7);
938 MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12);
939 MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17);
940 MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22);
941 MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7);
942 MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12);
943 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
944 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
945 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
946 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
947 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
948 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
949
950 MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5);
951 MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9);
952 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
953 MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20);
954 MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5);
955 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
956 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
957 MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20);
958 MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5);
959 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
960 MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14);
961 MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20);
962 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
963 MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9);
964 MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14);
965 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
966
967 MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4);
968 MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11);
969 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
970 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
971 MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4);
972 MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11);
973 MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16);
974 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
975 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
976 MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11);
977 MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16);
978 MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23);
979 MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4);
980 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
981 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
982 MD5STEP(F3, b, c, d, a, in[ 2] + 0xc4ac5665, 23);
983
984 MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6);
985 MD5STEP(F4, d, a, b, c, in[ 7] + 0x432aff97, 10);
986 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
987 MD5STEP(F4, b, c, d, a, in[ 5] + 0xfc93a039, 21);
988 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
989 MD5STEP(F4, d, a, b, c, in[ 3] + 0x8f0ccc92, 10);
990 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
991 MD5STEP(F4, b, c, d, a, in[ 1] + 0x85845dd1, 21);
992 MD5STEP(F4, a, b, c, d, in[ 8] + 0x6fa87e4f, 6);
993 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
994 MD5STEP(F4, c, d, a, b, in[ 6] + 0xa3014314, 15);
995 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
996 MD5STEP(F4, a, b, c, d, in[ 4] + 0xf7537e82, 6);
997 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
998 MD5STEP(F4, c, d, a, b, in[ 2] + 0x2ad7d2bb, 15);
999 MD5STEP(F4, b, c, d, a, in[ 9] + 0xeb86d391, 21);
1000
1001 buf[0] += a;
1002 buf[1] += b;
1003 buf[2] += c;
1004 buf[3] += d;
1005}
1006
1007
1008#ifdef RT_BIG_ENDIAN
1009/*
1010 * Note: this code is harmless on little-endian machines.
1011 */
1012static void rtMd5ByteReverse(uint32_t *buf, unsigned int longs)
1013{
1014 uint32_t t;
1015 do
1016 {
1017 t = *buf;
1018 t = RT_LE2H_U32(t);
1019 *buf = t;
1020 buf++;
1021 } while (--longs);
1022}
1023#else /* little endian - do nothing */
1024# define rtMd5ByteReverse(buf, len) do { /* Nothing */ } while (0)
1025#endif
1026
1027
1028/*
1029 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
1030 * initialization constants.
1031 */
1032static void dxbcRTMd5Init(PRTMD5CONTEXT pCtx)
1033{
1034 pCtx->AltPrivate.buf[0] = 0x67452301;
1035 pCtx->AltPrivate.buf[1] = 0xefcdab89;
1036 pCtx->AltPrivate.buf[2] = 0x98badcfe;
1037 pCtx->AltPrivate.buf[3] = 0x10325476;
1038
1039 pCtx->AltPrivate.bits[0] = 0;
1040 pCtx->AltPrivate.bits[1] = 0;
1041}
1042
1043
1044/*
1045 * Update context to reflect the concatenation of another buffer full
1046 * of bytes.
1047 */
1048/** @todo Optimize this, because len is always a multiple of 64. */
1049static void dxbcRTMd5Update(PRTMD5CONTEXT pCtx, const void *pvBuf, size_t len)
1050{
1051 const uint8_t *buf = (const uint8_t *)pvBuf;
1052 uint32_t t;
1053
1054 /* Update bitcount */
1055 t = pCtx->AltPrivate.bits[0];
1056 if ((pCtx->AltPrivate.bits[0] = t + ((uint32_t) len << 3)) < t)
1057 pCtx->AltPrivate.bits[1]++; /* Carry from low to high */
1058 pCtx->AltPrivate.bits[1] += (uint32_t)(len >> 29);
1059
1060 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
1061
1062 /* Handle any leading odd-sized chunks */
1063 if (t)
1064 {
1065 uint8_t *p = (uint8_t *) pCtx->AltPrivate.in + t;
1066
1067 t = 64 - t;
1068 if (len < t)
1069 {
1070 memcpy(p, buf, len);
1071 return;
1072 }
1073 memcpy(p, buf, t);
1074 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1075 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1076 buf += t;
1077 len -= t;
1078 }
1079
1080 /* Process data in 64-byte chunks */
1081#ifndef RT_BIG_ENDIAN
1082 if (!((uintptr_t)buf & 0x3))
1083 {
1084 while (len >= 64) {
1085 rtMd5Transform(pCtx->AltPrivate.buf, (uint32_t const *)buf);
1086 buf += 64;
1087 len -= 64;
1088 }
1089 }
1090 else
1091#endif
1092 {
1093 while (len >= 64) {
1094 memcpy(pCtx->AltPrivate.in, buf, 64);
1095 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1096 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1097 buf += 64;
1098 len -= 64;
1099 }
1100 }
1101
1102 /* Handle any remaining bytes of data */
1103 memcpy(pCtx->AltPrivate.in, buf, len);
1104}
1105
1106
1107static void dxbcHash(void const *pvData, uint32_t cbData, uint8_t pabDigest[RTMD5HASHSIZE])
1108{
1109 size_t const kBlockSize = 64;
1110 uint8_t au8BlockBuffer[kBlockSize];
1111
1112 static uint8_t const s_au8Padding[kBlockSize] =
1113 {
1114 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1116 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1117 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1118 };
1119
1120 RTMD5CONTEXT Ctx;
1121 PRTMD5CONTEXT const pCtx = &Ctx;
1122 dxbcRTMd5Init(pCtx);
1123
1124 uint8_t const *pu8Data = (uint8_t *)pvData;
1125 size_t cbRemaining = cbData;
1126
1127 size_t const cbCompleteBlocks = cbData & ~ (kBlockSize - 1);
1128 dxbcRTMd5Update(pCtx, pu8Data, cbCompleteBlocks);
1129 pu8Data += cbCompleteBlocks;
1130 cbRemaining -= cbCompleteBlocks;
1131
1132 /* Custom padding. */
1133 if (cbRemaining >= kBlockSize - 2 * sizeof(uint32_t))
1134 {
1135 /* Two additional blocks. */
1136 memcpy(&au8BlockBuffer[0], pu8Data, cbRemaining);
1137 memcpy(&au8BlockBuffer[cbRemaining], s_au8Padding, kBlockSize - cbRemaining);
1138 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1139
1140 memset(&au8BlockBuffer[sizeof(uint32_t)], 0, kBlockSize - 2 * sizeof(uint32_t));
1141 }
1142 else
1143 {
1144 /* One additional block. */
1145 memcpy(&au8BlockBuffer[sizeof(uint32_t)], pu8Data, cbRemaining);
1146 memcpy(&au8BlockBuffer[sizeof(uint32_t) + cbRemaining], s_au8Padding, kBlockSize - cbRemaining - 2 * sizeof(uint32_t));
1147 }
1148
1149 /* Set the first and last dwords of the last block. */
1150 *(uint32_t *)&au8BlockBuffer[0] = cbData << 3;
1151 *(uint32_t *)&au8BlockBuffer[kBlockSize - sizeof(uint32_t)] = (cbData << 1) | 1;
1152 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1153
1154 AssertCompile(sizeof(pCtx->AltPrivate.buf) == RTMD5HASHSIZE);
1155 memcpy(pabDigest, pCtx->AltPrivate.buf, RTMD5HASHSIZE);
1156}
1157
1158
1159/*
1160 *
1161 * Shader token reader.
1162 *
1163 */
1164
1165typedef struct DXBCTokenReader
1166{
1167 uint32_t const *pToken; /* Next token to read. */
1168 uint32_t cToken; /* How many tokens total. */
1169 uint32_t cRemainingToken; /* How many tokens remain. */
1170} DXBCTokenReader;
1171
1172
1173#ifdef LOG_ENABLED
1174DECLINLINE(uint32_t) dxbcTokenReaderByteOffset(DXBCTokenReader *r)
1175{
1176 return (r->cToken - r->cRemainingToken) * 4;
1177}
1178#endif
1179
1180
1181#if 0 // Unused for now
1182DECLINLINE(uint32_t) dxbcTokenReaderRemaining(DXBCTokenReader *r)
1183{
1184 return r->cRemainingToken;
1185}
1186#endif
1187
1188
1189DECLINLINE(uint32_t const *) dxbcTokenReaderPtr(DXBCTokenReader *r)
1190{
1191 return r->pToken;
1192}
1193
1194
1195DECLINLINE(bool) dxbcTokenReaderCanRead(DXBCTokenReader *r, uint32_t cToken)
1196{
1197 return cToken <= r->cRemainingToken;
1198}
1199
1200
1201DECLINLINE(void) dxbcTokenReaderSkip(DXBCTokenReader *r, uint32_t cToken)
1202{
1203 AssertReturnVoid(r->cRemainingToken >= cToken);
1204 r->cRemainingToken -= cToken;
1205 r->pToken += cToken;
1206}
1207
1208
1209DECLINLINE(uint32_t) dxbcTokenReaderRead32(DXBCTokenReader *r)
1210{
1211 AssertReturn(r->cRemainingToken, 0);
1212 --r->cRemainingToken;
1213 return *(r->pToken++);
1214}
1215
1216
1217DECLINLINE(uint64_t) dxbcTokenReaderRead64(DXBCTokenReader *r)
1218{
1219 uint64_t const u64Low = dxbcTokenReaderRead32(r);
1220 uint64_t const u64High = dxbcTokenReaderRead32(r);
1221 return u64Low + (u64High << 32);
1222}
1223
1224
1225/*
1226 *
1227 * Byte writer.
1228 *
1229 */
1230
1231typedef struct DXBCByteWriter
1232{
1233 uint8_t *pu8ByteCodeBegin; /* First byte of the buffer. */
1234 uint8_t *pu8ByteCodePtr; /* Next byte to be written. */
1235 uint32_t cbAllocated; /* How many bytes allocated in the buffer. */
1236 uint32_t cbRemaining; /* How many bytes remain in the buffer. */
1237 uint32_t cbWritten; /* Offset of first never written byte.
1238 * Since the writer allows to jump in the buffer, this field tracks
1239 * the upper boundary of the written data.
1240 */
1241 int32_t rc;
1242} DXBCByteWriter;
1243
1244
1245typedef struct DXBCByteWriterState
1246{
1247 uint32_t off; /* Offset of the next free byte. */
1248} DXBCByteWriterState;
1249
1250
1251DECLINLINE(void *) dxbcByteWriterPtr(DXBCByteWriter *w)
1252{
1253 return w->pu8ByteCodePtr;
1254}
1255
1256
1257DECLINLINE(uint32_t) dxbcByteWriterSize(DXBCByteWriter *w)
1258{
1259 return (uint32_t)(w->pu8ByteCodePtr - w->pu8ByteCodeBegin);
1260}
1261
1262
1263static bool dxbcByteWriterRealloc(DXBCByteWriter *w, uint32_t cbNew)
1264{
1265 void *pvNew = RTMemAllocZ(cbNew);
1266 if (!pvNew)
1267 {
1268 w->rc = VERR_NO_MEMORY;
1269 return false;
1270 }
1271
1272 uint32_t const cbCurrent = dxbcByteWriterSize(w);
1273 memcpy(pvNew, w->pu8ByteCodeBegin, cbCurrent);
1274 RTMemFree(w->pu8ByteCodeBegin);
1275
1276 w->pu8ByteCodeBegin = (uint8_t *)pvNew;
1277 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + cbCurrent;
1278 w->cbAllocated = cbNew;
1279 w->cbRemaining = cbNew - cbCurrent;
1280 return true;
1281}
1282
1283
1284DECLINLINE(bool) dxbcByteWriterSetOffset(DXBCByteWriter *w, uint32_t off, DXBCByteWriterState *pSavedWriterState)
1285{
1286 if (RT_FAILURE(w->rc))
1287 return false;
1288
1289 uint32_t const cbNew = RT_ALIGN_32(off, 1024);
1290 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1291 AssertReturnStmt(off < cbMax && cbNew < cbMax, w->rc = VERR_INVALID_PARAMETER, false);
1292
1293 if (cbNew > w->cbAllocated)
1294 {
1295 if (!dxbcByteWriterRealloc(w, cbNew))
1296 return false;
1297 }
1298
1299 pSavedWriterState->off = dxbcByteWriterSize(w);
1300
1301 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + off;
1302 w->cbRemaining = w->cbAllocated - off;
1303 return true;
1304}
1305
1306
1307DECLINLINE(void) dxbcByteWriterRestore(DXBCByteWriter *w, DXBCByteWriterState *pSavedWriterState)
1308{
1309 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + pSavedWriterState->off;
1310 w->cbRemaining = w->cbAllocated - pSavedWriterState->off;
1311}
1312
1313
1314DECLINLINE(void) dxbcByteWriterCommit(DXBCByteWriter *w, uint32_t cbCommit)
1315{
1316 if (RT_FAILURE(w->rc))
1317 return;
1318
1319 Assert(cbCommit < w->cbRemaining);
1320 cbCommit = RT_MIN(cbCommit, w->cbRemaining);
1321 w->pu8ByteCodePtr += cbCommit;
1322 w->cbRemaining -= cbCommit;
1323 w->cbWritten = RT_MAX(w->cbWritten, w->cbAllocated - w->cbRemaining);
1324}
1325
1326
1327DECLINLINE(bool) dxbcByteWriterCanWrite(DXBCByteWriter *w, uint32_t cbMore)
1328{
1329 if (RT_FAILURE(w->rc))
1330 return false;
1331
1332 if (cbMore <= w->cbRemaining)
1333 return true;
1334
1335 /* Do not allow to allocate more than 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES */
1336 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1337 AssertReturnStmt(cbMore < cbMax && RT_ALIGN_32(cbMore, 4096) <= cbMax - w->cbAllocated, w->rc = VERR_INVALID_PARAMETER, false);
1338
1339 uint32_t cbNew = w->cbAllocated + RT_ALIGN_32(cbMore, 4096);
1340 return dxbcByteWriterRealloc(w, cbNew);
1341}
1342
1343
1344DECLINLINE(bool) dxbcByteWriterAddTokens(DXBCByteWriter *w, uint32_t const *paToken, uint32_t cToken)
1345{
1346 uint32_t const cbWrite = cToken * sizeof(uint32_t);
1347 if (dxbcByteWriterCanWrite(w, cbWrite))
1348 {
1349 memcpy(dxbcByteWriterPtr(w), paToken, cbWrite);
1350 dxbcByteWriterCommit(w, cbWrite);
1351 return true;
1352 }
1353
1354 AssertFailed();
1355 return false;
1356}
1357
1358
1359DECLINLINE(bool) dxbcByteWriterInit(DXBCByteWriter *w, uint32_t cbInitial)
1360{
1361 RT_ZERO(*w);
1362 return dxbcByteWriterCanWrite(w, cbInitial);
1363}
1364
1365
1366DECLINLINE(void) dxbcByteWriterReset(DXBCByteWriter *w)
1367{
1368 RTMemFree(w->pu8ByteCodeBegin);
1369 RT_ZERO(*w);
1370}
1371
1372
1373DECLINLINE(void) dxbcByteWriterFetchData(DXBCByteWriter *w, void **ppv, uint32_t *pcb)
1374{
1375 *ppv = w->pu8ByteCodeBegin;
1376 *pcb = w->cbWritten;
1377
1378 w->pu8ByteCodeBegin = NULL;
1379 dxbcByteWriterReset(w);
1380}
1381
1382
1383/*
1384 *
1385 * VGPU10 shader parser.
1386 *
1387 */
1388
1389/* Parse an instruction operand. */
1390static int dxbcParseOperand(DXBCTokenReader *r, VGPUOperand *paOperand, uint32_t *pcOperandRemain)
1391{
1392 ASSERT_GUEST_RETURN(*pcOperandRemain > 0, VERR_NOT_SUPPORTED);
1393
1394 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1395
1396 paOperand->paOperandToken = dxbcTokenReaderPtr(r);
1397 paOperand->cOperandToken = 0;
1398
1399 VGPU10OperandToken0 operand0;
1400 operand0.value = dxbcTokenReaderRead32(r);
1401
1402 Log6((" %s(%d) %s(%d) %s(%d) %s(%d)\n",
1403 dxbcOperandNumComponentsToString(operand0.numComponents), operand0.numComponents,
1404 dxbcOperandComponentModeToString(operand0.selectionMode), operand0.selectionMode,
1405 dxbcOperandTypeToString(operand0.operandType), operand0.operandType,
1406 dxbcOperandIndexDimensionToString(operand0.indexDimension), operand0.indexDimension));
1407
1408 ASSERT_GUEST_RETURN(operand0.numComponents <= VGPU10_OPERAND_4_COMPONENT, VERR_INVALID_PARAMETER);
1409 if ( operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32
1410 && operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE64)
1411 {
1412 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1413 {
1414 ASSERT_GUEST_RETURN(operand0.selectionMode <= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE, VERR_INVALID_PARAMETER);
1415 switch (operand0.selectionMode)
1416 {
1417 case VGPU10_OPERAND_4_COMPONENT_MASK_MODE:
1418 Log6((" Mask %#x\n", operand0.mask));
1419 break;
1420 case VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE:
1421 Log6((" Swizzle %s(%d) %s(%d) %s(%d) %s(%d)\n",
1422 dxbcOperandComponentNameToString(operand0.swizzleX), operand0.swizzleX,
1423 dxbcOperandComponentNameToString(operand0.swizzleY), operand0.swizzleY,
1424 dxbcOperandComponentNameToString(operand0.swizzleZ), operand0.swizzleZ,
1425 dxbcOperandComponentNameToString(operand0.swizzleW), operand0.swizzleW));
1426 break;
1427 case VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE:
1428 Log6((" Select %s(%d)\n",
1429 dxbcOperandComponentNameToString(operand0.selectMask), operand0.selectMask));
1430 break;
1431 default: /* Never happens. */
1432 break;
1433 }
1434 }
1435 }
1436
1437 if (operand0.extended)
1438 {
1439 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1440
1441 VGPU10OperandToken1 operand1;
1442 operand1.value = dxbcTokenReaderRead32(r);
1443
1444 Log6((" %s(%d) %s(%d)\n",
1445 dxbcExtendedOperandTypeToString(operand1.extendedOperandType), operand1.extendedOperandType,
1446 dxbcOperandModifierToString(operand1.operandModifier), operand1.operandModifier));
1447 }
1448
1449 ASSERT_GUEST_RETURN(operand0.operandType < VGPU10_NUM_OPERANDS, VERR_INVALID_PARAMETER);
1450
1451 if ( operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32
1452 || operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE64)
1453 {
1454 uint32_t cComponent = 0;
1455 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1456 cComponent = 4;
1457 else if (operand0.numComponents == VGPU10_OPERAND_1_COMPONENT)
1458 cComponent = 1;
1459
1460 for (uint32_t i = 0; i < cComponent; ++i)
1461 {
1462 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1463 paOperand->aImm[i] = dxbcTokenReaderRead32(r);
1464 }
1465 }
1466
1467 paOperand->numComponents = operand0.numComponents;
1468 paOperand->selectionMode = operand0.selectionMode;
1469 paOperand->mask = operand0.mask;
1470 paOperand->operandType = operand0.operandType;
1471 paOperand->indexDimension = operand0.indexDimension;
1472
1473 int rc = VINF_SUCCESS;
1474 /* 'indexDimension' tells the number of indices. 'i' is the array index, i.e. i = 0 for 1D, etc. */
1475 for (uint32_t i = 0; i < operand0.indexDimension; ++i)
1476 {
1477 if (i == 0) /* VGPU10_OPERAND_INDEX_1D */
1478 paOperand->aOperandIndex[i].indexRepresentation = operand0.index0Representation;
1479 else if (i == 1) /* VGPU10_OPERAND_INDEX_2D */
1480 paOperand->aOperandIndex[i].indexRepresentation = operand0.index1Representation;
1481 else /* VGPU10_OPERAND_INDEX_3D */
1482 continue; /* Skip because it is "rarely if ever used" and is not supported by VGPU10. */
1483
1484 uint32_t const indexRepresentation = paOperand->aOperandIndex[i].indexRepresentation;
1485 switch (indexRepresentation)
1486 {
1487 case VGPU10_OPERAND_INDEX_IMMEDIATE32:
1488 {
1489 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1490 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1491 break;
1492 }
1493 case VGPU10_OPERAND_INDEX_IMMEDIATE64:
1494 {
1495 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1496 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1497 break;
1498 }
1499 case VGPU10_OPERAND_INDEX_RELATIVE:
1500 {
1501 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1502 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1503 Log6((" [operand index %d] parsing relative\n", i));
1504 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1505 break;
1506 }
1507 case VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1508 {
1509 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1510 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1511 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1512 Log6((" [operand index %d] parsing relative\n", i));
1513 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1514 break;
1515 }
1516 case VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
1517 {
1518 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1519 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1520 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1521 Log6((" [operand index %d] parsing relative\n", i));
1522 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1523 break;
1524 }
1525 default:
1526 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1527 }
1528 Log6((" [operand index %d] %s(%d): %#llx%s\n",
1529 i, dxbcOperandIndexRepresentationToString(indexRepresentation), indexRepresentation,
1530 paOperand->aOperandIndex[i].iOperandImmediate, paOperand->aOperandIndex[i].pOperandRelative ? " + relative" : ""));
1531 if (RT_FAILURE(rc))
1532 break;
1533 }
1534
1535 paOperand->cOperandToken = dxbcTokenReaderPtr(r) - paOperand->paOperandToken;
1536
1537 *pcOperandRemain -= 1;
1538 return VINF_SUCCESS;
1539}
1540
1541
1542/* Parse an instruction. */
1543static int dxbcParseOpcode(DXBCTokenReader *r, VGPUOpcode *pOpcode)
1544{
1545 RT_ZERO(*pOpcode);
1546 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1547
1548 pOpcode->paOpcodeToken = dxbcTokenReaderPtr(r);
1549
1550 VGPU10OpcodeToken0 opcode;
1551 opcode.value = dxbcTokenReaderRead32(r);
1552
1553 pOpcode->opcodeType = opcode.opcodeType;
1554 ASSERT_GUEST_RETURN(pOpcode->opcodeType < VGPU10_NUM_OPCODES, VERR_INVALID_PARAMETER);
1555
1556 Log6(("[%#x] %s length %d\n",
1557 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength));
1558
1559 uint32_t const cOperand = g_aOpcodeInfo[pOpcode->opcodeType].cOperand;
1560 if (cOperand != UINT32_MAX)
1561 {
1562 ASSERT_GUEST_RETURN(cOperand < RT_ELEMENTS(pOpcode->aIdxOperand), VERR_INVALID_PARAMETER);
1563
1564 pOpcode->cOpcodeToken = opcode.instructionLength;
1565 uint32_t cOpcode = 1; /* Opcode token + extended opcode tokens. */
1566 if (opcode.extended)
1567 {
1568 if ( pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_BODY
1569 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_TABLE
1570 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_INTERFACE
1571 || pOpcode->opcodeType == VGPU10_OPCODE_INTERFACE_CALL
1572 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_THREAD_GROUP)
1573 {
1574 /* "next DWORD contains ... the actual instruction length in DWORD since it may not fit into 7 bits" */
1575 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1576 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1577 ++cOpcode;
1578 }
1579 else
1580 {
1581 VGPU10OpcodeToken1 opcode1;
1582 do
1583 {
1584 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1585 opcode1.value = dxbcTokenReaderRead32(r);
1586 ++cOpcode;
1587 ASSERT_GUEST( opcode1.opcodeType == VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
1588 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM
1589 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE);
1590 } while(opcode1.extended);
1591 }
1592 }
1593
1594 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken >= 1 && pOpcode->cOpcodeToken < 256, VERR_INVALID_PARAMETER);
1595 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - cOpcode), VERR_INVALID_PARAMETER);
1596
1597#ifdef LOG_ENABLED
1598 Log6((" %08X", opcode.value));
1599 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1600 Log6((" %08X", r->pToken[i - 1]));
1601 Log6(("\n"));
1602
1603 if (pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1604 Log6((" %s\n",
1605 dxbcResourceDimensionToString(opcode.resourceDimension)));
1606 else
1607 Log6((" %s\n",
1608 dxbcInterpolationModeToString(opcode.interpolationMode)));
1609#endif
1610 /* Additional tokens before operands. */
1611 switch (pOpcode->opcodeType)
1612 {
1613 case VGPU10_OPCODE_INTERFACE_CALL:
1614 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1615 dxbcTokenReaderSkip(r, 1); /* Function index */
1616 break;
1617
1618 default:
1619 break;
1620 }
1621
1622 /* Operands. */
1623 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1624 for (uint32_t i = 0; i < cOperand; ++i)
1625 {
1626 Log6((" [operand %d]\n", i));
1627 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1628 pOpcode->aIdxOperand[i] = idxOperand;
1629 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1630 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1631 }
1632
1633 pOpcode->cOperand = cOperand;
1634
1635 /* Additional tokens after operands. */
1636 switch (pOpcode->opcodeType)
1637 {
1638 case VGPU10_OPCODE_DCL_INPUT_SIV:
1639 case VGPU10_OPCODE_DCL_INPUT_SGV:
1640 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
1641 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
1642 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1643 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1644 {
1645 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1646
1647 VGPU10NameToken name;
1648 name.value = dxbcTokenReaderRead32(r);
1649 Log6((" %s(%d)\n",
1650 dxbcSystemNameToString(name.name), name.name));
1651 pOpcode->semanticName = name.name;
1652 break;
1653 }
1654 case VGPU10_OPCODE_DCL_RESOURCE:
1655 {
1656 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1657 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1658 break;
1659 }
1660 case VGPU10_OPCODE_DCL_TEMPS:
1661 {
1662 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1663 dxbcTokenReaderSkip(r, 1); /* number of temps */
1664 break;
1665 }
1666 case VGPU10_OPCODE_DCL_INDEXABLE_TEMP:
1667 {
1668 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1669 dxbcTokenReaderSkip(r, 3); /* register index; number of registers; number of components */
1670 break;
1671 }
1672 case VGPU10_OPCODE_DCL_INDEX_RANGE:
1673 {
1674 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1675 dxbcTokenReaderSkip(r, 1); /* count of registers */
1676 break;
1677 }
1678 case VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1679 {
1680 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1681 dxbcTokenReaderSkip(r, 1); /* maximum number of primitives */
1682 break;
1683 }
1684 case VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT:
1685 {
1686 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1687 dxbcTokenReaderSkip(r, 1); /* number of instances */
1688 break;
1689 }
1690 case VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR:
1691 {
1692 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1693 dxbcTokenReaderSkip(r, 1); /* maximum TessFactor */
1694 break;
1695 }
1696 case VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1697 case VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1698 {
1699 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1700 dxbcTokenReaderSkip(r, 1); /* number of instances of the current fork/join phase program to execute */
1701 break;
1702 }
1703 case VGPU10_OPCODE_DCL_THREAD_GROUP:
1704 {
1705 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1706 dxbcTokenReaderSkip(r, 3); /* Thread Group dimensions as UINT32: x, y, z */
1707 break;
1708 }
1709 case VGPU10_OPCODE_DCL_UAV_TYPED:
1710 {
1711 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1712 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1713 break;
1714 }
1715 case VGPU10_OPCODE_DCL_UAV_STRUCTURED:
1716 {
1717 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1718 dxbcTokenReaderSkip(r, 1); /* byte stride */
1719 break;
1720 }
1721 case VGPU10_OPCODE_DCL_TGSM_RAW:
1722 {
1723 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1724 dxbcTokenReaderSkip(r, 1); /* element count */
1725 break;
1726 }
1727 case VGPU10_OPCODE_DCL_TGSM_STRUCTURED:
1728 {
1729 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1730 dxbcTokenReaderSkip(r, 2); /* struct byte stride; struct count */
1731 break;
1732 }
1733 case VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED:
1734 {
1735 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1736 dxbcTokenReaderSkip(r, 1); /* struct byte stride */
1737 break;
1738 }
1739 default:
1740 break;
1741 }
1742 }
1743 else
1744 {
1745 /* Special opcodes. */
1746 if (pOpcode->opcodeType == VGPU10_OPCODE_CUSTOMDATA)
1747 {
1748 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1749 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1750
1751 if (pOpcode->cOpcodeToken < 2)
1752 pOpcode->cOpcodeToken = 2;
1753 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 2), VERR_INVALID_PARAMETER);
1754
1755#ifdef LOG_ENABLED
1756 Log6((" %08X", opcode.value));
1757 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1758 Log6((" %08X", r->pToken[i - 1]));
1759 Log6(("\n"));
1760
1761 Log6((" %s\n",
1762 dxbcCustomDataClassToString(opcode.customDataClass)));
1763#endif
1764 dxbcTokenReaderSkip(r, pOpcode->cOpcodeToken - 2);
1765 }
1766 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1767 {
1768 pOpcode->cOpcodeToken = opcode.instructionLength;
1769 pOpcode->opcodeSubtype = opcode.vmwareOpcodeType;
1770
1771#ifdef LOG_ENABLED
1772 Log6((" %08X", opcode.value));
1773 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1774 Log6((" %08X", r->pToken[i - 1]));
1775 Log6(("\n"));
1776
1777 Log6((" %s(%d)\n",
1778 dxbcVmwareOpcodeTypeToString(opcode.vmwareOpcodeType), opcode.vmwareOpcodeType));
1779#endif
1780
1781 if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_IDIV)
1782 {
1783 /* Integer divide. */
1784 pOpcode->cOperand = 4; /* dstQuit, dstRem, src0, src1. */
1785
1786 /* Operands. */
1787 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1788 for (uint32_t i = 0; i < pOpcode->cOperand; ++i)
1789 {
1790 Log6((" [operand %d]\n", i));
1791 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1792 pOpcode->aIdxOperand[i] = idxOperand;
1793 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1794 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1795 }
1796 }
1797 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DFRC)
1798 //else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DRSQ)
1799 else
1800 {
1801 /** @todo implement */
1802 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1803 }
1804 }
1805 else
1806 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1807
1808 // pOpcode->cOperand = 0;
1809 }
1810
1811 return VINF_SUCCESS;
1812}
1813
1814
1815typedef struct DXBCOUTPUTCTX
1816{
1817 VGPU10ProgramToken programToken;
1818 uint32_t cToken; /* Number of tokens in the original shader code. */
1819
1820 uint32_t offSubroutine; /* Current offset where to write subroutines. */
1821} DXBCOUTPUTCTX;
1822
1823
1824static void dxbcOutputInit(DXBCOUTPUTCTX *pOutctx, VGPU10ProgramToken const *pProgramToken, uint32_t cToken)
1825{
1826 RT_ZERO(*pOutctx);
1827 pOutctx->programToken = *pProgramToken;
1828 pOutctx->cToken = cToken;
1829
1830 pOutctx->offSubroutine = cToken * 4;
1831}
1832
1833
1834static int dxbcEmitVmwareIDIV(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1835{
1836 /* Insert a call and append a subroutne. */
1837 VGPU10OpcodeToken0 opcode;
1838 VGPU10OperandToken0 operand;
1839
1840 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1841
1842 /*
1843 * Call
1844 */
1845 opcode.value = 0;
1846 opcode.opcodeType = VGPU10_OPCODE_CALL;
1847 opcode.instructionLength = 3;
1848 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1849
1850 operand.value = 0;
1851 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1852 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1853 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1854 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1855 dxbcByteWriterAddTokens(w, &operand.value, 1);
1856
1857 dxbcByteWriterAddTokens(w, &label, 1);
1858
1859 opcode.value = 0;
1860 opcode.opcodeType = VGPU10_OPCODE_NOP;
1861 opcode.instructionLength = 1;
1862 for (unsigned i = 0; i < pOpcode->cOpcodeToken - 3; ++i)
1863 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1864
1865 /*
1866 * Subroutine.
1867 */
1868 DXBCByteWriterState savedWriterState;
1869 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1870 return w->rc;
1871
1872 /* label */
1873 opcode.value = 0;
1874 opcode.opcodeType = VGPU10_OPCODE_LABEL;
1875 opcode.instructionLength = 3;
1876 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1877
1878 operand.value = 0;
1879 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1880 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1881 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1882 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1883 dxbcByteWriterAddTokens(w, &operand.value, 1);
1884 dxbcByteWriterAddTokens(w, &label, 1);
1885
1886 /* Just output UDIV for now. */
1887 opcode.value = 0;
1888 opcode.opcodeType = VGPU10_OPCODE_UDIV;
1889 opcode.instructionLength = pOpcode->cOpcodeToken;
1890 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1891 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1892
1893 /* ret */
1894 opcode.value = 0;
1895 opcode.opcodeType = VGPU10_OPCODE_RET;
1896 opcode.instructionLength = 1;
1897 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1898
1899 pOutctx->offSubroutine = dxbcByteWriterSize(w);
1900 dxbcByteWriterRestore(w, &savedWriterState);
1901
1902 return w->rc;
1903}
1904
1905
1906static int dxbcOutputOpcode(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1907{
1908#ifdef DEBUG
1909 void *pvBegin = dxbcByteWriterPtr(w);
1910#endif
1911
1912 if ( pOutctx->programToken.programType == VGPU10_PIXEL_SHADER
1913 && pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1914 {
1915 /** @todo This is a workaround. */
1916 /* Sometimes the guest (Mesa) created a shader with uninitialized resource dimension.
1917 * Use texture 2d because it is what a pixel shader normally uses.
1918 */
1919 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken == 4, VERR_INVALID_PARAMETER);
1920
1921 VGPU10OpcodeToken0 opcode;
1922 opcode.value = pOpcode->paOpcodeToken[0];
1923 if (opcode.resourceDimension == VGPU10_RESOURCE_DIMENSION_BUFFER)
1924 {
1925 opcode.resourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
1926 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1927 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], 2);
1928 uint32_t const returnType = 0x5555; /* float */
1929 dxbcByteWriterAddTokens(w, &returnType, 1);
1930 return VINF_SUCCESS;
1931 }
1932 }
1933 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1934 {
1935 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_IDIV)
1936 {
1937 return dxbcEmitVmwareIDIV(pOutctx, w, pOpcode);
1938 }
1939
1940 ASSERT_GUEST_FAILED_RETURN(VERR_NOT_SUPPORTED);
1941 }
1942
1943#ifdef DEBUG
1944 /* The code above must emit either nothing or everything. */
1945 Assert((uintptr_t)pvBegin == (uintptr_t)dxbcByteWriterPtr(w));
1946#endif
1947
1948 /* Just emit the unmodified instruction. */
1949 dxbcByteWriterAddTokens(w, pOpcode->paOpcodeToken, pOpcode->cOpcodeToken);
1950 return VINF_SUCCESS;
1951}
1952
1953
1954static int dxbcOutputFinalize(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w)
1955{
1956 RT_NOREF(pOutctx, w);
1957 return VINF_SUCCESS;
1958}
1959
1960
1961static DECLCALLBACK(int) signatureEntryCmp(void const *pvElement1, void const *pvElement2, void *pvUser)
1962{
1963 SVGA3dDXSignatureEntry const *e1 = (SVGA3dDXSignatureEntry *)pvElement1;
1964 SVGA3dDXSignatureEntry const *e2 = (SVGA3dDXSignatureEntry *)pvElement2;
1965 RT_NOREF(pvUser);
1966
1967 if (e1->registerIndex < e2->registerIndex)
1968 return -1;
1969 if (e1->registerIndex > e2->registerIndex)
1970 return 1;
1971 if ((e1->mask & 0xf) < (e2->mask & 0xf))
1972 return -1;
1973 if ((e1->mask & 0xf) > (e2->mask & 0xf))
1974 return 1;
1975 return 0;
1976}
1977
1978
1979static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
1980 SVGA3dDXSignatureEntry const *paSignature,
1981 DXShaderAttributeSemantic *paSemantic,
1982 uint32_t u32BlobType);
1983
1984
1985/*
1986 * Parse and verify the shader byte code. Extract input and output signatures into pInfo.
1987 */
1988int DXShaderParse(void const *pvShaderCode, uint32_t cbShaderCode, DXShaderInfo *pInfo)
1989{
1990 if (pInfo)
1991 RT_ZERO(*pInfo);
1992
1993 ASSERT_GUEST_RETURN(cbShaderCode <= SVGA3D_MAX_SHADER_MEMORY_BYTES, VERR_INVALID_PARAMETER);
1994 ASSERT_GUEST_RETURN((cbShaderCode & 0x3) == 0, VERR_INVALID_PARAMETER); /* Aligned to the token size. */
1995 ASSERT_GUEST_RETURN(cbShaderCode >= 8, VERR_INVALID_PARAMETER); /* At least program and length tokens. */
1996
1997 uint32_t const *paToken = (uint32_t *)pvShaderCode;
1998
1999 VGPU10ProgramToken const *pProgramToken = (VGPU10ProgramToken *)&paToken[0];
2000 ASSERT_GUEST_RETURN( pProgramToken->majorVersion >= 4
2001 && pProgramToken->programType <= VGPU10_COMPUTE_SHADER, VERR_INVALID_PARAMETER);
2002 if (pInfo)
2003 pInfo->enmProgramType = (VGPU10_PROGRAM_TYPE)pProgramToken->programType;
2004
2005 uint32_t const cToken = paToken[1];
2006 Log6(("Shader version %d.%d type %s(%d) Length %d\n",
2007 pProgramToken->majorVersion, pProgramToken->minorVersion, dxbcShaderTypeToString(pProgramToken->programType), pProgramToken->programType, cToken));
2008 ASSERT_GUEST_RETURN(cbShaderCode / 4 >= cToken, VERR_INVALID_PARAMETER); /* Declared length should be less or equal to the actual. */
2009
2010 /* Write the parsed (and possibly modified) shader to a memory buffer. */
2011 DXBCByteWriter dxbcByteWriter;
2012 DXBCByteWriter *w = &dxbcByteWriter;
2013 if (!dxbcByteWriterInit(w, 4096 + cbShaderCode))
2014 return VERR_NO_MEMORY;
2015
2016 dxbcByteWriterAddTokens(w, paToken, 2);
2017
2018 DXBCTokenReader parser;
2019 RT_ZERO(parser);
2020
2021 DXBCTokenReader *r = &parser;
2022 r->pToken = &paToken[2];
2023 r->cToken = r->cRemainingToken = cToken - 2;
2024
2025 DXBCOUTPUTCTX outctx;
2026 dxbcOutputInit(&outctx, pProgramToken, cToken);
2027
2028 int rc = VINF_SUCCESS;
2029 while (dxbcTokenReaderCanRead(r, 1))
2030 {
2031 uint32_t const offOpcode = dxbcByteWriterSize(w);
2032
2033 VGPUOpcode opcode;
2034 rc = dxbcParseOpcode(r, &opcode);
2035 ASSERT_GUEST_STMT_BREAK(RT_SUCCESS(rc), rc = VERR_INVALID_PARAMETER);
2036
2037 rc = dxbcOutputOpcode(&outctx, w, &opcode);
2038 AssertRCBreak(rc);
2039
2040 if (pInfo)
2041 {
2042 /* Remember offsets of DCL_RESOURCE instructions. */
2043 if ( outctx.programToken.programType == VGPU10_PIXEL_SHADER
2044 && opcode.opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
2045 {
2046 if ( opcode.cOperand == 1
2047 && opcode.aValOperand[0].indexDimension == VGPU10_OPERAND_INDEX_1D
2048 && opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32)
2049 {
2050 uint32_t const indexResource = opcode.aValOperand[0].aOperandIndex[0].iOperandImmediate;
2051 if (indexResource < SVGA3D_DX_MAX_SRVIEWS)
2052 {
2053 ASSERT_GUEST(pInfo->aOffDclResource[indexResource] == 0);
2054 pInfo->aOffDclResource[indexResource] = offOpcode;
2055 pInfo->cDclResource = RT_MAX(pInfo->cDclResource, indexResource + 1);
2056 }
2057 else
2058 ASSERT_GUEST_FAILED();
2059 }
2060 else
2061 ASSERT_GUEST_FAILED();
2062 }
2063
2064 /* Fetch signatures. */
2065 SVGA3dDXSignatureEntry *pSignatureEntry = NULL;
2066 switch (opcode.opcodeType)
2067 {
2068 case VGPU10_OPCODE_DCL_INPUT:
2069 case VGPU10_OPCODE_DCL_INPUT_SIV:
2070 //case VGPU10_OPCODE_DCL_INPUT_SGV:
2071 case VGPU10_OPCODE_DCL_INPUT_PS:
2072 //case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
2073 //case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
2074 //case VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
2075 ASSERT_GUEST_STMT_BREAK(pInfo->cInputSignature < RT_ELEMENTS(pInfo->aInputSignature), rc = VERR_INVALID_PARAMETER);
2076 pSignatureEntry = &pInfo->aInputSignature[pInfo->cInputSignature++];
2077 break;
2078 case VGPU10_OPCODE_DCL_OUTPUT:
2079 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
2080 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
2081 //case VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
2082 ASSERT_GUEST_STMT_BREAK(pInfo->cOutputSignature < RT_ELEMENTS(pInfo->aOutputSignature), rc = VERR_INVALID_PARAMETER);
2083 pSignatureEntry = &pInfo->aOutputSignature[pInfo->cOutputSignature++];
2084 break;
2085 default:
2086 break;
2087 }
2088
2089 if (RT_FAILURE(rc))
2090 break;
2091
2092 if (pSignatureEntry)
2093 {
2094 ASSERT_GUEST_STMT_BREAK( opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32
2095 || opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE64,
2096 rc = VERR_NOT_SUPPORTED);
2097
2098 uint32_t const indexDimension = opcode.aValOperand[0].indexDimension;
2099 if (indexDimension == VGPU10_OPERAND_INDEX_0D)
2100 {
2101 if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID)
2102 {
2103 pSignatureEntry->registerIndex = 0;
2104 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID;
2105 }
2106 else if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH)
2107 {
2108 /* oDepth is always last in the signature. Register index is equal to 0xFFFFFFFF. */
2109 pSignatureEntry->registerIndex = 0xFFFFFFFF;
2110 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
2111 }
2112 else if (opcode.aValOperand[0].operandType <= VGPU10_OPERAND_TYPE_SM50_MAX)
2113 {
2114 pSignatureEntry->registerIndex = 0;
2115 pSignatureEntry->semanticName = opcode.semanticName;
2116 }
2117 else
2118 ASSERT_GUEST_FAILED_STMT_BREAK(rc = VERR_NOT_SUPPORTED);
2119 }
2120 else
2121 {
2122 ASSERT_GUEST_STMT_BREAK( indexDimension == VGPU10_OPERAND_INDEX_1D
2123 || indexDimension == VGPU10_OPERAND_INDEX_2D
2124 || indexDimension == VGPU10_OPERAND_INDEX_3D,
2125 rc = VERR_NOT_SUPPORTED);
2126 /* The register index seems to be in the highest dimension. */
2127 pSignatureEntry->registerIndex = opcode.aValOperand[0].aOperandIndex[indexDimension - VGPU10_OPERAND_INDEX_1D].iOperandImmediate;
2128 pSignatureEntry->semanticName = opcode.semanticName;
2129 }
2130 pSignatureEntry->mask = opcode.aValOperand[0].mask;
2131 pSignatureEntry->componentType = SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN; // Will be updated by vboxDXUpdateVSInputSignature
2132 pSignatureEntry->minPrecision = SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT;
2133 }
2134 }
2135 }
2136
2137 if (RT_FAILURE(rc))
2138 {
2139 return rc;
2140 }
2141
2142 rc = dxbcOutputFinalize(&outctx, w);
2143 if (RT_FAILURE(rc))
2144 {
2145 return rc;
2146 }
2147
2148 dxbcByteWriterFetchData(w, &pInfo->pvBytecode, &pInfo->cbBytecode);
2149 uint32_t *pcOutputToken = (uint32_t *)pInfo->pvBytecode + 1;
2150 *pcOutputToken = pInfo->cbBytecode / 4;
2151
2152 /* Sort signatures by register index and mask because the host API need them to be sorted. */
2153 if (pInfo->cInputSignature)
2154 {
2155 RTSortShell(pInfo->aInputSignature, pInfo->cInputSignature, sizeof(pInfo->aInputSignature[0]),
2156 signatureEntryCmp, NULL);
2157 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2158 pInfo->aInputSignature,
2159 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2160 }
2161 if (pInfo->cOutputSignature)
2162 {
2163 RTSortShell(pInfo->aOutputSignature, pInfo->cOutputSignature, sizeof(pInfo->aOutputSignature[0]),
2164 signatureEntryCmp, NULL);
2165 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2166 pInfo->aOutputSignature,
2167 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2168 }
2169 if (pInfo->cPatchConstantSignature)
2170 {
2171 RTSortShell(pInfo->aPatchConstantSignature, pInfo->cPatchConstantSignature, sizeof(pInfo->aPatchConstantSignature[0]),
2172 signatureEntryCmp, NULL);
2173 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2174 pInfo->aPatchConstantSignature,
2175 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2176 }
2177
2178#ifdef LOG_ENABLED
2179 if (pInfo->cInputSignature)
2180 {
2181 Log6(("Input signatures:\n"));
2182 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2183 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2184 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2185 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2186 }
2187 if (pInfo->cOutputSignature)
2188 {
2189 Log6(("Output signatures:\n"));
2190 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2191 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2192 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2193 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2194 }
2195 if (pInfo->cPatchConstantSignature)
2196 {
2197 Log6(("Patch constant signatures:\n"));
2198 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2199 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2200 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2201 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2202 }
2203#endif
2204
2205 return VINF_SUCCESS;
2206}
2207
2208void DXShaderGenerateSemantics(DXShaderInfo *pInfo)
2209{
2210 if (pInfo->cInputSignature)
2211 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2212 pInfo->aInputSignature,
2213 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2214 if (pInfo->cOutputSignature)
2215 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2216 pInfo->aOutputSignature,
2217 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2218 if (pInfo->cPatchConstantSignature)
2219 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2220 pInfo->aPatchConstantSignature,
2221 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2222}
2223
2224void DXShaderFree(DXShaderInfo *pInfo)
2225{
2226 RTMemFree(pInfo->pvBytecode);
2227 RT_ZERO(*pInfo);
2228}
2229
2230
2231#if 0 // Unused. Replaced with dxbcSemanticInfo.
2232static char const *dxbcSemanticName(SVGA3dDXSignatureSemanticName enmSemanticName)
2233{
2234 /* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics */
2235 switch (enmSemanticName)
2236 {
2237 case SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION: return "SV_Position";
2238 case SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE: return "SV_ClipDistance";
2239 case SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE: return "SV_CullDistance";
2240 case SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX: return "SV_RenderTargetArrayIndex";
2241 case SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX: return "SV_ViewportArrayIndex";
2242 case SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID: return "SV_VertexID";
2243 case SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID: return "SV_PrimitiveID";
2244 case SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID: return "SV_InstanceID";
2245 case SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE: return "SV_IsFrontFace";
2246 case SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX: return "SV_SampleIndex";
2247 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadUeq0EdgeTessFactor";
2248 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadVeq0EdgeTessFactor";
2249 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadUeq1EdgeTessFactor";
2250 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadVeq1EdgeTessFactor";
2251 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: return "SV_FinalQuadUInsideTessFactor";
2252 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: return "SV_FinalQuadVInsideTessFactor";
2253 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriUeq0EdgeTessFactor";
2254 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriVeq0EdgeTessFactor";
2255 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriWeq0EdgeTessFactor";
2256 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR: return "SV_FinalTriInsideTessFactor";
2257 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR: return "SV_FinalLineDetailTessFactor";
2258 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR: return "SV_FinalLineDensityTessFactor";
2259 default:
2260 Assert(enmSemanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
2261 break;
2262 }
2263 /* Generic. Arbitrary name. It does not have any meaning. */
2264 return "ATTRIB";
2265}
2266#endif
2267
2268
2269/* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics
2270 * Type:
2271 * 0 - undefined
2272 * 1 - unsigned int
2273 * 2 - signed int
2274 * 3 - float
2275 */
2276typedef struct VGPUSemanticInfo
2277{
2278 char const *pszName;
2279 uint32_t u32Type;
2280} VGPUSemanticInfo;
2281
2282static VGPUSemanticInfo const g_aSemanticInfo[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX] =
2283{
2284 { "ATTRIB", 0 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2285 { "SV_Position", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1
2286 { "SV_ClipDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2
2287 { "SV_CullDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3
2288 { "SV_RenderTargetArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4
2289 { "SV_ViewportArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5
2290 { "SV_VertexID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6
2291 { "SV_PrimitiveID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7
2292 { "SV_InstanceID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8
2293 { "SV_IsFrontFace", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9
2294 { "SV_SampleIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10
2295 /** @todo Is this a correct name for all TessFactors? */
2296 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11
2297 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12
2298 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13
2299 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14
2300 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15
2301 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16
2302 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17
2303 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18
2304 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19
2305 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20
2306 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21
2307 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22
2308};
2309
2310static VGPUSemanticInfo const g_SemanticPSOutput =
2311 { "SV_TARGET", 3 }; // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2312
2313
2314static VGPUSemanticInfo const *dxbcSemanticInfo(DXShaderInfo const *pInfo, SVGA3dDXSignatureSemanticName enmSemanticName, uint32_t u32BlobType)
2315{
2316 if (enmSemanticName < RT_ELEMENTS(g_aSemanticInfo))
2317 {
2318 if ( enmSemanticName == 0
2319 && pInfo->enmProgramType == VGPU10_PIXEL_SHADER
2320 && u32BlobType == DXBC_BLOB_TYPE_OSGN)
2321 return &g_SemanticPSOutput;
2322 return &g_aSemanticInfo[enmSemanticName];
2323 }
2324 return &g_aSemanticInfo[0];
2325}
2326
2327
2328static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
2329 SVGA3dDXSignatureEntry const *paSignature,
2330 DXShaderAttributeSemantic *paSemantic,
2331 uint32_t u32BlobType)
2332{
2333 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2334 {
2335 SVGA3dDXSignatureEntry const *src = &paSignature[iSignatureEntry];
2336 DXShaderAttributeSemantic *dst = &paSemantic[iSignatureEntry];
2337
2338 ASSERT_GUEST_RETURN_VOID(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX);
2339
2340 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2341 dst->pcszSemanticName = pSemanticInfo->pszName;
2342 dst->SemanticIndex = 0;
2343 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2344 {
2345 DXShaderAttributeSemantic const *pSemantic = &paSemantic[i];
2346 if (RTStrCmp(pSemantic->pcszSemanticName, dst->pcszSemanticName) == 0)
2347 ++dst->SemanticIndex;
2348 }
2349 }
2350}
2351
2352
2353static int dxbcCreateIOSGNBlob(DXShaderInfo const *pInfo, DXBCHeader *pHdr, uint32_t u32BlobType, uint32_t cSignature,
2354 SVGA3dDXSignatureEntry const *paSignature, DXShaderAttributeSemantic const *paSemantic, DXBCByteWriter *w)
2355{
2356 RT_NOREF(pInfo);
2357 AssertReturn(cSignature <= SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS, VERR_INVALID_PARAMETER);
2358
2359 uint32_t cbBlob = RT_UOFFSETOF_DYN(DXBCBlobIOSGN, aElement[cSignature]);
2360 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2361 return VERR_NO_MEMORY;
2362
2363 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2364 pHdrBlob->u32BlobType = u32BlobType;
2365 // pHdrBlob->cbBlob = 0;
2366
2367 DXBCBlobIOSGN *pHdrISGN = (DXBCBlobIOSGN *)&pHdrBlob[1];
2368 pHdrISGN->cElement = cSignature;
2369 pHdrISGN->offElement = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[0]);
2370
2371 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2372 {
2373 SVGA3dDXSignatureEntry const *srcEntry = &paSignature[iSignatureEntry];
2374 DXShaderAttributeSemantic const *srcSemantic = &paSemantic[iSignatureEntry];
2375 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignatureEntry];
2376
2377 dst->offElementName = 0;
2378 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2379 {
2380 DXBCBlobIOSGNElement const *pElement = &pHdrISGN->aElement[i];
2381 char const *pszElementName = (char *)pHdrISGN + pElement->offElementName;
2382 if (RTStrCmp(pszElementName, srcSemantic->pcszSemanticName) == 0)
2383 {
2384 dst->offElementName = pElement->offElementName;
2385 break;
2386 }
2387 }
2388 dst->idxSemantic = srcSemantic->SemanticIndex;
2389 dst->enmSystemValue = srcEntry->semanticName;
2390 dst->enmComponentType = srcEntry->componentType;
2391 dst->idxRegister = srcEntry->registerIndex;
2392 dst->u.mask = srcEntry->mask;
2393
2394 if (dst->offElementName == 0)
2395 {
2396 /* Store the semantic name for this element. */
2397 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without DXBCBlobHeader). */
2398 uint32_t const cbElementName = (uint32_t)strlen(srcSemantic->pcszSemanticName) + 1;
2399 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob + cbElementName))
2400 return VERR_NO_MEMORY;
2401
2402 memcpy((char *)pHdrISGN + dst->offElementName, srcSemantic->pcszSemanticName, cbElementName);
2403 cbBlob += cbElementName;
2404 }
2405 }
2406
2407 /* Blobs are 4 bytes aligned. Commit the blob data. */
2408 cbBlob = RT_ALIGN_32(cbBlob, 4);
2409 pHdrBlob->cbBlob = cbBlob;
2410 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2411 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2412 return VINF_SUCCESS;
2413}
2414
2415
2416static int dxbcCreateSHDRBlob(DXBCHeader *pHdr, uint32_t u32BlobType,
2417 void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2418{
2419 uint32_t cbBlob = cbShader;
2420 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2421 return VERR_NO_MEMORY;
2422
2423 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2424 pHdrBlob->u32BlobType = u32BlobType;
2425 // pHdrBlob->cbBlob = 0;
2426
2427 memcpy(&pHdrBlob[1], pvShader, cbShader);
2428
2429 /* Blobs are 4 bytes aligned. Commit the blob data. */
2430 cbBlob = RT_ALIGN_32(cbBlob, 4);
2431 pHdrBlob->cbBlob = cbBlob;
2432 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2433 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2434 return VINF_SUCCESS;
2435}
2436
2437
2438/*
2439 * Create a DXBC container with signature and shader code data blobs.
2440 */
2441static int dxbcCreateFromInfo(DXShaderInfo const *pInfo, void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2442{
2443 int rc;
2444
2445 /* Create a DXBC container with ISGN, OSGN and SHDR blobs. */
2446 uint32_t cBlob = 3;
2447 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2448 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2449 ++cBlob;
2450
2451 uint32_t const cbHdr = RT_UOFFSETOF_DYN(DXBCHeader, aBlobOffset[cBlob]); /* Header with blob offsets. */
2452 if (!dxbcByteWriterCanWrite(w, cbHdr))
2453 return VERR_NO_MEMORY;
2454
2455 /* Container header. */
2456 DXBCHeader *pHdr = (DXBCHeader *)dxbcByteWriterPtr(w);
2457 pHdr->u32DXBC = DXBC_MAGIC;
2458 // RT_ZERO(pHdr->au8Hash);
2459 pHdr->u32Version = 1;
2460 pHdr->cbTotal = cbHdr;
2461 pHdr->cBlob = cBlob;
2462 //RT_ZERO(pHdr->aBlobOffset);
2463 dxbcByteWriterCommit(w, cbHdr);
2464
2465#ifdef LOG_ENABLED
2466 if (pInfo->cInputSignature)
2467 {
2468 Log6(("Input signatures:\n"));
2469 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2470 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2471 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2472 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2473 }
2474 if (pInfo->cOutputSignature)
2475 {
2476 Log6(("Output signatures:\n"));
2477 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2478 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2479 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2480 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2481 }
2482 if (pInfo->cPatchConstantSignature)
2483 {
2484 Log6(("Patch constant signatures:\n"));
2485 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2486 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2487 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2488 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2489 }
2490#endif
2491
2492 /* Blobs. */
2493 uint32_t iBlob = 0;
2494
2495 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2496 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_ISGN, pInfo->cInputSignature, &pInfo->aInputSignature[0], pInfo->aInputSemantic, w);
2497 AssertRCReturn(rc, rc);
2498
2499 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2500 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pInfo->aOutputSemantic, w);
2501 AssertRCReturn(rc, rc);
2502
2503 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2504 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2505 {
2506 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2507 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_PCSG, pInfo->cPatchConstantSignature, &pInfo->aPatchConstantSignature[0], pInfo->aPatchConstantSemantic, w);
2508 AssertRCReturn(rc, rc);
2509 }
2510
2511 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2512 rc = dxbcCreateSHDRBlob(pHdr, DXBC_BLOB_TYPE_SHDR, pvShader, cbShader, w);
2513 AssertRCReturn(rc, rc);
2514
2515 Assert(iBlob == cBlob);
2516
2517 AssertCompile(RT_UOFFSETOF(DXBCHeader, u32Version) == 0x14);
2518 dxbcHash(&pHdr->u32Version, pHdr->cbTotal - RT_UOFFSETOF(DXBCHeader, u32Version), pHdr->au8Hash);
2519
2520 return VINF_SUCCESS;
2521}
2522
2523
2524int DXShaderCreateDXBC(DXShaderInfo const *pInfo, void **ppvDXBC, uint32_t *pcbDXBC)
2525{
2526 /* Build DXBC container. */
2527 int rc;
2528 DXBCByteWriter dxbcByteWriter;
2529 DXBCByteWriter *w = &dxbcByteWriter;
2530 if (dxbcByteWriterInit(w, 4096 + pInfo->cbBytecode))
2531 {
2532 rc = dxbcCreateFromInfo(pInfo, pInfo->pvBytecode, pInfo->cbBytecode, w);
2533 if (RT_SUCCESS(rc))
2534 dxbcByteWriterFetchData(w, ppvDXBC, pcbDXBC);
2535 }
2536 else
2537 rc = VERR_NO_MEMORY;
2538 return rc;
2539}
2540
2541
2542static char const *dxbcGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, uint32_t u32BlobType,
2543 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature,
2544 SVGA3dDXSignatureSemanticName *pSemanticName)
2545{
2546 for (uint32_t i = 0; i < cSignature; ++i)
2547 {
2548 SVGA3dDXSignatureEntry const *p = &paSignature[i];
2549 if (p->registerIndex == idxRegister)
2550 {
2551 AssertReturn(p->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, NULL);
2552 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, p->semanticName, u32BlobType);
2553 *pSemanticName = p->semanticName;
2554 return pSemanticInfo->pszName;
2555 }
2556 }
2557 return NULL;
2558}
2559
2560char const *DXShaderGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, SVGA3dDXSignatureSemanticName *pSemanticName)
2561{
2562 return dxbcGetOutputSemanticName(pInfo, idxRegister, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pSemanticName);
2563}
2564
2565VGPU10_RESOURCE_RETURN_TYPE DXShaderResourceReturnTypeFromFormat(SVGA3dSurfaceFormat format)
2566{
2567 /** @todo This is auto-generated from format names and needs a review. */
2568 switch (format)
2569 {
2570 case SVGA3D_R32G32B32A32_UINT: return VGPU10_RETURN_TYPE_UINT;
2571 case SVGA3D_R32G32B32A32_SINT: return VGPU10_RETURN_TYPE_SINT;
2572 case SVGA3D_R32G32B32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2573 case SVGA3D_R32G32B32_UINT: return VGPU10_RETURN_TYPE_UINT;
2574 case SVGA3D_R32G32B32_SINT: return VGPU10_RETURN_TYPE_SINT;
2575 case SVGA3D_R16G16B16A16_UINT: return VGPU10_RETURN_TYPE_UINT;
2576 case SVGA3D_R16G16B16A16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2577 case SVGA3D_R16G16B16A16_SINT: return VGPU10_RETURN_TYPE_SINT;
2578 case SVGA3D_R32G32_UINT: return VGPU10_RETURN_TYPE_UINT;
2579 case SVGA3D_R32G32_SINT: return VGPU10_RETURN_TYPE_SINT;
2580 case SVGA3D_D32_FLOAT_S8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2581 case SVGA3D_R32_FLOAT_X8X24: return VGPU10_RETURN_TYPE_FLOAT;
2582 case SVGA3D_X32_G8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2583 case SVGA3D_R10G10B10A2_UINT: return VGPU10_RETURN_TYPE_UINT;
2584 case SVGA3D_R11G11B10_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2585 case SVGA3D_R8G8B8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2586 case SVGA3D_R8G8B8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2587 case SVGA3D_R8G8B8A8_UINT: return VGPU10_RETURN_TYPE_UINT;
2588 case SVGA3D_R8G8B8A8_SINT: return VGPU10_RETURN_TYPE_SINT;
2589 case SVGA3D_R16G16_UINT: return VGPU10_RETURN_TYPE_UINT;
2590 case SVGA3D_R16G16_SINT: return VGPU10_RETURN_TYPE_SINT;
2591 case SVGA3D_D32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2592 case SVGA3D_R32_UINT: return VGPU10_RETURN_TYPE_UINT;
2593 case SVGA3D_R32_SINT: return VGPU10_RETURN_TYPE_SINT;
2594 case SVGA3D_D24_UNORM_S8_UINT: return VGPU10_RETURN_TYPE_UNORM;
2595 case SVGA3D_R24_UNORM_X8: return VGPU10_RETURN_TYPE_UNORM;
2596 case SVGA3D_X24_G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2597 case SVGA3D_R8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2598 case SVGA3D_R8G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2599 case SVGA3D_R8G8_SINT: return VGPU10_RETURN_TYPE_SINT;
2600 case SVGA3D_R16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2601 case SVGA3D_R16_UINT: return VGPU10_RETURN_TYPE_UINT;
2602 case SVGA3D_R16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2603 case SVGA3D_R16_SINT: return VGPU10_RETURN_TYPE_SINT;
2604 case SVGA3D_R8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2605 case SVGA3D_R8_UINT: return VGPU10_RETURN_TYPE_UINT;
2606 case SVGA3D_R8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2607 case SVGA3D_R8_SINT: return VGPU10_RETURN_TYPE_SINT;
2608 case SVGA3D_R8G8_B8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2609 case SVGA3D_G8R8_G8B8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2610 case SVGA3D_BC1_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2611 case SVGA3D_BC2_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2612 case SVGA3D_BC3_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2613 case SVGA3D_BC4_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2614 case SVGA3D_BC5_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2615 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2616 case SVGA3D_B8G8R8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2617 case SVGA3D_B8G8R8X8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2618 case SVGA3D_R32G32B32A32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2619 case SVGA3D_R16G16B16A16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2620 case SVGA3D_R16G16B16A16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2621 case SVGA3D_R32G32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2622 case SVGA3D_R10G10B10A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2623 case SVGA3D_R8G8B8A8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2624 case SVGA3D_R16G16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2625 case SVGA3D_R16G16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2626 case SVGA3D_R16G16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2627 case SVGA3D_R32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2628 case SVGA3D_R8G8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2629 case SVGA3D_R16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2630 case SVGA3D_D16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2631 case SVGA3D_A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2632 case SVGA3D_BC1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2633 case SVGA3D_BC2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2634 case SVGA3D_BC3_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2635 case SVGA3D_B5G6R5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2636 case SVGA3D_B5G5R5A1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2637 case SVGA3D_B8G8R8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2638 case SVGA3D_B8G8R8X8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2639 case SVGA3D_BC4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2640 case SVGA3D_BC5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2641 case SVGA3D_B4G4R4A4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2642 case SVGA3D_BC7_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2643 case SVGA3D_BC7_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2644 case SVGA3D_R9G9B9E5_SHAREDEXP: return VGPU10_RETURN_TYPE_FLOAT;
2645 default:
2646 break;
2647 }
2648 return VGPU10_RETURN_TYPE_UNORM;
2649}
2650
2651
2652SVGA3dDXSignatureRegisterComponentType DXShaderComponentTypeFromFormat(SVGA3dSurfaceFormat format)
2653{
2654 /** @todo This is auto-generated from format names and needs a review. */
2655 switch (format)
2656 {
2657 case SVGA3D_R32G32B32A32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2658 case SVGA3D_R32G32B32A32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2659 case SVGA3D_R32G32B32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2660 case SVGA3D_R32G32B32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2661 case SVGA3D_R32G32B32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2662 case SVGA3D_R16G16B16A16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2663 case SVGA3D_R16G16B16A16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2664 case SVGA3D_R16G16B16A16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2665 case SVGA3D_R32G32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2666 case SVGA3D_R32G32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2667 case SVGA3D_D32_FLOAT_S8X24_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2668 case SVGA3D_R32_FLOAT_X8X24: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2669 case SVGA3D_X32_G8X24_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2670 case SVGA3D_R10G10B10A2_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2671 case SVGA3D_R11G11B10_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2672 case SVGA3D_R8G8B8A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2673 case SVGA3D_R8G8B8A8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2674 case SVGA3D_R8G8B8A8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2675 case SVGA3D_R8G8B8A8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2676 case SVGA3D_R16G16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2677 case SVGA3D_R16G16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2678 case SVGA3D_D32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2679 case SVGA3D_R32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2680 case SVGA3D_R32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2681 case SVGA3D_D24_UNORM_S8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2682 case SVGA3D_R24_UNORM_X8: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2683 case SVGA3D_X24_G8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2684 case SVGA3D_R8G8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2685 case SVGA3D_R8G8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2686 case SVGA3D_R8G8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2687 case SVGA3D_R16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2688 case SVGA3D_R16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2689 case SVGA3D_R16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2690 case SVGA3D_R16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2691 case SVGA3D_R8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2692 case SVGA3D_R8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2693 case SVGA3D_R8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2694 case SVGA3D_R8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2695 case SVGA3D_R8G8_B8G8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2696 case SVGA3D_G8R8_G8B8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2697 case SVGA3D_BC1_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2698 case SVGA3D_BC2_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2699 case SVGA3D_BC3_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2700 case SVGA3D_BC4_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2701 case SVGA3D_BC5_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2702 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2703 case SVGA3D_B8G8R8A8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2704 case SVGA3D_B8G8R8X8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2705 case SVGA3D_R32G32B32A32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2706 case SVGA3D_R16G16B16A16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2707 case SVGA3D_R16G16B16A16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2708 case SVGA3D_R32G32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2709 case SVGA3D_R10G10B10A2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2710 case SVGA3D_R8G8B8A8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2711 case SVGA3D_R16G16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2712 case SVGA3D_R16G16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2713 case SVGA3D_R16G16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2714 case SVGA3D_R32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2715 case SVGA3D_R8G8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2716 case SVGA3D_R16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2717 case SVGA3D_D16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2718 case SVGA3D_A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2719 case SVGA3D_BC1_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2720 case SVGA3D_BC2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2721 case SVGA3D_BC3_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2722 case SVGA3D_B5G6R5_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2723 case SVGA3D_B5G5R5A1_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2724 case SVGA3D_B8G8R8A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2725 case SVGA3D_B8G8R8X8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2726 case SVGA3D_BC4_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2727 case SVGA3D_BC5_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2728 case SVGA3D_B4G4R4A4_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2729 case SVGA3D_BC7_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2730 case SVGA3D_BC7_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2731 case SVGA3D_R9G9B9E5_SHAREDEXP: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2732 default:
2733 break;
2734 }
2735 return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2736}
2737
2738
2739int DXShaderUpdateResources(DXShaderInfo const *pInfo, VGPU10_RESOURCE_DIMENSION *paResourceDimension,
2740 VGPU10_RESOURCE_RETURN_TYPE *paResourceReturnType, uint32_t cResources)
2741{
2742 for (uint32_t i = 0; i < pInfo->cDclResource; ++i)
2743 {
2744 VGPU10_RESOURCE_DIMENSION const resourceDimension = i < cResources ? paResourceDimension[i] : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2745 AssertContinue(resourceDimension <= VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
2746
2747 VGPU10_RESOURCE_RETURN_TYPE const resourceReturnType = i < cResources ? paResourceReturnType[i] : VGPU10_RETURN_TYPE_FLOAT;
2748 AssertContinue(resourceReturnType <= VGPU10_RETURN_TYPE_MIXED);
2749
2750 uint32_t const offToken = pInfo->aOffDclResource[i];
2751 AssertContinue(offToken < pInfo->cbBytecode);
2752 if (offToken == 0) /* nothing at this index */
2753 continue;
2754
2755 uint32_t *paToken = (uint32_t *)((uintptr_t)pInfo->pvBytecode + offToken);
2756
2757 VGPU10OpcodeToken0 *pOpcode = (VGPU10OpcodeToken0 *)&paToken[0];
2758 pOpcode->resourceDimension = resourceDimension;
2759 // paToken[1] unmodified
2760 // paToken[2] unmodified
2761 VGPU10ResourceReturnTypeToken *pReturnTypeToken = (VGPU10ResourceReturnTypeToken *)&paToken[3];
2762 pReturnTypeToken->component0 = (uint8_t)resourceReturnType;
2763 pReturnTypeToken->component1 = (uint8_t)resourceReturnType;
2764 pReturnTypeToken->component2 = (uint8_t)resourceReturnType;
2765 pReturnTypeToken->component3 = (uint8_t)resourceReturnType;
2766 }
2767
2768 return VINF_SUCCESS;
2769}
2770
2771#ifdef DXBC_STANDALONE_TEST
2772static int dxbcCreateFromBytecode(void const *pvShaderCode, uint32_t cbShaderCode, void **ppvDXBC, uint32_t *pcbDXBC)
2773{
2774 /* Parse the shader bytecode and create DXBC container with resource, signature and shader bytecode blobs. */
2775 DXShaderInfo info;
2776 RT_ZERO(info);
2777 int rc = DXShaderParse(pvShaderCode, cbShaderCode, &info);
2778 if (RT_SUCCESS(rc))
2779 rc = DXShaderCreateDXBC(&info, ppvDXBC, pcbDXBC);
2780 return rc;
2781}
2782
2783static int parseShaderVM(void const *pvShaderCode, uint32_t cbShaderCode)
2784{
2785 void *pv = NULL;
2786 uint32_t cb = 0;
2787 int rc = dxbcCreateFromBytecode(pvShaderCode, cbShaderCode, &pv, &cb);
2788 if (RT_SUCCESS(rc))
2789 {
2790 /* Hexdump DXBC */
2791 printf("{\n");
2792 uint8_t *pu8 = (uint8_t *)pv;
2793 for (uint32_t i = 0; i < cb; ++i)
2794 {
2795 if ((i % 16) == 0)
2796 {
2797 if (i > 0)
2798 printf(",\n");
2799
2800 printf(" 0x%02x", pu8[i]);
2801 }
2802 else
2803 {
2804 printf(", 0x%02x", pu8[i]);
2805 }
2806 }
2807 printf("\n");
2808 printf("};\n");
2809
2810 RTMemFree(pv);
2811 }
2812
2813 return rc;
2814}
2815
2816static DXBCBlobHeader *dxbcFindBlob(DXBCHeader *pDXBCHeader, uint32_t u32BlobType)
2817{
2818 uint8_t const *pu8DXBCBegin = (uint8_t *)pDXBCHeader;
2819 for (uint32_t i = 0; i < pDXBCHeader->cBlob; ++i)
2820 {
2821 DXBCBlobHeader *pCurrentBlob = (DXBCBlobHeader *)&pu8DXBCBegin[pDXBCHeader->aBlobOffset[i]];
2822 if (pCurrentBlob->u32BlobType == u32BlobType)
2823 return pCurrentBlob;
2824 }
2825 return NULL;
2826}
2827
2828static int dxbcExtractShaderCode(DXBCHeader *pDXBCHeader, void **ppvCode, uint32_t *pcbCode)
2829{
2830 DXBCBlobHeader *pBlob = dxbcFindBlob(pDXBCHeader, DXBC_BLOB_TYPE_SHDR);
2831 AssertReturn(pBlob, VERR_NOT_IMPLEMENTED);
2832
2833 DXBCBlobSHDR *pSHDR = (DXBCBlobSHDR *)&pBlob[1];
2834 *pcbCode = pSHDR->cToken * 4;
2835 *ppvCode = RTMemAlloc(*pcbCode);
2836 AssertReturn(*ppvCode, VERR_NO_MEMORY);
2837
2838 memcpy(*ppvCode, pSHDR, *pcbCode);
2839 return VINF_SUCCESS;
2840}
2841
2842static int parseShaderDXBC(void const *pvDXBC)
2843{
2844 DXBCHeader *pDXBCHeader = (DXBCHeader *)pvDXBC;
2845 void *pvShaderCode = NULL;
2846 uint32_t cbShaderCode = 0;
2847 int rc = dxbcExtractShaderCode(pDXBCHeader, &pvShaderCode, &cbShaderCode);
2848 if (RT_SUCCESS(rc))
2849 {
2850 rc = parseShaderVM(pvShaderCode, cbShaderCode);
2851 RTMemFree(pvShaderCode);
2852 }
2853 return rc;
2854}
2855#endif /* DXBC_STANDALONE_TEST */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette