VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/DevVGA-SVGA3d-dx-shader.cpp@ 100195

Last change on this file since 100195 was 99688, checked in by vboxsync, 21 months ago

Devices/Graphics: common code for 3D backend initialization and termination. bugref:9830

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 131.1 KB
Line 
1/* $Id: DevVGA-SVGA3d-dx-shader.cpp 99688 2023-05-09 05:28:22Z vboxsync $ */
2/** @file
3 * DevVMWare - VMWare SVGA device - VGPU10+ (DX) shader utilities.
4 */
5
6/*
7 * Copyright (C) 2020-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
33#include <VBox/AssertGuest.h>
34#include <VBox/log.h>
35
36#include <iprt/asm.h>
37#include <iprt/md5.h>
38#include <iprt/mem.h>
39#include <iprt/sort.h>
40#include <iprt/string.h>
41
42#include "DevVGA-SVGA3d-dx-shader.h"
43
44#ifdef RT_OS_WINDOWS
45#include <d3d11TokenizedProgramFormat.hpp>
46#else
47#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM 2
48#define D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE 3
49#endif
50
51/*
52 *
53 * DXBC shader binary format definitions.
54 *
55 */
56
57/* DXBC container header. */
58typedef struct DXBCHeader
59{
60 uint32_t u32DXBC; /* 0x43425844 = 'D', 'X', 'B', 'C' */
61 uint8_t au8Hash[16]; /* Modified MD5 hash. See dxbcHash. */
62 uint32_t u32Version; /* 1 */
63 uint32_t cbTotal; /* Total size in bytes. Including the header. */
64 uint32_t cBlob; /* Number of entries in aBlobOffset array. */
65 uint32_t aBlobOffset[1]; /* Offsets of blobs from the start of DXBC header. */
66} DXBCHeader;
67
68#define DXBC_MAGIC RT_MAKE_U32_FROM_U8('D', 'X', 'B', 'C')
69
70/* DXBC blob header. */
71typedef struct DXBCBlobHeader
72{
73 uint32_t u32BlobType; /* FourCC code. DXBC_BLOB_TYPE_* */
74 uint32_t cbBlob; /* Size of the blob excluding the blob header. 4 bytes aligned. */
75 /* Followed by the blob's data. */
76} DXBCBlobHeader;
77
78/* DXBC blob types. */
79#define DXBC_BLOB_TYPE_ISGN RT_MAKE_U32_FROM_U8('I', 'S', 'G', 'N')
80#define DXBC_BLOB_TYPE_OSGN RT_MAKE_U32_FROM_U8('O', 'S', 'G', 'N')
81#define DXBC_BLOB_TYPE_PCSG RT_MAKE_U32_FROM_U8('P', 'C', 'S', 'G')
82#define DXBC_BLOB_TYPE_SHDR RT_MAKE_U32_FROM_U8('S', 'H', 'D', 'R')
83/** @todo More... */
84
85/* 'SHDR' blob data format. */
86typedef struct DXBCBlobSHDR
87{
88 VGPU10ProgramToken programToken;
89 uint32_t cToken; /* Number of 32 bit tokens including programToken and cToken. */
90 uint32_t au32Token[1]; /* cToken - 2 number of tokens. */
91} DXBCBlobSHDR;
92
93/* Element of an input or output signature. */
94typedef struct DXBCBlobIOSGNElement
95{
96 uint32_t offElementName; /* Offset of the semantic's name relative to the start of the blob data. */
97 uint32_t idxSemantic; /* Semantic index. */
98 uint32_t enmSystemValue; /* SVGA3dDXSignatureSemanticName */
99 uint32_t enmComponentType; /* 1 - unsigned, 2 - integer, 3 - float. */
100 uint32_t idxRegister; /* Shader register index. Elements must be sorted by register index. */
101 union
102 {
103 struct
104 {
105 uint32_t mask : 8; /* Component mask. Lower 4 bits represent X, Y, Z, W channels. */
106 uint32_t mask2 : 8; /* Which components are used in the shader. */
107 uint32_t pad : 16;
108 } m;
109 uint32_t mask;
110 } u;
111} DXBCBlobIOSGNElement;
112
113/* 'ISGN' and 'OSGN' blob data format. */
114typedef struct DXBCBlobIOSGN
115{
116 uint32_t cElement; /* Number of signature elements. */
117 uint32_t offElement; /* Offset of the first element from the start of the blob. Equals to 8. */
118 DXBCBlobIOSGNElement aElement[1]; /* Signature elements. Size is cElement. */
119 /* Followed by ASCIIZ semantic names. */
120} DXBCBlobIOSGN;
121
122
123/*
124 * VGPU10 shader parser definitions.
125 */
126
127/* Parsed info about an operand index. */
128typedef struct VGPUOperandIndex
129{
130 uint32_t indexRepresentation; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
131 uint64_t iOperandImmediate; /* Needs up to a qword. */
132 struct VGPUOperand *pOperandRelative; /* For VGPU10_OPERAND_INDEX_*RELATIVE */
133} VGPUOperandIndex;
134
135/* Parsed info about an operand. */
136typedef struct VGPUOperand
137{
138 uint32_t numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
139 uint32_t selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
140 uint32_t mask : 4; /* 4-bits X, Y, Z, W mask for VGPU10_OPERAND_4_COMPONENT_MASK_MODE. */
141 uint32_t operandType : 8; /* VGPU10_OPERAND_TYPE */
142 uint32_t indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
143 VGPUOperandIndex aOperandIndex[VGPU10_OPERAND_INDEX_3D]; /* Up to 3. */
144 uint32_t aImm[4]; /* Immediate values for VGPU10_OPERAND_TYPE_IMMEDIATE* */
145 uint32_t cOperandToken; /* Number of tokens in this operand. */
146 uint32_t const *paOperandToken; /* Pointer to operand tokens in the input buffer. */
147} VGPUOperand;
148
149/* Parsed info about an opcode. */
150typedef struct VGPUOpcode
151{
152 uint32_t cOpcodeToken; /* Number of tokens for this operation. */
153 uint32_t opcodeType; /* VGPU10_OPCODE_* */
154 uint32_t opcodeSubtype; /* For example VGPU10_VMWARE_OPCODE_* */
155 uint32_t semanticName; /* SVGA3dDXSignatureSemanticName for system value declarations. */
156 uint32_t cOperand; /* Number of operands for this instruction. */
157 uint32_t aIdxOperand[8]; /* Indices of the instruction operands in the aValOperand array. */
158 /* 8 should be enough for everyone. */
159 VGPUOperand aValOperand[16]; /* Operands including VGPU10_OPERAND_INDEX_*RELATIVE if they are used: */
160 /* Operand1, VGPU10_OPERAND_INDEX_*RELATIVE for Operand1, ... */
161 /* ... */
162 /* OperandN, VGPU10_OPERAND_INDEX_*RELATIVE for OperandN, ... */
163 /* 16 probably should be enough for everyone. */
164 uint32_t const *paOpcodeToken; /* Pointer to opcode tokens in the input buffer. */
165} VGPUOpcode;
166
167typedef struct VGPUOpcodeInfo
168{
169 uint32_t cOperand; /* Number of operands for this opcode. */
170} VGPUOpcodeInfo;
171
172static VGPUOpcodeInfo const g_aOpcodeInfo[] =
173{
174 { 3 }, /* VGPU10_OPCODE_ADD */
175 { 3 }, /* VGPU10_OPCODE_AND */
176 { 0 }, /* VGPU10_OPCODE_BREAK */
177 { 1 }, /* VGPU10_OPCODE_BREAKC */
178 { 1 }, /* VGPU10_OPCODE_CALL */
179 { 2 }, /* VGPU10_OPCODE_CALLC */
180 { 1 }, /* VGPU10_OPCODE_CASE */
181 { 0 }, /* VGPU10_OPCODE_CONTINUE */
182 { 1 }, /* VGPU10_OPCODE_CONTINUEC */
183 { 0 }, /* VGPU10_OPCODE_CUT */
184 { 0 }, /* VGPU10_OPCODE_DEFAULT */
185 { 2 }, /* VGPU10_OPCODE_DERIV_RTX */
186 { 2 }, /* VGPU10_OPCODE_DERIV_RTY */
187 { 1 }, /* VGPU10_OPCODE_DISCARD */
188 { 3 }, /* VGPU10_OPCODE_DIV */
189 { 3 }, /* VGPU10_OPCODE_DP2 */
190 { 3 }, /* VGPU10_OPCODE_DP3 */
191 { 3 }, /* VGPU10_OPCODE_DP4 */
192 { 0 }, /* VGPU10_OPCODE_ELSE */
193 { 0 }, /* VGPU10_OPCODE_EMIT */
194 { 0 }, /* VGPU10_OPCODE_EMITTHENCUT */
195 { 0 }, /* VGPU10_OPCODE_ENDIF */
196 { 0 }, /* VGPU10_OPCODE_ENDLOOP */
197 { 0 }, /* VGPU10_OPCODE_ENDSWITCH */
198 { 3 }, /* VGPU10_OPCODE_EQ */
199 { 2 }, /* VGPU10_OPCODE_EXP */
200 { 2 }, /* VGPU10_OPCODE_FRC */
201 { 2 }, /* VGPU10_OPCODE_FTOI */
202 { 2 }, /* VGPU10_OPCODE_FTOU */
203 { 3 }, /* VGPU10_OPCODE_GE */
204 { 3 }, /* VGPU10_OPCODE_IADD */
205 { 1 }, /* VGPU10_OPCODE_IF */
206 { 3 }, /* VGPU10_OPCODE_IEQ */
207 { 3 }, /* VGPU10_OPCODE_IGE */
208 { 3 }, /* VGPU10_OPCODE_ILT */
209 { 4 }, /* VGPU10_OPCODE_IMAD */
210 { 3 }, /* VGPU10_OPCODE_IMAX */
211 { 3 }, /* VGPU10_OPCODE_IMIN */
212 { 4 }, /* VGPU10_OPCODE_IMUL */
213 { 3 }, /* VGPU10_OPCODE_INE */
214 { 2 }, /* VGPU10_OPCODE_INEG */
215 { 3 }, /* VGPU10_OPCODE_ISHL */
216 { 3 }, /* VGPU10_OPCODE_ISHR */
217 { 2 }, /* VGPU10_OPCODE_ITOF */
218 { 1 }, /* VGPU10_OPCODE_LABEL */
219 { 3 }, /* VGPU10_OPCODE_LD */
220 { 4 }, /* VGPU10_OPCODE_LD_MS */
221 { 2 }, /* VGPU10_OPCODE_LOG */
222 { 0 }, /* VGPU10_OPCODE_LOOP */
223 { 3 }, /* VGPU10_OPCODE_LT */
224 { 4 }, /* VGPU10_OPCODE_MAD */
225 { 3 }, /* VGPU10_OPCODE_MIN */
226 { 3 }, /* VGPU10_OPCODE_MAX */
227 { UINT32_MAX }, /* VGPU10_OPCODE_CUSTOMDATA: special opcode */
228 { 2 }, /* VGPU10_OPCODE_MOV */
229 { 4 }, /* VGPU10_OPCODE_MOVC */
230 { 3 }, /* VGPU10_OPCODE_MUL */
231 { 3 }, /* VGPU10_OPCODE_NE */
232 { 0 }, /* VGPU10_OPCODE_NOP */
233 { 2 }, /* VGPU10_OPCODE_NOT */
234 { 3 }, /* VGPU10_OPCODE_OR */
235 { 3 }, /* VGPU10_OPCODE_RESINFO */
236 { 0 }, /* VGPU10_OPCODE_RET */
237 { 1 }, /* VGPU10_OPCODE_RETC */
238 { 2 }, /* VGPU10_OPCODE_ROUND_NE */
239 { 2 }, /* VGPU10_OPCODE_ROUND_NI */
240 { 2 }, /* VGPU10_OPCODE_ROUND_PI */
241 { 2 }, /* VGPU10_OPCODE_ROUND_Z */
242 { 2 }, /* VGPU10_OPCODE_RSQ */
243 { 4 }, /* VGPU10_OPCODE_SAMPLE */
244 { 5 }, /* VGPU10_OPCODE_SAMPLE_C */
245 { 5 }, /* VGPU10_OPCODE_SAMPLE_C_LZ */
246 { 5 }, /* VGPU10_OPCODE_SAMPLE_L */
247 { 6 }, /* VGPU10_OPCODE_SAMPLE_D */
248 { 5 }, /* VGPU10_OPCODE_SAMPLE_B */
249 { 2 }, /* VGPU10_OPCODE_SQRT */
250 { 1 }, /* VGPU10_OPCODE_SWITCH */
251 { 3 }, /* VGPU10_OPCODE_SINCOS */
252 { 4 }, /* VGPU10_OPCODE_UDIV */
253 { 3 }, /* VGPU10_OPCODE_ULT */
254 { 3 }, /* VGPU10_OPCODE_UGE */
255 { 4 }, /* VGPU10_OPCODE_UMUL */
256 { 4 }, /* VGPU10_OPCODE_UMAD */
257 { 3 }, /* VGPU10_OPCODE_UMAX */
258 { 3 }, /* VGPU10_OPCODE_UMIN */
259 { 3 }, /* VGPU10_OPCODE_USHR */
260 { 2 }, /* VGPU10_OPCODE_UTOF */
261 { 3 }, /* VGPU10_OPCODE_XOR */
262 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE */
263 { 1 }, /* VGPU10_OPCODE_DCL_CONSTANT_BUFFER */
264 { 1 }, /* VGPU10_OPCODE_DCL_SAMPLER */
265 { 1 }, /* VGPU10_OPCODE_DCL_INDEX_RANGE */
266 { 0 }, /* VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY */
267 { 0 }, /* VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE */
268 { 0 }, /* VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT */
269 { 1 }, /* VGPU10_OPCODE_DCL_INPUT */
270 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SGV */
271 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_SIV */
272 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS */
273 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SGV */
274 { 1 }, /* VGPU10_OPCODE_DCL_INPUT_PS_SIV */
275 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT */
276 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SGV */
277 { 1 }, /* VGPU10_OPCODE_DCL_OUTPUT_SIV */
278 { 0 }, /* VGPU10_OPCODE_DCL_TEMPS */
279 { 0 }, /* VGPU10_OPCODE_DCL_INDEXABLE_TEMP */
280 { 0 }, /* VGPU10_OPCODE_DCL_GLOBAL_FLAGS */
281 { UINT32_MAX }, /* VGPU10_OPCODE_VMWARE: special opcode */
282 { 4 }, /* VGPU10_OPCODE_LOD */
283 { 4 }, /* VGPU10_OPCODE_GATHER4 */
284 { 3 }, /* VGPU10_OPCODE_SAMPLE_POS */
285 { 2 }, /* VGPU10_OPCODE_SAMPLE_INFO */
286 { UINT32_MAX }, /* VGPU10_OPCODE_RESERVED1: special opcode */
287 { 0 }, /* VGPU10_OPCODE_HS_DECLS */
288 { 0 }, /* VGPU10_OPCODE_HS_CONTROL_POINT_PHASE */
289 { 0 }, /* VGPU10_OPCODE_HS_FORK_PHASE */
290 { 0 }, /* VGPU10_OPCODE_HS_JOIN_PHASE */
291 { 1 }, /* VGPU10_OPCODE_EMIT_STREAM */
292 { 1 }, /* VGPU10_OPCODE_CUT_STREAM */
293 { 1 }, /* VGPU10_OPCODE_EMITTHENCUT_STREAM */
294 { 1 }, /* VGPU10_OPCODE_INTERFACE_CALL */
295 { 2 }, /* VGPU10_OPCODE_BUFINFO */
296 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_COARSE */
297 { 2 }, /* VGPU10_OPCODE_DERIV_RTX_FINE */
298 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_COARSE */
299 { 2 }, /* VGPU10_OPCODE_DERIV_RTY_FINE */
300 { 5 }, /* VGPU10_OPCODE_GATHER4_C */
301 { 5 }, /* VGPU10_OPCODE_GATHER4_PO */
302 { 6 }, /* VGPU10_OPCODE_GATHER4_PO_C */
303 { 2 }, /* VGPU10_OPCODE_RCP */
304 { 2 }, /* VGPU10_OPCODE_F32TOF16 */
305 { 2 }, /* VGPU10_OPCODE_F16TOF32 */
306 { 4 }, /* VGPU10_OPCODE_UADDC */
307 { 4 }, /* VGPU10_OPCODE_USUBB */
308 { 2 }, /* VGPU10_OPCODE_COUNTBITS */
309 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_HI */
310 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_LO */
311 { 2 }, /* VGPU10_OPCODE_FIRSTBIT_SHI */
312 { 4 }, /* VGPU10_OPCODE_UBFE */
313 { 4 }, /* VGPU10_OPCODE_IBFE */
314 { 5 }, /* VGPU10_OPCODE_BFI */
315 { 2 }, /* VGPU10_OPCODE_BFREV */
316 { 5 }, /* VGPU10_OPCODE_SWAPC */
317 { 1 }, /* VGPU10_OPCODE_DCL_STREAM */
318 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_BODY */
319 { 0 }, /* VGPU10_OPCODE_DCL_FUNCTION_TABLE */
320 { 0 }, /* VGPU10_OPCODE_DCL_INTERFACE */
321 { 0 }, /* VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT */
322 { 0 }, /* VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT */
323 { 0 }, /* VGPU10_OPCODE_DCL_TESS_DOMAIN */
324 { 0 }, /* VGPU10_OPCODE_DCL_TESS_PARTITIONING */
325 { 0 }, /* VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE */
326 { 0 }, /* VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR */
327 { 0 }, /* VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT */
328 { 0 }, /* VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */
329 { 0 }, /* VGPU10_OPCODE_DCL_THREAD_GROUP */
330 { 1 }, /* VGPU10_OPCODE_DCL_UAV_TYPED */
331 { 1 }, /* VGPU10_OPCODE_DCL_UAV_RAW */
332 { 1 }, /* VGPU10_OPCODE_DCL_UAV_STRUCTURED */
333 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_RAW */
334 { 1 }, /* VGPU10_OPCODE_DCL_TGSM_STRUCTURED */
335 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_RAW */
336 { 1 }, /* VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED */
337 { 3 }, /* VGPU10_OPCODE_LD_UAV_TYPED */
338 { 3 }, /* VGPU10_OPCODE_STORE_UAV_TYPED */
339 { 3 }, /* VGPU10_OPCODE_LD_RAW */
340 { 3 }, /* VGPU10_OPCODE_STORE_RAW */
341 { 4 }, /* VGPU10_OPCODE_LD_STRUCTURED */
342 { 4 }, /* VGPU10_OPCODE_STORE_STRUCTURED */
343 { 3 }, /* VGPU10_OPCODE_ATOMIC_AND */
344 { 3 }, /* VGPU10_OPCODE_ATOMIC_OR */
345 { 3 }, /* VGPU10_OPCODE_ATOMIC_XOR */
346 { 4 }, /* VGPU10_OPCODE_ATOMIC_CMP_STORE */
347 { 3 }, /* VGPU10_OPCODE_ATOMIC_IADD */
348 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMAX */
349 { 3 }, /* VGPU10_OPCODE_ATOMIC_IMIN */
350 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMAX */
351 { 3 }, /* VGPU10_OPCODE_ATOMIC_UMIN */
352 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_ALLOC */
353 { 2 }, /* VGPU10_OPCODE_IMM_ATOMIC_CONSUME */
354 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IADD */
355 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_AND */
356 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_OR */
357 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_XOR */
358 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_EXCH */
359 { 5 }, /* VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH */
360 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMAX */
361 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_IMIN */
362 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMAX */
363 { 4 }, /* VGPU10_OPCODE_IMM_ATOMIC_UMIN */
364 { 0 }, /* VGPU10_OPCODE_SYNC */
365 { 3 }, /* VGPU10_OPCODE_DADD */
366 { 3 }, /* VGPU10_OPCODE_DMAX */
367 { 3 }, /* VGPU10_OPCODE_DMIN */
368 { 3 }, /* VGPU10_OPCODE_DMUL */
369 { 3 }, /* VGPU10_OPCODE_DEQ */
370 { 3 }, /* VGPU10_OPCODE_DGE */
371 { 3 }, /* VGPU10_OPCODE_DLT */
372 { 3 }, /* VGPU10_OPCODE_DNE */
373 { 2 }, /* VGPU10_OPCODE_DMOV */
374 { 4 }, /* VGPU10_OPCODE_DMOVC */
375 { 2 }, /* VGPU10_OPCODE_DTOF */
376 { 2 }, /* VGPU10_OPCODE_FTOD */
377 { 3 }, /* VGPU10_OPCODE_EVAL_SNAPPED */
378 { 3 }, /* VGPU10_OPCODE_EVAL_SAMPLE_INDEX */
379 { 2 }, /* VGPU10_OPCODE_EVAL_CENTROID */
380 { 0 }, /* VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT */
381 { 0 }, /* VGPU10_OPCODE_ABORT */
382 { 0 }, /* VGPU10_OPCODE_DEBUG_BREAK */
383 { 0 }, /* VGPU10_OPCODE_RESERVED0 */
384 { 3 }, /* VGPU10_OPCODE_DDIV */
385 { 4 }, /* VGPU10_OPCODE_DFMA */
386 { 2 }, /* VGPU10_OPCODE_DRCP */
387 { 4 }, /* VGPU10_OPCODE_MSAD */
388 { 2 }, /* VGPU10_OPCODE_DTOI */
389 { 2 }, /* VGPU10_OPCODE_DTOU */
390 { 2 }, /* VGPU10_OPCODE_ITOD */
391 { 2 }, /* VGPU10_OPCODE_UTOD */
392};
393AssertCompile(RT_ELEMENTS(g_aOpcodeInfo) == VGPU10_NUM_OPCODES);
394
395#ifdef LOG_ENABLED
396/*
397 *
398 * Helpers to translate a VGPU10 shader constant to a string.
399 *
400 */
401
402#define SVGA_CASE_ID2STR(idx) case idx: return #idx
403
404static const char *dxbcOpcodeToString(uint32_t opcodeType)
405{
406 VGPU10_OPCODE_TYPE enm = (VGPU10_OPCODE_TYPE)opcodeType;
407 switch (enm)
408 {
409 SVGA_CASE_ID2STR(VGPU10_OPCODE_ADD);
410 SVGA_CASE_ID2STR(VGPU10_OPCODE_AND);
411 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAK);
412 SVGA_CASE_ID2STR(VGPU10_OPCODE_BREAKC);
413 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALL);
414 SVGA_CASE_ID2STR(VGPU10_OPCODE_CALLC);
415 SVGA_CASE_ID2STR(VGPU10_OPCODE_CASE);
416 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUE);
417 SVGA_CASE_ID2STR(VGPU10_OPCODE_CONTINUEC);
418 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT);
419 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEFAULT);
420 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX);
421 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY);
422 SVGA_CASE_ID2STR(VGPU10_OPCODE_DISCARD);
423 SVGA_CASE_ID2STR(VGPU10_OPCODE_DIV);
424 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP2);
425 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP3);
426 SVGA_CASE_ID2STR(VGPU10_OPCODE_DP4);
427 SVGA_CASE_ID2STR(VGPU10_OPCODE_ELSE);
428 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT);
429 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT);
430 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDIF);
431 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDLOOP);
432 SVGA_CASE_ID2STR(VGPU10_OPCODE_ENDSWITCH);
433 SVGA_CASE_ID2STR(VGPU10_OPCODE_EQ);
434 SVGA_CASE_ID2STR(VGPU10_OPCODE_EXP);
435 SVGA_CASE_ID2STR(VGPU10_OPCODE_FRC);
436 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOI);
437 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOU);
438 SVGA_CASE_ID2STR(VGPU10_OPCODE_GE);
439 SVGA_CASE_ID2STR(VGPU10_OPCODE_IADD);
440 SVGA_CASE_ID2STR(VGPU10_OPCODE_IF);
441 SVGA_CASE_ID2STR(VGPU10_OPCODE_IEQ);
442 SVGA_CASE_ID2STR(VGPU10_OPCODE_IGE);
443 SVGA_CASE_ID2STR(VGPU10_OPCODE_ILT);
444 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAD);
445 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMAX);
446 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMIN);
447 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMUL);
448 SVGA_CASE_ID2STR(VGPU10_OPCODE_INE);
449 SVGA_CASE_ID2STR(VGPU10_OPCODE_INEG);
450 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHL);
451 SVGA_CASE_ID2STR(VGPU10_OPCODE_ISHR);
452 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOF);
453 SVGA_CASE_ID2STR(VGPU10_OPCODE_LABEL);
454 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD);
455 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_MS);
456 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOG);
457 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOOP);
458 SVGA_CASE_ID2STR(VGPU10_OPCODE_LT);
459 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAD);
460 SVGA_CASE_ID2STR(VGPU10_OPCODE_MIN);
461 SVGA_CASE_ID2STR(VGPU10_OPCODE_MAX);
462 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUSTOMDATA);
463 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOV);
464 SVGA_CASE_ID2STR(VGPU10_OPCODE_MOVC);
465 SVGA_CASE_ID2STR(VGPU10_OPCODE_MUL);
466 SVGA_CASE_ID2STR(VGPU10_OPCODE_NE);
467 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOP);
468 SVGA_CASE_ID2STR(VGPU10_OPCODE_NOT);
469 SVGA_CASE_ID2STR(VGPU10_OPCODE_OR);
470 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESINFO);
471 SVGA_CASE_ID2STR(VGPU10_OPCODE_RET);
472 SVGA_CASE_ID2STR(VGPU10_OPCODE_RETC);
473 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NE);
474 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_NI);
475 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_PI);
476 SVGA_CASE_ID2STR(VGPU10_OPCODE_ROUND_Z);
477 SVGA_CASE_ID2STR(VGPU10_OPCODE_RSQ);
478 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE);
479 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C);
480 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_C_LZ);
481 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_L);
482 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_D);
483 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_B);
484 SVGA_CASE_ID2STR(VGPU10_OPCODE_SQRT);
485 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWITCH);
486 SVGA_CASE_ID2STR(VGPU10_OPCODE_SINCOS);
487 SVGA_CASE_ID2STR(VGPU10_OPCODE_UDIV);
488 SVGA_CASE_ID2STR(VGPU10_OPCODE_ULT);
489 SVGA_CASE_ID2STR(VGPU10_OPCODE_UGE);
490 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMUL);
491 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAD);
492 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMAX);
493 SVGA_CASE_ID2STR(VGPU10_OPCODE_UMIN);
494 SVGA_CASE_ID2STR(VGPU10_OPCODE_USHR);
495 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOF);
496 SVGA_CASE_ID2STR(VGPU10_OPCODE_XOR);
497 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE);
498 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_CONSTANT_BUFFER);
499 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_SAMPLER);
500 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEX_RANGE);
501 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY);
502 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE);
503 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT);
504 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT);
505 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SGV);
506 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_SIV);
507 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS);
508 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SGV);
509 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_PS_SIV);
510 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT);
511 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SGV);
512 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_SIV);
513 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TEMPS);
514 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INDEXABLE_TEMP);
515 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GLOBAL_FLAGS);
516 SVGA_CASE_ID2STR(VGPU10_OPCODE_VMWARE);
517 SVGA_CASE_ID2STR(VGPU10_OPCODE_LOD);
518 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4);
519 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_POS);
520 SVGA_CASE_ID2STR(VGPU10_OPCODE_SAMPLE_INFO);
521 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED1);
522 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_DECLS);
523 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_CONTROL_POINT_PHASE);
524 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_FORK_PHASE);
525 SVGA_CASE_ID2STR(VGPU10_OPCODE_HS_JOIN_PHASE);
526 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMIT_STREAM);
527 SVGA_CASE_ID2STR(VGPU10_OPCODE_CUT_STREAM);
528 SVGA_CASE_ID2STR(VGPU10_OPCODE_EMITTHENCUT_STREAM);
529 SVGA_CASE_ID2STR(VGPU10_OPCODE_INTERFACE_CALL);
530 SVGA_CASE_ID2STR(VGPU10_OPCODE_BUFINFO);
531 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_COARSE);
532 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTX_FINE);
533 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_COARSE);
534 SVGA_CASE_ID2STR(VGPU10_OPCODE_DERIV_RTY_FINE);
535 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_C);
536 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO);
537 SVGA_CASE_ID2STR(VGPU10_OPCODE_GATHER4_PO_C);
538 SVGA_CASE_ID2STR(VGPU10_OPCODE_RCP);
539 SVGA_CASE_ID2STR(VGPU10_OPCODE_F32TOF16);
540 SVGA_CASE_ID2STR(VGPU10_OPCODE_F16TOF32);
541 SVGA_CASE_ID2STR(VGPU10_OPCODE_UADDC);
542 SVGA_CASE_ID2STR(VGPU10_OPCODE_USUBB);
543 SVGA_CASE_ID2STR(VGPU10_OPCODE_COUNTBITS);
544 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_HI);
545 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_LO);
546 SVGA_CASE_ID2STR(VGPU10_OPCODE_FIRSTBIT_SHI);
547 SVGA_CASE_ID2STR(VGPU10_OPCODE_UBFE);
548 SVGA_CASE_ID2STR(VGPU10_OPCODE_IBFE);
549 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFI);
550 SVGA_CASE_ID2STR(VGPU10_OPCODE_BFREV);
551 SVGA_CASE_ID2STR(VGPU10_OPCODE_SWAPC);
552 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_STREAM);
553 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_BODY);
554 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_FUNCTION_TABLE);
555 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INTERFACE);
556 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT);
557 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT);
558 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_DOMAIN);
559 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_PARTITIONING);
560 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE);
561 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR);
562 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT);
563 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT);
564 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_THREAD_GROUP);
565 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_TYPED);
566 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_RAW);
567 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_UAV_STRUCTURED);
568 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_RAW);
569 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_TGSM_STRUCTURED);
570 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_RAW);
571 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED);
572 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_UAV_TYPED);
573 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_UAV_TYPED);
574 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_RAW);
575 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_RAW);
576 SVGA_CASE_ID2STR(VGPU10_OPCODE_LD_STRUCTURED);
577 SVGA_CASE_ID2STR(VGPU10_OPCODE_STORE_STRUCTURED);
578 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_AND);
579 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_OR);
580 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_XOR);
581 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_CMP_STORE);
582 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IADD);
583 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMAX);
584 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_IMIN);
585 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMAX);
586 SVGA_CASE_ID2STR(VGPU10_OPCODE_ATOMIC_UMIN);
587 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_ALLOC);
588 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CONSUME);
589 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IADD);
590 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_AND);
591 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_OR);
592 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_XOR);
593 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_EXCH);
594 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
595 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMAX);
596 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_IMIN);
597 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMAX);
598 SVGA_CASE_ID2STR(VGPU10_OPCODE_IMM_ATOMIC_UMIN);
599 SVGA_CASE_ID2STR(VGPU10_OPCODE_SYNC);
600 SVGA_CASE_ID2STR(VGPU10_OPCODE_DADD);
601 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMAX);
602 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMIN);
603 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMUL);
604 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEQ);
605 SVGA_CASE_ID2STR(VGPU10_OPCODE_DGE);
606 SVGA_CASE_ID2STR(VGPU10_OPCODE_DLT);
607 SVGA_CASE_ID2STR(VGPU10_OPCODE_DNE);
608 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOV);
609 SVGA_CASE_ID2STR(VGPU10_OPCODE_DMOVC);
610 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOF);
611 SVGA_CASE_ID2STR(VGPU10_OPCODE_FTOD);
612 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SNAPPED);
613 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_SAMPLE_INDEX);
614 SVGA_CASE_ID2STR(VGPU10_OPCODE_EVAL_CENTROID);
615 SVGA_CASE_ID2STR(VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT);
616 SVGA_CASE_ID2STR(VGPU10_OPCODE_ABORT);
617 SVGA_CASE_ID2STR(VGPU10_OPCODE_DEBUG_BREAK);
618 SVGA_CASE_ID2STR(VGPU10_OPCODE_RESERVED0);
619 SVGA_CASE_ID2STR(VGPU10_OPCODE_DDIV);
620 SVGA_CASE_ID2STR(VGPU10_OPCODE_DFMA);
621 SVGA_CASE_ID2STR(VGPU10_OPCODE_DRCP);
622 SVGA_CASE_ID2STR(VGPU10_OPCODE_MSAD);
623 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOI);
624 SVGA_CASE_ID2STR(VGPU10_OPCODE_DTOU);
625 SVGA_CASE_ID2STR(VGPU10_OPCODE_ITOD);
626 SVGA_CASE_ID2STR(VGPU10_OPCODE_UTOD);
627 SVGA_CASE_ID2STR(VGPU10_NUM_OPCODES);
628 }
629 return NULL;
630}
631
632
633static const char *dxbcShaderTypeToString(uint32_t value)
634{
635 VGPU10_PROGRAM_TYPE enm = (VGPU10_PROGRAM_TYPE)value;
636 switch (enm)
637 {
638 SVGA_CASE_ID2STR(VGPU10_PIXEL_SHADER);
639 SVGA_CASE_ID2STR(VGPU10_VERTEX_SHADER);
640 SVGA_CASE_ID2STR(VGPU10_GEOMETRY_SHADER);
641 SVGA_CASE_ID2STR(VGPU10_HULL_SHADER);
642 SVGA_CASE_ID2STR(VGPU10_DOMAIN_SHADER);
643 SVGA_CASE_ID2STR(VGPU10_COMPUTE_SHADER);
644 }
645 return NULL;
646}
647
648
649static const char *dxbcCustomDataClassToString(uint32_t value)
650{
651 VGPU10_CUSTOMDATA_CLASS enm = (VGPU10_CUSTOMDATA_CLASS)value;
652 switch (enm)
653 {
654 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_COMMENT);
655 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DEBUGINFO);
656 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_OPAQUE);
657 SVGA_CASE_ID2STR(VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER);
658 }
659 return NULL;
660}
661
662
663static const char *dxbcSystemNameToString(uint32_t value)
664{
665 VGPU10_SYSTEM_NAME enm = (VGPU10_SYSTEM_NAME)value;
666 switch (enm)
667 {
668 SVGA_CASE_ID2STR(VGPU10_NAME_UNDEFINED);
669 SVGA_CASE_ID2STR(VGPU10_NAME_POSITION);
670 SVGA_CASE_ID2STR(VGPU10_NAME_CLIP_DISTANCE);
671 SVGA_CASE_ID2STR(VGPU10_NAME_CULL_DISTANCE);
672 SVGA_CASE_ID2STR(VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX);
673 SVGA_CASE_ID2STR(VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
674 SVGA_CASE_ID2STR(VGPU10_NAME_VERTEX_ID);
675 SVGA_CASE_ID2STR(VGPU10_NAME_PRIMITIVE_ID);
676 SVGA_CASE_ID2STR(VGPU10_NAME_INSTANCE_ID);
677 SVGA_CASE_ID2STR(VGPU10_NAME_IS_FRONT_FACE);
678 SVGA_CASE_ID2STR(VGPU10_NAME_SAMPLE_INDEX);
679 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR);
680 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR);
681 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR);
682 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR);
683 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
684 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
685 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR);
686 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR);
687 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR);
688 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
689 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
690 SVGA_CASE_ID2STR(VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
691 }
692 return NULL;
693}
694
695
696static const char *dxbcOperandTypeToString(uint32_t value)
697{
698 VGPU10_OPERAND_TYPE enm = (VGPU10_OPERAND_TYPE)value;
699 switch (enm)
700 {
701 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_TEMP);
702 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT);
703 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT);
704 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INDEXABLE_TEMP);
705 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE32);
706 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE64);
707 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_SAMPLER);
708 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RESOURCE);
709 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CONSTANT_BUFFER);
710 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER);
711 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_LABEL);
712 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
713 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH);
714 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_NULL);
715 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_RASTERIZER);
716 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK);
717 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_STREAM);
718 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_BODY);
719 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_TABLE);
720 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INTERFACE);
721 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_INPUT);
722 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_FUNCTION_OUTPUT);
723 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID);
724 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID);
725 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID);
726 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT);
727 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT);
728 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
729 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT);
730 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THIS_POINTER);
731 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_UAV);
732 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY);
733 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID);
734 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID);
735 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
736 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK);
737 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED);
738 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID);
739 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL);
740 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL);
741 SVGA_CASE_ID2STR(VGPU10_OPERAND_TYPE_CYCLE_COUNTER);
742 SVGA_CASE_ID2STR(VGPU10_NUM_OPERANDS);
743 }
744 return NULL;
745}
746
747
748static const char *dxbcExtendedOperandTypeToString(uint32_t value)
749{
750 VGPU10_EXTENDED_OPERAND_TYPE enm = (VGPU10_EXTENDED_OPERAND_TYPE)value;
751 switch (enm)
752 {
753 SVGA_CASE_ID2STR(VGPU10_EXTENDED_OPERAND_EMPTY);
754 SVGA_CASE_ID2STR(VGPU10_EXTENDED_OPERAND_MODIFIER);
755 }
756 return NULL;
757}
758
759
760static const char *dxbcOperandModifierToString(uint32_t value)
761{
762 VGPU10_OPERAND_MODIFIER enm = (VGPU10_OPERAND_MODIFIER)value;
763 switch (enm)
764 {
765 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_NONE);
766 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_NEG);
767 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_ABS);
768 SVGA_CASE_ID2STR(VGPU10_OPERAND_MODIFIER_ABSNEG);
769 }
770 return NULL;
771}
772
773
774static const char *dxbcOperandNumComponentsToString(uint32_t value)
775{
776 VGPU10_OPERAND_NUM_COMPONENTS enm = (VGPU10_OPERAND_NUM_COMPONENTS)value;
777 switch (enm)
778 {
779 SVGA_CASE_ID2STR(VGPU10_OPERAND_0_COMPONENT);
780 SVGA_CASE_ID2STR(VGPU10_OPERAND_1_COMPONENT);
781 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT);
782 SVGA_CASE_ID2STR(VGPU10_OPERAND_N_COMPONENT);
783 }
784 return NULL;
785}
786
787
788static const char *dxbcOperandComponentModeToString(uint32_t value)
789{
790 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE enm = (VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE)value;
791 switch (enm)
792 {
793 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
794 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE);
795 SVGA_CASE_ID2STR(VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE);
796 }
797 return NULL;
798}
799
800
801static const char *dxbcOperandComponentNameToString(uint32_t value)
802{
803 VGPU10_COMPONENT_NAME enm = (VGPU10_COMPONENT_NAME)value;
804 switch (enm)
805 {
806 SVGA_CASE_ID2STR(VGPU10_COMPONENT_X);
807 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Y);
808 SVGA_CASE_ID2STR(VGPU10_COMPONENT_Z);
809 SVGA_CASE_ID2STR(VGPU10_COMPONENT_W);
810 }
811 return NULL;
812}
813
814
815static const char *dxbcOperandIndexDimensionToString(uint32_t value)
816{
817 VGPU10_OPERAND_INDEX_DIMENSION enm = (VGPU10_OPERAND_INDEX_DIMENSION)value;
818 switch (enm)
819 {
820 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_0D);
821 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_1D);
822 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_2D);
823 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_3D);
824 }
825 return NULL;
826}
827
828
829static const char *dxbcOperandIndexRepresentationToString(uint32_t value)
830{
831 VGPU10_OPERAND_INDEX_REPRESENTATION enm = (VGPU10_OPERAND_INDEX_REPRESENTATION)value;
832 switch (enm)
833 {
834 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32);
835 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64);
836 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_RELATIVE);
837 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
838 SVGA_CASE_ID2STR(VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE);
839 }
840 return NULL;
841}
842
843
844static const char *dxbcInterpolationModeToString(uint32_t value)
845{
846 VGPU10_INTERPOLATION_MODE enm = (VGPU10_INTERPOLATION_MODE)value;
847 switch (enm)
848 {
849 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_UNDEFINED);
850 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_CONSTANT);
851 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR);
852 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_CENTROID);
853 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE);
854 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
855 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_SAMPLE);
856 SVGA_CASE_ID2STR(VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
857 }
858 return NULL;
859}
860
861
862static const char *dxbcResourceDimensionToString(uint32_t value)
863{
864 VGPU10_RESOURCE_DIMENSION enm = (VGPU10_RESOURCE_DIMENSION)value;
865 switch (enm)
866 {
867 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_UNKNOWN);
868 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_BUFFER);
869 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1D);
870 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2D);
871 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS);
872 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE3D);
873 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
874 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY);
875 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY);
876 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY);
877 SVGA_CASE_ID2STR(VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
878 }
879 return NULL;
880}
881
882
883static const char *dxbcVmwareOpcodeTypeToString(uint32_t value)
884{
885 VGPU10_VMWARE_OPCODE_TYPE enm = (VGPU10_VMWARE_OPCODE_TYPE)value;
886 switch (enm)
887 {
888 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_IDIV);
889 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DFRC);
890 SVGA_CASE_ID2STR(VGPU10_VMWARE_OPCODE_DRSQ);
891 SVGA_CASE_ID2STR(VGPU10_VMWARE_NUM_OPCODES);
892 }
893 return NULL;
894}
895
896#endif /* LOG_ENABLED */
897
898/*
899 * MD5 from IPRT (alt-md5.cpp) for DXBC hash calculation.
900 * DXBC hash function uses a different padding for the data, see dxbcHash.
901 * Therefore RTMd5Final is not needed. Two functions have been renamed: dxbcRTMd5Update dxbcRTMd5Init.
902 */
903
904
905/* The four core functions - F1 is optimized somewhat */
906/* #define F1(x, y, z) (x & y | ~x & z) */
907#define F1(x, y, z) (z ^ (x & (y ^ z)))
908#define F2(x, y, z) F1(z, x, y)
909#define F3(x, y, z) (x ^ y ^ z)
910#define F4(x, y, z) (y ^ (x | ~z))
911
912
913/* This is the central step in the MD5 algorithm. */
914#define MD5STEP(f, w, x, y, z, data, s) \
915 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
916
917
918/**
919 * The core of the MD5 algorithm, this alters an existing MD5 hash to reflect
920 * the addition of 16 longwords of new data. RTMd5Update blocks the data and
921 * converts bytes into longwords for this routine.
922 */
923static void rtMd5Transform(uint32_t buf[4], uint32_t const in[16])
924{
925 uint32_t a, b, c, d;
926
927 a = buf[0];
928 b = buf[1];
929 c = buf[2];
930 d = buf[3];
931
932 /* fn, w, x, y, z, data, s) */
933 MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7);
934 MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12);
935 MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17);
936 MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22);
937 MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7);
938 MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12);
939 MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17);
940 MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22);
941 MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7);
942 MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12);
943 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
944 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
945 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
946 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
947 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
948 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
949
950 MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5);
951 MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9);
952 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
953 MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20);
954 MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5);
955 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
956 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
957 MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20);
958 MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5);
959 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
960 MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14);
961 MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20);
962 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
963 MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9);
964 MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14);
965 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
966
967 MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4);
968 MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11);
969 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
970 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
971 MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4);
972 MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11);
973 MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16);
974 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
975 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
976 MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11);
977 MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16);
978 MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23);
979 MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4);
980 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
981 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
982 MD5STEP(F3, b, c, d, a, in[ 2] + 0xc4ac5665, 23);
983
984 MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6);
985 MD5STEP(F4, d, a, b, c, in[ 7] + 0x432aff97, 10);
986 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
987 MD5STEP(F4, b, c, d, a, in[ 5] + 0xfc93a039, 21);
988 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
989 MD5STEP(F4, d, a, b, c, in[ 3] + 0x8f0ccc92, 10);
990 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
991 MD5STEP(F4, b, c, d, a, in[ 1] + 0x85845dd1, 21);
992 MD5STEP(F4, a, b, c, d, in[ 8] + 0x6fa87e4f, 6);
993 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
994 MD5STEP(F4, c, d, a, b, in[ 6] + 0xa3014314, 15);
995 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
996 MD5STEP(F4, a, b, c, d, in[ 4] + 0xf7537e82, 6);
997 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
998 MD5STEP(F4, c, d, a, b, in[ 2] + 0x2ad7d2bb, 15);
999 MD5STEP(F4, b, c, d, a, in[ 9] + 0xeb86d391, 21);
1000
1001 buf[0] += a;
1002 buf[1] += b;
1003 buf[2] += c;
1004 buf[3] += d;
1005}
1006
1007
1008#ifdef RT_BIG_ENDIAN
1009/*
1010 * Note: this code is harmless on little-endian machines.
1011 */
1012static void rtMd5ByteReverse(uint32_t *buf, unsigned int longs)
1013{
1014 uint32_t t;
1015 do
1016 {
1017 t = *buf;
1018 t = RT_LE2H_U32(t);
1019 *buf = t;
1020 buf++;
1021 } while (--longs);
1022}
1023#else /* little endian - do nothing */
1024# define rtMd5ByteReverse(buf, len) do { /* Nothing */ } while (0)
1025#endif
1026
1027
1028/*
1029 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
1030 * initialization constants.
1031 */
1032static void dxbcRTMd5Init(PRTMD5CONTEXT pCtx)
1033{
1034 pCtx->AltPrivate.buf[0] = 0x67452301;
1035 pCtx->AltPrivate.buf[1] = 0xefcdab89;
1036 pCtx->AltPrivate.buf[2] = 0x98badcfe;
1037 pCtx->AltPrivate.buf[3] = 0x10325476;
1038
1039 pCtx->AltPrivate.bits[0] = 0;
1040 pCtx->AltPrivate.bits[1] = 0;
1041}
1042
1043
1044/*
1045 * Update context to reflect the concatenation of another buffer full
1046 * of bytes.
1047 */
1048/** @todo Optimize this, because len is always a multiple of 64. */
1049static void dxbcRTMd5Update(PRTMD5CONTEXT pCtx, const void *pvBuf, size_t len)
1050{
1051 const uint8_t *buf = (const uint8_t *)pvBuf;
1052 uint32_t t;
1053
1054 /* Update bitcount */
1055 t = pCtx->AltPrivate.bits[0];
1056 if ((pCtx->AltPrivate.bits[0] = t + ((uint32_t) len << 3)) < t)
1057 pCtx->AltPrivate.bits[1]++; /* Carry from low to high */
1058 pCtx->AltPrivate.bits[1] += (uint32_t)(len >> 29);
1059
1060 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
1061
1062 /* Handle any leading odd-sized chunks */
1063 if (t)
1064 {
1065 uint8_t *p = (uint8_t *) pCtx->AltPrivate.in + t;
1066
1067 t = 64 - t;
1068 if (len < t)
1069 {
1070 memcpy(p, buf, len);
1071 return;
1072 }
1073 memcpy(p, buf, t);
1074 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1075 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1076 buf += t;
1077 len -= t;
1078 }
1079
1080 /* Process data in 64-byte chunks */
1081#ifndef RT_BIG_ENDIAN
1082 if (!((uintptr_t)buf & 0x3))
1083 {
1084 while (len >= 64) {
1085 rtMd5Transform(pCtx->AltPrivate.buf, (uint32_t const *)buf);
1086 buf += 64;
1087 len -= 64;
1088 }
1089 }
1090 else
1091#endif
1092 {
1093 while (len >= 64) {
1094 memcpy(pCtx->AltPrivate.in, buf, 64);
1095 rtMd5ByteReverse(pCtx->AltPrivate.in, 16);
1096 rtMd5Transform(pCtx->AltPrivate.buf, pCtx->AltPrivate.in);
1097 buf += 64;
1098 len -= 64;
1099 }
1100 }
1101
1102 /* Handle any remaining bytes of data */
1103 memcpy(pCtx->AltPrivate.in, buf, len);
1104}
1105
1106
1107static void dxbcHash(void const *pvData, uint32_t cbData, uint8_t pabDigest[RTMD5HASHSIZE])
1108{
1109 size_t const kBlockSize = 64;
1110 uint8_t au8BlockBuffer[kBlockSize];
1111
1112 static uint8_t const s_au8Padding[kBlockSize] =
1113 {
1114 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1116 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1117 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1118 };
1119
1120 RTMD5CONTEXT Ctx;
1121 PRTMD5CONTEXT const pCtx = &Ctx;
1122 dxbcRTMd5Init(pCtx);
1123
1124 uint8_t const *pu8Data = (uint8_t *)pvData;
1125 size_t cbRemaining = cbData;
1126
1127 size_t const cbCompleteBlocks = cbData & ~ (kBlockSize - 1);
1128 dxbcRTMd5Update(pCtx, pu8Data, cbCompleteBlocks);
1129 pu8Data += cbCompleteBlocks;
1130 cbRemaining -= cbCompleteBlocks;
1131
1132 /* Custom padding. */
1133 if (cbRemaining >= kBlockSize - 2 * sizeof(uint32_t))
1134 {
1135 /* Two additional blocks. */
1136 memcpy(&au8BlockBuffer[0], pu8Data, cbRemaining);
1137 memcpy(&au8BlockBuffer[cbRemaining], s_au8Padding, kBlockSize - cbRemaining);
1138 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1139
1140 memset(&au8BlockBuffer[sizeof(uint32_t)], 0, kBlockSize - 2 * sizeof(uint32_t));
1141 }
1142 else
1143 {
1144 /* One additional block. */
1145 memcpy(&au8BlockBuffer[sizeof(uint32_t)], pu8Data, cbRemaining);
1146 memcpy(&au8BlockBuffer[sizeof(uint32_t) + cbRemaining], s_au8Padding, kBlockSize - cbRemaining - 2 * sizeof(uint32_t));
1147 }
1148
1149 /* Set the first and last dwords of the last block. */
1150 *(uint32_t *)&au8BlockBuffer[0] = cbData << 3;
1151 *(uint32_t *)&au8BlockBuffer[kBlockSize - sizeof(uint32_t)] = (cbData << 1) | 1;
1152 dxbcRTMd5Update(pCtx, au8BlockBuffer, kBlockSize);
1153
1154 AssertCompile(sizeof(pCtx->AltPrivate.buf) == RTMD5HASHSIZE);
1155 memcpy(pabDigest, pCtx->AltPrivate.buf, RTMD5HASHSIZE);
1156}
1157
1158
1159/*
1160 *
1161 * Shader token reader.
1162 *
1163 */
1164
1165typedef struct DXBCTokenReader
1166{
1167 uint32_t const *pToken; /* Next token to read. */
1168 uint32_t cToken; /* How many tokens total. */
1169 uint32_t cRemainingToken; /* How many tokens remain. */
1170} DXBCTokenReader;
1171
1172
1173#ifdef LOG_ENABLED
1174DECLINLINE(uint32_t) dxbcTokenReaderByteOffset(DXBCTokenReader *r)
1175{
1176 return (r->cToken - r->cRemainingToken) * 4;
1177}
1178#endif
1179
1180
1181#if 0 // Unused for now
1182DECLINLINE(uint32_t) dxbcTokenReaderRemaining(DXBCTokenReader *r)
1183{
1184 return r->cRemainingToken;
1185}
1186#endif
1187
1188
1189DECLINLINE(uint32_t const *) dxbcTokenReaderPtr(DXBCTokenReader *r)
1190{
1191 return r->pToken;
1192}
1193
1194
1195DECLINLINE(bool) dxbcTokenReaderCanRead(DXBCTokenReader *r, uint32_t cToken)
1196{
1197 return cToken <= r->cRemainingToken;
1198}
1199
1200
1201DECLINLINE(void) dxbcTokenReaderSkip(DXBCTokenReader *r, uint32_t cToken)
1202{
1203 AssertReturnVoid(r->cRemainingToken >= cToken);
1204 r->cRemainingToken -= cToken;
1205 r->pToken += cToken;
1206}
1207
1208
1209DECLINLINE(uint32_t) dxbcTokenReaderRead32(DXBCTokenReader *r)
1210{
1211 AssertReturn(r->cRemainingToken, 0);
1212 --r->cRemainingToken;
1213 return *(r->pToken++);
1214}
1215
1216
1217DECLINLINE(uint64_t) dxbcTokenReaderRead64(DXBCTokenReader *r)
1218{
1219 uint64_t const u64Low = dxbcTokenReaderRead32(r);
1220 uint64_t const u64High = dxbcTokenReaderRead32(r);
1221 return u64Low + (u64High << 32);
1222}
1223
1224
1225/*
1226 *
1227 * Byte writer.
1228 *
1229 */
1230
1231typedef struct DXBCByteWriter
1232{
1233 uint8_t *pu8ByteCodeBegin; /* First byte of the buffer. */
1234 uint8_t *pu8ByteCodePtr; /* Next byte to be written. */
1235 uint32_t cbAllocated; /* How many bytes allocated in the buffer. */
1236 uint32_t cbRemaining; /* How many bytes remain in the buffer. */
1237 uint32_t cbWritten; /* Offset of first never written byte.
1238 * Since the writer allows to jump in the buffer, this field tracks
1239 * the upper boundary of the written data.
1240 */
1241 int32_t rc;
1242} DXBCByteWriter;
1243
1244
1245typedef struct DXBCByteWriterState
1246{
1247 uint32_t off; /* Offset of the next free byte. */
1248} DXBCByteWriterState;
1249
1250
1251DECLINLINE(void *) dxbcByteWriterPtr(DXBCByteWriter *w)
1252{
1253 return w->pu8ByteCodePtr;
1254}
1255
1256
1257DECLINLINE(uint32_t) dxbcByteWriterSize(DXBCByteWriter *w)
1258{
1259 return (uint32_t)(w->pu8ByteCodePtr - w->pu8ByteCodeBegin);
1260}
1261
1262
1263static bool dxbcByteWriterRealloc(DXBCByteWriter *w, uint32_t cbNew)
1264{
1265 void *pvNew = RTMemAllocZ(cbNew);
1266 if (!pvNew)
1267 {
1268 w->rc = VERR_NO_MEMORY;
1269 return false;
1270 }
1271
1272 uint32_t const cbCurrent = dxbcByteWriterSize(w);
1273 if (cbCurrent)
1274 {
1275 memcpy(pvNew, w->pu8ByteCodeBegin, cbCurrent);
1276 RTMemFree(w->pu8ByteCodeBegin);
1277 }
1278 else
1279 Assert(w->pu8ByteCodeBegin == NULL);
1280
1281 w->pu8ByteCodeBegin = (uint8_t *)pvNew;
1282 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + cbCurrent;
1283 w->cbAllocated = cbNew;
1284 w->cbRemaining = cbNew - cbCurrent;
1285 return true;
1286}
1287
1288
1289DECLINLINE(bool) dxbcByteWriterSetOffset(DXBCByteWriter *w, uint32_t off, DXBCByteWriterState *pSavedWriterState)
1290{
1291 if (RT_FAILURE(w->rc))
1292 return false;
1293
1294 uint32_t const cbNew = RT_ALIGN_32(off, 1024);
1295 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1296 AssertReturnStmt(off < cbMax && cbNew < cbMax, w->rc = VERR_INVALID_PARAMETER, false);
1297
1298 if (cbNew > w->cbAllocated)
1299 {
1300 if (!dxbcByteWriterRealloc(w, cbNew))
1301 return false;
1302 }
1303
1304 pSavedWriterState->off = dxbcByteWriterSize(w);
1305
1306 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + off;
1307 w->cbRemaining = w->cbAllocated - off;
1308 return true;
1309}
1310
1311
1312DECLINLINE(void) dxbcByteWriterRestore(DXBCByteWriter *w, DXBCByteWriterState *pSavedWriterState)
1313{
1314 w->pu8ByteCodePtr = w->pu8ByteCodeBegin + pSavedWriterState->off;
1315 w->cbRemaining = w->cbAllocated - pSavedWriterState->off;
1316}
1317
1318
1319DECLINLINE(void) dxbcByteWriterCommit(DXBCByteWriter *w, uint32_t cbCommit)
1320{
1321 if (RT_FAILURE(w->rc))
1322 return;
1323
1324 Assert(cbCommit < w->cbRemaining);
1325 cbCommit = RT_MIN(cbCommit, w->cbRemaining);
1326 w->pu8ByteCodePtr += cbCommit;
1327 w->cbRemaining -= cbCommit;
1328 w->cbWritten = RT_MAX(w->cbWritten, w->cbAllocated - w->cbRemaining);
1329}
1330
1331
1332DECLINLINE(bool) dxbcByteWriterCanWrite(DXBCByteWriter *w, uint32_t cbMore)
1333{
1334 if (RT_FAILURE(w->rc))
1335 return false;
1336
1337 if (cbMore <= w->cbRemaining)
1338 return true;
1339
1340 /* Do not allow to allocate more than 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES */
1341 uint32_t const cbMax = 2 * SVGA3D_MAX_SHADER_MEMORY_BYTES;
1342 AssertReturnStmt(cbMore < cbMax && RT_ALIGN_32(cbMore, 4096) <= cbMax - w->cbAllocated, w->rc = VERR_INVALID_PARAMETER, false);
1343
1344 uint32_t cbNew = w->cbAllocated + RT_ALIGN_32(cbMore, 4096);
1345 return dxbcByteWriterRealloc(w, cbNew);
1346}
1347
1348
1349DECLINLINE(bool) dxbcByteWriterAddTokens(DXBCByteWriter *w, uint32_t const *paToken, uint32_t cToken)
1350{
1351 uint32_t const cbWrite = cToken * sizeof(uint32_t);
1352 if (dxbcByteWriterCanWrite(w, cbWrite))
1353 {
1354 memcpy(dxbcByteWriterPtr(w), paToken, cbWrite);
1355 dxbcByteWriterCommit(w, cbWrite);
1356 return true;
1357 }
1358
1359 AssertFailed();
1360 return false;
1361}
1362
1363
1364DECLINLINE(bool) dxbcByteWriterInit(DXBCByteWriter *w, uint32_t cbInitial)
1365{
1366 RT_ZERO(*w);
1367 return dxbcByteWriterCanWrite(w, cbInitial);
1368}
1369
1370
1371DECLINLINE(void) dxbcByteWriterReset(DXBCByteWriter *w)
1372{
1373 RTMemFree(w->pu8ByteCodeBegin);
1374 RT_ZERO(*w);
1375}
1376
1377
1378DECLINLINE(void) dxbcByteWriterFetchData(DXBCByteWriter *w, void **ppv, uint32_t *pcb)
1379{
1380 *ppv = w->pu8ByteCodeBegin;
1381 *pcb = w->cbWritten;
1382
1383 w->pu8ByteCodeBegin = NULL;
1384 dxbcByteWriterReset(w);
1385}
1386
1387
1388/*
1389 *
1390 * VGPU10 shader parser.
1391 *
1392 */
1393
1394/* Parse an instruction operand. */
1395static int dxbcParseOperand(DXBCTokenReader *r, VGPUOperand *paOperand, uint32_t *pcOperandRemain)
1396{
1397 ASSERT_GUEST_RETURN(*pcOperandRemain > 0, VERR_NOT_SUPPORTED);
1398
1399 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1400
1401 paOperand->paOperandToken = dxbcTokenReaderPtr(r);
1402 paOperand->cOperandToken = 0;
1403
1404 VGPU10OperandToken0 operand0;
1405 operand0.value = dxbcTokenReaderRead32(r);
1406
1407 Log6((" %s(%d) %s(%d) %s(%d) %s(%d)\n",
1408 dxbcOperandNumComponentsToString(operand0.numComponents), operand0.numComponents,
1409 dxbcOperandComponentModeToString(operand0.selectionMode), operand0.selectionMode,
1410 dxbcOperandTypeToString(operand0.operandType), operand0.operandType,
1411 dxbcOperandIndexDimensionToString(operand0.indexDimension), operand0.indexDimension));
1412
1413 ASSERT_GUEST_RETURN(operand0.numComponents <= VGPU10_OPERAND_4_COMPONENT, VERR_INVALID_PARAMETER);
1414 if ( operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32
1415 && operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE64)
1416 {
1417 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1418 {
1419 ASSERT_GUEST_RETURN(operand0.selectionMode <= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE, VERR_INVALID_PARAMETER);
1420 switch (operand0.selectionMode)
1421 {
1422 case VGPU10_OPERAND_4_COMPONENT_MASK_MODE:
1423 Log6((" Mask %#x\n", operand0.mask));
1424 break;
1425 case VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE:
1426 Log6((" Swizzle %s(%d) %s(%d) %s(%d) %s(%d)\n",
1427 dxbcOperandComponentNameToString(operand0.swizzleX), operand0.swizzleX,
1428 dxbcOperandComponentNameToString(operand0.swizzleY), operand0.swizzleY,
1429 dxbcOperandComponentNameToString(operand0.swizzleZ), operand0.swizzleZ,
1430 dxbcOperandComponentNameToString(operand0.swizzleW), operand0.swizzleW));
1431 break;
1432 case VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE:
1433 Log6((" Select %s(%d)\n",
1434 dxbcOperandComponentNameToString(operand0.selectMask), operand0.selectMask));
1435 break;
1436 default: /* Never happens. */
1437 break;
1438 }
1439 }
1440 }
1441
1442 if (operand0.extended)
1443 {
1444 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1445
1446 VGPU10OperandToken1 operand1;
1447 operand1.value = dxbcTokenReaderRead32(r);
1448
1449 Log6((" %s(%d) %s(%d)\n",
1450 dxbcExtendedOperandTypeToString(operand1.extendedOperandType), operand1.extendedOperandType,
1451 dxbcOperandModifierToString(operand1.operandModifier), operand1.operandModifier));
1452 }
1453
1454 ASSERT_GUEST_RETURN(operand0.operandType < VGPU10_NUM_OPERANDS, VERR_INVALID_PARAMETER);
1455
1456 if ( operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32
1457 || operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE64)
1458 {
1459 uint32_t cComponent = 0;
1460 if (operand0.numComponents == VGPU10_OPERAND_4_COMPONENT)
1461 cComponent = 4;
1462 else if (operand0.numComponents == VGPU10_OPERAND_1_COMPONENT)
1463 cComponent = 1;
1464
1465 for (uint32_t i = 0; i < cComponent; ++i)
1466 {
1467 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1468 paOperand->aImm[i] = dxbcTokenReaderRead32(r);
1469 }
1470 }
1471
1472 paOperand->numComponents = operand0.numComponents;
1473 paOperand->selectionMode = operand0.selectionMode;
1474 paOperand->mask = operand0.mask;
1475 paOperand->operandType = operand0.operandType;
1476 paOperand->indexDimension = operand0.indexDimension;
1477
1478 int rc = VINF_SUCCESS;
1479 /* 'indexDimension' tells the number of indices. 'i' is the array index, i.e. i = 0 for 1D, etc. */
1480 for (uint32_t i = 0; i < operand0.indexDimension; ++i)
1481 {
1482 if (i == 0) /* VGPU10_OPERAND_INDEX_1D */
1483 paOperand->aOperandIndex[i].indexRepresentation = operand0.index0Representation;
1484 else if (i == 1) /* VGPU10_OPERAND_INDEX_2D */
1485 paOperand->aOperandIndex[i].indexRepresentation = operand0.index1Representation;
1486 else /* VGPU10_OPERAND_INDEX_3D */
1487 continue; /* Skip because it is "rarely if ever used" and is not supported by VGPU10. */
1488
1489 uint32_t const indexRepresentation = paOperand->aOperandIndex[i].indexRepresentation;
1490 switch (indexRepresentation)
1491 {
1492 case VGPU10_OPERAND_INDEX_IMMEDIATE32:
1493 {
1494 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1495 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1496 break;
1497 }
1498 case VGPU10_OPERAND_INDEX_IMMEDIATE64:
1499 {
1500 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1501 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1502 break;
1503 }
1504 case VGPU10_OPERAND_INDEX_RELATIVE:
1505 {
1506 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1507 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1508 Log6((" [operand index %d] parsing relative\n", i));
1509 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1510 break;
1511 }
1512 case VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1513 {
1514 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1515 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead32(r);
1516 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1517 Log6((" [operand index %d] parsing relative\n", i));
1518 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1519 break;
1520 }
1521 case VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE:
1522 {
1523 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1524 paOperand->aOperandIndex[i].iOperandImmediate = dxbcTokenReaderRead64(r);
1525 paOperand->aOperandIndex[i].pOperandRelative = &paOperand[1];
1526 Log6((" [operand index %d] parsing relative\n", i));
1527 rc = dxbcParseOperand(r, &paOperand[1], pcOperandRemain);
1528 break;
1529 }
1530 default:
1531 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1532 }
1533 Log6((" [operand index %d] %s(%d): %#llx%s\n",
1534 i, dxbcOperandIndexRepresentationToString(indexRepresentation), indexRepresentation,
1535 paOperand->aOperandIndex[i].iOperandImmediate, paOperand->aOperandIndex[i].pOperandRelative ? " + relative" : ""));
1536 if (RT_FAILURE(rc))
1537 break;
1538 }
1539
1540 paOperand->cOperandToken = dxbcTokenReaderPtr(r) - paOperand->paOperandToken;
1541
1542 *pcOperandRemain -= 1;
1543 return VINF_SUCCESS;
1544}
1545
1546
1547/* Parse an instruction. */
1548static int dxbcParseOpcode(DXBCTokenReader *r, VGPUOpcode *pOpcode)
1549{
1550 RT_ZERO(*pOpcode);
1551 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1552
1553 pOpcode->paOpcodeToken = dxbcTokenReaderPtr(r);
1554
1555 VGPU10OpcodeToken0 opcode;
1556 opcode.value = dxbcTokenReaderRead32(r);
1557
1558 pOpcode->opcodeType = opcode.opcodeType;
1559 ASSERT_GUEST_RETURN(pOpcode->opcodeType < VGPU10_NUM_OPCODES, VERR_INVALID_PARAMETER);
1560
1561 Log6(("[%#x] %s length %d\n",
1562 dxbcTokenReaderByteOffset(r) - 4, dxbcOpcodeToString(pOpcode->opcodeType), opcode.instructionLength));
1563
1564 uint32_t const cOperand = g_aOpcodeInfo[pOpcode->opcodeType].cOperand;
1565 if (cOperand != UINT32_MAX)
1566 {
1567 ASSERT_GUEST_RETURN(cOperand < RT_ELEMENTS(pOpcode->aIdxOperand), VERR_INVALID_PARAMETER);
1568
1569 pOpcode->cOpcodeToken = opcode.instructionLength;
1570 uint32_t cOpcode = 1; /* Opcode token + extended opcode tokens. */
1571 if (opcode.extended)
1572 {
1573 if ( pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_BODY
1574 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_FUNCTION_TABLE
1575 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_INTERFACE
1576 || pOpcode->opcodeType == VGPU10_OPCODE_INTERFACE_CALL
1577 || pOpcode->opcodeType == VGPU10_OPCODE_DCL_THREAD_GROUP)
1578 {
1579 /* "next DWORD contains ... the actual instruction length in DWORD since it may not fit into 7 bits" */
1580 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1581 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1582 ++cOpcode;
1583 }
1584 else
1585 {
1586 VGPU10OpcodeToken1 opcode1;
1587 do
1588 {
1589 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1590 opcode1.value = dxbcTokenReaderRead32(r);
1591 ++cOpcode;
1592 ASSERT_GUEST( opcode1.opcodeType == VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
1593 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM
1594 || opcode1.opcodeType == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE);
1595 } while(opcode1.extended);
1596 }
1597 }
1598
1599 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken >= 1 && pOpcode->cOpcodeToken < 256, VERR_INVALID_PARAMETER);
1600 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - cOpcode), VERR_INVALID_PARAMETER);
1601
1602#ifdef LOG_ENABLED
1603 Log6((" %08X", opcode.value));
1604 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1605 Log6((" %08X", r->pToken[i - 1]));
1606 Log6(("\n"));
1607
1608 if (pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
1609 Log6((" %s\n",
1610 dxbcResourceDimensionToString(opcode.resourceDimension)));
1611 else
1612 Log6((" %s\n",
1613 dxbcInterpolationModeToString(opcode.interpolationMode)));
1614#endif
1615 /* Additional tokens before operands. */
1616 switch (pOpcode->opcodeType)
1617 {
1618 case VGPU10_OPCODE_INTERFACE_CALL:
1619 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1620 dxbcTokenReaderSkip(r, 1); /* Function index */
1621 break;
1622
1623 default:
1624 break;
1625 }
1626
1627 /* Operands. */
1628 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1629 for (uint32_t i = 0; i < cOperand; ++i)
1630 {
1631 Log6((" [operand %d]\n", i));
1632 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1633 pOpcode->aIdxOperand[i] = idxOperand;
1634 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1635 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1636 }
1637
1638 pOpcode->cOperand = cOperand;
1639
1640 /* Additional tokens after operands. */
1641 switch (pOpcode->opcodeType)
1642 {
1643 case VGPU10_OPCODE_DCL_INPUT_SIV:
1644 case VGPU10_OPCODE_DCL_INPUT_SGV:
1645 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
1646 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
1647 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
1648 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
1649 {
1650 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1651
1652 VGPU10NameToken name;
1653 name.value = dxbcTokenReaderRead32(r);
1654 Log6((" %s(%d)\n",
1655 dxbcSystemNameToString(name.name), name.name));
1656 pOpcode->semanticName = name.name;
1657 break;
1658 }
1659 case VGPU10_OPCODE_DCL_RESOURCE:
1660 {
1661 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1662 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1663 break;
1664 }
1665 case VGPU10_OPCODE_DCL_TEMPS:
1666 {
1667 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1668 dxbcTokenReaderSkip(r, 1); /* number of temps */
1669 break;
1670 }
1671 case VGPU10_OPCODE_DCL_INDEXABLE_TEMP:
1672 {
1673 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1674 dxbcTokenReaderSkip(r, 3); /* register index; number of registers; number of components */
1675 break;
1676 }
1677 case VGPU10_OPCODE_DCL_INDEX_RANGE:
1678 {
1679 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1680 dxbcTokenReaderSkip(r, 1); /* count of registers */
1681 break;
1682 }
1683 case VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1684 {
1685 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1686 dxbcTokenReaderSkip(r, 1); /* maximum number of primitives */
1687 break;
1688 }
1689 case VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT:
1690 {
1691 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1692 dxbcTokenReaderSkip(r, 1); /* number of instances */
1693 break;
1694 }
1695 case VGPU10_OPCODE_DCL_HS_MAX_TESSFACTOR:
1696 {
1697 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1698 dxbcTokenReaderSkip(r, 1); /* maximum TessFactor */
1699 break;
1700 }
1701 case VGPU10_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1702 case VGPU10_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1703 {
1704 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1705 dxbcTokenReaderSkip(r, 1); /* number of instances of the current fork/join phase program to execute */
1706 break;
1707 }
1708 case VGPU10_OPCODE_DCL_THREAD_GROUP:
1709 {
1710 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 3), VERR_INVALID_PARAMETER);
1711 dxbcTokenReaderSkip(r, 3); /* Thread Group dimensions as UINT32: x, y, z */
1712 break;
1713 }
1714 case VGPU10_OPCODE_DCL_UAV_TYPED:
1715 {
1716 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1717 dxbcTokenReaderSkip(r, 1); /* ResourceReturnTypeToken */
1718 break;
1719 }
1720 case VGPU10_OPCODE_DCL_UAV_STRUCTURED:
1721 {
1722 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1723 dxbcTokenReaderSkip(r, 1); /* byte stride */
1724 break;
1725 }
1726 case VGPU10_OPCODE_DCL_TGSM_RAW:
1727 {
1728 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1729 dxbcTokenReaderSkip(r, 1); /* element count */
1730 break;
1731 }
1732 case VGPU10_OPCODE_DCL_TGSM_STRUCTURED:
1733 {
1734 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 2), VERR_INVALID_PARAMETER);
1735 dxbcTokenReaderSkip(r, 2); /* struct byte stride; struct count */
1736 break;
1737 }
1738 case VGPU10_OPCODE_DCL_RESOURCE_STRUCTURED:
1739 {
1740 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1741 dxbcTokenReaderSkip(r, 1); /* struct byte stride */
1742 break;
1743 }
1744 default:
1745 break;
1746 }
1747 }
1748 else
1749 {
1750 /* Special opcodes. */
1751 if (pOpcode->opcodeType == VGPU10_OPCODE_CUSTOMDATA)
1752 {
1753 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, 1), VERR_INVALID_PARAMETER);
1754 pOpcode->cOpcodeToken = dxbcTokenReaderRead32(r);
1755
1756 if (pOpcode->cOpcodeToken < 2)
1757 pOpcode->cOpcodeToken = 2;
1758 ASSERT_GUEST_RETURN(dxbcTokenReaderCanRead(r, pOpcode->cOpcodeToken - 2), VERR_INVALID_PARAMETER);
1759
1760#ifdef LOG_ENABLED
1761 Log6((" %08X", opcode.value));
1762 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1763 Log6((" %08X", r->pToken[i - 1]));
1764 Log6(("\n"));
1765
1766 Log6((" %s\n",
1767 dxbcCustomDataClassToString(opcode.customDataClass)));
1768#endif
1769 dxbcTokenReaderSkip(r, pOpcode->cOpcodeToken - 2);
1770 }
1771 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
1772 {
1773 pOpcode->cOpcodeToken = opcode.instructionLength;
1774 pOpcode->opcodeSubtype = opcode.vmwareOpcodeType;
1775
1776#ifdef LOG_ENABLED
1777 Log6((" %08X", opcode.value));
1778 for (uint32_t i = 1; i < pOpcode->cOpcodeToken; ++i)
1779 Log6((" %08X", r->pToken[i - 1]));
1780 Log6(("\n"));
1781
1782 Log6((" %s(%d)\n",
1783 dxbcVmwareOpcodeTypeToString(opcode.vmwareOpcodeType), opcode.vmwareOpcodeType));
1784#endif
1785
1786 if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_IDIV)
1787 {
1788 /* Integer divide. */
1789 pOpcode->cOperand = 4; /* dstQuit, dstRem, src0, src1. */
1790 }
1791 else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DFRC)
1792 {
1793 /* Double precision fraction. */
1794 pOpcode->cOperand = 2; /* dst, src. */
1795 }
1796 else if (opcode.vmwareOpcodeType == VGPU10_VMWARE_OPCODE_DRSQ)
1797 {
1798 /* Double precision reciprocal square root. */
1799 pOpcode->cOperand = 2; /* dst, src. */
1800 }
1801 else
1802 {
1803 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1804 }
1805
1806 /* Operands. */
1807 uint32_t cOperandRemain = RT_ELEMENTS(pOpcode->aValOperand);
1808 for (uint32_t i = 0; i < pOpcode->cOperand; ++i)
1809 {
1810 Log6((" [operand %d]\n", i));
1811 uint32_t const idxOperand = RT_ELEMENTS(pOpcode->aValOperand) - cOperandRemain;
1812 pOpcode->aIdxOperand[i] = idxOperand;
1813 int rc = dxbcParseOperand(r, &pOpcode->aValOperand[idxOperand], &cOperandRemain);
1814 ASSERT_GUEST_RETURN(RT_SUCCESS(rc), VERR_INVALID_PARAMETER);
1815 }
1816 }
1817 else
1818 ASSERT_GUEST_FAILED_RETURN(VERR_INVALID_PARAMETER);
1819
1820 // pOpcode->cOperand = 0;
1821 }
1822
1823 return VINF_SUCCESS;
1824}
1825
1826
1827typedef struct DXBCOUTPUTCTX
1828{
1829 VGPU10ProgramToken programToken;
1830 uint32_t cToken; /* Number of tokens in the original shader code. */
1831
1832 uint32_t offSubroutine; /* Current offset where to write subroutines. */
1833} DXBCOUTPUTCTX;
1834
1835
1836static void dxbcOutputInit(DXBCOUTPUTCTX *pOutctx, VGPU10ProgramToken const *pProgramToken, uint32_t cToken)
1837{
1838 RT_ZERO(*pOutctx);
1839 pOutctx->programToken = *pProgramToken;
1840 pOutctx->cToken = cToken;
1841
1842 pOutctx->offSubroutine = cToken * 4;
1843}
1844
1845
1846static void dxbcEmitCall(DXBCByteWriter *w, VGPUOpcode const *pOpcode, uint32_t label)
1847{
1848 VGPU10OpcodeToken0 opcode;
1849 VGPU10OperandToken0 operand;
1850
1851 opcode.value = 0;
1852 opcode.opcodeType = VGPU10_OPCODE_CALL;
1853 opcode.instructionLength = 3;
1854 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1855
1856 operand.value = 0;
1857 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1858 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1859 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1860 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1861 dxbcByteWriterAddTokens(w, &operand.value, 1);
1862
1863 dxbcByteWriterAddTokens(w, &label, 1);
1864
1865 opcode.value = 0;
1866 opcode.opcodeType = VGPU10_OPCODE_NOP;
1867 opcode.instructionLength = 1;
1868 for (unsigned i = 0; i < pOpcode->cOpcodeToken - 3; ++i)
1869 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1870}
1871
1872
1873static void dxbcEmitLabel(DXBCByteWriter *w, uint32_t label)
1874{
1875 VGPU10OpcodeToken0 opcode;
1876 VGPU10OperandToken0 operand;
1877
1878 opcode.value = 0;
1879 opcode.opcodeType = VGPU10_OPCODE_LABEL;
1880 opcode.instructionLength = 3;
1881 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1882
1883 operand.value = 0;
1884 operand.numComponents = VGPU10_OPERAND_1_COMPONENT;
1885 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
1886 operand.indexDimension = VGPU10_OPERAND_INDEX_1D;
1887 operand.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1888 dxbcByteWriterAddTokens(w, &operand.value, 1);
1889 dxbcByteWriterAddTokens(w, &label, 1);
1890}
1891
1892
1893static void dxbcEmitRet(DXBCByteWriter *w)
1894{
1895 VGPU10OpcodeToken0 opcode;
1896
1897 opcode.value = 0;
1898 opcode.opcodeType = VGPU10_OPCODE_RET;
1899 opcode.instructionLength = 1;
1900 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1901}
1902
1903
1904static int dxbcEmitVmwareIDIV(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1905{
1906 /* Insert a call and append a subroutne. */
1907 VGPU10OpcodeToken0 opcode;
1908
1909 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1910
1911 dxbcEmitCall(w, pOpcode, label);
1912
1913 /*
1914 * Subroutine.
1915 */
1916 DXBCByteWriterState savedWriterState;
1917 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1918 return w->rc;
1919
1920 dxbcEmitLabel(w, label);
1921
1922 /* Just output UDIV for now. */
1923 opcode.value = 0;
1924 opcode.opcodeType = VGPU10_OPCODE_UDIV;
1925 opcode.instructionLength = pOpcode->cOpcodeToken;
1926 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1927 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1928
1929 dxbcEmitRet(w);
1930
1931 pOutctx->offSubroutine = dxbcByteWriterSize(w);
1932 dxbcByteWriterRestore(w, &savedWriterState);
1933
1934 return w->rc;
1935}
1936
1937
1938static int dxbcEmitVmwareDFRC(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1939{
1940 /* Insert a call and append a subroutine. */
1941 VGPU10OpcodeToken0 opcode;
1942
1943 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1944
1945 dxbcEmitCall(w, pOpcode, label);
1946
1947 /*
1948 * Subroutine.
1949 */
1950 DXBCByteWriterState savedWriterState;
1951 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1952 return w->rc;
1953
1954 dxbcEmitLabel(w, label);
1955
1956 /* Just output a MOV for now. */
1957 opcode.value = 0;
1958 opcode.opcodeType = VGPU10_OPCODE_MOV;
1959 opcode.instructionLength = pOpcode->cOpcodeToken;
1960 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1961 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1962
1963 dxbcEmitRet(w);
1964
1965 pOutctx->offSubroutine = dxbcByteWriterSize(w);
1966 dxbcByteWriterRestore(w, &savedWriterState);
1967
1968 return w->rc;
1969}
1970
1971
1972static int dxbcEmitVmwareDRSQ(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
1973{
1974 /* Insert a call and append a subroutine. */
1975 VGPU10OpcodeToken0 opcode;
1976
1977 uint32_t const label = (pOutctx->offSubroutine - dxbcByteWriterSize(w)) / 4;
1978
1979 dxbcEmitCall(w, pOpcode, label);
1980
1981 /*
1982 * Subroutine.
1983 */
1984 DXBCByteWriterState savedWriterState;
1985 if (!dxbcByteWriterSetOffset(w, pOutctx->offSubroutine, &savedWriterState))
1986 return w->rc;
1987
1988 dxbcEmitLabel(w, label);
1989
1990 /* Just output a MOV for now. */
1991 opcode.value = 0;
1992 opcode.opcodeType = VGPU10_OPCODE_MOV;
1993 opcode.instructionLength = pOpcode->cOpcodeToken;
1994 dxbcByteWriterAddTokens(w, &opcode.value, 1);
1995 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], pOpcode->cOpcodeToken - 1);
1996
1997 dxbcEmitRet(w);
1998
1999 pOutctx->offSubroutine = dxbcByteWriterSize(w);
2000 dxbcByteWriterRestore(w, &savedWriterState);
2001
2002 return w->rc;
2003}
2004
2005
2006static int dxbcOutputOpcode(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w, VGPUOpcode *pOpcode)
2007{
2008#ifdef DEBUG
2009 void *pvBegin = dxbcByteWriterPtr(w);
2010#endif
2011
2012 if ( pOutctx->programToken.programType == VGPU10_PIXEL_SHADER
2013 && pOpcode->opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
2014 {
2015 /** @todo This is a workaround. */
2016 /* Sometimes the guest (Mesa) created a shader with uninitialized resource dimension.
2017 * Use texture 2d because it is what a pixel shader normally uses.
2018 */
2019 ASSERT_GUEST_RETURN(pOpcode->cOpcodeToken == 4, VERR_INVALID_PARAMETER);
2020
2021 VGPU10OpcodeToken0 opcode;
2022 opcode.value = pOpcode->paOpcodeToken[0];
2023 if (opcode.resourceDimension == VGPU10_RESOURCE_DIMENSION_BUFFER)
2024 {
2025 opcode.resourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2026 dxbcByteWriterAddTokens(w, &opcode.value, 1);
2027 dxbcByteWriterAddTokens(w, &pOpcode->paOpcodeToken[1], 2);
2028 uint32_t const returnType = 0x5555; /* float */
2029 dxbcByteWriterAddTokens(w, &returnType, 1);
2030 return VINF_SUCCESS;
2031 }
2032 }
2033 else if (pOpcode->opcodeType == VGPU10_OPCODE_VMWARE)
2034 {
2035 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_IDIV)
2036 return dxbcEmitVmwareIDIV(pOutctx, w, pOpcode);
2037 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_DFRC)
2038 return dxbcEmitVmwareDFRC(pOutctx, w, pOpcode);
2039 if (pOpcode->opcodeSubtype == VGPU10_VMWARE_OPCODE_DRSQ)
2040 return dxbcEmitVmwareDRSQ(pOutctx, w, pOpcode);
2041
2042 ASSERT_GUEST_FAILED_RETURN(VERR_NOT_SUPPORTED);
2043 }
2044
2045#ifdef DEBUG
2046 /* The code above must emit either nothing or everything. */
2047 Assert((uintptr_t)pvBegin == (uintptr_t)dxbcByteWriterPtr(w));
2048#endif
2049
2050 /* Just emit the unmodified instruction. */
2051 dxbcByteWriterAddTokens(w, pOpcode->paOpcodeToken, pOpcode->cOpcodeToken);
2052 return VINF_SUCCESS;
2053}
2054
2055
2056static int dxbcOutputFinalize(DXBCOUTPUTCTX *pOutctx, DXBCByteWriter *w)
2057{
2058 RT_NOREF(pOutctx, w);
2059 return VINF_SUCCESS;
2060}
2061
2062
2063static DECLCALLBACK(int) signatureEntryCmp(void const *pvElement1, void const *pvElement2, void *pvUser)
2064{
2065 SVGA3dDXSignatureEntry const *e1 = (SVGA3dDXSignatureEntry *)pvElement1;
2066 SVGA3dDXSignatureEntry const *e2 = (SVGA3dDXSignatureEntry *)pvElement2;
2067 RT_NOREF(pvUser);
2068
2069 if (e1->registerIndex < e2->registerIndex)
2070 return -1;
2071 if (e1->registerIndex > e2->registerIndex)
2072 return 1;
2073 if ((e1->mask & 0xf) < (e2->mask & 0xf))
2074 return -1;
2075 if ((e1->mask & 0xf) > (e2->mask & 0xf))
2076 return 1;
2077 return 0;
2078}
2079
2080
2081static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
2082 SVGA3dDXSignatureEntry const *paSignature,
2083 DXShaderAttributeSemantic *paSemantic,
2084 uint32_t u32BlobType);
2085
2086
2087/*
2088 * Parse and verify the shader byte code. Extract input and output signatures into pInfo.
2089 */
2090int DXShaderParse(void const *pvShaderCode, uint32_t cbShaderCode, DXShaderInfo *pInfo)
2091{
2092 if (pInfo)
2093 RT_ZERO(*pInfo);
2094
2095 ASSERT_GUEST_RETURN(cbShaderCode <= SVGA3D_MAX_SHADER_MEMORY_BYTES, VERR_INVALID_PARAMETER);
2096 ASSERT_GUEST_RETURN((cbShaderCode & 0x3) == 0, VERR_INVALID_PARAMETER); /* Aligned to the token size. */
2097 ASSERT_GUEST_RETURN(cbShaderCode >= 8, VERR_INVALID_PARAMETER); /* At least program and length tokens. */
2098
2099 uint32_t const *paToken = (uint32_t *)pvShaderCode;
2100
2101 VGPU10ProgramToken const *pProgramToken = (VGPU10ProgramToken *)&paToken[0];
2102 ASSERT_GUEST_RETURN( pProgramToken->majorVersion >= 4
2103 && pProgramToken->programType <= VGPU10_COMPUTE_SHADER, VERR_INVALID_PARAMETER);
2104 if (pInfo)
2105 pInfo->enmProgramType = (VGPU10_PROGRAM_TYPE)pProgramToken->programType;
2106
2107 uint32_t const cToken = paToken[1];
2108 Log6(("Shader version %d.%d type %s(%d) Length %d\n",
2109 pProgramToken->majorVersion, pProgramToken->minorVersion, dxbcShaderTypeToString(pProgramToken->programType), pProgramToken->programType, cToken));
2110 ASSERT_GUEST_RETURN(cbShaderCode / 4 >= cToken, VERR_INVALID_PARAMETER); /* Declared length should be less or equal to the actual. */
2111
2112 /* Write the parsed (and possibly modified) shader to a memory buffer. */
2113 DXBCByteWriter dxbcByteWriter;
2114 DXBCByteWriter *w = &dxbcByteWriter;
2115 if (!dxbcByteWriterInit(w, 4096 + cbShaderCode))
2116 return VERR_NO_MEMORY;
2117
2118 dxbcByteWriterAddTokens(w, paToken, 2);
2119
2120 DXBCTokenReader parser;
2121 RT_ZERO(parser);
2122
2123 DXBCTokenReader *r = &parser;
2124 r->pToken = &paToken[2];
2125 r->cToken = r->cRemainingToken = cToken - 2;
2126
2127 DXBCOUTPUTCTX outctx;
2128 dxbcOutputInit(&outctx, pProgramToken, cToken);
2129
2130 int rc = VINF_SUCCESS;
2131 while (dxbcTokenReaderCanRead(r, 1))
2132 {
2133 uint32_t const offOpcode = dxbcByteWriterSize(w);
2134
2135 VGPUOpcode opcode;
2136 rc = dxbcParseOpcode(r, &opcode);
2137 ASSERT_GUEST_STMT_BREAK(RT_SUCCESS(rc), rc = VERR_INVALID_PARAMETER);
2138
2139 rc = dxbcOutputOpcode(&outctx, w, &opcode);
2140 AssertRCBreak(rc);
2141
2142 if (pInfo)
2143 {
2144 /* Remember offsets of DCL_RESOURCE instructions. */
2145 if ( outctx.programToken.programType == VGPU10_PIXEL_SHADER
2146 && opcode.opcodeType == VGPU10_OPCODE_DCL_RESOURCE)
2147 {
2148 if ( opcode.cOperand == 1
2149 && opcode.aValOperand[0].indexDimension == VGPU10_OPERAND_INDEX_1D
2150 && opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32)
2151 {
2152 uint32_t const indexResource = opcode.aValOperand[0].aOperandIndex[0].iOperandImmediate;
2153 if (indexResource < SVGA3D_DX_MAX_SRVIEWS)
2154 {
2155 ASSERT_GUEST(pInfo->aOffDclResource[indexResource] == 0);
2156 pInfo->aOffDclResource[indexResource] = offOpcode;
2157 pInfo->cDclResource = RT_MAX(pInfo->cDclResource, indexResource + 1);
2158 }
2159 else
2160 ASSERT_GUEST_FAILED();
2161 }
2162 else
2163 ASSERT_GUEST_FAILED();
2164 }
2165
2166 /* Fetch signatures. */
2167 SVGA3dDXSignatureEntry *pSignatureEntry = NULL;
2168 switch (opcode.opcodeType)
2169 {
2170 case VGPU10_OPCODE_DCL_INPUT:
2171 case VGPU10_OPCODE_DCL_INPUT_SIV:
2172 //case VGPU10_OPCODE_DCL_INPUT_SGV:
2173 case VGPU10_OPCODE_DCL_INPUT_PS:
2174 //case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
2175 //case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
2176 //case VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
2177 ASSERT_GUEST_STMT_BREAK(pInfo->cInputSignature < RT_ELEMENTS(pInfo->aInputSignature), rc = VERR_INVALID_PARAMETER);
2178 pSignatureEntry = &pInfo->aInputSignature[pInfo->cInputSignature++];
2179 break;
2180 case VGPU10_OPCODE_DCL_OUTPUT:
2181 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
2182 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
2183 //case VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
2184 ASSERT_GUEST_STMT_BREAK(pInfo->cOutputSignature < RT_ELEMENTS(pInfo->aOutputSignature), rc = VERR_INVALID_PARAMETER);
2185 pSignatureEntry = &pInfo->aOutputSignature[pInfo->cOutputSignature++];
2186 break;
2187 default:
2188 break;
2189 }
2190
2191 if (RT_FAILURE(rc))
2192 break;
2193
2194 if (pSignatureEntry)
2195 {
2196 ASSERT_GUEST_STMT_BREAK( opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE32
2197 || opcode.aValOperand[0].aOperandIndex[0].indexRepresentation == VGPU10_OPERAND_INDEX_IMMEDIATE64,
2198 rc = VERR_NOT_SUPPORTED);
2199
2200 uint32_t const indexDimension = opcode.aValOperand[0].indexDimension;
2201 if (indexDimension == VGPU10_OPERAND_INDEX_0D)
2202 {
2203 if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID)
2204 {
2205 pSignatureEntry->registerIndex = 0;
2206 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID;
2207 }
2208 else if (opcode.aValOperand[0].operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH)
2209 {
2210 /* oDepth is always last in the signature. Register index is equal to 0xFFFFFFFF. */
2211 pSignatureEntry->registerIndex = 0xFFFFFFFF;
2212 pSignatureEntry->semanticName = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
2213 }
2214 else if (opcode.aValOperand[0].operandType <= VGPU10_OPERAND_TYPE_SM50_MAX)
2215 {
2216 pSignatureEntry->registerIndex = 0;
2217 pSignatureEntry->semanticName = opcode.semanticName;
2218 }
2219 else
2220 ASSERT_GUEST_FAILED_STMT_BREAK(rc = VERR_NOT_SUPPORTED);
2221 }
2222 else
2223 {
2224 ASSERT_GUEST_STMT_BREAK( indexDimension == VGPU10_OPERAND_INDEX_1D
2225 || indexDimension == VGPU10_OPERAND_INDEX_2D
2226 || indexDimension == VGPU10_OPERAND_INDEX_3D,
2227 rc = VERR_NOT_SUPPORTED);
2228 /* The register index seems to be in the highest dimension. */
2229 pSignatureEntry->registerIndex = opcode.aValOperand[0].aOperandIndex[indexDimension - VGPU10_OPERAND_INDEX_1D].iOperandImmediate;
2230 pSignatureEntry->semanticName = opcode.semanticName;
2231 }
2232 pSignatureEntry->mask = opcode.aValOperand[0].mask;
2233 pSignatureEntry->componentType = SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN; // Will be updated by vboxDXUpdateVSInputSignature
2234 pSignatureEntry->minPrecision = SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT;
2235 }
2236 }
2237 }
2238
2239 if (RT_FAILURE(rc))
2240 {
2241 return rc;
2242 }
2243
2244 rc = dxbcOutputFinalize(&outctx, w);
2245 if (RT_FAILURE(rc))
2246 {
2247 return rc;
2248 }
2249
2250 dxbcByteWriterFetchData(w, &pInfo->pvBytecode, &pInfo->cbBytecode);
2251 uint32_t *pcOutputToken = (uint32_t *)pInfo->pvBytecode + 1;
2252 *pcOutputToken = pInfo->cbBytecode / 4;
2253
2254 /* Sort signatures by register index and mask because the host API need them to be sorted. */
2255 if (pInfo->cInputSignature)
2256 {
2257 RTSortShell(pInfo->aInputSignature, pInfo->cInputSignature, sizeof(pInfo->aInputSignature[0]),
2258 signatureEntryCmp, NULL);
2259 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2260 pInfo->aInputSignature,
2261 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2262 }
2263 if (pInfo->cOutputSignature)
2264 {
2265 RTSortShell(pInfo->aOutputSignature, pInfo->cOutputSignature, sizeof(pInfo->aOutputSignature[0]),
2266 signatureEntryCmp, NULL);
2267 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2268 pInfo->aOutputSignature,
2269 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2270 }
2271 if (pInfo->cPatchConstantSignature)
2272 {
2273 RTSortShell(pInfo->aPatchConstantSignature, pInfo->cPatchConstantSignature, sizeof(pInfo->aPatchConstantSignature[0]),
2274 signatureEntryCmp, NULL);
2275 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2276 pInfo->aPatchConstantSignature,
2277 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2278 }
2279
2280#ifdef LOG_ENABLED
2281 if (pInfo->cInputSignature)
2282 {
2283 Log6(("Input signatures:\n"));
2284 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2285 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2286 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2287 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2288 }
2289 if (pInfo->cOutputSignature)
2290 {
2291 Log6(("Output signatures:\n"));
2292 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2293 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2294 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2295 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2296 }
2297 if (pInfo->cPatchConstantSignature)
2298 {
2299 Log6(("Patch constant signatures:\n"));
2300 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2301 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2302 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2303 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2304 }
2305#endif
2306
2307 return VINF_SUCCESS;
2308}
2309
2310
2311void DXShaderGenerateSemantics(DXShaderInfo *pInfo)
2312{
2313 if (pInfo->cInputSignature)
2314 dxbcGenerateSemantics(pInfo, pInfo->cInputSignature,
2315 pInfo->aInputSignature,
2316 pInfo->aInputSemantic, DXBC_BLOB_TYPE_ISGN);
2317 if (pInfo->cOutputSignature)
2318 dxbcGenerateSemantics(pInfo, pInfo->cOutputSignature,
2319 pInfo->aOutputSignature,
2320 pInfo->aOutputSemantic, DXBC_BLOB_TYPE_OSGN);
2321 if (pInfo->cPatchConstantSignature)
2322 dxbcGenerateSemantics(pInfo, pInfo->cPatchConstantSignature,
2323 pInfo->aPatchConstantSignature,
2324 pInfo->aPatchConstantSemantic, DXBC_BLOB_TYPE_PCSG);
2325}
2326
2327
2328void DXShaderSortSignatures(DXShaderInfo *pInfo)
2329{
2330 /* Sort signatures by register index and mask because the host API need them to be sorted. */
2331 if (pInfo->cInputSignature)
2332 {
2333 RTSortShell(pInfo->aInputSignature, pInfo->cInputSignature, sizeof(pInfo->aInputSignature[0]),
2334 signatureEntryCmp, NULL);
2335 }
2336 if (pInfo->cOutputSignature)
2337 {
2338 RTSortShell(pInfo->aOutputSignature, pInfo->cOutputSignature, sizeof(pInfo->aOutputSignature[0]),
2339 signatureEntryCmp, NULL);
2340 }
2341 if (pInfo->cPatchConstantSignature)
2342 {
2343 RTSortShell(pInfo->aPatchConstantSignature, pInfo->cPatchConstantSignature, sizeof(pInfo->aPatchConstantSignature[0]),
2344 signatureEntryCmp, NULL);
2345 }
2346}
2347
2348
2349void DXShaderFree(DXShaderInfo *pInfo)
2350{
2351 RTMemFree(pInfo->pvBytecode);
2352 RT_ZERO(*pInfo);
2353}
2354
2355
2356#if 0 // Unused. Replaced with dxbcSemanticInfo.
2357static char const *dxbcSemanticName(SVGA3dDXSignatureSemanticName enmSemanticName)
2358{
2359 /* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics */
2360 switch (enmSemanticName)
2361 {
2362 case SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION: return "SV_Position";
2363 case SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE: return "SV_ClipDistance";
2364 case SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE: return "SV_CullDistance";
2365 case SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX: return "SV_RenderTargetArrayIndex";
2366 case SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX: return "SV_ViewportArrayIndex";
2367 case SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID: return "SV_VertexID";
2368 case SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID: return "SV_PrimitiveID";
2369 case SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID: return "SV_InstanceID";
2370 case SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE: return "SV_IsFrontFace";
2371 case SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX: return "SV_SampleIndex";
2372 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadUeq0EdgeTessFactor";
2373 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalQuadVeq0EdgeTessFactor";
2374 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadUeq1EdgeTessFactor";
2375 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return "SV_FinalQuadVeq1EdgeTessFactor";
2376 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: return "SV_FinalQuadUInsideTessFactor";
2377 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: return "SV_FinalQuadVInsideTessFactor";
2378 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriUeq0EdgeTessFactor";
2379 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriVeq0EdgeTessFactor";
2380 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return "SV_FinalTriWeq0EdgeTessFactor";
2381 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR: return "SV_FinalTriInsideTessFactor";
2382 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR: return "SV_FinalLineDetailTessFactor";
2383 case SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR: return "SV_FinalLineDensityTessFactor";
2384 default:
2385 Assert(enmSemanticName == SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
2386 break;
2387 }
2388 /* Generic. Arbitrary name. It does not have any meaning. */
2389 return "ATTRIB";
2390}
2391#endif
2392
2393
2394/* https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-semantics#system-value-semantics
2395 * Type:
2396 * 0 - undefined
2397 * 1 - unsigned int
2398 * 2 - signed int
2399 * 3 - float
2400 */
2401typedef struct VGPUSemanticInfo
2402{
2403 char const *pszName;
2404 uint32_t u32Type;
2405} VGPUSemanticInfo;
2406
2407static VGPUSemanticInfo const g_aSemanticInfo[SVGADX_SIGNATURE_SEMANTIC_NAME_MAX] =
2408{
2409 { "ATTRIB", 0 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2410 { "SV_Position", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1
2411 { "SV_ClipDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2
2412 { "SV_CullDistance", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3
2413 { "SV_RenderTargetArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4
2414 { "SV_ViewportArrayIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5
2415 { "SV_VertexID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6
2416 { "SV_PrimitiveID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7
2417 { "SV_InstanceID", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8
2418 { "SV_IsFrontFace", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9
2419 { "SV_SampleIndex", 1 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10
2420 /** @todo Is this a correct name for all TessFactors? */
2421 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11
2422 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12
2423 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13
2424 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14
2425 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15
2426 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16
2427 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17
2428 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18
2429 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19
2430 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20
2431 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21
2432 { "SV_TessFactor", 3 }, // SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22
2433};
2434
2435static VGPUSemanticInfo const g_SemanticPSOutput =
2436 { "SV_TARGET", 3 }; // SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0
2437
2438
2439static VGPUSemanticInfo const *dxbcSemanticInfo(DXShaderInfo const *pInfo, SVGA3dDXSignatureSemanticName enmSemanticName, uint32_t u32BlobType)
2440{
2441 if (enmSemanticName < RT_ELEMENTS(g_aSemanticInfo))
2442 {
2443 if ( enmSemanticName == 0
2444 && pInfo->enmProgramType == VGPU10_PIXEL_SHADER
2445 && u32BlobType == DXBC_BLOB_TYPE_OSGN)
2446 return &g_SemanticPSOutput;
2447 return &g_aSemanticInfo[enmSemanticName];
2448 }
2449 return &g_aSemanticInfo[0];
2450}
2451
2452
2453static void dxbcGenerateSemantics(DXShaderInfo *pInfo, uint32_t cSignature,
2454 SVGA3dDXSignatureEntry const *paSignature,
2455 DXShaderAttributeSemantic *paSemantic,
2456 uint32_t u32BlobType)
2457{
2458 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2459 {
2460 SVGA3dDXSignatureEntry const *src = &paSignature[iSignatureEntry];
2461 DXShaderAttributeSemantic *dst = &paSemantic[iSignatureEntry];
2462
2463 ASSERT_GUEST_RETURN_VOID(src->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX);
2464
2465 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, src->semanticName, u32BlobType);
2466 dst->pcszSemanticName = pSemanticInfo->pszName;
2467 dst->SemanticIndex = 0;
2468 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2469 {
2470 DXShaderAttributeSemantic const *pSemantic = &paSemantic[i];
2471 if (RTStrCmp(pSemantic->pcszSemanticName, dst->pcszSemanticName) == 0)
2472 ++dst->SemanticIndex;
2473 }
2474 }
2475}
2476
2477
2478static int dxbcCreateIOSGNBlob(DXShaderInfo const *pInfo, DXBCHeader *pHdr, uint32_t u32BlobType, uint32_t cSignature,
2479 SVGA3dDXSignatureEntry const *paSignature, DXShaderAttributeSemantic const *paSemantic, DXBCByteWriter *w)
2480{
2481 RT_NOREF(pInfo);
2482 AssertReturn(cSignature <= SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS, VERR_INVALID_PARAMETER);
2483
2484 uint32_t cbBlob = RT_UOFFSETOF_DYN(DXBCBlobIOSGN, aElement[cSignature]);
2485 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2486 return VERR_NO_MEMORY;
2487
2488 Log6(("Create signature type %c%c%c%c (0x%RX32)\n",
2489 RT_BYTE1(u32BlobType), RT_BYTE2(u32BlobType), RT_BYTE3(u32BlobType), RT_BYTE4(u32BlobType), u32BlobType));
2490
2491 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2492 pHdrBlob->u32BlobType = u32BlobType;
2493 // pHdrBlob->cbBlob = 0;
2494
2495 DXBCBlobIOSGN *pHdrISGN = (DXBCBlobIOSGN *)&pHdrBlob[1];
2496 pHdrISGN->cElement = cSignature;
2497 pHdrISGN->offElement = RT_UOFFSETOF(DXBCBlobIOSGN, aElement[0]);
2498
2499#ifdef DEBUG
2500 /* Check that signatures are sorted by register index because the host API need them to be sorted. */
2501 uint32_t idxRegisterLast = 0;
2502#endif
2503
2504 for (uint32_t iSignatureEntry = 0; iSignatureEntry < cSignature; ++iSignatureEntry)
2505 {
2506 SVGA3dDXSignatureEntry const *srcEntry = &paSignature[iSignatureEntry];
2507 DXShaderAttributeSemantic const *srcSemantic = &paSemantic[iSignatureEntry];
2508 DXBCBlobIOSGNElement *dst = &pHdrISGN->aElement[iSignatureEntry];
2509
2510 dst->offElementName = 0;
2511 for (uint32_t i = 0; i < iSignatureEntry; ++i)
2512 {
2513 DXBCBlobIOSGNElement const *pElement = &pHdrISGN->aElement[i];
2514 char const *pszElementName = (char *)pHdrISGN + pElement->offElementName;
2515 if (RTStrCmp(pszElementName, srcSemantic->pcszSemanticName) == 0)
2516 {
2517 dst->offElementName = pElement->offElementName;
2518 break;
2519 }
2520 }
2521 dst->idxSemantic = srcSemantic->SemanticIndex;
2522 dst->enmSystemValue = srcEntry->semanticName;
2523 dst->enmComponentType = srcEntry->componentType;
2524 dst->idxRegister = srcEntry->registerIndex;
2525 dst->u.mask = srcEntry->mask;
2526
2527 Log6((" [%u]: %s[%u] sv %u type %u reg %u mask %X\n",
2528 iSignatureEntry, srcSemantic->pcszSemanticName, dst->idxSemantic,
2529 dst->enmSystemValue, dst->enmComponentType, dst->idxRegister, dst->u.mask));
2530
2531#ifdef DEBUG
2532 Assert(idxRegisterLast <= dst->idxRegister);
2533 idxRegisterLast = dst->idxRegister;
2534#endif
2535
2536 if (dst->offElementName == 0)
2537 {
2538 /* Store the semantic name for this element. */
2539 dst->offElementName = cbBlob; /* Offset of the semantic's name relative to the start of the blob (without DXBCBlobHeader). */
2540 uint32_t const cbElementName = (uint32_t)strlen(srcSemantic->pcszSemanticName) + 1;
2541 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob + cbElementName))
2542 return VERR_NO_MEMORY;
2543
2544 memcpy((char *)pHdrISGN + dst->offElementName, srcSemantic->pcszSemanticName, cbElementName);
2545 cbBlob += cbElementName;
2546 }
2547 }
2548
2549 /* Blobs are 4 bytes aligned. Commit the blob data. */
2550 cbBlob = RT_ALIGN_32(cbBlob, 4);
2551 pHdrBlob->cbBlob = cbBlob;
2552 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2553 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2554 return VINF_SUCCESS;
2555}
2556
2557
2558static int dxbcCreateSHDRBlob(DXBCHeader *pHdr, uint32_t u32BlobType,
2559 void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2560{
2561 uint32_t cbBlob = cbShader;
2562 if (!dxbcByteWriterCanWrite(w, sizeof(DXBCBlobHeader) + cbBlob))
2563 return VERR_NO_MEMORY;
2564
2565 DXBCBlobHeader *pHdrBlob = (DXBCBlobHeader *)dxbcByteWriterPtr(w);
2566 pHdrBlob->u32BlobType = u32BlobType;
2567 // pHdrBlob->cbBlob = 0;
2568
2569 memcpy(&pHdrBlob[1], pvShader, cbShader);
2570
2571 /* Blobs are 4 bytes aligned. Commit the blob data. */
2572 cbBlob = RT_ALIGN_32(cbBlob, 4);
2573 pHdrBlob->cbBlob = cbBlob;
2574 pHdr->cbTotal += cbBlob + sizeof(DXBCBlobHeader);
2575 dxbcByteWriterCommit(w, cbBlob + sizeof(DXBCBlobHeader));
2576 return VINF_SUCCESS;
2577}
2578
2579
2580/*
2581 * Create a DXBC container with signature and shader code data blobs.
2582 */
2583static int dxbcCreateFromInfo(DXShaderInfo const *pInfo, void const *pvShader, uint32_t cbShader, DXBCByteWriter *w)
2584{
2585 int rc;
2586
2587 /* Create a DXBC container with ISGN, OSGN and SHDR blobs. */
2588 uint32_t cBlob = 3;
2589 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2590 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2591 ++cBlob;
2592
2593 uint32_t const cbHdr = RT_UOFFSETOF_DYN(DXBCHeader, aBlobOffset[cBlob]); /* Header with blob offsets. */
2594 if (!dxbcByteWriterCanWrite(w, cbHdr))
2595 return VERR_NO_MEMORY;
2596
2597 /* Container header. */
2598 DXBCHeader *pHdr = (DXBCHeader *)dxbcByteWriterPtr(w);
2599 pHdr->u32DXBC = DXBC_MAGIC;
2600 // RT_ZERO(pHdr->au8Hash);
2601 pHdr->u32Version = 1;
2602 pHdr->cbTotal = cbHdr;
2603 pHdr->cBlob = cBlob;
2604 //RT_ZERO(pHdr->aBlobOffset);
2605 dxbcByteWriterCommit(w, cbHdr);
2606
2607#ifdef LOG_ENABLED
2608 if (pInfo->cInputSignature)
2609 {
2610 Log6(("Input signatures:\n"));
2611 for (uint32_t i = 0; i < pInfo->cInputSignature; ++i)
2612 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2613 i, pInfo->aInputSignature[i].registerIndex, pInfo->aInputSignature[i].semanticName, pInfo->aInputSignature[i].mask,
2614 pInfo->aInputSemantic[i].pcszSemanticName, pInfo->aInputSemantic[i].SemanticIndex));
2615 }
2616 if (pInfo->cOutputSignature)
2617 {
2618 Log6(("Output signatures:\n"));
2619 for (uint32_t i = 0; i < pInfo->cOutputSignature; ++i)
2620 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2621 i, pInfo->aOutputSignature[i].registerIndex, pInfo->aOutputSignature[i].semanticName, pInfo->aOutputSignature[i].mask,
2622 pInfo->aOutputSemantic[i].pcszSemanticName, pInfo->aOutputSemantic[i].SemanticIndex));
2623 }
2624 if (pInfo->cPatchConstantSignature)
2625 {
2626 Log6(("Patch constant signatures:\n"));
2627 for (uint32_t i = 0; i < pInfo->cPatchConstantSignature; ++i)
2628 Log6((" [%u]: %u %u 0x%X, %s %d\n",
2629 i, pInfo->aPatchConstantSignature[i].registerIndex, pInfo->aPatchConstantSignature[i].semanticName, pInfo->aPatchConstantSignature[i].mask,
2630 pInfo->aPatchConstantSemantic[i].pcszSemanticName, pInfo->aPatchConstantSemantic[i].SemanticIndex));
2631 }
2632#endif
2633
2634 /* Blobs. */
2635 uint32_t iBlob = 0;
2636
2637 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2638 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_ISGN, pInfo->cInputSignature, &pInfo->aInputSignature[0], pInfo->aInputSemantic, w);
2639 AssertRCReturn(rc, rc);
2640
2641 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2642 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pInfo->aOutputSemantic, w);
2643 AssertRCReturn(rc, rc);
2644
2645 if ( pInfo->enmProgramType == VGPU10_HULL_SHADER
2646 || pInfo->enmProgramType == VGPU10_DOMAIN_SHADER)
2647 {
2648 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2649 rc = dxbcCreateIOSGNBlob(pInfo, pHdr, DXBC_BLOB_TYPE_PCSG, pInfo->cPatchConstantSignature, &pInfo->aPatchConstantSignature[0], pInfo->aPatchConstantSemantic, w);
2650 AssertRCReturn(rc, rc);
2651 }
2652
2653 pHdr->aBlobOffset[iBlob++] = dxbcByteWriterSize(w);
2654 rc = dxbcCreateSHDRBlob(pHdr, DXBC_BLOB_TYPE_SHDR, pvShader, cbShader, w);
2655 AssertRCReturn(rc, rc);
2656
2657 Assert(iBlob == cBlob);
2658
2659 AssertCompile(RT_UOFFSETOF(DXBCHeader, u32Version) == 0x14);
2660 dxbcHash(&pHdr->u32Version, pHdr->cbTotal - RT_UOFFSETOF(DXBCHeader, u32Version), pHdr->au8Hash);
2661
2662 return VINF_SUCCESS;
2663}
2664
2665
2666int DXShaderCreateDXBC(DXShaderInfo const *pInfo, void **ppvDXBC, uint32_t *pcbDXBC)
2667{
2668 /* Build DXBC container. */
2669 int rc;
2670 DXBCByteWriter dxbcByteWriter;
2671 DXBCByteWriter *w = &dxbcByteWriter;
2672 if (dxbcByteWriterInit(w, 4096 + pInfo->cbBytecode))
2673 {
2674 rc = dxbcCreateFromInfo(pInfo, pInfo->pvBytecode, pInfo->cbBytecode, w);
2675 if (RT_SUCCESS(rc))
2676 dxbcByteWriterFetchData(w, ppvDXBC, pcbDXBC);
2677 }
2678 else
2679 rc = VERR_NO_MEMORY;
2680 return rc;
2681}
2682
2683
2684static char const *dxbcGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, uint32_t u32BlobType,
2685 uint32_t cSignature, SVGA3dDXSignatureEntry const *paSignature,
2686 SVGA3dDXSignatureSemanticName *pSemanticName)
2687{
2688 for (uint32_t i = 0; i < cSignature; ++i)
2689 {
2690 SVGA3dDXSignatureEntry const *p = &paSignature[i];
2691 if (p->registerIndex == idxRegister)
2692 {
2693 AssertReturn(p->semanticName < SVGADX_SIGNATURE_SEMANTIC_NAME_MAX, NULL);
2694 VGPUSemanticInfo const *pSemanticInfo = dxbcSemanticInfo(pInfo, p->semanticName, u32BlobType);
2695 *pSemanticName = p->semanticName;
2696 return pSemanticInfo->pszName;
2697 }
2698 }
2699 return NULL;
2700}
2701
2702char const *DXShaderGetOutputSemanticName(DXShaderInfo const *pInfo, uint32_t idxRegister, SVGA3dDXSignatureSemanticName *pSemanticName)
2703{
2704 return dxbcGetOutputSemanticName(pInfo, idxRegister, DXBC_BLOB_TYPE_OSGN, pInfo->cOutputSignature, &pInfo->aOutputSignature[0], pSemanticName);
2705}
2706
2707VGPU10_RESOURCE_RETURN_TYPE DXShaderResourceReturnTypeFromFormat(SVGA3dSurfaceFormat format)
2708{
2709 /** @todo This is auto-generated from format names and needs a review. */
2710 switch (format)
2711 {
2712 case SVGA3D_R32G32B32A32_UINT: return VGPU10_RETURN_TYPE_UINT;
2713 case SVGA3D_R32G32B32A32_SINT: return VGPU10_RETURN_TYPE_SINT;
2714 case SVGA3D_R32G32B32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2715 case SVGA3D_R32G32B32_UINT: return VGPU10_RETURN_TYPE_UINT;
2716 case SVGA3D_R32G32B32_SINT: return VGPU10_RETURN_TYPE_SINT;
2717 case SVGA3D_R16G16B16A16_UINT: return VGPU10_RETURN_TYPE_UINT;
2718 case SVGA3D_R16G16B16A16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2719 case SVGA3D_R16G16B16A16_SINT: return VGPU10_RETURN_TYPE_SINT;
2720 case SVGA3D_R32G32_UINT: return VGPU10_RETURN_TYPE_UINT;
2721 case SVGA3D_R32G32_SINT: return VGPU10_RETURN_TYPE_SINT;
2722 case SVGA3D_D32_FLOAT_S8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2723 case SVGA3D_R32_FLOAT_X8X24: return VGPU10_RETURN_TYPE_FLOAT;
2724 case SVGA3D_X32_G8X24_UINT: return VGPU10_RETURN_TYPE_UINT;
2725 case SVGA3D_R10G10B10A2_UINT: return VGPU10_RETURN_TYPE_UINT;
2726 case SVGA3D_R11G11B10_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2727 case SVGA3D_R8G8B8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2728 case SVGA3D_R8G8B8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2729 case SVGA3D_R8G8B8A8_UINT: return VGPU10_RETURN_TYPE_UINT;
2730 case SVGA3D_R8G8B8A8_SINT: return VGPU10_RETURN_TYPE_SINT;
2731 case SVGA3D_R16G16_UINT: return VGPU10_RETURN_TYPE_UINT;
2732 case SVGA3D_R16G16_SINT: return VGPU10_RETURN_TYPE_SINT;
2733 case SVGA3D_D32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2734 case SVGA3D_R32_UINT: return VGPU10_RETURN_TYPE_UINT;
2735 case SVGA3D_R32_SINT: return VGPU10_RETURN_TYPE_SINT;
2736 case SVGA3D_D24_UNORM_S8_UINT: return VGPU10_RETURN_TYPE_UNORM;
2737 case SVGA3D_R24_UNORM_X8: return VGPU10_RETURN_TYPE_UNORM;
2738 case SVGA3D_X24_G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2739 case SVGA3D_R8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2740 case SVGA3D_R8G8_UINT: return VGPU10_RETURN_TYPE_UINT;
2741 case SVGA3D_R8G8_SINT: return VGPU10_RETURN_TYPE_SINT;
2742 case SVGA3D_R16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2743 case SVGA3D_R16_UINT: return VGPU10_RETURN_TYPE_UINT;
2744 case SVGA3D_R16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2745 case SVGA3D_R16_SINT: return VGPU10_RETURN_TYPE_SINT;
2746 case SVGA3D_R8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2747 case SVGA3D_R8_UINT: return VGPU10_RETURN_TYPE_UINT;
2748 case SVGA3D_R8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2749 case SVGA3D_R8_SINT: return VGPU10_RETURN_TYPE_SINT;
2750 case SVGA3D_R8G8_B8G8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2751 case SVGA3D_G8R8_G8B8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2752 case SVGA3D_BC1_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2753 case SVGA3D_BC2_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2754 case SVGA3D_BC3_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2755 case SVGA3D_BC4_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2756 case SVGA3D_BC5_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2757 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2758 case SVGA3D_B8G8R8A8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2759 case SVGA3D_B8G8R8X8_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2760 case SVGA3D_R32G32B32A32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2761 case SVGA3D_R16G16B16A16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2762 case SVGA3D_R16G16B16A16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2763 case SVGA3D_R32G32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2764 case SVGA3D_R10G10B10A2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2765 case SVGA3D_R8G8B8A8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2766 case SVGA3D_R16G16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2767 case SVGA3D_R16G16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2768 case SVGA3D_R16G16_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2769 case SVGA3D_R32_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2770 case SVGA3D_R8G8_SNORM: return VGPU10_RETURN_TYPE_SNORM;
2771 case SVGA3D_R16_FLOAT: return VGPU10_RETURN_TYPE_FLOAT;
2772 case SVGA3D_D16_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2773 case SVGA3D_A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2774 case SVGA3D_BC1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2775 case SVGA3D_BC2_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2776 case SVGA3D_BC3_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2777 case SVGA3D_B5G6R5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2778 case SVGA3D_B5G5R5A1_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2779 case SVGA3D_B8G8R8A8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2780 case SVGA3D_B8G8R8X8_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2781 case SVGA3D_BC4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2782 case SVGA3D_BC5_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2783 case SVGA3D_B4G4R4A4_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2784 case SVGA3D_BC7_UNORM: return VGPU10_RETURN_TYPE_UNORM;
2785 case SVGA3D_BC7_UNORM_SRGB: return VGPU10_RETURN_TYPE_UNORM;
2786 case SVGA3D_R9G9B9E5_SHAREDEXP: return VGPU10_RETURN_TYPE_FLOAT;
2787 default:
2788 break;
2789 }
2790 return VGPU10_RETURN_TYPE_UNORM;
2791}
2792
2793
2794SVGA3dDXSignatureRegisterComponentType DXShaderComponentTypeFromFormat(SVGA3dSurfaceFormat format)
2795{
2796 /** @todo This is auto-generated from format names and needs a review. */
2797 switch (format)
2798 {
2799 case SVGA3D_R32G32B32A32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2800 case SVGA3D_R32G32B32A32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2801 case SVGA3D_R32G32B32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2802 case SVGA3D_R32G32B32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2803 case SVGA3D_R32G32B32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2804 case SVGA3D_R16G16B16A16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2805 case SVGA3D_R16G16B16A16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2806 case SVGA3D_R16G16B16A16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2807 case SVGA3D_R32G32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2808 case SVGA3D_R32G32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2809 case SVGA3D_D32_FLOAT_S8X24_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2810 case SVGA3D_R32_FLOAT_X8X24: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2811 case SVGA3D_X32_G8X24_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2812 case SVGA3D_R10G10B10A2_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2813 case SVGA3D_R11G11B10_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2814 case SVGA3D_R8G8B8A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2815 case SVGA3D_R8G8B8A8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2816 case SVGA3D_R8G8B8A8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2817 case SVGA3D_R8G8B8A8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2818 case SVGA3D_R16G16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2819 case SVGA3D_R16G16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2820 case SVGA3D_D32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2821 case SVGA3D_R32_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2822 case SVGA3D_R32_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2823 case SVGA3D_D24_UNORM_S8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2824 case SVGA3D_R24_UNORM_X8: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2825 case SVGA3D_X24_G8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2826 case SVGA3D_R8G8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2827 case SVGA3D_R8G8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2828 case SVGA3D_R8G8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2829 case SVGA3D_R16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2830 case SVGA3D_R16_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2831 case SVGA3D_R16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2832 case SVGA3D_R16_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2833 case SVGA3D_R8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2834 case SVGA3D_R8_UINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_UINT32;
2835 case SVGA3D_R8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2836 case SVGA3D_R8_SINT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_SINT32;
2837 case SVGA3D_R8G8_B8G8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2838 case SVGA3D_G8R8_G8B8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2839 case SVGA3D_BC1_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2840 case SVGA3D_BC2_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2841 case SVGA3D_BC3_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2842 case SVGA3D_BC4_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2843 case SVGA3D_BC5_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2844 case SVGA3D_R10G10B10_XR_BIAS_A2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2845 case SVGA3D_B8G8R8A8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2846 case SVGA3D_B8G8R8X8_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2847 case SVGA3D_R32G32B32A32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2848 case SVGA3D_R16G16B16A16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2849 case SVGA3D_R16G16B16A16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2850 case SVGA3D_R32G32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2851 case SVGA3D_R10G10B10A2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2852 case SVGA3D_R8G8B8A8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2853 case SVGA3D_R16G16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2854 case SVGA3D_R16G16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2855 case SVGA3D_R16G16_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2856 case SVGA3D_R32_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2857 case SVGA3D_R8G8_SNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2858 case SVGA3D_R16_FLOAT: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2859 case SVGA3D_D16_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2860 case SVGA3D_A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2861 case SVGA3D_BC1_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2862 case SVGA3D_BC2_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2863 case SVGA3D_BC3_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2864 case SVGA3D_B5G6R5_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2865 case SVGA3D_B5G5R5A1_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2866 case SVGA3D_B8G8R8A8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2867 case SVGA3D_B8G8R8X8_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2868 case SVGA3D_BC4_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2869 case SVGA3D_BC5_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2870 case SVGA3D_B4G4R4A4_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2871 case SVGA3D_BC7_UNORM: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2872 case SVGA3D_BC7_UNORM_SRGB: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2873 case SVGA3D_R9G9B9E5_SHAREDEXP: return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2874 default:
2875 break;
2876 }
2877 return SVGADX_SIGNATURE_REGISTER_COMPONENT_FLOAT32;
2878}
2879
2880
2881int DXShaderUpdateResources(DXShaderInfo const *pInfo, VGPU10_RESOURCE_DIMENSION *paResourceDimension,
2882 VGPU10_RESOURCE_RETURN_TYPE *paResourceReturnType, uint32_t cResources)
2883{
2884 for (uint32_t i = 0; i < pInfo->cDclResource; ++i)
2885 {
2886 VGPU10_RESOURCE_DIMENSION const resourceDimension = i < cResources ? paResourceDimension[i] : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2887 AssertContinue(resourceDimension <= VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY);
2888
2889 VGPU10_RESOURCE_RETURN_TYPE const resourceReturnType = i < cResources ? paResourceReturnType[i] : VGPU10_RETURN_TYPE_FLOAT;
2890 AssertContinue(resourceReturnType <= VGPU10_RETURN_TYPE_MIXED);
2891
2892 uint32_t const offToken = pInfo->aOffDclResource[i];
2893 AssertContinue(offToken < pInfo->cbBytecode);
2894 if (offToken == 0) /* nothing at this index */
2895 continue;
2896
2897 uint32_t *paToken = (uint32_t *)((uintptr_t)pInfo->pvBytecode + offToken);
2898
2899 VGPU10OpcodeToken0 *pOpcode = (VGPU10OpcodeToken0 *)&paToken[0];
2900 pOpcode->resourceDimension = resourceDimension;
2901 // paToken[1] unmodified
2902 // paToken[2] unmodified
2903 VGPU10ResourceReturnTypeToken *pReturnTypeToken = (VGPU10ResourceReturnTypeToken *)&paToken[3];
2904 pReturnTypeToken->component0 = (uint8_t)resourceReturnType;
2905 pReturnTypeToken->component1 = (uint8_t)resourceReturnType;
2906 pReturnTypeToken->component2 = (uint8_t)resourceReturnType;
2907 pReturnTypeToken->component3 = (uint8_t)resourceReturnType;
2908 }
2909
2910 return VINF_SUCCESS;
2911}
2912
2913#ifdef DXBC_STANDALONE_TEST
2914static int dxbcCreateFromBytecode(void const *pvShaderCode, uint32_t cbShaderCode, void **ppvDXBC, uint32_t *pcbDXBC)
2915{
2916 /* Parse the shader bytecode and create DXBC container with resource, signature and shader bytecode blobs. */
2917 DXShaderInfo info;
2918 RT_ZERO(info);
2919 int rc = DXShaderParse(pvShaderCode, cbShaderCode, &info);
2920 if (RT_SUCCESS(rc))
2921 rc = DXShaderCreateDXBC(&info, ppvDXBC, pcbDXBC);
2922 return rc;
2923}
2924
2925static int parseShaderVM(void const *pvShaderCode, uint32_t cbShaderCode)
2926{
2927 void *pv = NULL;
2928 uint32_t cb = 0;
2929 int rc = dxbcCreateFromBytecode(pvShaderCode, cbShaderCode, &pv, &cb);
2930 if (RT_SUCCESS(rc))
2931 {
2932 /* Hexdump DXBC */
2933 printf("{\n");
2934 uint8_t *pu8 = (uint8_t *)pv;
2935 for (uint32_t i = 0; i < cb; ++i)
2936 {
2937 if ((i % 16) == 0)
2938 {
2939 if (i > 0)
2940 printf(",\n");
2941
2942 printf(" 0x%02x", pu8[i]);
2943 }
2944 else
2945 {
2946 printf(", 0x%02x", pu8[i]);
2947 }
2948 }
2949 printf("\n");
2950 printf("};\n");
2951
2952 RTMemFree(pv);
2953 }
2954
2955 return rc;
2956}
2957
2958static DXBCBlobHeader *dxbcFindBlob(DXBCHeader *pDXBCHeader, uint32_t u32BlobType)
2959{
2960 uint8_t const *pu8DXBCBegin = (uint8_t *)pDXBCHeader;
2961 for (uint32_t i = 0; i < pDXBCHeader->cBlob; ++i)
2962 {
2963 DXBCBlobHeader *pCurrentBlob = (DXBCBlobHeader *)&pu8DXBCBegin[pDXBCHeader->aBlobOffset[i]];
2964 if (pCurrentBlob->u32BlobType == u32BlobType)
2965 return pCurrentBlob;
2966 }
2967 return NULL;
2968}
2969
2970static int dxbcExtractShaderCode(DXBCHeader *pDXBCHeader, void **ppvCode, uint32_t *pcbCode)
2971{
2972 DXBCBlobHeader *pBlob = dxbcFindBlob(pDXBCHeader, DXBC_BLOB_TYPE_SHDR);
2973 AssertReturn(pBlob, VERR_NOT_IMPLEMENTED);
2974
2975 DXBCBlobSHDR *pSHDR = (DXBCBlobSHDR *)&pBlob[1];
2976 *pcbCode = pSHDR->cToken * 4;
2977 *ppvCode = RTMemAlloc(*pcbCode);
2978 AssertReturn(*ppvCode, VERR_NO_MEMORY);
2979
2980 memcpy(*ppvCode, pSHDR, *pcbCode);
2981 return VINF_SUCCESS;
2982}
2983
2984static int parseShaderDXBC(void const *pvDXBC)
2985{
2986 DXBCHeader *pDXBCHeader = (DXBCHeader *)pvDXBC;
2987 void *pvShaderCode = NULL;
2988 uint32_t cbShaderCode = 0;
2989 int rc = dxbcExtractShaderCode(pDXBCHeader, &pvShaderCode, &cbShaderCode);
2990 if (RT_SUCCESS(rc))
2991 {
2992 rc = parseShaderVM(pvShaderCode, cbShaderCode);
2993 RTMemFree(pvShaderCode);
2994 }
2995 return rc;
2996}
2997#endif /* DXBC_STANDALONE_TEST */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette