1 | ;------------------------------------------------------------------------------
|
---|
2 | ;
|
---|
3 | ; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
|
---|
4 | ; SPDX-License-Identifier: BSD-2-Clause-Patent
|
---|
5 | ;
|
---|
6 | ; Abstract:
|
---|
7 | ;
|
---|
8 | ; Provide macro for register save/restore using SSE registers
|
---|
9 | ;
|
---|
10 | ;------------------------------------------------------------------------------
|
---|
11 |
|
---|
12 | ;
|
---|
13 | ; Define SSE and AVX instruction set
|
---|
14 | ;
|
---|
15 | ;
|
---|
16 | ; Define SSE macros using SSE 4.1 instructions
|
---|
17 | ; args 1:XMM, 2:IDX, 3:REG
|
---|
18 | ;
|
---|
19 | %macro SXMMN 3
|
---|
20 | pinsrq %1, %3, (%2 & 3)
|
---|
21 | %endmacro
|
---|
22 |
|
---|
23 | ;
|
---|
24 | ; args 1:XMM, 2:REG, 3:IDX
|
---|
25 | ;
|
---|
26 | %macro LXMMN 3
|
---|
27 | pextrq %2, %1, (%3 & 3)
|
---|
28 | %endmacro
|
---|
29 |
|
---|
30 | ;
|
---|
31 | ; Define AVX macros using AVX instructions
|
---|
32 | ; Save XMM to YMM
|
---|
33 | ; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
|
---|
34 | ;
|
---|
35 | %macro SYMMN 3
|
---|
36 | vinsertf128 %1, %1, %3, %2
|
---|
37 | %endmacro
|
---|
38 |
|
---|
39 | ;
|
---|
40 | ; Restore XMM from YMM
|
---|
41 | ; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
|
---|
42 | ;
|
---|
43 | %macro LYMMN 3
|
---|
44 | vextractf128 %2, %1, %3
|
---|
45 | %endmacro
|
---|
46 |
|
---|
47 | ;
|
---|
48 | ; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
|
---|
49 | ; Modified: XMM5, YMM6, YMM7 and YMM8
|
---|
50 | ;
|
---|
51 | %macro SAVE_REGS 0
|
---|
52 | SXMMN xmm5, 0, rbp
|
---|
53 | SXMMN xmm5, 1, rbx
|
---|
54 | SYMMN ymm7, 1, xmm5
|
---|
55 | SXMMN xmm5, 0, rsi
|
---|
56 | SXMMN xmm5, 1, rdi
|
---|
57 | SYMMN ymm8, 1, xmm5
|
---|
58 | SAVE_RSP
|
---|
59 | %endmacro
|
---|
60 |
|
---|
61 | ;
|
---|
62 | ; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
|
---|
63 | ; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
|
---|
64 | ;
|
---|
65 | %macro LOAD_REGS 0
|
---|
66 | LYMMN ymm7, xmm5, 1
|
---|
67 | LXMMN xmm5, rbp, 0
|
---|
68 | LXMMN xmm5, rbx, 1
|
---|
69 | LYMMN ymm8, xmm5, 1
|
---|
70 | LXMMN xmm5, rsi, 0
|
---|
71 | LXMMN xmm5, rdi, 1
|
---|
72 | LOAD_RSP
|
---|
73 | %endmacro
|
---|
74 | ;
|
---|
75 | ; Restore RBP from YMM7[128:191]
|
---|
76 | ; Modified: XMM5 and RBP
|
---|
77 | ;
|
---|
78 | %macro LOAD_RBP 0
|
---|
79 | LYMMN ymm7, xmm5, 1
|
---|
80 | movq rbp, xmm5
|
---|
81 | %endmacro
|
---|
82 |
|
---|
83 | ;
|
---|
84 | ; Restore RBX from YMM7[192:255]
|
---|
85 | ; Modified: XMM5 and RBX
|
---|
86 | ;
|
---|
87 | %macro LOAD_RBX 0
|
---|
88 | LYMMN ymm7, xmm5, 1
|
---|
89 | LXMMN xmm5, rbx, 1
|
---|
90 | %endmacro
|
---|
91 |
|
---|
92 | ;
|
---|
93 | ; Upper half of YMM6 to save/restore Time Stamp, RSP
|
---|
94 | ;
|
---|
95 | ;
|
---|
96 | ; Save Time Stamp to YMM6[192:255]
|
---|
97 | ; arg 1:general purpose register which holds time stamp
|
---|
98 | ; Modified: XMM5 and YMM6
|
---|
99 | ;
|
---|
100 | %macro SAVE_TS 1
|
---|
101 | LYMMN ymm6, xmm5, 1
|
---|
102 | SXMMN xmm5, 1, %1
|
---|
103 | SYMMN ymm6, 1, xmm5
|
---|
104 | %endmacro
|
---|
105 |
|
---|
106 | ;
|
---|
107 | ; Restore Time Stamp from YMM6[192:255]
|
---|
108 | ; arg 1:general purpose register where to save time stamp
|
---|
109 | ; Modified: XMM5 and %1
|
---|
110 | ;
|
---|
111 | %macro LOAD_TS 1
|
---|
112 | LYMMN ymm6, xmm5, 1
|
---|
113 | LXMMN xmm5, %1, 1
|
---|
114 | %endmacro
|
---|
115 |
|
---|
116 | ;
|
---|
117 | ; Save RSP to YMM6[128:191]
|
---|
118 | ; Modified: XMM5 and YMM6
|
---|
119 | ;
|
---|
120 | %macro SAVE_RSP 0
|
---|
121 | LYMMN ymm6, xmm5, 1
|
---|
122 | SXMMN xmm5, 0, rsp
|
---|
123 | SYMMN ymm6, 1, xmm5
|
---|
124 | %endmacro
|
---|
125 |
|
---|
126 | ;
|
---|
127 | ; Restore RSP from YMM6[128:191]
|
---|
128 | ; Modified: XMM5 and RSP
|
---|
129 | ;
|
---|
130 | %macro LOAD_RSP 0
|
---|
131 | LYMMN ymm6, xmm5, 1
|
---|
132 | movq rsp, xmm5
|
---|
133 | %endmacro
|
---|
134 |
|
---|
135 | ;
|
---|
136 | ; Upper half of YMM9 to save/restore UCODE status, BFV address
|
---|
137 | ;
|
---|
138 | ;
|
---|
139 | ; Save uCode status to YMM9[192:255]
|
---|
140 | ; arg 1:general purpose register which holds uCode status
|
---|
141 | ; Modified: XMM5 and YMM9
|
---|
142 | ;
|
---|
143 | %macro SAVE_UCODE_STATUS 1
|
---|
144 | LYMMN ymm9, xmm5, 1
|
---|
145 | SXMMN xmm5, 0, %1
|
---|
146 | SYMMN ymm9, 1, xmm5
|
---|
147 | %endmacro
|
---|
148 |
|
---|
149 | ;
|
---|
150 | ; Restore uCode status from YMM9[192:255]
|
---|
151 | ; arg 1:general purpose register where to save uCode status
|
---|
152 | ; Modified: XMM5 and %1
|
---|
153 | ;
|
---|
154 | %macro LOAD_UCODE_STATUS 1
|
---|
155 | LYMMN ymm9, xmm5, 1
|
---|
156 | movq %1, xmm5
|
---|
157 | %endmacro
|
---|
158 |
|
---|
159 | ;
|
---|
160 | ; Save BFV address to YMM9[128:191]
|
---|
161 | ; arg 1:general purpose register which holds BFV address
|
---|
162 | ; Modified: XMM5 and YMM9
|
---|
163 | ;
|
---|
164 | %macro SAVE_BFV 1
|
---|
165 | LYMMN ymm9, xmm5, 1
|
---|
166 | SXMMN xmm5, 1, %1
|
---|
167 | SYMMN ymm9, 1, xmm5
|
---|
168 | %endmacro
|
---|
169 |
|
---|
170 | ;
|
---|
171 | ; Restore BFV address from YMM9[128:191]
|
---|
172 | ; arg 1:general purpose register where to save BFV address
|
---|
173 | ; Modified: XMM5 and %1
|
---|
174 | ;
|
---|
175 | %macro LOAD_BFV 1
|
---|
176 | LYMMN ymm9, xmm5, 1
|
---|
177 | LXMMN xmm5, %1, 1
|
---|
178 | %endmacro
|
---|
179 |
|
---|
180 | ;
|
---|
181 | ; Upper half of YMM10 to save/restore RCX
|
---|
182 | ;
|
---|
183 | ;
|
---|
184 | ; Save RCX to YMM10[128:191]
|
---|
185 | ; Modified: XMM5 and YMM10
|
---|
186 | ;
|
---|
187 |
|
---|
188 | %macro SAVE_RCX 0
|
---|
189 | LYMMN ymm10, xmm5, 1
|
---|
190 | SXMMN xmm5, 0, rcx
|
---|
191 | SYMMN ymm10, 1, xmm5
|
---|
192 | %endmacro
|
---|
193 |
|
---|
194 | ;
|
---|
195 | ; Restore RCX from YMM10[128:191]
|
---|
196 | ; Modified: XMM5 and RCX
|
---|
197 | ;
|
---|
198 |
|
---|
199 | %macro LOAD_RCX 0
|
---|
200 | LYMMN ymm10, xmm5, 1
|
---|
201 | movq rcx, xmm5
|
---|
202 | %endmacro
|
---|
203 |
|
---|
204 | ;
|
---|
205 | ; Save TemporaryRamSize to YMM10[192:255]
|
---|
206 | ; arg 1:general purpose register which holds TemporaryRamSize
|
---|
207 | ; Modified: XMM5 and YMM10[192:255]
|
---|
208 | ;
|
---|
209 | %macro SAVE_TEMPORARY_RAM_SIZE 1
|
---|
210 | LYMMN ymm10, xmm5, 1
|
---|
211 | SXMMN xmm5, 1, %1
|
---|
212 | SYMMN ymm10, 1, xmm5
|
---|
213 | %endmacro
|
---|
214 |
|
---|
215 | ;
|
---|
216 | ; Restore TemporaryRamSize from YMM10[192:255]
|
---|
217 | ; arg 1:general purpose register where to save TemporaryRamSize
|
---|
218 | ; Modified: XMM5 and %1
|
---|
219 | ;
|
---|
220 | %macro LOAD_TEMPORARY_RAM_SIZE 1
|
---|
221 | LYMMN ymm10, xmm5, 1
|
---|
222 | LXMMN xmm5, %1, 1
|
---|
223 | %endmacro
|
---|
224 |
|
---|
225 | ;
|
---|
226 | ; YMM7[128:191] for calling stack
|
---|
227 | ; arg 1:Entry
|
---|
228 | ; Modified: RSI, XMM5, YMM7
|
---|
229 | ;
|
---|
230 | %macro CALL_YMM 1
|
---|
231 | mov rsi, %%ReturnAddress
|
---|
232 | LYMMN ymm7, xmm5, 1
|
---|
233 | SXMMN xmm5, 0, rsi
|
---|
234 | SYMMN ymm7, 1, xmm5
|
---|
235 | mov rsi, %1
|
---|
236 | jmp rsi
|
---|
237 | %%ReturnAddress:
|
---|
238 | %endmacro
|
---|
239 | ;
|
---|
240 | ; Restore RIP from YMM7[128:191]
|
---|
241 | ; Modified: RSI, XMM5
|
---|
242 | ;
|
---|
243 | %macro RET_YMM 0
|
---|
244 | LYMMN ymm7, xmm5, 1
|
---|
245 | movq rsi, xmm5
|
---|
246 | jmp rsi
|
---|
247 | %endmacro
|
---|
248 |
|
---|
249 | %macro ENABLE_SSE 0
|
---|
250 | ;
|
---|
251 | ; Initialize floating point units
|
---|
252 | ;
|
---|
253 | jmp NextAddress
|
---|
254 | align 4
|
---|
255 | ;
|
---|
256 | ; Float control word initial value:
|
---|
257 | ; all exceptions masked, double-precision, round-to-nearest
|
---|
258 | ;
|
---|
259 | FpuControlWord DW 027Fh
|
---|
260 | ;
|
---|
261 | ; Multimedia-extensions control word:
|
---|
262 | ; all exceptions masked, round-to-nearest, flush to zero for masked underflow
|
---|
263 | ;
|
---|
264 | MmxControlWord DQ 01F80h
|
---|
265 | SseError:
|
---|
266 | ;
|
---|
267 | ; Processor has to support SSE
|
---|
268 | ;
|
---|
269 | jmp SseError
|
---|
270 | NextAddress:
|
---|
271 | finit
|
---|
272 | mov rax, FpuControlWord
|
---|
273 | fldcw [rax]
|
---|
274 |
|
---|
275 | ;
|
---|
276 | ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
|
---|
277 | ; whether the processor supports SSE instruction.
|
---|
278 | ;
|
---|
279 | ; Save RBX to R11
|
---|
280 | ; Save RCX to R10
|
---|
281 | ;
|
---|
282 | mov r11, rbx
|
---|
283 | mov r10, rcx
|
---|
284 | mov rax, 1
|
---|
285 | cpuid
|
---|
286 | bt rdx, 25
|
---|
287 | jnc SseError
|
---|
288 |
|
---|
289 | ;
|
---|
290 | ; SSE 4.1 support
|
---|
291 | ;
|
---|
292 | bt ecx, 19
|
---|
293 | jnc SseError
|
---|
294 | ;
|
---|
295 | ; Restore RBX from R11
|
---|
296 | ; Restore RCX from R10
|
---|
297 | ;
|
---|
298 | mov rbx, r11
|
---|
299 | mov rcx, r10
|
---|
300 |
|
---|
301 | ;
|
---|
302 | ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
|
---|
303 | ;
|
---|
304 | mov rax, cr4
|
---|
305 | or rax, 00000600h
|
---|
306 | mov cr4, rax
|
---|
307 |
|
---|
308 | ;
|
---|
309 | ; The processor should support SSE instruction and we can use
|
---|
310 | ; ldmxcsr instruction
|
---|
311 | ;
|
---|
312 | mov rax, MmxControlWord
|
---|
313 | ldmxcsr [rax]
|
---|
314 | %endmacro
|
---|
315 |
|
---|
316 | %macro ENABLE_AVX 0
|
---|
317 | ;
|
---|
318 | ; Save RBX to R11
|
---|
319 | ; Save RCX to R10
|
---|
320 | ;
|
---|
321 | mov r11, rbx
|
---|
322 | mov r10, rcx
|
---|
323 | mov eax, 1
|
---|
324 | cpuid
|
---|
325 | and ecx, 10000000h
|
---|
326 | cmp ecx, 10000000h ; check AVX feature flag
|
---|
327 | je EnableAvx
|
---|
328 | AvxError:
|
---|
329 | ;
|
---|
330 | ; Processor has to support AVX
|
---|
331 | ;
|
---|
332 | jmp AvxError
|
---|
333 | EnableAvx:
|
---|
334 | ;
|
---|
335 | ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
|
---|
336 | ;
|
---|
337 | mov rax, cr4
|
---|
338 | or rax, 00040000h
|
---|
339 | mov cr4, rax
|
---|
340 |
|
---|
341 | mov rcx, 0 ; index 0
|
---|
342 | xgetbv ; result in edx:eax
|
---|
343 | or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state
|
---|
344 | xsetbv
|
---|
345 | ;
|
---|
346 | ; Restore RBX from R11
|
---|
347 | ; Restore RCX from R10
|
---|
348 | ;
|
---|
349 | mov rbx, r11
|
---|
350 | mov rcx, r10
|
---|
351 | %endmacro
|
---|
352 |
|
---|