VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/asm/ASMMemFirstMismatchingU8.asm@ 97793

Last change on this file since 97793 was 96407, checked in by vboxsync, 2 years ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1; $Id: ASMMemFirstMismatchingU8.asm 96407 2022-08-22 17:43:14Z vboxsync $
2;; @file
3; IPRT - ASMMemFirstMismatchingU8().
4;
5
6;
7; Copyright (C) 2006-2022 Oracle and/or its affiliates.
8;
9; This file is part of VirtualBox base platform packages, as
10; available from https://www.virtualbox.org.
11;
12; This program is free software; you can redistribute it and/or
13; modify it under the terms of the GNU General Public License
14; as published by the Free Software Foundation, in version 3 of the
15; License.
16;
17; This program is distributed in the hope that it will be useful, but
18; WITHOUT ANY WARRANTY; without even the implied warranty of
19; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20; General Public License for more details.
21;
22; You should have received a copy of the GNU General Public License
23; along with this program; if not, see <https://www.gnu.org/licenses>.
24;
25; The contents of this file may alternatively be used under the terms
26; of the Common Development and Distribution License Version 1.0
27; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28; in the VirtualBox distribution, in which case the provisions of the
29; CDDL are applicable instead of those of the GPL.
30;
31; You may elect to license modified versions of this file under the
32; terms and conditions of either the GPL or the CDDL or both.
33;
34; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35;
36
37
38;*******************************************************************************
39;* Header Files *
40;*******************************************************************************
41%define RT_ASM_WITH_SEH64
42%include "iprt/asmdefs.mac"
43
44
45BEGINCODE
46
47;;
48; Variant of ASMMemFirstMismatchingU8 with a fixed @a u8 value.
49; We repeat the prolog and join the generic function.
50;
51RT_BEGINPROC ASMMemFirstNonZero
52 ;
53 ; Prologue.
54 ;
55%if ARCH_BITS != 64
56 push xBP
57 mov xBP, xSP
58 push xDI
59 %if ARCH_BITS == 16
60 push es
61 %endif
62%elifdef ASM_CALL64_MSC
63 mov r9, rdi ; save rdi in r9
64%endif
65SEH64_END_PROLOGUE
66
67 ;
68 ; Normalize input; rdi=pv, rcx=cb, rax=0
69 ;
70 %if ARCH_BITS == 64
71 %ifdef ASM_CALL64_MSC
72 mov rdi, rcx
73 mov rcx, rdx
74 jrcxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return_all_same)
75 xor eax, eax
76 %else
77 mov rcx, rsi
78 jrcxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return_all_same)
79 xor eax, eax
80 %endif
81
82 %elif ARCH_BITS == 32
83 mov ecx, [ebp + 0ch]
84 jecxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return_all_same)
85 mov edi, [ebp + 08h]
86 xor eax, eax
87
88 %elif ARCH_BITS == 16
89 mov cx, [bp + 08h] ; cb
90 jcxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return16_all_same)
91 les di, [bp + 04h] ; pv (far)
92 xor ax, ax
93
94 %else
95 %error "Invalid ARCH_BITS value"
96 %endif
97
98 ;
99 ; Join ASMMemFirstMismatchingU8
100 ;
101 jmp RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.is_all_zero_joining)
102ENDPROC ASMMemFirstNonZero
103
104
105;;
106; Inverted memchr.
107;
108; @returns Pointer to the byte which doesn't equal u8.
109; @returns NULL if all equal to u8.
110;
111; @param msc:rcx gcc:rdi pv Pointer to the memory block.
112; @param msc:rdx gcc:rsi cb Number of bytes in the block. This MUST be aligned on 32-bit!
113; @param msc:r8b gcc:dl u8 The value it's supposed to be filled with.
114;
115; @cproto DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
116;
117RT_BEGINPROC ASMMemFirstMismatchingU8
118 ;
119 ; Prologue.
120 ;
121%if ARCH_BITS != 64
122 push xBP
123 mov xBP, xSP
124 push xDI
125 %if ARCH_BITS == 16
126 push es
127 %endif
128%elifdef ASM_CALL64_MSC
129 mov r9, rdi ; save rdi in r9
130%endif
131SEH64_END_PROLOGUE
132
133%if ARCH_BITS != 16
134 ;
135 ; The 32-bit and 64-bit variant of the code.
136 ;
137
138 ; Normalize input; rdi=pv, rcx=cb, rax=eight-times-u8
139 %if ARCH_BITS == 64
140 %ifdef ASM_CALL64_MSC
141 mov rdi, rcx
142 mov rcx, rdx
143 jrcxz .return_all_same
144 movzx r8d, r8b
145 mov rax, qword 0101010101010101h
146 imul rax, r8
147 %else
148 mov rcx, rsi
149 jrcxz .return_all_same
150 movzx edx, dl
151 mov rax, qword 0101010101010101h
152 imul rax, rdx
153 %endif
154
155 %elif ARCH_BITS == 32
156 mov ecx, [ebp + 0ch]
157 jecxz .return_all_same
158 mov edi, [ebp + 08h]
159 movzx eax, byte [ebp + 10h]
160 mov ah, al
161 movzx edx, ax
162 shl eax, 16
163 or eax, edx
164 %else
165 %error "Invalid ARCH_BITS value"
166 %endif
167
168.is_all_zero_joining:
169 cld
170
171 ; Unaligned pointer? Align it (elsewhere).
172 test edi, xCB - 1
173 jnz .unaligned_pv
174.aligned_pv:
175
176 ; Do the dword/qword scan.
177 mov edx, xCB - 1
178 and edx, ecx ; Remaining bytes for tail scan
179 %if ARCH_BITS == 64
180 shr xCX, 3
181 repe scasq
182 %else
183 shr xCX, 2
184 repe scasd
185 %endif
186 jne .multibyte_mismatch
187
188 ; Prep for tail scan.
189 mov ecx, edx
190
191 ;
192 ; Byte by byte scan.
193 ;
194.byte_by_byte:
195 repe scasb
196 jne .return_xDI
197
198.return_all_same:
199 xor eax, eax
200 %ifdef ASM_CALL64_MSC
201 mov rdi, r9 ; restore rdi
202 %elif ARCH_BITS == 32
203 pop edi
204 leave
205 %endif
206 ret
207
208 ; Return after byte scan mismatch.
209.return_xDI:
210 lea xAX, [xDI - 1]
211 %ifdef ASM_CALL64_MSC
212 mov rdi, r9 ; restore rdi
213 %elif ARCH_BITS == 32
214 pop edi
215 leave
216 %endif
217 ret
218
219 ;
220 ; Multibyte mismatch. We rewind and do a byte scan of the remainder.
221 ; (can't just search the qword as the buffer must be considered volatile).
222 ;
223.multibyte_mismatch:
224 lea xDI, [xDI - xCB]
225 lea xCX, [xCX * xCB + xCB]
226 or ecx, edx
227 jmp .byte_by_byte
228
229 ;
230 ; Unaligned pointer. If it's worth it, align the pointer, but if the
231 ; memory block is too small do the byte scan variant.
232 ;
233.unaligned_pv:
234 cmp xCX, 4*xCB ; 4 steps seems reasonable.
235 jbe .byte_by_byte
236
237 ; Unrolled buffer realignment.
238 %if ARCH_BITS == 64
239 dec xCX
240 scasb
241 jne .return_xDI
242 test edi, xCB - 1
243 jz .aligned_pv
244
245 dec xCX
246 scasb
247 jne .return_xDI
248 test edi, xCB - 1
249 jz .aligned_pv
250
251 dec xCX
252 scasb
253 jne .return_xDI
254 test edi, xCB - 1
255 jz .aligned_pv
256
257 dec xCX
258 scasb
259 jne .return_xDI
260 test edi, xCB - 1
261 jz .aligned_pv
262 %endif
263
264 dec xCX
265 scasb
266 jne .return_xDI
267 test edi, xCB - 1
268 jz .aligned_pv
269
270 dec xCX
271 scasb
272 jne .return_xDI
273 test edi, xCB - 1
274 jz .aligned_pv
275
276 dec xCX
277 scasb
278 jne .return_xDI
279 jmp .aligned_pv
280
281
282%else ; ARCH_BITS == 16
283
284 ;
285 ; The 16-bit variant of the code is a little simpler since we're
286 ; working with two byte words in the 'fast' scan. We also keep
287 ; this separate from the 32-bit/64-bit code because that allows
288 ; avoid a few rex prefixes here and there by using extended
289 ; registers (e??) where we don't care about the whole register.
290 ;
291CPU 8086
292
293 ; Load input parameters.
294 mov cx, [bp + 08h] ; cb
295 jcxz .return16_all_same
296 les di, [bp + 04h] ; pv (far)
297 mov al, [bp + 0ah] ; u8
298 mov ah, al
299
300.is_all_zero_joining:
301 cld
302
303 ; Align the pointer.
304 test di, 1
305 jz .word_scan
306
307 dec cx
308 scasb
309 jne .return16_di
310 jcxz .return16_all_same
311
312 ; Scan word-by-word.
313.word_scan:
314 mov dx, cx
315 shr cx, 1
316 repe scasw
317 jne .word_mismatch
318
319 ; do we have a tail byte?
320 test dl, 1
321 jz .return16_all_same
322 scasb
323 jne .return16_di
324
325.return16_all_same:
326 xor ax, ax
327 xor dx, dx
328.return16:
329 pop es
330 pop di
331 pop bp
332 ret
333
334.word_mismatch:
335 ; back up a word.
336 inc cx
337 sub di, 2
338
339 ; Do byte-by-byte scanning of the rest of the buffer.
340 shl cx, 1
341 mov dl, 1
342 and dl, [bp + 08h] ; cb
343 or cl, dl
344 repe scasb
345 je .return16_all_same
346
347.return16_di:
348 mov ax, di
349 dec ax
350 mov dx, es
351 jmp .return16
352
353%endif ; ARCH_BITS == 16
354ENDPROC ASMMemFirstMismatchingU8
355
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette