VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.3/crypto/poly1305/asm/poly1305-mips.pl@ 95219

Last change on this file since 95219 was 94082, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: started applying and adjusting our OpenSSL changes to 3.0.1. bugref:10128

  • Property svn:executable set to *
File size: 9.1 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# Poly1305 hash for MIPS64.
18#
19# May 2016
20#
21# Numbers are cycles per processed byte with poly1305_blocks alone.
22#
23# IALU/gcc
24# R1x000 5.64/+120% (big-endian)
25# Octeon II 3.80/+280% (little-endian)
26
27######################################################################
28# There is a number of MIPS ABI in use, O32 and N32/64 are most
29# widely used. Then there is a new contender: NUBI. It appears that if
30# one picks the latter, it's possible to arrange code in ABI neutral
31# manner. Therefore let's stick to NUBI register layout:
32#
33($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
34($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
35($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
36($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
37#
38# The return value is placed in $a0. Following coding rules facilitate
39# interoperability:
40#
41# - never ever touch $tp, "thread pointer", former $gp [o32 can be
42# excluded from the rule, because it's specified volatile];
43# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
44# old code];
45# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
46#
47# For reference here is register layout for N32/64 MIPS ABIs:
48#
49# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
50# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
51# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
52# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
53# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
54#
55# <[email protected]>
56#
57######################################################################
58
59# $output is the last argument if it looks like a file (it has an extension)
60# $flavour is the first argument if it doesn't look like a file
61$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
62# supported flavours are o32,n32,64,nubi32,nubi64, default is o32
63$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
64
65die "MIPS64 only" unless ($flavour =~ /64|n32/i);
66
67$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
68$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
69
70($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
71($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
72
73$code.=<<___;
74#include "mips_arch.h"
75
76#ifdef MIPSEB
77# define MSB 0
78# define LSB 7
79#else
80# define MSB 7
81# define LSB 0
82#endif
83
84.text
85.set noat
86.set noreorder
87
88.align 5
89.globl poly1305_init
90.ent poly1305_init
91poly1305_init:
92 .frame $sp,0,$ra
93 .set reorder
94
95 sd $zero,0($ctx)
96 sd $zero,8($ctx)
97 sd $zero,16($ctx)
98
99 beqz $inp,.Lno_key
100
101#if defined(_MIPS_ARCH_MIPS64R6)
102 ld $in0,0($inp)
103 ld $in1,8($inp)
104#else
105 ldl $in0,0+MSB($inp)
106 ldl $in1,8+MSB($inp)
107 ldr $in0,0+LSB($inp)
108 ldr $in1,8+LSB($inp)
109#endif
110#ifdef MIPSEB
111# if defined(_MIPS_ARCH_MIPS64R2)
112 dsbh $in0,$in0 # byte swap
113 dsbh $in1,$in1
114 dshd $in0,$in0
115 dshd $in1,$in1
116# else
117 ori $tmp0,$zero,0xFF
118 dsll $tmp2,$tmp0,32
119 or $tmp0,$tmp2 # 0x000000FF000000FF
120
121 and $tmp1,$in0,$tmp0 # byte swap
122 and $tmp3,$in1,$tmp0
123 dsrl $tmp2,$in0,24
124 dsrl $tmp4,$in1,24
125 dsll $tmp1,24
126 dsll $tmp3,24
127 and $tmp2,$tmp0
128 and $tmp4,$tmp0
129 dsll $tmp0,8 # 0x0000FF000000FF00
130 or $tmp1,$tmp2
131 or $tmp3,$tmp4
132 and $tmp2,$in0,$tmp0
133 and $tmp4,$in1,$tmp0
134 dsrl $in0,8
135 dsrl $in1,8
136 dsll $tmp2,8
137 dsll $tmp4,8
138 and $in0,$tmp0
139 and $in1,$tmp0
140 or $tmp1,$tmp2
141 or $tmp3,$tmp4
142 or $in0,$tmp1
143 or $in1,$tmp3
144 dsrl $tmp1,$in0,32
145 dsrl $tmp3,$in1,32
146 dsll $in0,32
147 dsll $in1,32
148 or $in0,$tmp1
149 or $in1,$tmp3
150# endif
151#endif
152 li $tmp0,1
153 dsll $tmp0,32
154 daddiu $tmp0,-63
155 dsll $tmp0,28
156 daddiu $tmp0,-1 # 0ffffffc0fffffff
157
158 and $in0,$tmp0
159 daddiu $tmp0,-3 # 0ffffffc0ffffffc
160 and $in1,$tmp0
161
162 sd $in0,24($ctx)
163 dsrl $tmp0,$in1,2
164 sd $in1,32($ctx)
165 daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
166 sd $tmp0,40($ctx)
167
168.Lno_key:
169 li $v0,0 # return 0
170 jr $ra
171.end poly1305_init
172___
173{
174my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
175 ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
176
177$code.=<<___;
178.align 5
179.globl poly1305_blocks
180.ent poly1305_blocks
181poly1305_blocks:
182 .set noreorder
183 dsrl $len,4 # number of complete blocks
184 bnez $len,poly1305_blocks_internal
185 nop
186 jr $ra
187 nop
188.end poly1305_blocks
189
190.align 5
191.ent poly1305_blocks_internal
192poly1305_blocks_internal:
193 .frame $sp,6*8,$ra
194 .mask $SAVED_REGS_MASK,-8
195 .set noreorder
196 dsubu $sp,6*8
197 sd $s5,40($sp)
198 sd $s4,32($sp)
199___
200$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
201 sd $s3,24($sp)
202 sd $s2,16($sp)
203 sd $s1,8($sp)
204 sd $s0,0($sp)
205___
206$code.=<<___;
207 .set reorder
208
209 ld $h0,0($ctx) # load hash value
210 ld $h1,8($ctx)
211 ld $h2,16($ctx)
212
213 ld $r0,24($ctx) # load key
214 ld $r1,32($ctx)
215 ld $s1,40($ctx)
216
217.Loop:
218#if defined(_MIPS_ARCH_MIPS64R6)
219 ld $in0,0($inp) # load input
220 ld $in1,8($inp)
221#else
222 ldl $in0,0+MSB($inp) # load input
223 ldl $in1,8+MSB($inp)
224 ldr $in0,0+LSB($inp)
225 ldr $in1,8+LSB($inp)
226#endif
227 daddiu $len,-1
228 daddiu $inp,16
229#ifdef MIPSEB
230# if defined(_MIPS_ARCH_MIPS64R2)
231 dsbh $in0,$in0 # byte swap
232 dsbh $in1,$in1
233 dshd $in0,$in0
234 dshd $in1,$in1
235# else
236 ori $tmp0,$zero,0xFF
237 dsll $tmp2,$tmp0,32
238 or $tmp0,$tmp2 # 0x000000FF000000FF
239
240 and $tmp1,$in0,$tmp0 # byte swap
241 and $tmp3,$in1,$tmp0
242 dsrl $tmp2,$in0,24
243 dsrl $tmp4,$in1,24
244 dsll $tmp1,24
245 dsll $tmp3,24
246 and $tmp2,$tmp0
247 and $tmp4,$tmp0
248 dsll $tmp0,8 # 0x0000FF000000FF00
249 or $tmp1,$tmp2
250 or $tmp3,$tmp4
251 and $tmp2,$in0,$tmp0
252 and $tmp4,$in1,$tmp0
253 dsrl $in0,8
254 dsrl $in1,8
255 dsll $tmp2,8
256 dsll $tmp4,8
257 and $in0,$tmp0
258 and $in1,$tmp0
259 or $tmp1,$tmp2
260 or $tmp3,$tmp4
261 or $in0,$tmp1
262 or $in1,$tmp3
263 dsrl $tmp1,$in0,32
264 dsrl $tmp3,$in1,32
265 dsll $in0,32
266 dsll $in1,32
267 or $in0,$tmp1
268 or $in1,$tmp3
269# endif
270#endif
271 daddu $h0,$in0 # accumulate input
272 daddu $h1,$in1
273 sltu $tmp0,$h0,$in0
274 sltu $tmp1,$h1,$in1
275 daddu $h1,$tmp0
276
277 dmultu ($r0,$h0) # h0*r0
278 daddu $h2,$padbit
279 sltu $tmp0,$h1,$tmp0
280 mflo ($d0,$r0,$h0)
281 mfhi ($d1,$r0,$h0)
282
283 dmultu ($s1,$h1) # h1*5*r1
284 daddu $tmp0,$tmp1
285 daddu $h2,$tmp0
286 mflo ($tmp0,$s1,$h1)
287 mfhi ($tmp1,$s1,$h1)
288
289 dmultu ($r1,$h0) # h0*r1
290 daddu $d0,$tmp0
291 daddu $d1,$tmp1
292 mflo ($tmp2,$r1,$h0)
293 mfhi ($d2,$r1,$h0)
294 sltu $tmp0,$d0,$tmp0
295 daddu $d1,$tmp0
296
297 dmultu ($r0,$h1) # h1*r0
298 daddu $d1,$tmp2
299 sltu $tmp2,$d1,$tmp2
300 mflo ($tmp0,$r0,$h1)
301 mfhi ($tmp1,$r0,$h1)
302 daddu $d2,$tmp2
303
304 dmultu ($s1,$h2) # h2*5*r1
305 daddu $d1,$tmp0
306 daddu $d2,$tmp1
307 mflo ($tmp2,$s1,$h2)
308
309 dmultu ($r0,$h2) # h2*r0
310 sltu $tmp0,$d1,$tmp0
311 daddu $d2,$tmp0
312 mflo ($tmp3,$r0,$h2)
313
314 daddu $d1,$tmp2
315 daddu $d2,$tmp3
316 sltu $tmp2,$d1,$tmp2
317 daddu $d2,$tmp2
318
319 li $tmp0,-4 # final reduction
320 and $tmp0,$d2
321 dsrl $tmp1,$d2,2
322 andi $h2,$d2,3
323 daddu $tmp0,$tmp1
324 daddu $h0,$d0,$tmp0
325 sltu $tmp0,$h0,$tmp0
326 daddu $h1,$d1,$tmp0
327 sltu $tmp0,$h1,$tmp0
328 daddu $h2,$h2,$tmp0
329
330 bnez $len,.Loop
331
332 sd $h0,0($ctx) # store hash value
333 sd $h1,8($ctx)
334 sd $h2,16($ctx)
335
336 .set noreorder
337 ld $s5,40($sp) # epilogue
338 ld $s4,32($sp)
339___
340$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
341 ld $s3,24($sp)
342 ld $s2,16($sp)
343 ld $s1,8($sp)
344 ld $s0,0($sp)
345___
346$code.=<<___;
347 jr $ra
348 daddu $sp,6*8
349.end poly1305_blocks_internal
350___
351}
352{
353my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
354
355$code.=<<___;
356.align 5
357.globl poly1305_emit
358.ent poly1305_emit
359poly1305_emit:
360 .frame $sp,0,$ra
361 .set reorder
362
363 ld $tmp0,0($ctx)
364 ld $tmp1,8($ctx)
365 ld $tmp2,16($ctx)
366
367 daddiu $in0,$tmp0,5 # compare to modulus
368 sltiu $tmp3,$in0,5
369 daddu $in1,$tmp1,$tmp3
370 sltu $tmp3,$in1,$tmp3
371 daddu $tmp2,$tmp2,$tmp3
372
373 dsrl $tmp2,2 # see if it carried/borrowed
374 dsubu $tmp2,$zero,$tmp2
375 nor $tmp3,$zero,$tmp2
376
377 and $in0,$tmp2
378 and $tmp0,$tmp3
379 and $in1,$tmp2
380 and $tmp1,$tmp3
381 or $in0,$tmp0
382 or $in1,$tmp1
383
384 lwu $tmp0,0($nonce) # load nonce
385 lwu $tmp1,4($nonce)
386 lwu $tmp2,8($nonce)
387 lwu $tmp3,12($nonce)
388 dsll $tmp1,32
389 dsll $tmp3,32
390 or $tmp0,$tmp1
391 or $tmp2,$tmp3
392
393 daddu $in0,$tmp0 # accumulate nonce
394 daddu $in1,$tmp2
395 sltu $tmp0,$in0,$tmp0
396 daddu $in1,$tmp0
397
398 dsrl $tmp0,$in0,8 # write mac value
399 dsrl $tmp1,$in0,16
400 dsrl $tmp2,$in0,24
401 sb $in0,0($mac)
402 dsrl $tmp3,$in0,32
403 sb $tmp0,1($mac)
404 dsrl $tmp0,$in0,40
405 sb $tmp1,2($mac)
406 dsrl $tmp1,$in0,48
407 sb $tmp2,3($mac)
408 dsrl $tmp2,$in0,56
409 sb $tmp3,4($mac)
410 dsrl $tmp3,$in1,8
411 sb $tmp0,5($mac)
412 dsrl $tmp0,$in1,16
413 sb $tmp1,6($mac)
414 dsrl $tmp1,$in1,24
415 sb $tmp2,7($mac)
416
417 sb $in1,8($mac)
418 dsrl $tmp2,$in1,32
419 sb $tmp3,9($mac)
420 dsrl $tmp3,$in1,40
421 sb $tmp0,10($mac)
422 dsrl $tmp0,$in1,48
423 sb $tmp1,11($mac)
424 dsrl $tmp1,$in1,56
425 sb $tmp2,12($mac)
426 sb $tmp3,13($mac)
427 sb $tmp0,14($mac)
428 sb $tmp1,15($mac)
429
430 jr $ra
431.end poly1305_emit
432.rdata
433.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by <appro\@openssl.org>"
434.align 2
435___
436}
437
438$output and open STDOUT,">$output";
439print $code;
440close STDOUT or die "error closing STDOUT: $!";
441
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette