VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.0g/crypto/poly1305/asm/poly1305-mips.pl@ 69890

Last change on this file since 69890 was 69890, checked in by vboxsync, 7 years ago

Added OpenSSL 1.1.0g with unneeded files removed, otherwise unmodified.
bugref:8070: src/libs maintenance

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
File size: 8.5 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# Poly1305 hash for MIPS64.
18#
19# May 2016
20#
21# Numbers are cycles per processed byte with poly1305_blocks alone.
22#
23# IALU/gcc
24# R1x000 5.64/+120% (big-endian)
25# Octeon II 3.80/+280% (little-endian)
26
27######################################################################
28# There is a number of MIPS ABI in use, O32 and N32/64 are most
29# widely used. Then there is a new contender: NUBI. It appears that if
30# one picks the latter, it's possible to arrange code in ABI neutral
31# manner. Therefore let's stick to NUBI register layout:
32#
33($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
34($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
35($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
36($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
37#
38# The return value is placed in $a0. Following coding rules facilitate
39# interoperability:
40#
41# - never ever touch $tp, "thread pointer", former $gp [o32 can be
42# excluded from the rule, because it's specified volatile];
43# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
44# old code];
45# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
46#
47# For reference here is register layout for N32/64 MIPS ABIs:
48#
49# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
50# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
51# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
52# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
53# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
54#
55# <[email protected]>
56#
57######################################################################
58
59$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
60
61die "MIPS64 only" unless ($flavour =~ /64|n32/i);
62
63$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
64$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
65
66($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
67($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
68
69$code.=<<___;
70#ifdef MIPSEB
71# define MSB 0
72# define LSB 7
73#else
74# define MSB 7
75# define LSB 0
76#endif
77
78.text
79.set noat
80.set noreorder
81
82.align 5
83.globl poly1305_init
84.ent poly1305_init
85poly1305_init:
86 .frame $sp,0,$ra
87 .set reorder
88
89 sd $zero,0($ctx)
90 sd $zero,8($ctx)
91 sd $zero,16($ctx)
92
93 beqz $inp,.Lno_key
94
95 ldl $in0,0+MSB($inp)
96 ldl $in1,8+MSB($inp)
97 ldr $in0,0+LSB($inp)
98 ldr $in1,8+LSB($inp)
99#ifdef MIPSEB
100# if defined(_MIPS_ARCH_MIPS64R2)
101 dsbh $in0,$in0 # byte swap
102 dsbh $in1,$in1
103 dshd $in0,$in0
104 dshd $in1,$in1
105# else
106 ori $tmp0,$zero,0xFF
107 dsll $tmp2,$tmp0,32
108 or $tmp0,$tmp2 # 0x000000FF000000FF
109
110 and $tmp1,$in0,$tmp0 # byte swap
111 and $tmp3,$in1,$tmp0
112 dsrl $tmp2,$in0,24
113 dsrl $tmp4,$in1,24
114 dsll $tmp1,24
115 dsll $tmp3,24
116 and $tmp2,$tmp0
117 and $tmp4,$tmp0
118 dsll $tmp0,8 # 0x0000FF000000FF00
119 or $tmp1,$tmp2
120 or $tmp3,$tmp4
121 and $tmp2,$in0,$tmp0
122 and $tmp4,$in1,$tmp0
123 dsrl $in0,8
124 dsrl $in1,8
125 dsll $tmp2,8
126 dsll $tmp4,8
127 and $in0,$tmp0
128 and $in1,$tmp0
129 or $tmp1,$tmp2
130 or $tmp3,$tmp4
131 or $in0,$tmp1
132 or $in1,$tmp3
133 dsrl $tmp1,$in0,32
134 dsrl $tmp3,$in1,32
135 dsll $in0,32
136 dsll $in1,32
137 or $in0,$tmp1
138 or $in1,$tmp3
139# endif
140#endif
141 li $tmp0,1
142 dsll $tmp0,32
143 daddiu $tmp0,-63
144 dsll $tmp0,28
145 daddiu $tmp0,-1 # 0ffffffc0fffffff
146
147 and $in0,$tmp0
148 daddiu $tmp0,-3 # 0ffffffc0ffffffc
149 and $in1,$tmp0
150
151 sd $in0,24($ctx)
152 dsrl $tmp0,$in1,2
153 sd $in1,32($ctx)
154 daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
155 sd $tmp0,40($ctx)
156
157.Lno_key:
158 li $v0,0 # return 0
159 jr $ra
160.end poly1305_init
161___
162{
163my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
164 ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
165
166$code.=<<___;
167.align 5
168.globl poly1305_blocks
169.ent poly1305_blocks
170poly1305_blocks:
171 .set noreorder
172 dsrl $len,4 # number of complete blocks
173 bnez $len,poly1305_blocks_internal
174 nop
175 jr $ra
176 nop
177.end poly1305_blocks
178
179.align 5
180.ent poly1305_blocks_internal
181poly1305_blocks_internal:
182 .frame $sp,6*8,$ra
183 .mask $SAVED_REGS_MASK,-8
184 .set noreorder
185 dsub $sp,6*8
186 sd $s5,40($sp)
187 sd $s4,32($sp)
188___
189$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
190 sd $s3,24($sp)
191 sd $s2,16($sp)
192 sd $s1,8($sp)
193 sd $s0,0($sp)
194___
195$code.=<<___;
196 .set reorder
197
198 ld $h0,0($ctx) # load hash value
199 ld $h1,8($ctx)
200 ld $h2,16($ctx)
201
202 ld $r0,24($ctx) # load key
203 ld $r1,32($ctx)
204 ld $s1,40($ctx)
205
206.Loop:
207 ldl $in0,0+MSB($inp) # load input
208 ldl $in1,8+MSB($inp)
209 ldr $in0,0+LSB($inp)
210 daddiu $len,-1
211 ldr $in1,8+LSB($inp)
212 daddiu $inp,16
213#ifdef MIPSEB
214# if defined(_MIPS_ARCH_MIPS64R2)
215 dsbh $in0,$in0 # byte swap
216 dsbh $in1,$in1
217 dshd $in0,$in0
218 dshd $in1,$in1
219# else
220 ori $tmp0,$zero,0xFF
221 dsll $tmp2,$tmp0,32
222 or $tmp0,$tmp2 # 0x000000FF000000FF
223
224 and $tmp1,$in0,$tmp0 # byte swap
225 and $tmp3,$in1,$tmp0
226 dsrl $tmp2,$in0,24
227 dsrl $tmp4,$in1,24
228 dsll $tmp1,24
229 dsll $tmp3,24
230 and $tmp2,$tmp0
231 and $tmp4,$tmp0
232 dsll $tmp0,8 # 0x0000FF000000FF00
233 or $tmp1,$tmp2
234 or $tmp3,$tmp4
235 and $tmp2,$in0,$tmp0
236 and $tmp4,$in1,$tmp0
237 dsrl $in0,8
238 dsrl $in1,8
239 dsll $tmp2,8
240 dsll $tmp4,8
241 and $in0,$tmp0
242 and $in1,$tmp0
243 or $tmp1,$tmp2
244 or $tmp3,$tmp4
245 or $in0,$tmp1
246 or $in1,$tmp3
247 dsrl $tmp1,$in0,32
248 dsrl $tmp3,$in1,32
249 dsll $in0,32
250 dsll $in1,32
251 or $in0,$tmp1
252 or $in1,$tmp3
253# endif
254#endif
255 daddu $h0,$in0 # accumulate input
256 daddu $h1,$in1
257 sltu $tmp0,$h0,$in0
258 sltu $tmp1,$h1,$in1
259 daddu $h1,$tmp0
260
261 dmultu $r0,$h0 # h0*r0
262 daddu $h2,$padbit
263 sltu $tmp0,$h1,$tmp0
264 mflo $d0
265 mfhi $d1
266
267 dmultu $s1,$h1 # h1*5*r1
268 daddu $tmp0,$tmp1
269 daddu $h2,$tmp0
270 mflo $tmp0
271 mfhi $tmp1
272
273 dmultu $r1,$h0 # h0*r1
274 daddu $d0,$tmp0
275 daddu $d1,$tmp1
276 mflo $tmp2
277 mfhi $d2
278 sltu $tmp0,$d0,$tmp0
279 daddu $d1,$tmp0
280
281 dmultu $r0,$h1 # h1*r0
282 daddu $d1,$tmp2
283 sltu $tmp2,$d1,$tmp2
284 mflo $tmp0
285 mfhi $tmp1
286 daddu $d2,$tmp2
287
288 dmultu $s1,$h2 # h2*5*r1
289 daddu $d1,$tmp0
290 daddu $d2,$tmp1
291 mflo $tmp2
292
293 dmultu $r0,$h2 # h2*r0
294 sltu $tmp0,$d1,$tmp0
295 daddu $d2,$tmp0
296 mflo $tmp3
297
298 daddu $d1,$tmp2
299 daddu $d2,$tmp3
300 sltu $tmp2,$d1,$tmp2
301 daddu $d2,$tmp2
302
303 li $tmp0,-4 # final reduction
304 and $tmp0,$d2
305 dsrl $tmp1,$d2,2
306 andi $h2,$d2,3
307 daddu $tmp0,$tmp1
308 daddu $h0,$d0,$tmp0
309 sltu $tmp0,$h0,$tmp0
310 daddu $h1,$d1,$tmp0
311 sltu $tmp0,$h1,$tmp0
312 daddu $h2,$h2,$tmp0
313
314 bnez $len,.Loop
315
316 sd $h0,0($ctx) # store hash value
317 sd $h1,8($ctx)
318 sd $h2,16($ctx)
319
320 .set noreorder
321 ld $s5,40($sp) # epilogue
322 ld $s4,32($sp)
323___
324$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
325 ld $s3,24($sp)
326 ld $s2,16($sp)
327 ld $s1,8($sp)
328 ld $s0,0($sp)
329___
330$code.=<<___;
331 jr $ra
332 dadd $sp,6*8
333.end poly1305_blocks_internal
334___
335}
336{
337my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
338
339$code.=<<___;
340.align 5
341.globl poly1305_emit
342.ent poly1305_emit
343poly1305_emit:
344 .frame $sp,0,$ra
345 .set reorder
346
347 ld $tmp0,0($ctx)
348 ld $tmp1,8($ctx)
349 ld $tmp2,16($ctx)
350
351 daddiu $in0,$tmp0,5 # compare to modulus
352 sltiu $tmp3,$in0,5
353 daddu $in1,$tmp1,$tmp3
354 sltu $tmp3,$in1,$tmp3
355 daddu $tmp2,$tmp2,$tmp3
356
357 dsrl $tmp2,2 # see if it carried/borrowed
358 dsubu $tmp2,$zero,$tmp2
359 nor $tmp3,$zero,$tmp2
360
361 and $in0,$tmp2
362 and $tmp0,$tmp3
363 and $in1,$tmp2
364 and $tmp1,$tmp3
365 or $in0,$tmp0
366 or $in1,$tmp1
367
368 lwu $tmp0,0($nonce) # load nonce
369 lwu $tmp1,4($nonce)
370 lwu $tmp2,8($nonce)
371 lwu $tmp3,12($nonce)
372 dsll $tmp1,32
373 dsll $tmp3,32
374 or $tmp0,$tmp1
375 or $tmp2,$tmp3
376
377 daddu $in0,$tmp0 # accumulate nonce
378 daddu $in1,$tmp2
379 sltu $tmp0,$in0,$tmp0
380 daddu $in1,$tmp0
381
382 dsrl $tmp0,$in0,8 # write mac value
383 dsrl $tmp1,$in0,16
384 dsrl $tmp2,$in0,24
385 sb $in0,0($mac)
386 dsrl $tmp3,$in0,32
387 sb $tmp0,1($mac)
388 dsrl $tmp0,$in0,40
389 sb $tmp1,2($mac)
390 dsrl $tmp1,$in0,48
391 sb $tmp2,3($mac)
392 dsrl $tmp2,$in0,56
393 sb $tmp3,4($mac)
394 dsrl $tmp3,$in1,8
395 sb $tmp0,5($mac)
396 dsrl $tmp0,$in1,16
397 sb $tmp1,6($mac)
398 dsrl $tmp1,$in1,24
399 sb $tmp2,7($mac)
400
401 sb $in1,8($mac)
402 dsrl $tmp2,$in1,32
403 sb $tmp3,9($mac)
404 dsrl $tmp3,$in1,40
405 sb $tmp0,10($mac)
406 dsrl $tmp0,$in1,48
407 sb $tmp1,11($mac)
408 dsrl $tmp1,$in1,56
409 sb $tmp2,12($mac)
410 sb $tmp3,13($mac)
411 sb $tmp0,14($mac)
412 sb $tmp1,15($mac)
413
414 jr $ra
415.end poly1305_emit
416.rdata
417.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by <appro\@openssl.org>"
418.align 2
419___
420}
421
422$output=pop and open STDOUT,">$output";
423print $code;
424close STDOUT;
425
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette