1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 |
|
---|
17 | # Poly1305 hash for MIPS64.
|
---|
18 | #
|
---|
19 | # May 2016
|
---|
20 | #
|
---|
21 | # Numbers are cycles per processed byte with poly1305_blocks alone.
|
---|
22 | #
|
---|
23 | # IALU/gcc
|
---|
24 | # R1x000 5.64/+120% (big-endian)
|
---|
25 | # Octeon II 3.80/+280% (little-endian)
|
---|
26 |
|
---|
27 | ######################################################################
|
---|
28 | # There is a number of MIPS ABI in use, O32 and N32/64 are most
|
---|
29 | # widely used. Then there is a new contender: NUBI. It appears that if
|
---|
30 | # one picks the latter, it's possible to arrange code in ABI neutral
|
---|
31 | # manner. Therefore let's stick to NUBI register layout:
|
---|
32 | #
|
---|
33 | ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
|
---|
34 | ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
---|
35 | ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
|
---|
36 | ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
|
---|
37 | #
|
---|
38 | # The return value is placed in $a0. Following coding rules facilitate
|
---|
39 | # interoperability:
|
---|
40 | #
|
---|
41 | # - never ever touch $tp, "thread pointer", former $gp [o32 can be
|
---|
42 | # excluded from the rule, because it's specified volatile];
|
---|
43 | # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
|
---|
44 | # old code];
|
---|
45 | # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
|
---|
46 | #
|
---|
47 | # For reference here is register layout for N32/64 MIPS ABIs:
|
---|
48 | #
|
---|
49 | # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
|
---|
50 | # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
---|
51 | # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
|
---|
52 | # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
|
---|
53 | # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
|
---|
54 | #
|
---|
55 | # <[email protected]>
|
---|
56 | #
|
---|
57 | ######################################################################
|
---|
58 |
|
---|
59 | # $output is the last argument if it looks like a file (it has an extension)
|
---|
60 | # $flavour is the first argument if it doesn't look like a file
|
---|
61 | $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
---|
62 | # supported flavours are o32,n32,64,nubi32,nubi64, default is o32
|
---|
63 | $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
|
---|
64 |
|
---|
65 | die "MIPS64 only" unless ($flavour =~ /64|n32/i);
|
---|
66 |
|
---|
67 | $v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
|
---|
68 | $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
|
---|
69 |
|
---|
70 | ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
|
---|
71 | ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
|
---|
72 |
|
---|
73 | $code.=<<___;
|
---|
74 | #include "mips_arch.h"
|
---|
75 |
|
---|
76 | #ifdef MIPSEB
|
---|
77 | # define MSB 0
|
---|
78 | # define LSB 7
|
---|
79 | #else
|
---|
80 | # define MSB 7
|
---|
81 | # define LSB 0
|
---|
82 | #endif
|
---|
83 |
|
---|
84 | .text
|
---|
85 | .set noat
|
---|
86 | .set noreorder
|
---|
87 |
|
---|
88 | .align 5
|
---|
89 | .globl poly1305_init
|
---|
90 | .ent poly1305_init
|
---|
91 | poly1305_init:
|
---|
92 | .frame $sp,0,$ra
|
---|
93 | .set reorder
|
---|
94 |
|
---|
95 | sd $zero,0($ctx)
|
---|
96 | sd $zero,8($ctx)
|
---|
97 | sd $zero,16($ctx)
|
---|
98 |
|
---|
99 | beqz $inp,.Lno_key
|
---|
100 |
|
---|
101 | #if defined(_MIPS_ARCH_MIPS64R6)
|
---|
102 | ld $in0,0($inp)
|
---|
103 | ld $in1,8($inp)
|
---|
104 | #else
|
---|
105 | ldl $in0,0+MSB($inp)
|
---|
106 | ldl $in1,8+MSB($inp)
|
---|
107 | ldr $in0,0+LSB($inp)
|
---|
108 | ldr $in1,8+LSB($inp)
|
---|
109 | #endif
|
---|
110 | #ifdef MIPSEB
|
---|
111 | # if defined(_MIPS_ARCH_MIPS64R2)
|
---|
112 | dsbh $in0,$in0 # byte swap
|
---|
113 | dsbh $in1,$in1
|
---|
114 | dshd $in0,$in0
|
---|
115 | dshd $in1,$in1
|
---|
116 | # else
|
---|
117 | ori $tmp0,$zero,0xFF
|
---|
118 | dsll $tmp2,$tmp0,32
|
---|
119 | or $tmp0,$tmp2 # 0x000000FF000000FF
|
---|
120 |
|
---|
121 | and $tmp1,$in0,$tmp0 # byte swap
|
---|
122 | and $tmp3,$in1,$tmp0
|
---|
123 | dsrl $tmp2,$in0,24
|
---|
124 | dsrl $tmp4,$in1,24
|
---|
125 | dsll $tmp1,24
|
---|
126 | dsll $tmp3,24
|
---|
127 | and $tmp2,$tmp0
|
---|
128 | and $tmp4,$tmp0
|
---|
129 | dsll $tmp0,8 # 0x0000FF000000FF00
|
---|
130 | or $tmp1,$tmp2
|
---|
131 | or $tmp3,$tmp4
|
---|
132 | and $tmp2,$in0,$tmp0
|
---|
133 | and $tmp4,$in1,$tmp0
|
---|
134 | dsrl $in0,8
|
---|
135 | dsrl $in1,8
|
---|
136 | dsll $tmp2,8
|
---|
137 | dsll $tmp4,8
|
---|
138 | and $in0,$tmp0
|
---|
139 | and $in1,$tmp0
|
---|
140 | or $tmp1,$tmp2
|
---|
141 | or $tmp3,$tmp4
|
---|
142 | or $in0,$tmp1
|
---|
143 | or $in1,$tmp3
|
---|
144 | dsrl $tmp1,$in0,32
|
---|
145 | dsrl $tmp3,$in1,32
|
---|
146 | dsll $in0,32
|
---|
147 | dsll $in1,32
|
---|
148 | or $in0,$tmp1
|
---|
149 | or $in1,$tmp3
|
---|
150 | # endif
|
---|
151 | #endif
|
---|
152 | li $tmp0,1
|
---|
153 | dsll $tmp0,32
|
---|
154 | daddiu $tmp0,-63
|
---|
155 | dsll $tmp0,28
|
---|
156 | daddiu $tmp0,-1 # 0ffffffc0fffffff
|
---|
157 |
|
---|
158 | and $in0,$tmp0
|
---|
159 | daddiu $tmp0,-3 # 0ffffffc0ffffffc
|
---|
160 | and $in1,$tmp0
|
---|
161 |
|
---|
162 | sd $in0,24($ctx)
|
---|
163 | dsrl $tmp0,$in1,2
|
---|
164 | sd $in1,32($ctx)
|
---|
165 | daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
|
---|
166 | sd $tmp0,40($ctx)
|
---|
167 |
|
---|
168 | .Lno_key:
|
---|
169 | li $v0,0 # return 0
|
---|
170 | jr $ra
|
---|
171 | .end poly1305_init
|
---|
172 | ___
|
---|
173 | {
|
---|
174 | my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
|
---|
175 | ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
|
---|
176 |
|
---|
177 | $code.=<<___;
|
---|
178 | .align 5
|
---|
179 | .globl poly1305_blocks
|
---|
180 | .ent poly1305_blocks
|
---|
181 | poly1305_blocks:
|
---|
182 | .set noreorder
|
---|
183 | dsrl $len,4 # number of complete blocks
|
---|
184 | bnez $len,poly1305_blocks_internal
|
---|
185 | nop
|
---|
186 | jr $ra
|
---|
187 | nop
|
---|
188 | .end poly1305_blocks
|
---|
189 |
|
---|
190 | .align 5
|
---|
191 | .ent poly1305_blocks_internal
|
---|
192 | poly1305_blocks_internal:
|
---|
193 | .frame $sp,6*8,$ra
|
---|
194 | .mask $SAVED_REGS_MASK,-8
|
---|
195 | .set noreorder
|
---|
196 | dsubu $sp,6*8
|
---|
197 | sd $s5,40($sp)
|
---|
198 | sd $s4,32($sp)
|
---|
199 | ___
|
---|
200 | $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
|
---|
201 | sd $s3,24($sp)
|
---|
202 | sd $s2,16($sp)
|
---|
203 | sd $s1,8($sp)
|
---|
204 | sd $s0,0($sp)
|
---|
205 | ___
|
---|
206 | $code.=<<___;
|
---|
207 | .set reorder
|
---|
208 |
|
---|
209 | ld $h0,0($ctx) # load hash value
|
---|
210 | ld $h1,8($ctx)
|
---|
211 | ld $h2,16($ctx)
|
---|
212 |
|
---|
213 | ld $r0,24($ctx) # load key
|
---|
214 | ld $r1,32($ctx)
|
---|
215 | ld $s1,40($ctx)
|
---|
216 |
|
---|
217 | .Loop:
|
---|
218 | #if defined(_MIPS_ARCH_MIPS64R6)
|
---|
219 | ld $in0,0($inp) # load input
|
---|
220 | ld $in1,8($inp)
|
---|
221 | #else
|
---|
222 | ldl $in0,0+MSB($inp) # load input
|
---|
223 | ldl $in1,8+MSB($inp)
|
---|
224 | ldr $in0,0+LSB($inp)
|
---|
225 | ldr $in1,8+LSB($inp)
|
---|
226 | #endif
|
---|
227 | daddiu $len,-1
|
---|
228 | daddiu $inp,16
|
---|
229 | #ifdef MIPSEB
|
---|
230 | # if defined(_MIPS_ARCH_MIPS64R2)
|
---|
231 | dsbh $in0,$in0 # byte swap
|
---|
232 | dsbh $in1,$in1
|
---|
233 | dshd $in0,$in0
|
---|
234 | dshd $in1,$in1
|
---|
235 | # else
|
---|
236 | ori $tmp0,$zero,0xFF
|
---|
237 | dsll $tmp2,$tmp0,32
|
---|
238 | or $tmp0,$tmp2 # 0x000000FF000000FF
|
---|
239 |
|
---|
240 | and $tmp1,$in0,$tmp0 # byte swap
|
---|
241 | and $tmp3,$in1,$tmp0
|
---|
242 | dsrl $tmp2,$in0,24
|
---|
243 | dsrl $tmp4,$in1,24
|
---|
244 | dsll $tmp1,24
|
---|
245 | dsll $tmp3,24
|
---|
246 | and $tmp2,$tmp0
|
---|
247 | and $tmp4,$tmp0
|
---|
248 | dsll $tmp0,8 # 0x0000FF000000FF00
|
---|
249 | or $tmp1,$tmp2
|
---|
250 | or $tmp3,$tmp4
|
---|
251 | and $tmp2,$in0,$tmp0
|
---|
252 | and $tmp4,$in1,$tmp0
|
---|
253 | dsrl $in0,8
|
---|
254 | dsrl $in1,8
|
---|
255 | dsll $tmp2,8
|
---|
256 | dsll $tmp4,8
|
---|
257 | and $in0,$tmp0
|
---|
258 | and $in1,$tmp0
|
---|
259 | or $tmp1,$tmp2
|
---|
260 | or $tmp3,$tmp4
|
---|
261 | or $in0,$tmp1
|
---|
262 | or $in1,$tmp3
|
---|
263 | dsrl $tmp1,$in0,32
|
---|
264 | dsrl $tmp3,$in1,32
|
---|
265 | dsll $in0,32
|
---|
266 | dsll $in1,32
|
---|
267 | or $in0,$tmp1
|
---|
268 | or $in1,$tmp3
|
---|
269 | # endif
|
---|
270 | #endif
|
---|
271 | daddu $h0,$in0 # accumulate input
|
---|
272 | daddu $h1,$in1
|
---|
273 | sltu $tmp0,$h0,$in0
|
---|
274 | sltu $tmp1,$h1,$in1
|
---|
275 | daddu $h1,$tmp0
|
---|
276 |
|
---|
277 | dmultu ($r0,$h0) # h0*r0
|
---|
278 | daddu $h2,$padbit
|
---|
279 | sltu $tmp0,$h1,$tmp0
|
---|
280 | mflo ($d0,$r0,$h0)
|
---|
281 | mfhi ($d1,$r0,$h0)
|
---|
282 |
|
---|
283 | dmultu ($s1,$h1) # h1*5*r1
|
---|
284 | daddu $tmp0,$tmp1
|
---|
285 | daddu $h2,$tmp0
|
---|
286 | mflo ($tmp0,$s1,$h1)
|
---|
287 | mfhi ($tmp1,$s1,$h1)
|
---|
288 |
|
---|
289 | dmultu ($r1,$h0) # h0*r1
|
---|
290 | daddu $d0,$tmp0
|
---|
291 | daddu $d1,$tmp1
|
---|
292 | mflo ($tmp2,$r1,$h0)
|
---|
293 | mfhi ($d2,$r1,$h0)
|
---|
294 | sltu $tmp0,$d0,$tmp0
|
---|
295 | daddu $d1,$tmp0
|
---|
296 |
|
---|
297 | dmultu ($r0,$h1) # h1*r0
|
---|
298 | daddu $d1,$tmp2
|
---|
299 | sltu $tmp2,$d1,$tmp2
|
---|
300 | mflo ($tmp0,$r0,$h1)
|
---|
301 | mfhi ($tmp1,$r0,$h1)
|
---|
302 | daddu $d2,$tmp2
|
---|
303 |
|
---|
304 | dmultu ($s1,$h2) # h2*5*r1
|
---|
305 | daddu $d1,$tmp0
|
---|
306 | daddu $d2,$tmp1
|
---|
307 | mflo ($tmp2,$s1,$h2)
|
---|
308 |
|
---|
309 | dmultu ($r0,$h2) # h2*r0
|
---|
310 | sltu $tmp0,$d1,$tmp0
|
---|
311 | daddu $d2,$tmp0
|
---|
312 | mflo ($tmp3,$r0,$h2)
|
---|
313 |
|
---|
314 | daddu $d1,$tmp2
|
---|
315 | daddu $d2,$tmp3
|
---|
316 | sltu $tmp2,$d1,$tmp2
|
---|
317 | daddu $d2,$tmp2
|
---|
318 |
|
---|
319 | li $tmp0,-4 # final reduction
|
---|
320 | and $tmp0,$d2
|
---|
321 | dsrl $tmp1,$d2,2
|
---|
322 | andi $h2,$d2,3
|
---|
323 | daddu $tmp0,$tmp1
|
---|
324 | daddu $h0,$d0,$tmp0
|
---|
325 | sltu $tmp0,$h0,$tmp0
|
---|
326 | daddu $h1,$d1,$tmp0
|
---|
327 | sltu $tmp0,$h1,$tmp0
|
---|
328 | daddu $h2,$h2,$tmp0
|
---|
329 |
|
---|
330 | bnez $len,.Loop
|
---|
331 |
|
---|
332 | sd $h0,0($ctx) # store hash value
|
---|
333 | sd $h1,8($ctx)
|
---|
334 | sd $h2,16($ctx)
|
---|
335 |
|
---|
336 | .set noreorder
|
---|
337 | ld $s5,40($sp) # epilogue
|
---|
338 | ld $s4,32($sp)
|
---|
339 | ___
|
---|
340 | $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
|
---|
341 | ld $s3,24($sp)
|
---|
342 | ld $s2,16($sp)
|
---|
343 | ld $s1,8($sp)
|
---|
344 | ld $s0,0($sp)
|
---|
345 | ___
|
---|
346 | $code.=<<___;
|
---|
347 | jr $ra
|
---|
348 | daddu $sp,6*8
|
---|
349 | .end poly1305_blocks_internal
|
---|
350 | ___
|
---|
351 | }
|
---|
352 | {
|
---|
353 | my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
|
---|
354 |
|
---|
355 | $code.=<<___;
|
---|
356 | .align 5
|
---|
357 | .globl poly1305_emit
|
---|
358 | .ent poly1305_emit
|
---|
359 | poly1305_emit:
|
---|
360 | .frame $sp,0,$ra
|
---|
361 | .set reorder
|
---|
362 |
|
---|
363 | ld $tmp0,0($ctx)
|
---|
364 | ld $tmp1,8($ctx)
|
---|
365 | ld $tmp2,16($ctx)
|
---|
366 |
|
---|
367 | daddiu $in0,$tmp0,5 # compare to modulus
|
---|
368 | sltiu $tmp3,$in0,5
|
---|
369 | daddu $in1,$tmp1,$tmp3
|
---|
370 | sltu $tmp3,$in1,$tmp3
|
---|
371 | daddu $tmp2,$tmp2,$tmp3
|
---|
372 |
|
---|
373 | dsrl $tmp2,2 # see if it carried/borrowed
|
---|
374 | dsubu $tmp2,$zero,$tmp2
|
---|
375 | nor $tmp3,$zero,$tmp2
|
---|
376 |
|
---|
377 | and $in0,$tmp2
|
---|
378 | and $tmp0,$tmp3
|
---|
379 | and $in1,$tmp2
|
---|
380 | and $tmp1,$tmp3
|
---|
381 | or $in0,$tmp0
|
---|
382 | or $in1,$tmp1
|
---|
383 |
|
---|
384 | lwu $tmp0,0($nonce) # load nonce
|
---|
385 | lwu $tmp1,4($nonce)
|
---|
386 | lwu $tmp2,8($nonce)
|
---|
387 | lwu $tmp3,12($nonce)
|
---|
388 | dsll $tmp1,32
|
---|
389 | dsll $tmp3,32
|
---|
390 | or $tmp0,$tmp1
|
---|
391 | or $tmp2,$tmp3
|
---|
392 |
|
---|
393 | daddu $in0,$tmp0 # accumulate nonce
|
---|
394 | daddu $in1,$tmp2
|
---|
395 | sltu $tmp0,$in0,$tmp0
|
---|
396 | daddu $in1,$tmp0
|
---|
397 |
|
---|
398 | dsrl $tmp0,$in0,8 # write mac value
|
---|
399 | dsrl $tmp1,$in0,16
|
---|
400 | dsrl $tmp2,$in0,24
|
---|
401 | sb $in0,0($mac)
|
---|
402 | dsrl $tmp3,$in0,32
|
---|
403 | sb $tmp0,1($mac)
|
---|
404 | dsrl $tmp0,$in0,40
|
---|
405 | sb $tmp1,2($mac)
|
---|
406 | dsrl $tmp1,$in0,48
|
---|
407 | sb $tmp2,3($mac)
|
---|
408 | dsrl $tmp2,$in0,56
|
---|
409 | sb $tmp3,4($mac)
|
---|
410 | dsrl $tmp3,$in1,8
|
---|
411 | sb $tmp0,5($mac)
|
---|
412 | dsrl $tmp0,$in1,16
|
---|
413 | sb $tmp1,6($mac)
|
---|
414 | dsrl $tmp1,$in1,24
|
---|
415 | sb $tmp2,7($mac)
|
---|
416 |
|
---|
417 | sb $in1,8($mac)
|
---|
418 | dsrl $tmp2,$in1,32
|
---|
419 | sb $tmp3,9($mac)
|
---|
420 | dsrl $tmp3,$in1,40
|
---|
421 | sb $tmp0,10($mac)
|
---|
422 | dsrl $tmp0,$in1,48
|
---|
423 | sb $tmp1,11($mac)
|
---|
424 | dsrl $tmp1,$in1,56
|
---|
425 | sb $tmp2,12($mac)
|
---|
426 | sb $tmp3,13($mac)
|
---|
427 | sb $tmp0,14($mac)
|
---|
428 | sb $tmp1,15($mac)
|
---|
429 |
|
---|
430 | jr $ra
|
---|
431 | .end poly1305_emit
|
---|
432 | .rdata
|
---|
433 | .asciiz "Poly1305 for MIPS64, CRYPTOGAMS by <appro\@openssl.org>"
|
---|
434 | .align 2
|
---|
435 | ___
|
---|
436 | }
|
---|
437 |
|
---|
438 | $output and open STDOUT,">$output";
|
---|
439 | print $code;
|
---|
440 | close STDOUT or die "error closing STDOUT: $!";
|
---|
441 |
|
---|