1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 | #
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 | #
|
---|
17 | # October 2012
|
---|
18 | #
|
---|
19 | # The module implements bn_GF2m_mul_2x2 polynomial multiplication used
|
---|
20 | # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
|
---|
21 | # the time being... Except that it has two code paths: one suitable
|
---|
22 | # for all SPARCv9 processors and one for VIS3-capable ones. Former
|
---|
23 | # delivers ~25-45% more, more for longer keys, heaviest DH and DSA
|
---|
24 | # verify operations on venerable UltraSPARC II. On T4 VIS3 code is
|
---|
25 | # ~100-230% faster than gcc-generated code and ~35-90% faster than
|
---|
26 | # the pure SPARCv9 code path.
|
---|
27 |
|
---|
28 | $output = pop and open STDOUT,">$output";
|
---|
29 |
|
---|
30 | $locals=16*8;
|
---|
31 |
|
---|
32 | $tab="%l0";
|
---|
33 |
|
---|
34 | @T=("%g2","%g3");
|
---|
35 | @i=("%g4","%g5");
|
---|
36 |
|
---|
37 | ($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
|
---|
38 | ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
|
---|
39 |
|
---|
40 | $code.=<<___;
|
---|
41 | #ifndef __ASSEMBLER__
|
---|
42 | # define __ASSEMBLER__ 1
|
---|
43 | #endif
|
---|
44 | #include "crypto/sparc_arch.h"
|
---|
45 |
|
---|
46 | #ifdef __arch64__
|
---|
47 | .register %g2,#scratch
|
---|
48 | .register %g3,#scratch
|
---|
49 | #endif
|
---|
50 |
|
---|
51 | #ifdef __PIC__
|
---|
52 | SPARC_PIC_THUNK(%g1)
|
---|
53 | #endif
|
---|
54 |
|
---|
55 | .globl bn_GF2m_mul_2x2
|
---|
56 | .align 16
|
---|
57 | bn_GF2m_mul_2x2:
|
---|
58 | SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
|
---|
59 | ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0]
|
---|
60 |
|
---|
61 | andcc %g1, SPARCV9_VIS3, %g0
|
---|
62 | bz,pn %icc,.Lsoftware
|
---|
63 | nop
|
---|
64 |
|
---|
65 | sllx %o1, 32, %o1
|
---|
66 | sllx %o3, 32, %o3
|
---|
67 | or %o2, %o1, %o1
|
---|
68 | or %o4, %o3, %o3
|
---|
69 | .word 0x95b262ab ! xmulx %o1, %o3, %o2
|
---|
70 | .word 0x99b262cb ! xmulxhi %o1, %o3, %o4
|
---|
71 | srlx %o2, 32, %o1 ! 13 cycles later
|
---|
72 | st %o2, [%o0+0]
|
---|
73 | st %o1, [%o0+4]
|
---|
74 | srlx %o4, 32, %o3
|
---|
75 | st %o4, [%o0+8]
|
---|
76 | retl
|
---|
77 | st %o3, [%o0+12]
|
---|
78 |
|
---|
79 | .align 16
|
---|
80 | .Lsoftware:
|
---|
81 | save %sp,-STACK_FRAME-$locals,%sp
|
---|
82 |
|
---|
83 | sllx %i1,32,$a
|
---|
84 | mov -1,$a12
|
---|
85 | sllx %i3,32,$b
|
---|
86 | or %i2,$a,$a
|
---|
87 | srlx $a12,1,$a48 ! 0x7fff...
|
---|
88 | or %i4,$b,$b
|
---|
89 | srlx $a12,2,$a12 ! 0x3fff...
|
---|
90 | add %sp,STACK_BIAS+STACK_FRAME,$tab
|
---|
91 |
|
---|
92 | sllx $a,2,$a4
|
---|
93 | mov $a,$a1
|
---|
94 | sllx $a,1,$a2
|
---|
95 |
|
---|
96 | srax $a4,63,@i[1] ! broadcast 61st bit
|
---|
97 | and $a48,$a4,$a4 ! (a<<2)&0x7fff...
|
---|
98 | srlx $a48,2,$a48
|
---|
99 | srax $a2,63,@i[0] ! broadcast 62nd bit
|
---|
100 | and $a12,$a2,$a2 ! (a<<1)&0x3fff...
|
---|
101 | srax $a1,63,$lo ! broadcast 63rd bit
|
---|
102 | and $a48,$a1,$a1 ! (a<<0)&0x1fff...
|
---|
103 |
|
---|
104 | sllx $a1,3,$a8
|
---|
105 | and $b,$lo,$lo
|
---|
106 | and $b,@i[0],@i[0]
|
---|
107 | and $b,@i[1],@i[1]
|
---|
108 |
|
---|
109 | stx %g0,[$tab+0*8] ! tab[0]=0
|
---|
110 | xor $a1,$a2,$a12
|
---|
111 | stx $a1,[$tab+1*8] ! tab[1]=a1
|
---|
112 | stx $a2,[$tab+2*8] ! tab[2]=a2
|
---|
113 | xor $a4,$a8,$a48
|
---|
114 | stx $a12,[$tab+3*8] ! tab[3]=a1^a2
|
---|
115 | xor $a4,$a1,$a1
|
---|
116 |
|
---|
117 | stx $a4,[$tab+4*8] ! tab[4]=a4
|
---|
118 | xor $a4,$a2,$a2
|
---|
119 | stx $a1,[$tab+5*8] ! tab[5]=a1^a4
|
---|
120 | xor $a4,$a12,$a12
|
---|
121 | stx $a2,[$tab+6*8] ! tab[6]=a2^a4
|
---|
122 | xor $a48,$a1,$a1
|
---|
123 | stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4
|
---|
124 | xor $a48,$a2,$a2
|
---|
125 |
|
---|
126 | stx $a8,[$tab+8*8] ! tab[8]=a8
|
---|
127 | xor $a48,$a12,$a12
|
---|
128 | stx $a1,[$tab+9*8] ! tab[9]=a1^a8
|
---|
129 | xor $a4,$a1,$a1
|
---|
130 | stx $a2,[$tab+10*8] ! tab[10]=a2^a8
|
---|
131 | xor $a4,$a2,$a2
|
---|
132 | stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8
|
---|
133 |
|
---|
134 | xor $a4,$a12,$a12
|
---|
135 | stx $a48,[$tab+12*8] ! tab[12]=a4^a8
|
---|
136 | srlx $lo,1,$hi
|
---|
137 | stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8
|
---|
138 | sllx $lo,63,$lo
|
---|
139 | stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8
|
---|
140 | srlx @i[0],2,@T[0]
|
---|
141 | stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8
|
---|
142 |
|
---|
143 | sllx @i[0],62,$a1
|
---|
144 | sllx $b,3,@i[0]
|
---|
145 | srlx @i[1],3,@T[1]
|
---|
146 | and @i[0],`0xf<<3`,@i[0]
|
---|
147 | sllx @i[1],61,$a2
|
---|
148 | ldx [$tab+@i[0]],@i[0]
|
---|
149 | srlx $b,4-3,@i[1]
|
---|
150 | xor @T[0],$hi,$hi
|
---|
151 | and @i[1],`0xf<<3`,@i[1]
|
---|
152 | xor $a1,$lo,$lo
|
---|
153 | ldx [$tab+@i[1]],@i[1]
|
---|
154 | xor @T[1],$hi,$hi
|
---|
155 |
|
---|
156 | xor @i[0],$lo,$lo
|
---|
157 | srlx $b,8-3,@i[0]
|
---|
158 | xor $a2,$lo,$lo
|
---|
159 | and @i[0],`0xf<<3`,@i[0]
|
---|
160 | ___
|
---|
161 | for($n=1;$n<14;$n++) {
|
---|
162 | $code.=<<___;
|
---|
163 | sllx @i[1],`$n*4`,@T[0]
|
---|
164 | ldx [$tab+@i[0]],@i[0]
|
---|
165 | srlx @i[1],`64-$n*4`,@T[1]
|
---|
166 | xor @T[0],$lo,$lo
|
---|
167 | srlx $b,`($n+2)*4`-3,@i[1]
|
---|
168 | xor @T[1],$hi,$hi
|
---|
169 | and @i[1],`0xf<<3`,@i[1]
|
---|
170 | ___
|
---|
171 | push(@i,shift(@i)); push(@T,shift(@T));
|
---|
172 | }
|
---|
173 | $code.=<<___;
|
---|
174 | sllx @i[1],`$n*4`,@T[0]
|
---|
175 | ldx [$tab+@i[0]],@i[0]
|
---|
176 | srlx @i[1],`64-$n*4`,@T[1]
|
---|
177 | xor @T[0],$lo,$lo
|
---|
178 |
|
---|
179 | sllx @i[0],`($n+1)*4`,@T[0]
|
---|
180 | xor @T[1],$hi,$hi
|
---|
181 | srlx @i[0],`64-($n+1)*4`,@T[1]
|
---|
182 | xor @T[0],$lo,$lo
|
---|
183 | xor @T[1],$hi,$hi
|
---|
184 |
|
---|
185 | srlx $lo,32,%i1
|
---|
186 | st $lo,[%i0+0]
|
---|
187 | st %i1,[%i0+4]
|
---|
188 | srlx $hi,32,%i2
|
---|
189 | st $hi,[%i0+8]
|
---|
190 | st %i2,[%i0+12]
|
---|
191 |
|
---|
192 | ret
|
---|
193 | restore
|
---|
194 | .type bn_GF2m_mul_2x2,#function
|
---|
195 | .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
---|
196 | .asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
|
---|
197 | .align 4
|
---|
198 | ___
|
---|
199 |
|
---|
200 | $code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
---|
201 | print $code;
|
---|
202 | close STDOUT or die "error closing STDOUT: $!";
|
---|