1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 | #
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 | #
|
---|
17 | # This module implements Poly1305 hash for s390x.
|
---|
18 | #
|
---|
19 | # June 2015
|
---|
20 | #
|
---|
21 | # ~6.6/2.3 cpb on z10/z196+, >2x improvement over compiler-generated
|
---|
22 | # code. For older compiler improvement coefficient is >3x, because
|
---|
23 | # then base 2^64 and base 2^32 implementations are compared.
|
---|
24 | #
|
---|
25 | # On side note, z13 enables vector base 2^26 implementation...
|
---|
26 |
|
---|
27 | $flavour = shift;
|
---|
28 |
|
---|
29 | if ($flavour =~ /3[12]/) {
|
---|
30 | $SIZE_T=4;
|
---|
31 | $g="";
|
---|
32 | } else {
|
---|
33 | $SIZE_T=8;
|
---|
34 | $g="g";
|
---|
35 | }
|
---|
36 |
|
---|
37 | while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
---|
38 | open STDOUT,">$output";
|
---|
39 |
|
---|
40 | $sp="%r15";
|
---|
41 |
|
---|
42 | my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5));
|
---|
43 |
|
---|
44 | $code.=<<___;
|
---|
45 | .text
|
---|
46 |
|
---|
47 | .globl poly1305_init
|
---|
48 | .type poly1305_init,\@function
|
---|
49 | .align 16
|
---|
50 | poly1305_init:
|
---|
51 | lghi %r0,0
|
---|
52 | lghi %r1,-1
|
---|
53 | stg %r0,0($ctx) # zero hash value
|
---|
54 | stg %r0,8($ctx)
|
---|
55 | stg %r0,16($ctx)
|
---|
56 |
|
---|
57 | cl${g}r $inp,%r0
|
---|
58 | je .Lno_key
|
---|
59 |
|
---|
60 | lrvg %r4,0($inp) # load little-endian key
|
---|
61 | lrvg %r5,8($inp)
|
---|
62 |
|
---|
63 | nihl %r1,0xffc0 # 0xffffffc0ffffffff
|
---|
64 | srlg %r0,%r1,4 # 0x0ffffffc0fffffff
|
---|
65 | srlg %r1,%r1,4
|
---|
66 | nill %r1,0xfffc # 0x0ffffffc0ffffffc
|
---|
67 |
|
---|
68 | ngr %r4,%r0
|
---|
69 | ngr %r5,%r1
|
---|
70 |
|
---|
71 | stg %r4,32($ctx)
|
---|
72 | stg %r5,40($ctx)
|
---|
73 |
|
---|
74 | .Lno_key:
|
---|
75 | lghi %r2,0
|
---|
76 | br %r14
|
---|
77 | .size poly1305_init,.-poly1305_init
|
---|
78 | ___
|
---|
79 | {
|
---|
80 | my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14));
|
---|
81 | my ($r0,$r1,$s1) = map("%r$_",(0..2));
|
---|
82 |
|
---|
83 | $code.=<<___;
|
---|
84 | .globl poly1305_blocks
|
---|
85 | .type poly1305_blocks,\@function
|
---|
86 | .align 16
|
---|
87 | poly1305_blocks:
|
---|
88 | srl${g} $len,4 # fixed-up in 64-bit build
|
---|
89 | lghi %r0,0
|
---|
90 | cl${g}r $len,%r0
|
---|
91 | je .Lno_data
|
---|
92 |
|
---|
93 | stm${g} %r6,%r14,`6*$SIZE_T`($sp)
|
---|
94 |
|
---|
95 | llgfr $padbit,$padbit # clear upper half, much needed with
|
---|
96 | # non-64-bit ABI
|
---|
97 | lg $r0,32($ctx) # load key
|
---|
98 | lg $r1,40($ctx)
|
---|
99 |
|
---|
100 | lg $h0,0($ctx) # load hash value
|
---|
101 | lg $h1,8($ctx)
|
---|
102 | lg $h2,16($ctx)
|
---|
103 |
|
---|
104 | st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx
|
---|
105 | srlg $s1,$r1,2
|
---|
106 | algr $s1,$r1 # s1 = r1 + r1>>2
|
---|
107 | j .Loop
|
---|
108 |
|
---|
109 | .align 16
|
---|
110 | .Loop:
|
---|
111 | lrvg $d0lo,0($inp) # load little-endian input
|
---|
112 | lrvg $d1lo,8($inp)
|
---|
113 | la $inp,16($inp)
|
---|
114 |
|
---|
115 | algr $d0lo,$h0 # accumulate input
|
---|
116 | alcgr $d1lo,$h1
|
---|
117 |
|
---|
118 | lgr $h0,$d0lo
|
---|
119 | mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo
|
---|
120 | lgr $h1,$d1lo
|
---|
121 | mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo
|
---|
122 |
|
---|
123 | mlgr $t0,$r1 # h0*r1 -> $t0:$h0
|
---|
124 | mlgr $t1,$r0 # h1*r0 -> $t1:$h1
|
---|
125 | alcgr $h2,$padbit
|
---|
126 |
|
---|
127 | algr $d0lo,$d1lo
|
---|
128 | lgr $d1lo,$h2
|
---|
129 | alcgr $d0hi,$d1hi
|
---|
130 | lghi $d1hi,0
|
---|
131 |
|
---|
132 | algr $h1,$h0
|
---|
133 | alcgr $t1,$t0
|
---|
134 |
|
---|
135 | msgr $d1lo,$s1 # h2*s1
|
---|
136 | msgr $h2,$r0 # h2*r0
|
---|
137 |
|
---|
138 | algr $h1,$d1lo
|
---|
139 | alcgr $t1,$d1hi # $d1hi is zero
|
---|
140 |
|
---|
141 | algr $h1,$d0hi
|
---|
142 | alcgr $h2,$t1
|
---|
143 |
|
---|
144 | lghi $h0,-4 # final reduction step
|
---|
145 | ngr $h0,$h2
|
---|
146 | srlg $t0,$h2,2
|
---|
147 | algr $h0,$t0
|
---|
148 | lghi $t1,3
|
---|
149 | ngr $h2,$t1
|
---|
150 |
|
---|
151 | algr $h0,$d0lo
|
---|
152 | alcgr $h1,$d1hi # $d1hi is still zero
|
---|
153 | alcgr $h2,$d1hi # $d1hi is still zero
|
---|
154 |
|
---|
155 | brct$g $len,.Loop
|
---|
156 |
|
---|
157 | l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx
|
---|
158 |
|
---|
159 | stg $h0,0($ctx) # store hash value
|
---|
160 | stg $h1,8($ctx)
|
---|
161 | stg $h2,16($ctx)
|
---|
162 |
|
---|
163 | lm${g} %r6,%r14,`6*$SIZE_T`($sp)
|
---|
164 | .Lno_data:
|
---|
165 | br %r14
|
---|
166 | .size poly1305_blocks,.-poly1305_blocks
|
---|
167 | ___
|
---|
168 | }
|
---|
169 | {
|
---|
170 | my ($mac,$nonce)=($inp,$len);
|
---|
171 | my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9));
|
---|
172 |
|
---|
173 | $code.=<<___;
|
---|
174 | .globl poly1305_emit
|
---|
175 | .type poly1305_emit,\@function
|
---|
176 | .align 16
|
---|
177 | poly1305_emit:
|
---|
178 | stm${g} %r6,%r9,`6*$SIZE_T`($sp)
|
---|
179 |
|
---|
180 | lg $h0,0($ctx)
|
---|
181 | lg $h1,8($ctx)
|
---|
182 | lg $h2,16($ctx)
|
---|
183 |
|
---|
184 | lghi %r0,5
|
---|
185 | lghi %r1,0
|
---|
186 | lgr $d0,$h0
|
---|
187 | lgr $d1,$h1
|
---|
188 |
|
---|
189 | algr $h0,%r0 # compare to modulus
|
---|
190 | alcgr $h1,%r1
|
---|
191 | alcgr $h2,%r1
|
---|
192 |
|
---|
193 | srlg $h2,$h2,2 # did it borrow/carry?
|
---|
194 | slgr %r1,$h2 # 0-$h2>>2
|
---|
195 | lg $h2,0($nonce) # load nonce
|
---|
196 | lghi %r0,-1
|
---|
197 | lg $ctx,8($nonce)
|
---|
198 | xgr %r0,%r1 # ~%r1
|
---|
199 |
|
---|
200 | ngr $h0,%r1
|
---|
201 | ngr $d0,%r0
|
---|
202 | ngr $h1,%r1
|
---|
203 | ngr $d1,%r0
|
---|
204 | ogr $h0,$d0
|
---|
205 | rllg $d0,$h2,32 # flip nonce words
|
---|
206 | ogr $h1,$d1
|
---|
207 | rllg $d1,$ctx,32
|
---|
208 |
|
---|
209 | algr $h0,$d0 # accumulate nonce
|
---|
210 | alcgr $h1,$d1
|
---|
211 |
|
---|
212 | strvg $h0,0($mac) # write little-endian result
|
---|
213 | strvg $h1,8($mac)
|
---|
214 |
|
---|
215 | lm${g} %r6,%r9,`6*$SIZE_T`($sp)
|
---|
216 | br %r14
|
---|
217 | .size poly1305_emit,.-poly1305_emit
|
---|
218 |
|
---|
219 | .string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
|
---|
220 | ___
|
---|
221 | }
|
---|
222 |
|
---|
223 | $code =~ s/\`([^\`]*)\`/eval $1/gem;
|
---|
224 | $code =~ s/\b(srlg\s+)(%r[0-9]+\s*,)\s*([0-9]+)/$1$2$2$3/gm;
|
---|
225 |
|
---|
226 | print $code;
|
---|
227 | close STDOUT or die "error closing STDOUT: $!";
|
---|