VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.0g/crypto/chacha/asm/chacha-s390x.pl@ 69881

Last change on this file since 69881 was 69881, checked in by vboxsync, 7 years ago

Update OpenSSL to 1.1.0g.
bugref:8070: src/libs maintenance

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
File size: 7.8 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# December 2015
18#
19# ChaCha20 for s390x.
20#
21# 3 times faster than compiler-generated code.
22
23$flavour = shift;
24
25if ($flavour =~ /3[12]/) {
26 $SIZE_T=4;
27 $g="";
28} else {
29 $SIZE_T=8;
30 $g="g";
31}
32
33while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
34open STDOUT,">$output";
35
36sub AUTOLOAD() # thunk [simplified] x86-style perlasm
37{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
38 $code .= "\t$opcode\t".join(',',@_)."\n";
39}
40
41my $sp="%r15";
42
43my $stdframe=16*$SIZE_T+4*8;
44my $frame=$stdframe+4*20;
45
46my ($out,$inp,$len,$key,$counter)=map("%r$_",(2..6));
47
48my @x=map("%r$_",(0..7,"x","x","x","x",(10..13)));
49my @t=map("%r$_",(8,9));
50
51sub ROUND {
52my ($a0,$b0,$c0,$d0)=@_;
53my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
54my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
55my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
56my ($xc,$xc_)=map("\"$_\"",@t);
57my @x=map("\"$_\"",@x);
58
59 # Consider order in which variables are addressed by their
60 # index:
61 #
62 # a b c d
63 #
64 # 0 4 8 12 < even round
65 # 1 5 9 13
66 # 2 6 10 14
67 # 3 7 11 15
68 # 0 5 10 15 < odd round
69 # 1 6 11 12
70 # 2 7 8 13
71 # 3 4 9 14
72 #
73 # 'a', 'b' and 'd's are permanently allocated in registers,
74 # @x[0..7,12..15], while 'c's are maintained in memory. If
75 # you observe 'c' column, you'll notice that pair of 'c's is
76 # invariant between rounds. This means that we have to reload
77 # them once per round, in the middle. This is why you'll see
78 # 'c' stores and loads in the middle, but none in the beginning
79 # or end.
80
81 (
82 "&alr (@x[$a0],@x[$b0])", # Q1
83 "&alr (@x[$a1],@x[$b1])", # Q2
84 "&xr (@x[$d0],@x[$a0])",
85 "&xr (@x[$d1],@x[$a1])",
86 "&rll (@x[$d0],@x[$d0],16)",
87 "&rll (@x[$d1],@x[$d1],16)",
88
89 "&alr ($xc,@x[$d0])",
90 "&alr ($xc_,@x[$d1])",
91 "&xr (@x[$b0],$xc)",
92 "&xr (@x[$b1],$xc_)",
93 "&rll (@x[$b0],@x[$b0],12)",
94 "&rll (@x[$b1],@x[$b1],12)",
95
96 "&alr (@x[$a0],@x[$b0])",
97 "&alr (@x[$a1],@x[$b1])",
98 "&xr (@x[$d0],@x[$a0])",
99 "&xr (@x[$d1],@x[$a1])",
100 "&rll (@x[$d0],@x[$d0],8)",
101 "&rll (@x[$d1],@x[$d1],8)",
102
103 "&alr ($xc,@x[$d0])",
104 "&alr ($xc_,@x[$d1])",
105 "&xr (@x[$b0],$xc)",
106 "&xr (@x[$b1],$xc_)",
107 "&rll (@x[$b0],@x[$b0],7)",
108 "&rll (@x[$b1],@x[$b1],7)",
109
110 "&stm ($xc,$xc_,'$stdframe+4*8+4*$c0($sp)')", # reload pair of 'c's
111 "&lm ($xc,$xc_,'$stdframe+4*8+4*$c2($sp)')",
112
113 "&alr (@x[$a2],@x[$b2])", # Q3
114 "&alr (@x[$a3],@x[$b3])", # Q4
115 "&xr (@x[$d2],@x[$a2])",
116 "&xr (@x[$d3],@x[$a3])",
117 "&rll (@x[$d2],@x[$d2],16)",
118 "&rll (@x[$d3],@x[$d3],16)",
119
120 "&alr ($xc,@x[$d2])",
121 "&alr ($xc_,@x[$d3])",
122 "&xr (@x[$b2],$xc)",
123 "&xr (@x[$b3],$xc_)",
124 "&rll (@x[$b2],@x[$b2],12)",
125 "&rll (@x[$b3],@x[$b3],12)",
126
127 "&alr (@x[$a2],@x[$b2])",
128 "&alr (@x[$a3],@x[$b3])",
129 "&xr (@x[$d2],@x[$a2])",
130 "&xr (@x[$d3],@x[$a3])",
131 "&rll (@x[$d2],@x[$d2],8)",
132 "&rll (@x[$d3],@x[$d3],8)",
133
134 "&alr ($xc,@x[$d2])",
135 "&alr ($xc_,@x[$d3])",
136 "&xr (@x[$b2],$xc)",
137 "&xr (@x[$b3],$xc_)",
138 "&rll (@x[$b2],@x[$b2],7)",
139 "&rll (@x[$b3],@x[$b3],7)"
140 );
141}
142
143$code.=<<___;
144.text
145
146.globl ChaCha20_ctr32
147.type ChaCha20_ctr32,\@function
148.align 32
149ChaCha20_ctr32:
150 lt${g}r $len,$len # $len==0?
151 bzr %r14
152 a${g}hi $len,-64
153 l${g}hi %r1,-$frame
154 stm${g} %r6,%r15,`6*$SIZE_T`($sp)
155 sl${g}r $out,$inp # difference
156 la $len,0($inp,$len) # end of input minus 64
157 larl %r7,.Lsigma
158 lgr %r0,$sp
159 la $sp,0(%r1,$sp)
160 st${g} %r0,0($sp)
161
162 lmg %r8,%r11,0($key) # load key
163 lmg %r12,%r13,0($counter) # load counter
164 lmg %r6,%r7,0(%r7) # load sigma constant
165
166 la %r14,0($inp)
167 st${g} $out,$frame+3*$SIZE_T($sp)
168 st${g} $len,$frame+4*$SIZE_T($sp)
169 stmg %r6,%r13,$stdframe($sp) # copy key schedule to stack
170 srlg @x[12],%r12,32 # 32-bit counter value
171 j .Loop_outer
172
173.align 16
174.Loop_outer:
175 lm @x[0],@x[7],$stdframe+4*0($sp) # load x[0]-x[7]
176 lm @t[0],@t[1],$stdframe+4*10($sp) # load x[10]-x[11]
177 lm @x[13],@x[15],$stdframe+4*13($sp) # load x[13]-x[15]
178 stm @t[0],@t[1],$stdframe+4*8+4*10($sp) # offload x[10]-x[11]
179 lm @t[0],@t[1],$stdframe+4*8($sp) # load x[8]-x[9]
180 st @x[12],$stdframe+4*12($sp) # save counter
181 st${g} %r14,$frame+2*$SIZE_T($sp) # save input pointer
182 lhi %r14,10
183 j .Loop
184
185.align 4
186.Loop:
187___
188 foreach (&ROUND(0, 4, 8,12)) { eval; }
189 foreach (&ROUND(0, 5,10,15)) { eval; }
190$code.=<<___;
191 brct %r14,.Loop
192
193 l${g} %r14,$frame+2*$SIZE_T($sp) # pull input pointer
194 stm @t[0],@t[1],$stdframe+4*8+4*8($sp) # offload x[8]-x[9]
195 lm${g} @t[0],@t[1],$frame+3*$SIZE_T($sp)
196
197 al @x[0],$stdframe+4*0($sp) # accumulate key schedule
198 al @x[1],$stdframe+4*1($sp)
199 al @x[2],$stdframe+4*2($sp)
200 al @x[3],$stdframe+4*3($sp)
201 al @x[4],$stdframe+4*4($sp)
202 al @x[5],$stdframe+4*5($sp)
203 al @x[6],$stdframe+4*6($sp)
204 al @x[7],$stdframe+4*7($sp)
205 lrvr @x[0],@x[0]
206 lrvr @x[1],@x[1]
207 lrvr @x[2],@x[2]
208 lrvr @x[3],@x[3]
209 lrvr @x[4],@x[4]
210 lrvr @x[5],@x[5]
211 lrvr @x[6],@x[6]
212 lrvr @x[7],@x[7]
213 al @x[12],$stdframe+4*12($sp)
214 al @x[13],$stdframe+4*13($sp)
215 al @x[14],$stdframe+4*14($sp)
216 al @x[15],$stdframe+4*15($sp)
217 lrvr @x[12],@x[12]
218 lrvr @x[13],@x[13]
219 lrvr @x[14],@x[14]
220 lrvr @x[15],@x[15]
221
222 la @t[0],0(@t[0],%r14) # reconstruct output pointer
223 cl${g}r %r14,@t[1]
224 jh .Ltail
225
226 x @x[0],4*0(%r14) # xor with input
227 x @x[1],4*1(%r14)
228 st @x[0],4*0(@t[0]) # store output
229 x @x[2],4*2(%r14)
230 st @x[1],4*1(@t[0])
231 x @x[3],4*3(%r14)
232 st @x[2],4*2(@t[0])
233 x @x[4],4*4(%r14)
234 st @x[3],4*3(@t[0])
235 lm @x[0],@x[3],$stdframe+4*8+4*8($sp) # load x[8]-x[11]
236 x @x[5],4*5(%r14)
237 st @x[4],4*4(@t[0])
238 x @x[6],4*6(%r14)
239 al @x[0],$stdframe+4*8($sp)
240 st @x[5],4*5(@t[0])
241 x @x[7],4*7(%r14)
242 al @x[1],$stdframe+4*9($sp)
243 st @x[6],4*6(@t[0])
244 x @x[12],4*12(%r14)
245 al @x[2],$stdframe+4*10($sp)
246 st @x[7],4*7(@t[0])
247 x @x[13],4*13(%r14)
248 al @x[3],$stdframe+4*11($sp)
249 st @x[12],4*12(@t[0])
250 x @x[14],4*14(%r14)
251 st @x[13],4*13(@t[0])
252 x @x[15],4*15(%r14)
253 st @x[14],4*14(@t[0])
254 lrvr @x[0],@x[0]
255 st @x[15],4*15(@t[0])
256 lrvr @x[1],@x[1]
257 lrvr @x[2],@x[2]
258 lrvr @x[3],@x[3]
259 lhi @x[12],1
260 x @x[0],4*8(%r14)
261 al @x[12],$stdframe+4*12($sp) # increment counter
262 x @x[1],4*9(%r14)
263 st @x[0],4*8(@t[0])
264 x @x[2],4*10(%r14)
265 st @x[1],4*9(@t[0])
266 x @x[3],4*11(%r14)
267 st @x[2],4*10(@t[0])
268 st @x[3],4*11(@t[0])
269
270 cl${g}r %r14,@t[1] # done yet?
271 la %r14,64(%r14)
272 jl .Loop_outer
273
274.Ldone:
275 xgr %r0,%r0
276 xgr %r1,%r1
277 xgr %r2,%r2
278 xgr %r3,%r3
279 stmg %r0,%r3,$stdframe+4*4($sp) # wipe key copy
280 stmg %r0,%r3,$stdframe+4*12($sp)
281
282 lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
283 br %r14
284
285.align 16
286.Ltail:
287 la @t[1],64($t[1])
288 stm @x[0],@x[7],$stdframe+4*0($sp)
289 sl${g}r @t[1],%r14
290 lm @x[0],@x[3],$stdframe+4*8+4*8($sp)
291 l${g}hi @x[6],0
292 stm @x[12],@x[15],$stdframe+4*12($sp)
293 al @x[0],$stdframe+4*8($sp)
294 al @x[1],$stdframe+4*9($sp)
295 al @x[2],$stdframe+4*10($sp)
296 al @x[3],$stdframe+4*11($sp)
297 lrvr @x[0],@x[0]
298 lrvr @x[1],@x[1]
299 lrvr @x[2],@x[2]
300 lrvr @x[3],@x[3]
301 stm @x[0],@x[3],$stdframe+4*8($sp)
302
303.Loop_tail:
304 llgc @x[4],0(@x[6],%r14)
305 llgc @x[5],$stdframe(@x[6],$sp)
306 xr @x[5],@x[4]
307 stc @x[5],0(@x[6],@t[0])
308 la @x[6],1(@x[6])
309 brct @t[1],.Loop_tail
310
311 j .Ldone
312.size ChaCha20_ctr32,.-ChaCha20_ctr32
313
314.align 32
315.Lsigma:
316.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
317.asciz "ChaCha20 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
318.align 4
319___
320
321foreach (split("\n",$code)) {
322 s/\`([^\`]*)\`/eval $1/ge;
323
324 print $_,"\n";
325}
326close STDOUT;
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette