1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 | #
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 | #
|
---|
17 | # February 2009
|
---|
18 | #
|
---|
19 | # Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
|
---|
20 | # "cluster" Address Generation Interlocks, so that one pipeline stall
|
---|
21 | # resolves several dependencies.
|
---|
22 |
|
---|
23 | # November 2010.
|
---|
24 | #
|
---|
25 | # Adapt for -m31 build. If kernel supports what's called "highgprs"
|
---|
26 | # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
|
---|
27 | # instructions and achieve "64-bit" performance even in 31-bit legacy
|
---|
28 | # application context. The feature is not specific to any particular
|
---|
29 | # processor, as long as it's "z-CPU". Latter implies that the code
|
---|
30 | # remains z/Architecture specific. On z990 it was measured to perform
|
---|
31 | # 50% better than code generated by gcc 4.3.
|
---|
32 |
|
---|
33 | $flavour = shift;
|
---|
34 |
|
---|
35 | if ($flavour =~ /3[12]/) {
|
---|
36 | $SIZE_T=4;
|
---|
37 | $g="";
|
---|
38 | } else {
|
---|
39 | $SIZE_T=8;
|
---|
40 | $g="g";
|
---|
41 | }
|
---|
42 |
|
---|
43 | while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
---|
44 | open STDOUT,">$output";
|
---|
45 |
|
---|
46 | $rp="%r14";
|
---|
47 | $sp="%r15";
|
---|
48 | $code=<<___;
|
---|
49 | .text
|
---|
50 |
|
---|
51 | ___
|
---|
52 |
|
---|
53 | # void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
|
---|
54 | {
|
---|
55 | $acc="%r0";
|
---|
56 | $cnt="%r1";
|
---|
57 | $key="%r2";
|
---|
58 | $len="%r3";
|
---|
59 | $inp="%r4";
|
---|
60 | $out="%r5";
|
---|
61 |
|
---|
62 | @XX=("%r6","%r7");
|
---|
63 | @TX=("%r8","%r9");
|
---|
64 | $YY="%r10";
|
---|
65 | $TY="%r11";
|
---|
66 |
|
---|
67 | $code.=<<___;
|
---|
68 | .globl RC4
|
---|
69 | .type RC4,\@function
|
---|
70 | .align 64
|
---|
71 | RC4:
|
---|
72 | stm${g} %r6,%r11,6*$SIZE_T($sp)
|
---|
73 | ___
|
---|
74 | $code.=<<___ if ($flavour =~ /3[12]/);
|
---|
75 | llgfr $len,$len
|
---|
76 | ___
|
---|
77 | $code.=<<___;
|
---|
78 | llgc $XX[0],0($key)
|
---|
79 | llgc $YY,1($key)
|
---|
80 | la $XX[0],1($XX[0])
|
---|
81 | nill $XX[0],0xff
|
---|
82 | srlg $cnt,$len,3
|
---|
83 | ltgr $cnt,$cnt
|
---|
84 | llgc $TX[0],2($XX[0],$key)
|
---|
85 | jz .Lshort
|
---|
86 | j .Loop8
|
---|
87 |
|
---|
88 | .align 64
|
---|
89 | .Loop8:
|
---|
90 | ___
|
---|
91 | for ($i=0;$i<8;$i++) {
|
---|
92 | $code.=<<___;
|
---|
93 | la $YY,0($YY,$TX[0]) # $i
|
---|
94 | nill $YY,255
|
---|
95 | la $XX[1],1($XX[0])
|
---|
96 | nill $XX[1],255
|
---|
97 | ___
|
---|
98 | $code.=<<___ if ($i==1);
|
---|
99 | llgc $acc,2($TY,$key)
|
---|
100 | ___
|
---|
101 | $code.=<<___ if ($i>1);
|
---|
102 | sllg $acc,$acc,8
|
---|
103 | ic $acc,2($TY,$key)
|
---|
104 | ___
|
---|
105 | $code.=<<___;
|
---|
106 | llgc $TY,2($YY,$key)
|
---|
107 | stc $TX[0],2($YY,$key)
|
---|
108 | llgc $TX[1],2($XX[1],$key)
|
---|
109 | stc $TY,2($XX[0],$key)
|
---|
110 | cr $XX[1],$YY
|
---|
111 | jne .Lcmov$i
|
---|
112 | la $TX[1],0($TX[0])
|
---|
113 | .Lcmov$i:
|
---|
114 | la $TY,0($TY,$TX[0])
|
---|
115 | nill $TY,255
|
---|
116 | ___
|
---|
117 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
|
---|
118 | }
|
---|
119 |
|
---|
120 | $code.=<<___;
|
---|
121 | lg $TX[1],0($inp)
|
---|
122 | sllg $acc,$acc,8
|
---|
123 | la $inp,8($inp)
|
---|
124 | ic $acc,2($TY,$key)
|
---|
125 | xgr $acc,$TX[1]
|
---|
126 | stg $acc,0($out)
|
---|
127 | la $out,8($out)
|
---|
128 | brctg $cnt,.Loop8
|
---|
129 |
|
---|
130 | .Lshort:
|
---|
131 | lghi $acc,7
|
---|
132 | ngr $len,$acc
|
---|
133 | jz .Lexit
|
---|
134 | j .Loop1
|
---|
135 |
|
---|
136 | .align 16
|
---|
137 | .Loop1:
|
---|
138 | la $YY,0($YY,$TX[0])
|
---|
139 | nill $YY,255
|
---|
140 | llgc $TY,2($YY,$key)
|
---|
141 | stc $TX[0],2($YY,$key)
|
---|
142 | stc $TY,2($XX[0],$key)
|
---|
143 | ar $TY,$TX[0]
|
---|
144 | ahi $XX[0],1
|
---|
145 | nill $TY,255
|
---|
146 | nill $XX[0],255
|
---|
147 | llgc $acc,0($inp)
|
---|
148 | la $inp,1($inp)
|
---|
149 | llgc $TY,2($TY,$key)
|
---|
150 | llgc $TX[0],2($XX[0],$key)
|
---|
151 | xr $acc,$TY
|
---|
152 | stc $acc,0($out)
|
---|
153 | la $out,1($out)
|
---|
154 | brct $len,.Loop1
|
---|
155 |
|
---|
156 | .Lexit:
|
---|
157 | ahi $XX[0],-1
|
---|
158 | stc $XX[0],0($key)
|
---|
159 | stc $YY,1($key)
|
---|
160 | lm${g} %r6,%r11,6*$SIZE_T($sp)
|
---|
161 | br $rp
|
---|
162 | .size RC4,.-RC4
|
---|
163 | .string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
|
---|
164 |
|
---|
165 | ___
|
---|
166 | }
|
---|
167 |
|
---|
168 | # void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
|
---|
169 | {
|
---|
170 | $cnt="%r0";
|
---|
171 | $idx="%r1";
|
---|
172 | $key="%r2";
|
---|
173 | $len="%r3";
|
---|
174 | $inp="%r4";
|
---|
175 | $acc="%r5";
|
---|
176 | $dat="%r6";
|
---|
177 | $ikey="%r7";
|
---|
178 | $iinp="%r8";
|
---|
179 |
|
---|
180 | $code.=<<___;
|
---|
181 | .globl RC4_set_key
|
---|
182 | .type RC4_set_key,\@function
|
---|
183 | .align 64
|
---|
184 | RC4_set_key:
|
---|
185 | stm${g} %r6,%r8,6*$SIZE_T($sp)
|
---|
186 | lhi $cnt,256
|
---|
187 | la $idx,0
|
---|
188 | sth $idx,0($key)
|
---|
189 | .align 4
|
---|
190 | .L1stloop:
|
---|
191 | stc $idx,2($idx,$key)
|
---|
192 | la $idx,1($idx)
|
---|
193 | brct $cnt,.L1stloop
|
---|
194 |
|
---|
195 | lghi $ikey,-256
|
---|
196 | lr $cnt,$len
|
---|
197 | la $iinp,0
|
---|
198 | la $idx,0
|
---|
199 | .align 16
|
---|
200 | .L2ndloop:
|
---|
201 | llgc $acc,2+256($ikey,$key)
|
---|
202 | llgc $dat,0($iinp,$inp)
|
---|
203 | la $idx,0($idx,$acc)
|
---|
204 | la $ikey,1($ikey)
|
---|
205 | la $idx,0($idx,$dat)
|
---|
206 | nill $idx,255
|
---|
207 | la $iinp,1($iinp)
|
---|
208 | tml $ikey,255
|
---|
209 | llgc $dat,2($idx,$key)
|
---|
210 | stc $dat,2+256-1($ikey,$key)
|
---|
211 | stc $acc,2($idx,$key)
|
---|
212 | jz .Ldone
|
---|
213 | brct $cnt,.L2ndloop
|
---|
214 | lr $cnt,$len
|
---|
215 | la $iinp,0
|
---|
216 | j .L2ndloop
|
---|
217 | .Ldone:
|
---|
218 | lm${g} %r6,%r8,6*$SIZE_T($sp)
|
---|
219 | br $rp
|
---|
220 | .size RC4_set_key,.-RC4_set_key
|
---|
221 |
|
---|
222 | ___
|
---|
223 | }
|
---|
224 |
|
---|
225 | # const char *RC4_options()
|
---|
226 | $code.=<<___;
|
---|
227 | .globl RC4_options
|
---|
228 | .type RC4_options,\@function
|
---|
229 | .align 16
|
---|
230 | RC4_options:
|
---|
231 | larl %r2,.Loptions
|
---|
232 | br %r14
|
---|
233 | .size RC4_options,.-RC4_options
|
---|
234 | .section .rodata
|
---|
235 | .Loptions:
|
---|
236 | .align 8
|
---|
237 | .string "rc4(8x,char)"
|
---|
238 | ___
|
---|
239 |
|
---|
240 | print $code;
|
---|
241 | close STDOUT or die "error closing STDOUT: $!"; # force flush
|
---|