VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.7/crypto/sha/asm/sha1-ppc.pl@ 98024

Last change on this file since 98024 was 94082, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: started applying and adjusting our OpenSSL changes to 3.0.1. bugref:10128

  • Property svn:executable set to *
File size: 8.3 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# I let hardware handle unaligned input(*), except on page boundaries
18# (see below for details). Otherwise straightforward implementation
19# with X vector in register bank.
20#
21# (*) this means that this module is inappropriate for PPC403? Does
22# anybody know if pre-POWER3 can sustain unaligned load?
23
24# -m64 -m32
25# ----------------------------------
26# PPC970,gcc-4.0.0 +76% +59%
27# Power6,xlc-7 +68% +33%
28
29# $output is the last argument if it looks like a file (it has an extension)
30# $flavour is the first argument if it doesn't look like a file
31$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
32$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
33
34if ($flavour =~ /64/) {
35 $SIZE_T =8;
36 $LRSAVE =2*$SIZE_T;
37 $UCMP ="cmpld";
38 $STU ="stdu";
39 $POP ="ld";
40 $PUSH ="std";
41} elsif ($flavour =~ /32/) {
42 $SIZE_T =4;
43 $LRSAVE =$SIZE_T;
44 $UCMP ="cmplw";
45 $STU ="stwu";
46 $POP ="lwz";
47 $PUSH ="stw";
48} else { die "nonsense $flavour"; }
49
50# Define endianness based on flavour
51# i.e.: linux64le
52$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
53
54$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
56( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
57die "can't locate ppc-xlate.pl";
58
59open STDOUT,"| $^X $xlate $flavour \"$output\""
60 or die "can't call $xlate: $!";
61
62$FRAME=24*$SIZE_T+64;
63$LOCALS=6*$SIZE_T;
64
65$K ="r0";
66$sp ="r1";
67$toc="r2";
68$ctx="r3";
69$inp="r4";
70$num="r5";
71$t0 ="r15";
72$t1 ="r6";
73
74$A ="r7";
75$B ="r8";
76$C ="r9";
77$D ="r10";
78$E ="r11";
79$T ="r12";
80
81@V=($A,$B,$C,$D,$E,$T);
82@X=("r16","r17","r18","r19","r20","r21","r22","r23",
83 "r24","r25","r26","r27","r28","r29","r30","r31");
84
85sub loadbe {
86my ($dst, $src, $temp_reg) = @_;
87$code.=<<___ if (!$LITTLE_ENDIAN);
88 lwz $dst,$src
89___
90$code.=<<___ if ($LITTLE_ENDIAN);
91 lwz $temp_reg,$src
92 rotlwi $dst,$temp_reg,8
93 rlwimi $dst,$temp_reg,24,0,7
94 rlwimi $dst,$temp_reg,24,16,23
95___
96}
97
98sub BODY_00_19 {
99my ($i,$a,$b,$c,$d,$e,$f)=@_;
100my $j=$i+1;
101
102 # Since the last value of $f is discarded, we can use
103 # it as a temp reg to swap byte-order when needed.
104 loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
105 loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
106$code.=<<___ if ($i<15);
107 add $f,$K,$e
108 rotlwi $e,$a,5
109 add $f,$f,@X[$i]
110 and $t0,$c,$b
111 add $f,$f,$e
112 andc $t1,$d,$b
113 rotlwi $b,$b,30
114 or $t0,$t0,$t1
115 add $f,$f,$t0
116___
117$code.=<<___ if ($i>=15);
118 add $f,$K,$e
119 rotlwi $e,$a,5
120 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
121 add $f,$f,@X[$i%16]
122 and $t0,$c,$b
123 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
124 add $f,$f,$e
125 andc $t1,$d,$b
126 rotlwi $b,$b,30
127 or $t0,$t0,$t1
128 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
129 add $f,$f,$t0
130 rotlwi @X[$j%16],@X[$j%16],1
131___
132}
133
134sub BODY_20_39 {
135my ($i,$a,$b,$c,$d,$e,$f)=@_;
136my $j=$i+1;
137$code.=<<___ if ($i<79);
138 add $f,$K,$e
139 xor $t0,$b,$d
140 rotlwi $e,$a,5
141 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
142 add $f,$f,@X[$i%16]
143 xor $t0,$t0,$c
144 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
145 add $f,$f,$t0
146 rotlwi $b,$b,30
147 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
148 add $f,$f,$e
149 rotlwi @X[$j%16],@X[$j%16],1
150___
151$code.=<<___ if ($i==79);
152 add $f,$K,$e
153 xor $t0,$b,$d
154 rotlwi $e,$a,5
155 lwz r16,0($ctx)
156 add $f,$f,@X[$i%16]
157 xor $t0,$t0,$c
158 lwz r17,4($ctx)
159 add $f,$f,$t0
160 rotlwi $b,$b,30
161 lwz r18,8($ctx)
162 lwz r19,12($ctx)
163 add $f,$f,$e
164 lwz r20,16($ctx)
165___
166}
167
168sub BODY_40_59 {
169my ($i,$a,$b,$c,$d,$e,$f)=@_;
170my $j=$i+1;
171$code.=<<___;
172 add $f,$K,$e
173 rotlwi $e,$a,5
174 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
175 add $f,$f,@X[$i%16]
176 and $t0,$b,$c
177 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
178 add $f,$f,$e
179 or $t1,$b,$c
180 rotlwi $b,$b,30
181 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
182 and $t1,$t1,$d
183 or $t0,$t0,$t1
184 rotlwi @X[$j%16],@X[$j%16],1
185 add $f,$f,$t0
186___
187}
188
189$code=<<___;
190.machine "any"
191.text
192
193.globl .sha1_block_data_order
194.align 4
195.sha1_block_data_order:
196 $STU $sp,-$FRAME($sp)
197 mflr r0
198 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
199 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
200 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
201 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
202 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
203 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
204 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
205 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
206 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
207 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
208 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
209 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
210 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
211 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
212 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
213 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
214 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
215 $PUSH r0,`$FRAME+$LRSAVE`($sp)
216 lwz $A,0($ctx)
217 lwz $B,4($ctx)
218 lwz $C,8($ctx)
219 lwz $D,12($ctx)
220 lwz $E,16($ctx)
221 andi. r0,$inp,3
222 bne Lunaligned
223Laligned:
224 mtctr $num
225 bl Lsha1_block_private
226 b Ldone
227
228; PowerPC specification allows an implementation to be ill-behaved
229; upon unaligned access which crosses page boundary. "Better safe
230; than sorry" principle makes me treat it specially. But I don't
231; look for particular offending word, but rather for 64-byte input
232; block which crosses the boundary. Once found that block is aligned
233; and hashed separately...
234.align 4
235Lunaligned:
236 subfic $t1,$inp,4096
237 andi. $t1,$t1,4095 ; distance to closest page boundary
238 srwi. $t1,$t1,6 ; t1/=64
239 beq Lcross_page
240 $UCMP $num,$t1
241 ble Laligned ; didn't cross the page boundary
242 mtctr $t1
243 subfc $num,$t1,$num
244 bl Lsha1_block_private
245Lcross_page:
246 li $t1,16
247 mtctr $t1
248 addi r20,$sp,$LOCALS ; spot within the frame
249Lmemcpy:
250 lbz r16,0($inp)
251 lbz r17,1($inp)
252 lbz r18,2($inp)
253 lbz r19,3($inp)
254 addi $inp,$inp,4
255 stb r16,0(r20)
256 stb r17,1(r20)
257 stb r18,2(r20)
258 stb r19,3(r20)
259 addi r20,r20,4
260 bdnz Lmemcpy
261
262 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
263 li $t1,1
264 addi $inp,$sp,$LOCALS
265 mtctr $t1
266 bl Lsha1_block_private
267 $POP $inp,`$FRAME-$SIZE_T*18`($sp)
268 addic. $num,$num,-1
269 bne Lunaligned
270
271Ldone:
272 $POP r0,`$FRAME+$LRSAVE`($sp)
273 $POP r15,`$FRAME-$SIZE_T*17`($sp)
274 $POP r16,`$FRAME-$SIZE_T*16`($sp)
275 $POP r17,`$FRAME-$SIZE_T*15`($sp)
276 $POP r18,`$FRAME-$SIZE_T*14`($sp)
277 $POP r19,`$FRAME-$SIZE_T*13`($sp)
278 $POP r20,`$FRAME-$SIZE_T*12`($sp)
279 $POP r21,`$FRAME-$SIZE_T*11`($sp)
280 $POP r22,`$FRAME-$SIZE_T*10`($sp)
281 $POP r23,`$FRAME-$SIZE_T*9`($sp)
282 $POP r24,`$FRAME-$SIZE_T*8`($sp)
283 $POP r25,`$FRAME-$SIZE_T*7`($sp)
284 $POP r26,`$FRAME-$SIZE_T*6`($sp)
285 $POP r27,`$FRAME-$SIZE_T*5`($sp)
286 $POP r28,`$FRAME-$SIZE_T*4`($sp)
287 $POP r29,`$FRAME-$SIZE_T*3`($sp)
288 $POP r30,`$FRAME-$SIZE_T*2`($sp)
289 $POP r31,`$FRAME-$SIZE_T*1`($sp)
290 mtlr r0
291 addi $sp,$sp,$FRAME
292 blr
293 .long 0
294 .byte 0,12,4,1,0x80,18,3,0
295 .long 0
296___
297
298# This is private block function, which uses tailored calling
299# interface, namely upon entry SHA_CTX is pre-loaded to given
300# registers and counter register contains amount of chunks to
301# digest...
302$code.=<<___;
303.align 4
304Lsha1_block_private:
305___
306$code.=<<___; # load K_00_19
307 lis $K,0x5a82
308 ori $K,$K,0x7999
309___
310for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
311$code.=<<___; # load K_20_39
312 lis $K,0x6ed9
313 ori $K,$K,0xeba1
314___
315for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
316$code.=<<___; # load K_40_59
317 lis $K,0x8f1b
318 ori $K,$K,0xbcdc
319___
320for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
321$code.=<<___; # load K_60_79
322 lis $K,0xca62
323 ori $K,$K,0xc1d6
324___
325for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
326$code.=<<___;
327 add r16,r16,$E
328 add r17,r17,$T
329 add r18,r18,$A
330 add r19,r19,$B
331 add r20,r20,$C
332 stw r16,0($ctx)
333 mr $A,r16
334 stw r17,4($ctx)
335 mr $B,r17
336 stw r18,8($ctx)
337 mr $C,r18
338 stw r19,12($ctx)
339 mr $D,r19
340 stw r20,16($ctx)
341 mr $E,r20
342 addi $inp,$inp,`16*4`
343 bdnz Lsha1_block_private
344 blr
345 .long 0
346 .byte 0,12,0x14,0,0,0,0,0
347.size .sha1_block_data_order,.-.sha1_block_data_order
348___
349$code.=<<___;
350.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
351___
352
353$code =~ s/\`([^\`]*)\`/eval $1/gem;
354print $code;
355close STDOUT or die "error closing STDOUT: $!";
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette