1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 | #
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 | #
|
---|
17 | # SHA512 for C64x+.
|
---|
18 | #
|
---|
19 | # January 2012
|
---|
20 | #
|
---|
21 | # Performance is 19 cycles per processed byte. Compared to block
|
---|
22 | # transform function from sha512.c compiled with cl6x with -mv6400+
|
---|
23 | # -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller.
|
---|
24 | # Loop unroll won't make it, this implementation, any faster, because
|
---|
25 | # it's effectively dominated by SHRU||SHL pairs and you can't schedule
|
---|
26 | # more of them.
|
---|
27 | #
|
---|
28 | # !!! Note that this module uses AMR, which means that all interrupt
|
---|
29 | # service routines are expected to preserve it and for own well-being
|
---|
30 | # zero it upon entry.
|
---|
31 |
|
---|
32 | while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
---|
33 | open STDOUT,">$output";
|
---|
34 |
|
---|
35 | ($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments
|
---|
36 | $K512="A3";
|
---|
37 |
|
---|
38 | ($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi,
|
---|
39 | $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31));
|
---|
40 | ($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo,
|
---|
41 | $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31));
|
---|
42 |
|
---|
43 | ($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13));
|
---|
44 | ($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13));
|
---|
45 | ($T1hi, $T2hi)= ("A6","A7");
|
---|
46 | ($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9");
|
---|
47 | ($Khi,$Klo)=("A9","A8");
|
---|
48 | ($MAJhi,$MAJlo)=($T2hi,$T2lo);
|
---|
49 | ($t1hi,$t1lo)=($Khi,"B2");
|
---|
50 | $CTXB=$t1lo;
|
---|
51 |
|
---|
52 | ($Xihi,$Xilo)=("A5","B5"); # circular/ring buffer
|
---|
53 |
|
---|
54 | $code.=<<___;
|
---|
55 | .text
|
---|
56 |
|
---|
57 | .if .ASSEMBLER_VERSION<7000000
|
---|
58 | .asg 0,__TI_EABI__
|
---|
59 | .endif
|
---|
60 | .if __TI_EABI__
|
---|
61 | .nocmp
|
---|
62 | .asg sha512_block_data_order,_sha512_block_data_order
|
---|
63 | .endif
|
---|
64 |
|
---|
65 | .asg B3,RA
|
---|
66 | .asg A15,FP
|
---|
67 | .asg B15,SP
|
---|
68 |
|
---|
69 | .if .BIG_ENDIAN
|
---|
70 | .asg $Khi,KHI
|
---|
71 | .asg $Klo,KLO
|
---|
72 | .else
|
---|
73 | .asg $Khi,KLO
|
---|
74 | .asg $Klo,KHI
|
---|
75 | .endif
|
---|
76 |
|
---|
77 | .global _sha512_block_data_order
|
---|
78 | _sha512_block_data_order:
|
---|
79 | __sha512_block:
|
---|
80 | .asmfunc stack_usage(40+128)
|
---|
81 | MV $NUM,A0 ; reassign $NUM
|
---|
82 | || MVK -128,B0
|
---|
83 | [!A0] BNOP RA ; if ($NUM==0) return;
|
---|
84 | || [A0] STW FP,*SP--(40) ; save frame pointer
|
---|
85 | || [A0] MV SP,FP
|
---|
86 | [A0] STDW B13:B12,*SP[4]
|
---|
87 | || [A0] MVK 0x00404,B1
|
---|
88 | [A0] STDW B11:B10,*SP[3]
|
---|
89 | || [A0] STDW A13:A12,*FP[-3]
|
---|
90 | || [A0] MVKH 0x60000,B1
|
---|
91 | [A0] STDW A11:A10,*SP[1]
|
---|
92 | || [A0] MVC B1,AMR ; setup circular addressing
|
---|
93 | || [A0] ADD B0,SP,SP ; alloca(128)
|
---|
94 | .if __TI_EABI__
|
---|
95 | [A0] AND B0,SP,SP ; align stack at 128 bytes
|
---|
96 | || [A0] ADDKPC __sha512_block,B1
|
---|
97 | || [A0] MVKL \$PCR_OFFSET(K512,__sha512_block),$K512
|
---|
98 | [A0] MVKH \$PCR_OFFSET(K512,__sha512_block),$K512
|
---|
99 | || [A0] SUBAW SP,2,SP ; reserve two words above buffer
|
---|
100 | .else
|
---|
101 | [A0] AND B0,SP,SP ; align stack at 128 bytes
|
---|
102 | || [A0] ADDKPC __sha512_block,B1
|
---|
103 | || [A0] MVKL (K512-__sha512_block),$K512
|
---|
104 | [A0] MVKH (K512-__sha512_block),$K512
|
---|
105 | || [A0] SUBAW SP,2,SP ; reserve two words above buffer
|
---|
106 | .endif
|
---|
107 | ADDAW SP,3,$Xilo
|
---|
108 | ADDAW SP,2,$Xihi
|
---|
109 |
|
---|
110 | || MV $CTXA,$CTXB
|
---|
111 | LDW *${CTXA}[0^.LITTLE_ENDIAN],$Ahi ; load ctx
|
---|
112 | || LDW *${CTXB}[1^.LITTLE_ENDIAN],$Alo
|
---|
113 | || ADD B1,$K512,$K512
|
---|
114 | LDW *${CTXA}[2^.LITTLE_ENDIAN],$Bhi
|
---|
115 | || LDW *${CTXB}[3^.LITTLE_ENDIAN],$Blo
|
---|
116 | LDW *${CTXA}[4^.LITTLE_ENDIAN],$Chi
|
---|
117 | || LDW *${CTXB}[5^.LITTLE_ENDIAN],$Clo
|
---|
118 | LDW *${CTXA}[6^.LITTLE_ENDIAN],$Dhi
|
---|
119 | || LDW *${CTXB}[7^.LITTLE_ENDIAN],$Dlo
|
---|
120 | LDW *${CTXA}[8^.LITTLE_ENDIAN],$Ehi
|
---|
121 | || LDW *${CTXB}[9^.LITTLE_ENDIAN],$Elo
|
---|
122 | LDW *${CTXA}[10^.LITTLE_ENDIAN],$Fhi
|
---|
123 | || LDW *${CTXB}[11^.LITTLE_ENDIAN],$Flo
|
---|
124 | LDW *${CTXA}[12^.LITTLE_ENDIAN],$Ghi
|
---|
125 | || LDW *${CTXB}[13^.LITTLE_ENDIAN],$Glo
|
---|
126 | LDW *${CTXA}[14^.LITTLE_ENDIAN],$Hhi
|
---|
127 | || LDW *${CTXB}[15^.LITTLE_ENDIAN],$Hlo
|
---|
128 |
|
---|
129 | LDNDW *$INP++,B11:B10 ; pre-fetch input
|
---|
130 | LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0]
|
---|
131 | outerloop?:
|
---|
132 | MVK 15,B0 ; loop counters
|
---|
133 | || MVK 64,B1
|
---|
134 | || SUB A0,1,A0
|
---|
135 | MV $Ahi,$Actxhi
|
---|
136 | || MV $Alo,$Actxlo
|
---|
137 | || MV $Bhi,$Bctxhi
|
---|
138 | || MV $Blo,$Bctxlo
|
---|
139 | || MV $Chi,$Cctxhi
|
---|
140 | || MV $Clo,$Cctxlo
|
---|
141 | || MVD $Dhi,$Dctxhi
|
---|
142 | || MVD $Dlo,$Dctxlo
|
---|
143 | MV $Ehi,$Ectxhi
|
---|
144 | || MV $Elo,$Ectxlo
|
---|
145 | || MV $Fhi,$Fctxhi
|
---|
146 | || MV $Flo,$Fctxlo
|
---|
147 | || MV $Ghi,$Gctxhi
|
---|
148 | || MV $Glo,$Gctxlo
|
---|
149 | || MVD $Hhi,$Hctxhi
|
---|
150 | || MVD $Hlo,$Hctxlo
|
---|
151 | loop0_15?:
|
---|
152 | .if .BIG_ENDIAN
|
---|
153 | MV B11,$T1hi
|
---|
154 | || MV B10,$T1lo
|
---|
155 | .else
|
---|
156 | SWAP4 B10,$T1hi
|
---|
157 | || SWAP4 B11,$T1lo
|
---|
158 | SWAP2 $T1hi,$T1hi
|
---|
159 | || SWAP2 $T1lo,$T1lo
|
---|
160 | .endif
|
---|
161 | loop16_79?:
|
---|
162 | STW $T1hi,*$Xihi++[2]
|
---|
163 | || STW $T1lo,*$Xilo++[2] ; X[i] = T1
|
---|
164 | || ADD $Hhi,$T1hi,$T1hi
|
---|
165 | || ADDU $Hlo,$T1lo,$T1carry:$T1lo ; T1 += h
|
---|
166 | || SHRU $Ehi,14,$S1hi
|
---|
167 | || SHL $Ehi,32-14,$S1lo
|
---|
168 | XOR $Fhi,$Ghi,$CHhi
|
---|
169 | || XOR $Flo,$Glo,$CHlo
|
---|
170 | || ADD KHI,$T1hi,$T1hi
|
---|
171 | || ADDU KLO,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += K512[i]
|
---|
172 | || SHRU $Elo,14,$t0lo
|
---|
173 | || SHL $Elo,32-14,$t0hi
|
---|
174 | XOR $t0hi,$S1hi,$S1hi
|
---|
175 | || XOR $t0lo,$S1lo,$S1lo
|
---|
176 | || AND $Ehi,$CHhi,$CHhi
|
---|
177 | || AND $Elo,$CHlo,$CHlo
|
---|
178 | || ROTL $Ghi,0,$Hhi
|
---|
179 | || ROTL $Glo,0,$Hlo ; h = g
|
---|
180 | || SHRU $Ehi,18,$t0hi
|
---|
181 | || SHL $Ehi,32-18,$t0lo
|
---|
182 | XOR $t0hi,$S1hi,$S1hi
|
---|
183 | || XOR $t0lo,$S1lo,$S1lo
|
---|
184 | || XOR $Ghi,$CHhi,$CHhi
|
---|
185 | || XOR $Glo,$CHlo,$CHlo ; Ch(e,f,g) = ((f^g)&e)^g
|
---|
186 | || ROTL $Fhi,0,$Ghi
|
---|
187 | || ROTL $Flo,0,$Glo ; g = f
|
---|
188 | || SHRU $Elo,18,$t0lo
|
---|
189 | || SHL $Elo,32-18,$t0hi
|
---|
190 | XOR $t0hi,$S1hi,$S1hi
|
---|
191 | || XOR $t0lo,$S1lo,$S1lo
|
---|
192 | || OR $Ahi,$Bhi,$MAJhi
|
---|
193 | || OR $Alo,$Blo,$MAJlo
|
---|
194 | || ROTL $Ehi,0,$Fhi
|
---|
195 | || ROTL $Elo,0,$Flo ; f = e
|
---|
196 | || SHRU $Ehi,41-32,$t0lo
|
---|
197 | || SHL $Ehi,64-41,$t0hi
|
---|
198 | XOR $t0hi,$S1hi,$S1hi
|
---|
199 | || XOR $t0lo,$S1lo,$S1lo
|
---|
200 | || AND $Chi,$MAJhi,$MAJhi
|
---|
201 | || AND $Clo,$MAJlo,$MAJlo
|
---|
202 | || ROTL $Dhi,0,$Ehi
|
---|
203 | || ROTL $Dlo,0,$Elo ; e = d
|
---|
204 | || SHRU $Elo,41-32,$t0hi
|
---|
205 | || SHL $Elo,64-41,$t0lo
|
---|
206 | XOR $t0hi,$S1hi,$S1hi
|
---|
207 | || XOR $t0lo,$S1lo,$S1lo ; Sigma1(e)
|
---|
208 | || AND $Ahi,$Bhi,$t1hi
|
---|
209 | || AND $Alo,$Blo,$t1lo
|
---|
210 | || ROTL $Chi,0,$Dhi
|
---|
211 | || ROTL $Clo,0,$Dlo ; d = c
|
---|
212 | || SHRU $Ahi,28,$S0hi
|
---|
213 | || SHL $Ahi,32-28,$S0lo
|
---|
214 | OR $t1hi,$MAJhi,$MAJhi
|
---|
215 | || OR $t1lo,$MAJlo,$MAJlo ; Maj(a,b,c) = ((a|b)&c)|(a&b)
|
---|
216 | || ADD $CHhi,$T1hi,$T1hi
|
---|
217 | || ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Ch(e,f,g)
|
---|
218 | || ROTL $Bhi,0,$Chi
|
---|
219 | || ROTL $Blo,0,$Clo ; c = b
|
---|
220 | || SHRU $Alo,28,$t0lo
|
---|
221 | || SHL $Alo,32-28,$t0hi
|
---|
222 | XOR $t0hi,$S0hi,$S0hi
|
---|
223 | || XOR $t0lo,$S0lo,$S0lo
|
---|
224 | || ADD $S1hi,$T1hi,$T1hi
|
---|
225 | || ADDU $S1lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Sigma1(e)
|
---|
226 | || ROTL $Ahi,0,$Bhi
|
---|
227 | || ROTL $Alo,0,$Blo ; b = a
|
---|
228 | || SHRU $Ahi,34-32,$t0lo
|
---|
229 | || SHL $Ahi,64-34,$t0hi
|
---|
230 | XOR $t0hi,$S0hi,$S0hi
|
---|
231 | || XOR $t0lo,$S0lo,$S0lo
|
---|
232 | || ADD $MAJhi,$T1hi,$T2hi
|
---|
233 | || ADDU $MAJlo,$T1carry:$T1lo,$T2carry:$T2lo ; T2 = T1+Maj(a,b,c)
|
---|
234 | || SHRU $Alo,34-32,$t0hi
|
---|
235 | || SHL $Alo,64-34,$t0lo
|
---|
236 | XOR $t0hi,$S0hi,$S0hi
|
---|
237 | || XOR $t0lo,$S0lo,$S0lo
|
---|
238 | || ADD $Ehi,$T1hi,$T1hi
|
---|
239 | || ADDU $Elo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += e
|
---|
240 | || [B0] BNOP loop0_15?
|
---|
241 | || SHRU $Ahi,39-32,$t0lo
|
---|
242 | || SHL $Ahi,64-39,$t0hi
|
---|
243 | XOR $t0hi,$S0hi,$S0hi
|
---|
244 | || XOR $t0lo,$S0lo,$S0lo
|
---|
245 | || [B0] LDNDW *$INP++,B11:B10 ; pre-fetch input
|
---|
246 | ||[!B1] BNOP break?
|
---|
247 | || SHRU $Alo,39-32,$t0hi
|
---|
248 | || SHL $Alo,64-39,$t0lo
|
---|
249 | XOR $t0hi,$S0hi,$S0hi
|
---|
250 | || XOR $t0lo,$S0lo,$S0lo ; Sigma0(a)
|
---|
251 | || ADD $T1carry,$T1hi,$Ehi
|
---|
252 | || MV $T1lo,$Elo ; e = T1
|
---|
253 | ||[!B0] LDW *${Xihi}[28],$T1hi
|
---|
254 | ||[!B0] LDW *${Xilo}[28],$T1lo ; X[i+14]
|
---|
255 | ADD $S0hi,$T2hi,$T2hi
|
---|
256 | || ADDU $S0lo,$T2carry:$T2lo,$T2carry:$T2lo ; T2 += Sigma0(a)
|
---|
257 | || [B1] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[i]
|
---|
258 | NOP ; avoid cross-path stall
|
---|
259 | ADD $T2carry,$T2hi,$Ahi
|
---|
260 | || MV $T2lo,$Alo ; a = T2
|
---|
261 | || [B0] SUB B0,1,B0
|
---|
262 | ;;===== branch to loop00_15? is taken here
|
---|
263 | NOP
|
---|
264 | ;;===== branch to break? is taken here
|
---|
265 | LDW *${Xihi}[2],$T2hi
|
---|
266 | || LDW *${Xilo}[2],$T2lo ; X[i+1]
|
---|
267 | || SHRU $T1hi,19,$S1hi
|
---|
268 | || SHL $T1hi,32-19,$S1lo
|
---|
269 | SHRU $T1lo,19,$t0lo
|
---|
270 | || SHL $T1lo,32-19,$t0hi
|
---|
271 | XOR $t0hi,$S1hi,$S1hi
|
---|
272 | || XOR $t0lo,$S1lo,$S1lo
|
---|
273 | || SHRU $T1hi,61-32,$t0lo
|
---|
274 | || SHL $T1hi,64-61,$t0hi
|
---|
275 | XOR $t0hi,$S1hi,$S1hi
|
---|
276 | || XOR $t0lo,$S1lo,$S1lo
|
---|
277 | || SHRU $T1lo,61-32,$t0hi
|
---|
278 | || SHL $T1lo,64-61,$t0lo
|
---|
279 | XOR $t0hi,$S1hi,$S1hi
|
---|
280 | || XOR $t0lo,$S1lo,$S1lo
|
---|
281 | || SHRU $T1hi,6,$t0hi
|
---|
282 | || SHL $T1hi,32-6,$t0lo
|
---|
283 | XOR $t0hi,$S1hi,$S1hi
|
---|
284 | || XOR $t0lo,$S1lo,$S1lo
|
---|
285 | || SHRU $T1lo,6,$t0lo
|
---|
286 | || LDW *${Xihi}[18],$T1hi
|
---|
287 | || LDW *${Xilo}[18],$T1lo ; X[i+9]
|
---|
288 | XOR $t0lo,$S1lo,$S1lo ; sigma1(Xi[i+14])
|
---|
289 |
|
---|
290 | || LDW *${Xihi}[0],$CHhi
|
---|
291 | || LDW *${Xilo}[0],$CHlo ; X[i]
|
---|
292 | || SHRU $T2hi,1,$S0hi
|
---|
293 | || SHL $T2hi,32-1,$S0lo
|
---|
294 | SHRU $T2lo,1,$t0lo
|
---|
295 | || SHL $T2lo,32-1,$t0hi
|
---|
296 | XOR $t0hi,$S0hi,$S0hi
|
---|
297 | || XOR $t0lo,$S0lo,$S0lo
|
---|
298 | || SHRU $T2hi,8,$t0hi
|
---|
299 | || SHL $T2hi,32-8,$t0lo
|
---|
300 | XOR $t0hi,$S0hi,$S0hi
|
---|
301 | || XOR $t0lo,$S0lo,$S0lo
|
---|
302 | || SHRU $T2lo,8,$t0lo
|
---|
303 | || SHL $T2lo,32-8,$t0hi
|
---|
304 | XOR $t0hi,$S0hi,$S0hi
|
---|
305 | || XOR $t0lo,$S0lo,$S0lo
|
---|
306 | || ADD $S1hi,$T1hi,$T1hi
|
---|
307 | || ADDU $S1lo,$T1lo,$T1carry:$T1lo ; T1 = X[i+9]+sigma1()
|
---|
308 | || [B1] BNOP loop16_79?
|
---|
309 | || SHRU $T2hi,7,$t0hi
|
---|
310 | || SHL $T2hi,32-7,$t0lo
|
---|
311 | XOR $t0hi,$S0hi,$S0hi
|
---|
312 | || XOR $t0lo,$S0lo,$S0lo
|
---|
313 | || ADD $CHhi,$T1hi,$T1hi
|
---|
314 | || ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += X[i]
|
---|
315 | || SHRU $T2lo,7,$t0lo
|
---|
316 | XOR $t0lo,$S0lo,$S0lo ; sigma0(Xi[i+1]
|
---|
317 |
|
---|
318 | ADD $S0hi,$T1hi,$T1hi
|
---|
319 | || ADDU $S0lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += sigma0()
|
---|
320 | || [B1] SUB B1,1,B1
|
---|
321 | NOP ; avoid cross-path stall
|
---|
322 | ADD $T1carry,$T1hi,$T1hi
|
---|
323 | ;;===== branch to loop16_79? is taken here
|
---|
324 |
|
---|
325 | break?:
|
---|
326 | ADD $Ahi,$Actxhi,$Ahi ; accumulate ctx
|
---|
327 | || ADDU $Alo,$Actxlo,$Actxlo:$Alo
|
---|
328 | || [A0] LDNDW *$INP++,B11:B10 ; pre-fetch input
|
---|
329 | || [A0] ADDK -640,$K512 ; rewind pointer to K512
|
---|
330 | ADD $Bhi,$Bctxhi,$Bhi
|
---|
331 | || ADDU $Blo,$Bctxlo,$Bctxlo:$Blo
|
---|
332 | || [A0] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0]
|
---|
333 | ADD $Chi,$Cctxhi,$Chi
|
---|
334 | || ADDU $Clo,$Cctxlo,$Cctxlo:$Clo
|
---|
335 | || ADD $Actxlo,$Ahi,$Ahi
|
---|
336 | ||[!A0] MV $CTXA,$CTXB
|
---|
337 | ADD $Dhi,$Dctxhi,$Dhi
|
---|
338 | || ADDU $Dlo,$Dctxlo,$Dctxlo:$Dlo
|
---|
339 | || ADD $Bctxlo,$Bhi,$Bhi
|
---|
340 | ||[!A0] STW $Ahi,*${CTXA}[0^.LITTLE_ENDIAN] ; save ctx
|
---|
341 | ||[!A0] STW $Alo,*${CTXB}[1^.LITTLE_ENDIAN]
|
---|
342 | ADD $Ehi,$Ectxhi,$Ehi
|
---|
343 | || ADDU $Elo,$Ectxlo,$Ectxlo:$Elo
|
---|
344 | || ADD $Cctxlo,$Chi,$Chi
|
---|
345 | || [A0] BNOP outerloop?
|
---|
346 | ||[!A0] STW $Bhi,*${CTXA}[2^.LITTLE_ENDIAN]
|
---|
347 | ||[!A0] STW $Blo,*${CTXB}[3^.LITTLE_ENDIAN]
|
---|
348 | ADD $Fhi,$Fctxhi,$Fhi
|
---|
349 | || ADDU $Flo,$Fctxlo,$Fctxlo:$Flo
|
---|
350 | || ADD $Dctxlo,$Dhi,$Dhi
|
---|
351 | ||[!A0] STW $Chi,*${CTXA}[4^.LITTLE_ENDIAN]
|
---|
352 | ||[!A0] STW $Clo,*${CTXB}[5^.LITTLE_ENDIAN]
|
---|
353 | ADD $Ghi,$Gctxhi,$Ghi
|
---|
354 | || ADDU $Glo,$Gctxlo,$Gctxlo:$Glo
|
---|
355 | || ADD $Ectxlo,$Ehi,$Ehi
|
---|
356 | ||[!A0] STW $Dhi,*${CTXA}[6^.LITTLE_ENDIAN]
|
---|
357 | ||[!A0] STW $Dlo,*${CTXB}[7^.LITTLE_ENDIAN]
|
---|
358 | ADD $Hhi,$Hctxhi,$Hhi
|
---|
359 | || ADDU $Hlo,$Hctxlo,$Hctxlo:$Hlo
|
---|
360 | || ADD $Fctxlo,$Fhi,$Fhi
|
---|
361 | ||[!A0] STW $Ehi,*${CTXA}[8^.LITTLE_ENDIAN]
|
---|
362 | ||[!A0] STW $Elo,*${CTXB}[9^.LITTLE_ENDIAN]
|
---|
363 | ADD $Gctxlo,$Ghi,$Ghi
|
---|
364 | ||[!A0] STW $Fhi,*${CTXA}[10^.LITTLE_ENDIAN]
|
---|
365 | ||[!A0] STW $Flo,*${CTXB}[11^.LITTLE_ENDIAN]
|
---|
366 | ADD $Hctxlo,$Hhi,$Hhi
|
---|
367 | ||[!A0] STW $Ghi,*${CTXA}[12^.LITTLE_ENDIAN]
|
---|
368 | ||[!A0] STW $Glo,*${CTXB}[13^.LITTLE_ENDIAN]
|
---|
369 | ;;===== branch to outerloop? is taken here
|
---|
370 |
|
---|
371 | STW $Hhi,*${CTXA}[14^.LITTLE_ENDIAN]
|
---|
372 | || STW $Hlo,*${CTXB}[15^.LITTLE_ENDIAN]
|
---|
373 | || MVK -40,B0
|
---|
374 | ADD FP,B0,SP ; destroy circular buffer
|
---|
375 | || LDDW *FP[-4],A11:A10
|
---|
376 | LDDW *SP[2],A13:A12
|
---|
377 | || LDDW *FP[-2],B11:B10
|
---|
378 | LDDW *SP[4],B13:B12
|
---|
379 | || BNOP RA
|
---|
380 | LDW *++SP(40),FP ; restore frame pointer
|
---|
381 | MVK 0,B0
|
---|
382 | MVC B0,AMR ; clear AMR
|
---|
383 | NOP 2 ; wait till FP is committed
|
---|
384 | .endasmfunc
|
---|
385 |
|
---|
386 | .if __TI_EABI__
|
---|
387 | .sect ".text:sha_asm.const"
|
---|
388 | .else
|
---|
389 | .sect ".const:sha_asm"
|
---|
390 | .endif
|
---|
391 | .align 128
|
---|
392 | K512:
|
---|
393 | .uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
|
---|
394 | .uword 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
|
---|
395 | .uword 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
|
---|
396 | .uword 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
|
---|
397 | .uword 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
|
---|
398 | .uword 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
|
---|
399 | .uword 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
|
---|
400 | .uword 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
|
---|
401 | .uword 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
|
---|
402 | .uword 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
|
---|
403 | .uword 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
|
---|
404 | .uword 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
|
---|
405 | .uword 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
|
---|
406 | .uword 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
|
---|
407 | .uword 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
|
---|
408 | .uword 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
|
---|
409 | .uword 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
|
---|
410 | .uword 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
|
---|
411 | .uword 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
|
---|
412 | .uword 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
|
---|
413 | .uword 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
|
---|
414 | .uword 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
|
---|
415 | .uword 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
|
---|
416 | .uword 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
|
---|
417 | .uword 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
|
---|
418 | .uword 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
|
---|
419 | .uword 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
|
---|
420 | .uword 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
|
---|
421 | .uword 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
|
---|
422 | .uword 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
|
---|
423 | .uword 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
|
---|
424 | .uword 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
|
---|
425 | .uword 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
|
---|
426 | .uword 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
|
---|
427 | .uword 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
|
---|
428 | .uword 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
|
---|
429 | .uword 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
|
---|
430 | .uword 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
|
---|
431 | .uword 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
|
---|
432 | .uword 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
|
---|
433 | .cstring "SHA512 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
|
---|
434 | .align 4
|
---|
435 | ___
|
---|
436 |
|
---|
437 | print $code;
|
---|
438 | close STDOUT or die "error closing STDOUT: $!";
|
---|