1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 | #
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 | #
|
---|
17 | # February 2012
|
---|
18 | #
|
---|
19 | # The module implements bn_GF2m_mul_2x2 polynomial multiplication
|
---|
20 | # used in bn_gf2m.c. It's kind of low-hanging mechanical port from
|
---|
21 | # C for the time being... The subroutine runs in 37 cycles, which is
|
---|
22 | # 4.5x faster than compiler-generated code. Though comparison is
|
---|
23 | # totally unfair, because this module utilizes Galois Field Multiply
|
---|
24 | # instruction.
|
---|
25 |
|
---|
26 | $output = pop and open STDOUT,">$output";
|
---|
27 |
|
---|
28 | ($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector
|
---|
29 |
|
---|
30 | ($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
|
---|
31 | ($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
|
---|
32 | ($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
|
---|
33 | ($A,$B)=($Alo,$B_1);
|
---|
34 | $xFF="B1";
|
---|
35 |
|
---|
36 | sub mul_1x1_upper {
|
---|
37 | my ($A,$B)=@_;
|
---|
38 | $code.=<<___;
|
---|
39 | EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
|
---|
40 | || AND $B,$xFF,$B_0
|
---|
41 | || SHRU $B,24,$B_3
|
---|
42 | SHRU $A,16, $Ahi ; smash $A to two halfwords
|
---|
43 | || EXTU $A,16,16,$Alo
|
---|
44 |
|
---|
45 | XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication
|
---|
46 | || XORMPY $Ahi,$B_2,$Ahix2
|
---|
47 | || EXTU $B,16,24,$B_1
|
---|
48 | XORMPY $Alo,$B_0,$Alox0
|
---|
49 | || XORMPY $Ahi,$B_0,$Ahix0
|
---|
50 | XORMPY $Alo,$B_3,$Alox3
|
---|
51 | || XORMPY $Ahi,$B_3,$Ahix3
|
---|
52 | XORMPY $Alo,$B_1,$Alox1
|
---|
53 | || XORMPY $Ahi,$B_1,$Ahix1
|
---|
54 | ___
|
---|
55 | }
|
---|
56 | sub mul_1x1_merged {
|
---|
57 | my ($OUTlo,$OUThi,$A,$B)=@_;
|
---|
58 | $code.=<<___;
|
---|
59 | EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
|
---|
60 | || AND $B,$xFF,$B_0
|
---|
61 | || SHRU $B,24,$B_3
|
---|
62 | SHRU $A,16, $Ahi ; smash $A to two halfwords
|
---|
63 | || EXTU $A,16,16,$Alo
|
---|
64 |
|
---|
65 | XOR $Ahix0,$Alox2,$Ahix0
|
---|
66 | || MV $Ahix2,$OUThi
|
---|
67 | || XORMPY $Alo,$B_2,$Alox2
|
---|
68 | XORMPY $Ahi,$B_2,$Ahix2
|
---|
69 | || EXTU $B,16,24,$B_1
|
---|
70 | || XORMPY $Alo,$B_0,A1 ; $Alox0
|
---|
71 | XOR $Ahix1,$Alox3,$Ahix1
|
---|
72 | || SHL $Ahix0,16,$OUTlo
|
---|
73 | || SHRU $Ahix0,16,$Ahix0
|
---|
74 | XOR $Alox0,$OUTlo,$OUTlo
|
---|
75 | || XOR $Ahix0,$OUThi,$OUThi
|
---|
76 | || XORMPY $Ahi,$B_0,$Ahix0
|
---|
77 | || XORMPY $Alo,$B_3,$Alox3
|
---|
78 | || SHL $Alox1,8,$Alox1
|
---|
79 | || SHL $Ahix3,8,$Ahix3
|
---|
80 | XOR $Alox1,$OUTlo,$OUTlo
|
---|
81 | || XOR $Ahix3,$OUThi,$OUThi
|
---|
82 | || XORMPY $Ahi,$B_3,$Ahix3
|
---|
83 | || SHL $Ahix1,24,$Alox1
|
---|
84 | || SHRU $Ahix1,8, $Ahix1
|
---|
85 | XOR $Alox1,$OUTlo,$OUTlo
|
---|
86 | || XOR $Ahix1,$OUThi,$OUThi
|
---|
87 | || XORMPY $Alo,$B_1,$Alox1
|
---|
88 | || XORMPY $Ahi,$B_1,$Ahix1
|
---|
89 | || MV A1,$Alox0
|
---|
90 | ___
|
---|
91 | }
|
---|
92 | sub mul_1x1_lower {
|
---|
93 | my ($OUTlo,$OUThi)=@_;
|
---|
94 | $code.=<<___;
|
---|
95 | ;NOP
|
---|
96 | XOR $Ahix0,$Alox2,$Ahix0
|
---|
97 | || MV $Ahix2,$OUThi
|
---|
98 | NOP
|
---|
99 | XOR $Ahix1,$Alox3,$Ahix1
|
---|
100 | || SHL $Ahix0,16,$OUTlo
|
---|
101 | || SHRU $Ahix0,16,$Ahix0
|
---|
102 | XOR $Alox0,$OUTlo,$OUTlo
|
---|
103 | || XOR $Ahix0,$OUThi,$OUThi
|
---|
104 | || SHL $Alox1,8,$Alox1
|
---|
105 | || SHL $Ahix3,8,$Ahix3
|
---|
106 | XOR $Alox1,$OUTlo,$OUTlo
|
---|
107 | || XOR $Ahix3,$OUThi,$OUThi
|
---|
108 | || SHL $Ahix1,24,$Alox1
|
---|
109 | || SHRU $Ahix1,8, $Ahix1
|
---|
110 | XOR $Alox1,$OUTlo,$OUTlo
|
---|
111 | || XOR $Ahix1,$OUThi,$OUThi
|
---|
112 | ___
|
---|
113 | }
|
---|
114 | $code.=<<___;
|
---|
115 | .text
|
---|
116 |
|
---|
117 | .if .ASSEMBLER_VERSION<7000000
|
---|
118 | .asg 0,__TI_EABI__
|
---|
119 | .endif
|
---|
120 | .if __TI_EABI__
|
---|
121 | .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
|
---|
122 | .endif
|
---|
123 |
|
---|
124 | .global _bn_GF2m_mul_2x2
|
---|
125 | _bn_GF2m_mul_2x2:
|
---|
126 | .asmfunc
|
---|
127 | MVK 0xFF,$xFF
|
---|
128 | ___
|
---|
129 | &mul_1x1_upper($a0,$b0); # a0·b0
|
---|
130 | $code.=<<___;
|
---|
131 | || MV $b1,$B
|
---|
132 | MV $a1,$A
|
---|
133 | ___
|
---|
134 | &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
|
---|
135 | $code.=<<___;
|
---|
136 | || XOR $b0,$b1,$B
|
---|
137 | XOR $a0,$a1,$A
|
---|
138 | ___
|
---|
139 | &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
|
---|
140 | $code.=<<___;
|
---|
141 | XOR A28,A31,A29
|
---|
142 | || XOR B28,B31,B29 ; a0·b0+a1·b1
|
---|
143 | ___
|
---|
144 | &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
|
---|
145 | $code.=<<___;
|
---|
146 | || BNOP B3
|
---|
147 | XOR A29,A30,A30
|
---|
148 | || XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
|
---|
149 | XOR B28,A30,A30
|
---|
150 | || STW A28,*${rp}[0]
|
---|
151 | XOR B30,A31,A31
|
---|
152 | || STW A30,*${rp}[1]
|
---|
153 | STW A31,*${rp}[2]
|
---|
154 | STW B31,*${rp}[3]
|
---|
155 | .endasmfunc
|
---|
156 | ___
|
---|
157 |
|
---|
158 | print $code;
|
---|
159 | close STDOUT or die "error closing STDOUT: $!";
|
---|