VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.2/crypto/bn/asm/c64xplus-gf2m.pl@ 101021

Last change on this file since 101021 was 101021, checked in by vboxsync, 15 months ago

openssl-3.1.2: Applied and adjusted our OpenSSL changes to 3.1.0. bugref:10519

File size: 4.0 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# February 2012
18#
19# The module implements bn_GF2m_mul_2x2 polynomial multiplication
20# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
21# C for the time being... The subroutine runs in 37 cycles, which is
22# 4.5x faster than compiler-generated code. Though comparison is
23# totally unfair, because this module utilizes Galois Field Multiply
24# instruction.
25
26$output = pop and open STDOUT,">$output";
27
28($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector
29
30($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
31($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
32($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
33($A,$B)=($Alo,$B_1);
34$xFF="B1";
35
36sub mul_1x1_upper {
37my ($A,$B)=@_;
38$code.=<<___;
39 EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
40|| AND $B,$xFF,$B_0
41|| SHRU $B,24,$B_3
42 SHRU $A,16, $Ahi ; smash $A to two halfwords
43|| EXTU $A,16,16,$Alo
44
45 XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication
46|| XORMPY $Ahi,$B_2,$Ahix2
47|| EXTU $B,16,24,$B_1
48 XORMPY $Alo,$B_0,$Alox0
49|| XORMPY $Ahi,$B_0,$Ahix0
50 XORMPY $Alo,$B_3,$Alox3
51|| XORMPY $Ahi,$B_3,$Ahix3
52 XORMPY $Alo,$B_1,$Alox1
53|| XORMPY $Ahi,$B_1,$Ahix1
54___
55}
56sub mul_1x1_merged {
57my ($OUTlo,$OUThi,$A,$B)=@_;
58$code.=<<___;
59 EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
60|| AND $B,$xFF,$B_0
61|| SHRU $B,24,$B_3
62 SHRU $A,16, $Ahi ; smash $A to two halfwords
63|| EXTU $A,16,16,$Alo
64
65 XOR $Ahix0,$Alox2,$Ahix0
66|| MV $Ahix2,$OUThi
67|| XORMPY $Alo,$B_2,$Alox2
68 XORMPY $Ahi,$B_2,$Ahix2
69|| EXTU $B,16,24,$B_1
70|| XORMPY $Alo,$B_0,A1 ; $Alox0
71 XOR $Ahix1,$Alox3,$Ahix1
72|| SHL $Ahix0,16,$OUTlo
73|| SHRU $Ahix0,16,$Ahix0
74 XOR $Alox0,$OUTlo,$OUTlo
75|| XOR $Ahix0,$OUThi,$OUThi
76|| XORMPY $Ahi,$B_0,$Ahix0
77|| XORMPY $Alo,$B_3,$Alox3
78|| SHL $Alox1,8,$Alox1
79|| SHL $Ahix3,8,$Ahix3
80 XOR $Alox1,$OUTlo,$OUTlo
81|| XOR $Ahix3,$OUThi,$OUThi
82|| XORMPY $Ahi,$B_3,$Ahix3
83|| SHL $Ahix1,24,$Alox1
84|| SHRU $Ahix1,8, $Ahix1
85 XOR $Alox1,$OUTlo,$OUTlo
86|| XOR $Ahix1,$OUThi,$OUThi
87|| XORMPY $Alo,$B_1,$Alox1
88|| XORMPY $Ahi,$B_1,$Ahix1
89|| MV A1,$Alox0
90___
91}
92sub mul_1x1_lower {
93my ($OUTlo,$OUThi)=@_;
94$code.=<<___;
95 ;NOP
96 XOR $Ahix0,$Alox2,$Ahix0
97|| MV $Ahix2,$OUThi
98 NOP
99 XOR $Ahix1,$Alox3,$Ahix1
100|| SHL $Ahix0,16,$OUTlo
101|| SHRU $Ahix0,16,$Ahix0
102 XOR $Alox0,$OUTlo,$OUTlo
103|| XOR $Ahix0,$OUThi,$OUThi
104|| SHL $Alox1,8,$Alox1
105|| SHL $Ahix3,8,$Ahix3
106 XOR $Alox1,$OUTlo,$OUTlo
107|| XOR $Ahix3,$OUThi,$OUThi
108|| SHL $Ahix1,24,$Alox1
109|| SHRU $Ahix1,8, $Ahix1
110 XOR $Alox1,$OUTlo,$OUTlo
111|| XOR $Ahix1,$OUThi,$OUThi
112___
113}
114$code.=<<___;
115 .text
116
117 .if .ASSEMBLER_VERSION<7000000
118 .asg 0,__TI_EABI__
119 .endif
120 .if __TI_EABI__
121 .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
122 .endif
123
124 .global _bn_GF2m_mul_2x2
125_bn_GF2m_mul_2x2:
126 .asmfunc
127 MVK 0xFF,$xFF
128___
129 &mul_1x1_upper($a0,$b0); # a0·b0
130$code.=<<___;
131|| MV $b1,$B
132 MV $a1,$A
133___
134 &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
135$code.=<<___;
136|| XOR $b0,$b1,$B
137 XOR $a0,$a1,$A
138___
139 &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
140$code.=<<___;
141 XOR A28,A31,A29
142|| XOR B28,B31,B29 ; a0·b0+a1·b1
143___
144 &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
145$code.=<<___;
146|| BNOP B3
147 XOR A29,A30,A30
148|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
149 XOR B28,A30,A30
150|| STW A28,*${rp}[0]
151 XOR B30,A31,A31
152|| STW A30,*${rp}[1]
153 STW A31,*${rp}[2]
154 STW B31,*${rp}[3]
155 .endasmfunc
156___
157
158print $code;
159close STDOUT or die "error closing STDOUT: $!";
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette