1 | /*
|
---|
2 | * Alpha optimized DSP utils
|
---|
3 | * Copyright (c) 2002 Falk Hueffner <[email protected]>
|
---|
4 | *
|
---|
5 | * This library is free software; you can redistribute it and/or
|
---|
6 | * modify it under the terms of the GNU Lesser General Public
|
---|
7 | * License as published by the Free Software Foundation; either
|
---|
8 | * version 2 of the License, or (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This library is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
13 | * Lesser General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU Lesser General Public
|
---|
16 | * License along with this library; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
18 | */
|
---|
19 |
|
---|
20 | #include "regdef.h"
|
---|
21 |
|
---|
22 | /* Some nicer register names. */
|
---|
23 | #define ta t10
|
---|
24 | #define tb t11
|
---|
25 | #define tc t12
|
---|
26 | #define td AT
|
---|
27 | /* Danger: these overlap with the argument list and the return value */
|
---|
28 | #define te a5
|
---|
29 | #define tf a4
|
---|
30 | #define tg a3
|
---|
31 | #define th v0
|
---|
32 |
|
---|
33 | .set noat
|
---|
34 | .set noreorder
|
---|
35 | .arch pca56
|
---|
36 | .text
|
---|
37 |
|
---|
38 | /*****************************************************************************
|
---|
39 | * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
|
---|
40 | *
|
---|
41 | * This code is written with a pca56 in mind. For ev6, one should
|
---|
42 | * really take the increased latency of 3 cycles for MVI instructions
|
---|
43 | * into account.
|
---|
44 | *
|
---|
45 | * It is important to keep the loading and first use of a register as
|
---|
46 | * far apart as possible, because if a register is accessed before it
|
---|
47 | * has been fetched from memory, the CPU will stall.
|
---|
48 | */
|
---|
49 | .align 4
|
---|
50 | .globl pix_abs16x16_mvi_asm
|
---|
51 | .ent pix_abs16x16_mvi_asm
|
---|
52 | pix_abs16x16_mvi_asm:
|
---|
53 | .frame sp, 0, ra, 0
|
---|
54 | .prologue 0
|
---|
55 |
|
---|
56 | #ifdef HAVE_GPROF
|
---|
57 | lda AT, _mcount
|
---|
58 | jsr AT, (AT), _mcount
|
---|
59 | #endif
|
---|
60 |
|
---|
61 | and a1, 7, t0
|
---|
62 | clr v0
|
---|
63 | lda a3, 16
|
---|
64 | beq t0, $aligned
|
---|
65 | .align 4
|
---|
66 | $unaligned:
|
---|
67 | /* Registers:
|
---|
68 | line 0:
|
---|
69 | t0: left_u -> left lo -> left
|
---|
70 | t1: mid
|
---|
71 | t2: right_u -> right hi -> right
|
---|
72 | t3: ref left
|
---|
73 | t4: ref right
|
---|
74 | line 1:
|
---|
75 | t5: left_u -> left lo -> left
|
---|
76 | t6: mid
|
---|
77 | t7: right_u -> right hi -> right
|
---|
78 | t8: ref left
|
---|
79 | t9: ref right
|
---|
80 | temp:
|
---|
81 | ta: left hi
|
---|
82 | tb: right lo
|
---|
83 | tc: error left
|
---|
84 | td: error right */
|
---|
85 |
|
---|
86 | /* load line 0 */
|
---|
87 | ldq_u t0, 0(a1) # left_u
|
---|
88 | ldq_u t1, 8(a1) # mid
|
---|
89 | ldq_u t2, 16(a1) # right_u
|
---|
90 | ldq t3, 0(a0) # ref left
|
---|
91 | ldq t4, 8(a0) # ref right
|
---|
92 | addq a0, a2, a0 # pix1
|
---|
93 | addq a1, a2, a1 # pix2
|
---|
94 | /* load line 1 */
|
---|
95 | ldq_u t5, 0(a1) # left_u
|
---|
96 | ldq_u t6, 8(a1) # mid
|
---|
97 | ldq_u t7, 16(a1) # right_u
|
---|
98 | ldq t8, 0(a0) # ref left
|
---|
99 | ldq t9, 8(a0) # ref right
|
---|
100 | addq a0, a2, a0 # pix1
|
---|
101 | addq a1, a2, a1 # pix2
|
---|
102 | /* calc line 0 */
|
---|
103 | extql t0, a1, t0 # left lo
|
---|
104 | extqh t1, a1, ta # left hi
|
---|
105 | extql t1, a1, tb # right lo
|
---|
106 | or t0, ta, t0 # left
|
---|
107 | extqh t2, a1, t2 # right hi
|
---|
108 | perr t3, t0, tc # error left
|
---|
109 | or t2, tb, t2 # right
|
---|
110 | perr t4, t2, td # error right
|
---|
111 | addq v0, tc, v0 # add error left
|
---|
112 | addq v0, td, v0 # add error left
|
---|
113 | /* calc line 1 */
|
---|
114 | extql t5, a1, t5 # left lo
|
---|
115 | extqh t6, a1, ta # left hi
|
---|
116 | extql t6, a1, tb # right lo
|
---|
117 | or t5, ta, t5 # left
|
---|
118 | extqh t7, a1, t7 # right hi
|
---|
119 | perr t8, t5, tc # error left
|
---|
120 | or t7, tb, t7 # right
|
---|
121 | perr t9, t7, td # error right
|
---|
122 | addq v0, tc, v0 # add error left
|
---|
123 | addq v0, td, v0 # add error left
|
---|
124 | /* loop */
|
---|
125 | subq a3, 2, a3 # h -= 2
|
---|
126 | bne a3, $unaligned
|
---|
127 | ret
|
---|
128 |
|
---|
129 | .align 4
|
---|
130 | $aligned:
|
---|
131 | /* load line 0 */
|
---|
132 | ldq t0, 0(a1) # left
|
---|
133 | ldq t1, 8(a1) # right
|
---|
134 | addq a1, a2, a1 # pix2
|
---|
135 | ldq t2, 0(a0) # ref left
|
---|
136 | ldq t3, 8(a0) # ref right
|
---|
137 | addq a0, a2, a0 # pix1
|
---|
138 | /* load line 1 */
|
---|
139 | ldq t4, 0(a1) # left
|
---|
140 | ldq t5, 8(a1) # right
|
---|
141 | addq a1, a2, a1 # pix2
|
---|
142 | ldq t6, 0(a0) # ref left
|
---|
143 | ldq t7, 8(a0) # ref right
|
---|
144 | addq a0, a2, a0 # pix1
|
---|
145 | /* load line 2 */
|
---|
146 | ldq t8, 0(a1) # left
|
---|
147 | ldq t9, 8(a1) # right
|
---|
148 | addq a1, a2, a1 # pix2
|
---|
149 | ldq ta, 0(a0) # ref left
|
---|
150 | ldq tb, 8(a0) # ref right
|
---|
151 | addq a0, a2, a0 # pix1
|
---|
152 | /* load line 3 */
|
---|
153 | ldq tc, 0(a1) # left
|
---|
154 | ldq td, 8(a1) # right
|
---|
155 | addq a1, a2, a1 # pix2
|
---|
156 | ldq te, 0(a0) # ref left
|
---|
157 | ldq tf, 8(a0) # ref right
|
---|
158 | /* calc line 0 */
|
---|
159 | perr t0, t2, t0 # error left
|
---|
160 | addq a0, a2, a0 # pix1
|
---|
161 | perr t1, t3, t1 # error right
|
---|
162 | addq v0, t0, v0 # add error left
|
---|
163 | /* calc line 1 */
|
---|
164 | perr t4, t6, t0 # error left
|
---|
165 | addq v0, t1, v0 # add error right
|
---|
166 | perr t5, t7, t1 # error right
|
---|
167 | addq v0, t0, v0 # add error left
|
---|
168 | /* calc line 2 */
|
---|
169 | perr t8, ta, t0 # error left
|
---|
170 | addq v0, t1, v0 # add error right
|
---|
171 | perr t9, tb, t1 # error right
|
---|
172 | addq v0, t0, v0 # add error left
|
---|
173 | /* calc line 3 */
|
---|
174 | perr tc, te, t0 # error left
|
---|
175 | addq v0, t1, v0 # add error right
|
---|
176 | perr td, tf, t1 # error right
|
---|
177 | addq v0, t0, v0 # add error left
|
---|
178 | addq v0, t1, v0 # add error right
|
---|
179 | /* loop */
|
---|
180 | subq a3, 4, a3 # h -= 4
|
---|
181 | bne a3, $aligned
|
---|
182 | ret
|
---|
183 | .end pix_abs16x16_mvi_asm
|
---|