VirtualBox

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/alpha/dsputil_alpha_asm.S@ 5776

Last change on this file since 5776 was 5776, checked in by vboxsync, 17 years ago

ffmpeg: exported to OSE

File size: 7.2 KB
Line 
1/*
2 * Alpha optimized DSP utils
3 * Copyright (c) 2002 Falk Hueffner <[email protected]>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20/*
21 * These functions are scheduled for pca56. They should work
22 * reasonably on ev6, though.
23 */
24
25#include "regdef.h"
26
27/* Some nicer register names. */
28#define ta t10
29#define tb t11
30#define tc t12
31#define td AT
32/* Danger: these overlap with the argument list and the return value */
33#define te a5
34#define tf a4
35#define tg a3
36#define th v0
37
38 .set noat
39 .set noreorder
40 .arch pca56
41 .text
42
43/************************************************************************
44 * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
45 * int line_size, int h)
46 */
47 .align 6
48 .globl put_pixels_axp_asm
49 .ent put_pixels_axp_asm
50put_pixels_axp_asm:
51 .frame sp, 0, ra
52 .prologue 0
53
54#ifdef HAVE_GPROF
55 lda AT, _mcount
56 jsr AT, (AT), _mcount
57#endif
58
59 and a1, 7, t0
60 beq t0, $aligned
61
62 .align 4
63$unaligned:
64 ldq_u t0, 0(a1)
65 ldq_u t1, 8(a1)
66 addq a1, a2, a1
67 nop
68
69 ldq_u t2, 0(a1)
70 ldq_u t3, 8(a1)
71 addq a1, a2, a1
72 nop
73
74 ldq_u t4, 0(a1)
75 ldq_u t5, 8(a1)
76 addq a1, a2, a1
77 nop
78
79 ldq_u t6, 0(a1)
80 ldq_u t7, 8(a1)
81 extql t0, a1, t0
82 addq a1, a2, a1
83
84 extqh t1, a1, t1
85 addq a0, a2, t8
86 extql t2, a1, t2
87 addq t8, a2, t9
88
89 extqh t3, a1, t3
90 addq t9, a2, ta
91 extql t4, a1, t4
92 or t0, t1, t0
93
94 extqh t5, a1, t5
95 or t2, t3, t2
96 extql t6, a1, t6
97 or t4, t5, t4
98
99 extqh t7, a1, t7
100 or t6, t7, t6
101 stq t0, 0(a0)
102 stq t2, 0(t8)
103
104 stq t4, 0(t9)
105 subq a3, 4, a3
106 stq t6, 0(ta)
107 addq ta, a2, a0
108
109 bne a3, $unaligned
110 ret
111
112 .align 4
113$aligned:
114 ldq t0, 0(a1)
115 addq a1, a2, a1
116 ldq t1, 0(a1)
117 addq a1, a2, a1
118
119 ldq t2, 0(a1)
120 addq a1, a2, a1
121 ldq t3, 0(a1)
122
123 addq a0, a2, t4
124 addq a1, a2, a1
125 addq t4, a2, t5
126 subq a3, 4, a3
127
128 stq t0, 0(a0)
129 addq t5, a2, t6
130 stq t1, 0(t4)
131 addq t6, a2, a0
132
133 stq t2, 0(t5)
134 stq t3, 0(t6)
135
136 bne a3, $aligned
137 ret
138 .end put_pixels_axp_asm
139
140/************************************************************************
141 * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
142 * int line_size)
143 */
144 .align 6
145 .globl put_pixels_clamped_mvi_asm
146 .ent put_pixels_clamped_mvi_asm
147put_pixels_clamped_mvi_asm:
148 .frame sp, 0, ra
149 .prologue 0
150
151#ifdef HAVE_GPROF
152 lda AT, _mcount
153 jsr AT, (AT), _mcount
154#endif
155
156 lda t8, -1
157 lda t9, 8 # loop counter
158 zap t8, 0xaa, t8 # 00ff00ff00ff00ff
159
160 .align 4
1611: ldq t0, 0(a0)
162 ldq t1, 8(a0)
163 ldq t2, 16(a0)
164 ldq t3, 24(a0)
165
166 maxsw4 t0, zero, t0
167 subq t9, 2, t9
168 maxsw4 t1, zero, t1
169 lda a0, 32(a0)
170
171 maxsw4 t2, zero, t2
172 addq a1, a2, ta
173 maxsw4 t3, zero, t3
174 minsw4 t0, t8, t0
175
176 minsw4 t1, t8, t1
177 minsw4 t2, t8, t2
178 minsw4 t3, t8, t3
179 pkwb t0, t0
180
181 pkwb t1, t1
182 pkwb t2, t2
183 pkwb t3, t3
184 stl t0, 0(a1)
185
186 stl t1, 4(a1)
187 addq ta, a2, a1
188 stl t2, 0(ta)
189 stl t3, 4(ta)
190
191 bne t9, 1b
192 ret
193 .end put_pixels_clamped_mvi_asm
194
195/************************************************************************
196 * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
197 * int line_size)
198 */
199 .align 6
200 .globl add_pixels_clamped_mvi_asm
201 .ent add_pixels_clamped_mvi_asm
202add_pixels_clamped_mvi_asm:
203 .frame sp, 0, ra
204 .prologue 0
205
206#ifdef HAVE_GPROF
207 lda AT, _mcount
208 jsr AT, (AT), _mcount
209#endif
210
211 lda t1, -1
212 lda th, 8
213 zap t1, 0x33, tg
214 nop
215
216 srl tg, 1, t0
217 xor tg, t0, tg # 0x8000800080008000
218 zap t1, 0xaa, tf # 0x00ff00ff00ff00ff
219
220 .align 4
2211: ldl t1, 0(a1) # pix0 (try to hit cache line soon)
222 ldl t4, 4(a1) # pix1
223 addq a1, a2, te # pixels += line_size
224 ldq t0, 0(a0) # shorts0
225
226 ldl t7, 0(te) # pix2 (try to hit cache line soon)
227 ldl ta, 4(te) # pix3
228 ldq t3, 8(a0) # shorts1
229 ldq t6, 16(a0) # shorts2
230
231 ldq t9, 24(a0) # shorts3
232 unpkbw t1, t1 # 0 0 (quarter/op no.)
233 and t0, tg, t2 # 0 1
234 unpkbw t4, t4 # 1 0
235
236 bic t0, tg, t0 # 0 2
237 unpkbw t7, t7 # 2 0
238 and t3, tg, t5 # 1 1
239 addq t0, t1, t0 # 0 3
240
241 xor t0, t2, t0 # 0 4
242 unpkbw ta, ta # 3 0
243 and t6, tg, t8 # 2 1
244 maxsw4 t0, zero, t0 # 0 5
245
246 bic t3, tg, t3 # 1 2
247 bic t6, tg, t6 # 2 2
248 minsw4 t0, tf, t0 # 0 6
249 addq t3, t4, t3 # 1 3
250
251 pkwb t0, t0 # 0 7
252 xor t3, t5, t3 # 1 4
253 maxsw4 t3, zero, t3 # 1 5
254 addq t6, t7, t6 # 2 3
255
256 xor t6, t8, t6 # 2 4
257 and t9, tg, tb # 3 1
258 minsw4 t3, tf, t3 # 1 6
259 bic t9, tg, t9 # 3 2
260
261 maxsw4 t6, zero, t6 # 2 5
262 addq t9, ta, t9 # 3 3
263 stl t0, 0(a1) # 0 8
264 minsw4 t6, tf, t6 # 2 6
265
266 xor t9, tb, t9 # 3 4
267 maxsw4 t9, zero, t9 # 3 5
268 lda a0, 32(a0) # block += 16;
269 pkwb t3, t3 # 1 7
270
271 minsw4 t9, tf, t9 # 3 6
272 subq th, 2, th
273 pkwb t6, t6 # 2 7
274 pkwb t9, t9 # 3 7
275
276 stl t3, 4(a1) # 1 8
277 addq te, a2, a1 # pixels += line_size
278 stl t6, 0(te) # 2 8
279 stl t9, 4(te) # 3 8
280
281 bne th, 1b
282 ret
283 .end add_pixels_clamped_mvi_asm
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette