1 | /*
|
---|
2 | * MMI optimized DSP utils
|
---|
3 | * Copyright (c) 2000, 2001 Fabrice Bellard.
|
---|
4 | *
|
---|
5 | * This library is free software; you can redistribute it and/or
|
---|
6 | * modify it under the terms of the GNU Lesser General Public
|
---|
7 | * License as published by the Free Software Foundation; either
|
---|
8 | * version 2 of the License, or (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This library is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
13 | * Lesser General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU Lesser General Public
|
---|
16 | * License along with this library; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
18 | *
|
---|
19 | * MMI optimization by Leon van Stuivenberg
|
---|
20 | * clear_blocks_mmi() by BroadQ
|
---|
21 | */
|
---|
22 |
|
---|
23 | #include "../dsputil.h"
|
---|
24 | #include "mmi.h"
|
---|
25 |
|
---|
26 | void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
|
---|
27 | void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
|
---|
28 | void ff_mmi_idct(DCTELEM *block);
|
---|
29 |
|
---|
30 | static void clear_blocks_mmi(DCTELEM * blocks)
|
---|
31 | {
|
---|
32 | asm volatile(
|
---|
33 | ".set noreorder \n"
|
---|
34 | "addiu $9, %0, 768 \n"
|
---|
35 | "nop \n"
|
---|
36 | "1: \n"
|
---|
37 | "sq $0, 0(%0) \n"
|
---|
38 | "move $8, %0 \n"
|
---|
39 | "addi %0, %0, 64 \n"
|
---|
40 | "sq $0, 16($8) \n"
|
---|
41 | "slt $10, %0, $9 \n"
|
---|
42 | "sq $0, 32($8) \n"
|
---|
43 | "bnez $10, 1b \n"
|
---|
44 | "sq $0, 48($8) \n"
|
---|
45 | ".set reorder \n"
|
---|
46 | : "+r" (blocks) :: "$8", "$9", "memory" );
|
---|
47 | }
|
---|
48 |
|
---|
49 |
|
---|
50 | static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
|
---|
51 | {
|
---|
52 | asm volatile(
|
---|
53 | ".set push \n\t"
|
---|
54 | ".set mips3 \n\t"
|
---|
55 | "ld $8, 0(%0) \n\t"
|
---|
56 | "add %0, %0, %2 \n\t"
|
---|
57 | "ld $9, 0(%0) \n\t"
|
---|
58 | "add %0, %0, %2 \n\t"
|
---|
59 | "ld $10, 0(%0) \n\t"
|
---|
60 | "pextlb $8, $0, $8 \n\t"
|
---|
61 | "sq $8, 0(%1) \n\t"
|
---|
62 | "add %0, %0, %2 \n\t"
|
---|
63 | "ld $8, 0(%0) \n\t"
|
---|
64 | "pextlb $9, $0, $9 \n\t"
|
---|
65 | "sq $9, 16(%1) \n\t"
|
---|
66 | "add %0, %0, %2 \n\t"
|
---|
67 | "ld $9, 0(%0) \n\t"
|
---|
68 | "pextlb $10, $0, $10 \n\t"
|
---|
69 | "sq $10, 32(%1) \n\t"
|
---|
70 | "add %0, %0, %2 \n\t"
|
---|
71 | "ld $10, 0(%0) \n\t"
|
---|
72 | "pextlb $8, $0, $8 \n\t"
|
---|
73 | "sq $8, 48(%1) \n\t"
|
---|
74 | "add %0, %0, %2 \n\t"
|
---|
75 | "ld $8, 0(%0) \n\t"
|
---|
76 | "pextlb $9, $0, $9 \n\t"
|
---|
77 | "sq $9, 64(%1) \n\t"
|
---|
78 | "add %0, %0, %2 \n\t"
|
---|
79 | "ld $9, 0(%0) \n\t"
|
---|
80 | "pextlb $10, $0, $10 \n\t"
|
---|
81 | "sq $10, 80(%1) \n\t"
|
---|
82 | "pextlb $8, $0, $8 \n\t"
|
---|
83 | "sq $8, 96(%1) \n\t"
|
---|
84 | "pextlb $9, $0, $9 \n\t"
|
---|
85 | "sq $9, 112(%1) \n\t"
|
---|
86 | ".set pop \n\t"
|
---|
87 | : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" );
|
---|
88 | }
|
---|
89 |
|
---|
90 |
|
---|
91 | static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
---|
92 | {
|
---|
93 | asm volatile(
|
---|
94 | ".set push \n\t"
|
---|
95 | ".set mips3 \n\t"
|
---|
96 | "1: \n\t"
|
---|
97 | "ldr $8, 0(%1) \n\t"
|
---|
98 | "addiu %2, %2, -1 \n\t"
|
---|
99 | "ldl $8, 7(%1) \n\t"
|
---|
100 | "add %1, %1, %3 \n\t"
|
---|
101 | "sd $8, 0(%0) \n\t"
|
---|
102 | "add %0, %0, %3 \n\t"
|
---|
103 | "bgtz %2, 1b \n\t"
|
---|
104 | ".set pop \n\t"
|
---|
105 | : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
|
---|
106 | : "$8", "memory" );
|
---|
107 | }
|
---|
108 |
|
---|
109 |
|
---|
110 | static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
---|
111 | {
|
---|
112 | asm volatile (
|
---|
113 | ".set push \n\t"
|
---|
114 | ".set mips3 \n\t"
|
---|
115 | "1: \n\t"
|
---|
116 | "ldr $8, 0(%1) \n\t"
|
---|
117 | "add $11, %1, %3 \n\t"
|
---|
118 | "ldl $8, 7(%1) \n\t"
|
---|
119 | "add $10, %0, %3 \n\t"
|
---|
120 | "ldr $9, 8(%1) \n\t"
|
---|
121 | "ldl $9, 15(%1) \n\t"
|
---|
122 | "ldr $12, 0($11) \n\t"
|
---|
123 | "add %1, $11, %3 \n\t"
|
---|
124 | "ldl $12, 7($11) \n\t"
|
---|
125 | "pcpyld $8, $9, $8 \n\t"
|
---|
126 | "sq $8, 0(%0) \n\t"
|
---|
127 | "ldr $13, 8($11) \n\t"
|
---|
128 | "addiu %2, %2, -2 \n\t"
|
---|
129 | "ldl $13, 15($11) \n\t"
|
---|
130 | "add %0, $10, %3 \n\t"
|
---|
131 | "pcpyld $12, $13, $12 \n\t"
|
---|
132 | "sq $12, 0($10) \n\t"
|
---|
133 | "bgtz %2, 1b \n\t"
|
---|
134 | ".set pop \n\t"
|
---|
135 | : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
|
---|
136 | : "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
|
---|
137 | }
|
---|
138 |
|
---|
139 |
|
---|
140 | void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
|
---|
141 | {
|
---|
142 | const int idct_algo= avctx->idct_algo;
|
---|
143 |
|
---|
144 | c->clear_blocks = clear_blocks_mmi;
|
---|
145 |
|
---|
146 | c->put_pixels_tab[1][0] = put_pixels8_mmi;
|
---|
147 | c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi;
|
---|
148 |
|
---|
149 | c->put_pixels_tab[0][0] = put_pixels16_mmi;
|
---|
150 | c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
|
---|
151 |
|
---|
152 | c->get_pixels = get_pixels_mmi;
|
---|
153 |
|
---|
154 | if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
|
---|
155 | c->idct_put= ff_mmi_idct_put;
|
---|
156 | c->idct_add= ff_mmi_idct_add;
|
---|
157 | c->idct = ff_mmi_idct;
|
---|
158 | c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
|
---|
159 | }
|
---|
160 | }
|
---|
161 |
|
---|