1 | #include "../dsputil.h"
|
---|
2 | #include "../mpegvideo.h"
|
---|
3 | #include "../avcodec.h"
|
---|
4 |
|
---|
5 | static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
|
---|
6 | DCTELEM *block, int n, int qscale)
|
---|
7 | {
|
---|
8 | int level, qmul, qadd;
|
---|
9 | int nCoeffs;
|
---|
10 | DCTELEM *block_orig = block;
|
---|
11 |
|
---|
12 | assert(s->block_last_index[n]>=0);
|
---|
13 |
|
---|
14 | qmul = qscale << 1;
|
---|
15 |
|
---|
16 | if (!s->h263_aic) {
|
---|
17 | if (n < 4)
|
---|
18 | level = block[0] * s->y_dc_scale;
|
---|
19 | else
|
---|
20 | level = block[0] * s->c_dc_scale;
|
---|
21 | qadd = (qscale - 1) | 1;
|
---|
22 | }else{
|
---|
23 | qadd = 0;
|
---|
24 | level = block[0];
|
---|
25 | }
|
---|
26 | if(s->ac_pred)
|
---|
27 | nCoeffs=63;
|
---|
28 | else
|
---|
29 | nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
---|
30 |
|
---|
31 | __asm__ __volatile__ (
|
---|
32 | /* "movd %1, %%mm6 \n\t" //qmul */
|
---|
33 | /* "packssdw %%mm6, %%mm6 \n\t" */
|
---|
34 | /* "packssdw %%mm6, %%mm6 \n\t" */
|
---|
35 | "tbcsth wr6, %[qmul] \n\t"
|
---|
36 | /* "movd %2, %%mm5 \n\t" //qadd */
|
---|
37 | /* "packssdw %%mm5, %%mm5 \n\t" */
|
---|
38 | /* "packssdw %%mm5, %%mm5 \n\t" */
|
---|
39 | "tbcsth wr5, %[qadd] \n\t"
|
---|
40 | "wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */
|
---|
41 | "wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */
|
---|
42 | "wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */
|
---|
43 | "1: \n\t"
|
---|
44 | "wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */
|
---|
45 | "wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */
|
---|
46 | "wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */
|
---|
47 | "wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */
|
---|
48 | /* "movq (%0, %3), %%mm2 \n\t" */
|
---|
49 | /* "movq 8(%0, %3), %%mm3 \n\t" */
|
---|
50 | "wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */
|
---|
51 | "wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */
|
---|
52 | "wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */
|
---|
53 | "wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */
|
---|
54 | "waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */
|
---|
55 | "waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */
|
---|
56 | "wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */
|
---|
57 | "wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */
|
---|
58 | "wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */
|
---|
59 | "wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */
|
---|
60 | "wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */
|
---|
61 | "wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */
|
---|
62 | "wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */
|
---|
63 | "wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */
|
---|
64 | "add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */
|
---|
65 | "subs %[i], %[i], #1 \n\t"
|
---|
66 | "bne 1b \n\t" /* "jng 1b \n\t" */
|
---|
67 | :[block]"+r"(block)
|
---|
68 | :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
|
---|
69 | :"memory");
|
---|
70 |
|
---|
71 | block_orig[0] = level;
|
---|
72 | }
|
---|
73 |
|
---|
74 | #if 0
|
---|
75 | static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s,
|
---|
76 | DCTELEM *block, int n, int qscale)
|
---|
77 | {
|
---|
78 | int nCoeffs;
|
---|
79 |
|
---|
80 | assert(s->block_last_index[n]>=0);
|
---|
81 |
|
---|
82 | if(s->ac_pred)
|
---|
83 | nCoeffs=63;
|
---|
84 | else
|
---|
85 | nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
---|
86 |
|
---|
87 | ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale);
|
---|
88 | }
|
---|
89 | #endif
|
---|
90 |
|
---|
91 | void MPV_common_init_iwmmxt(MpegEncContext *s)
|
---|
92 | {
|
---|
93 | if (!(mm_flags & MM_IWMMXT)) return;
|
---|
94 |
|
---|
95 | s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
|
---|
96 | #if 0
|
---|
97 | s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt;
|
---|
98 | #endif
|
---|
99 | }
|
---|