VirtualBox

source: vbox/trunk/src/recompiler_new/target-i386/ops_sse.h@ 13312

Last change on this file since 13312 was 11982, checked in by vboxsync, 16 years ago

All: license header changes for 2.0 (OSE headers, add Sun GPL/LGPL disclaimer)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.0 KB
Line 
1/*
2 * MMX/SSE/SSE2/PNI support
3 *
4 * Copyright (c) 2005 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21/*
22 * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
23 * other than GPL or LGPL is available it will apply instead, Sun elects to use only
24 * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
25 * a choice of LGPL license versions is made available with the language indicating
26 * that LGPLv2 or any later version may be used, or where a choice of which version
27 * of the LGPL is applied is otherwise unspecified.
28 */
29#if SHIFT == 0
30#define Reg MMXReg
31#define XMM_ONLY(x...)
32#define B(n) MMX_B(n)
33#define W(n) MMX_W(n)
34#define L(n) MMX_L(n)
35#define Q(n) q
36#define SUFFIX _mmx
37#else
38#define Reg XMMReg
39#define XMM_ONLY(x...) x
40#define B(n) XMM_B(n)
41#define W(n) XMM_W(n)
42#define L(n) XMM_L(n)
43#define Q(n) XMM_Q(n)
44#define SUFFIX _xmm
45#endif
46
47void OPPROTO glue(op_psrlw, SUFFIX)(void)
48{
49 Reg *d, *s;
50 int shift;
51
52 d = (Reg *)((char *)env + PARAM1);
53 s = (Reg *)((char *)env + PARAM2);
54
55 if (s->Q(0) > 15) {
56 d->Q(0) = 0;
57#if SHIFT == 1
58 d->Q(1) = 0;
59#endif
60 } else {
61 shift = s->B(0);
62 d->W(0) >>= shift;
63 d->W(1) >>= shift;
64 d->W(2) >>= shift;
65 d->W(3) >>= shift;
66#if SHIFT == 1
67 d->W(4) >>= shift;
68 d->W(5) >>= shift;
69 d->W(6) >>= shift;
70 d->W(7) >>= shift;
71#endif
72 }
73 FORCE_RET();
74}
75
76void OPPROTO glue(op_psraw, SUFFIX)(void)
77{
78 Reg *d, *s;
79 int shift;
80
81 d = (Reg *)((char *)env + PARAM1);
82 s = (Reg *)((char *)env + PARAM2);
83
84 if (s->Q(0) > 15) {
85 shift = 15;
86 } else {
87 shift = s->B(0);
88 }
89 d->W(0) = (int16_t)d->W(0) >> shift;
90 d->W(1) = (int16_t)d->W(1) >> shift;
91 d->W(2) = (int16_t)d->W(2) >> shift;
92 d->W(3) = (int16_t)d->W(3) >> shift;
93#if SHIFT == 1
94 d->W(4) = (int16_t)d->W(4) >> shift;
95 d->W(5) = (int16_t)d->W(5) >> shift;
96 d->W(6) = (int16_t)d->W(6) >> shift;
97 d->W(7) = (int16_t)d->W(7) >> shift;
98#endif
99}
100
101void OPPROTO glue(op_psllw, SUFFIX)(void)
102{
103 Reg *d, *s;
104 int shift;
105
106 d = (Reg *)((char *)env + PARAM1);
107 s = (Reg *)((char *)env + PARAM2);
108
109 if (s->Q(0) > 15) {
110 d->Q(0) = 0;
111#if SHIFT == 1
112 d->Q(1) = 0;
113#endif
114 } else {
115 shift = s->B(0);
116 d->W(0) <<= shift;
117 d->W(1) <<= shift;
118 d->W(2) <<= shift;
119 d->W(3) <<= shift;
120#if SHIFT == 1
121 d->W(4) <<= shift;
122 d->W(5) <<= shift;
123 d->W(6) <<= shift;
124 d->W(7) <<= shift;
125#endif
126 }
127 FORCE_RET();
128}
129
130void OPPROTO glue(op_psrld, SUFFIX)(void)
131{
132 Reg *d, *s;
133 int shift;
134
135 d = (Reg *)((char *)env + PARAM1);
136 s = (Reg *)((char *)env + PARAM2);
137
138 if (s->Q(0) > 31) {
139 d->Q(0) = 0;
140#if SHIFT == 1
141 d->Q(1) = 0;
142#endif
143 } else {
144 shift = s->B(0);
145 d->L(0) >>= shift;
146 d->L(1) >>= shift;
147#if SHIFT == 1
148 d->L(2) >>= shift;
149 d->L(3) >>= shift;
150#endif
151 }
152 FORCE_RET();
153}
154
155void OPPROTO glue(op_psrad, SUFFIX)(void)
156{
157 Reg *d, *s;
158 int shift;
159
160 d = (Reg *)((char *)env + PARAM1);
161 s = (Reg *)((char *)env + PARAM2);
162
163 if (s->Q(0) > 31) {
164 shift = 31;
165 } else {
166 shift = s->B(0);
167 }
168 d->L(0) = (int32_t)d->L(0) >> shift;
169 d->L(1) = (int32_t)d->L(1) >> shift;
170#if SHIFT == 1
171 d->L(2) = (int32_t)d->L(2) >> shift;
172 d->L(3) = (int32_t)d->L(3) >> shift;
173#endif
174}
175
176void OPPROTO glue(op_pslld, SUFFIX)(void)
177{
178 Reg *d, *s;
179 int shift;
180
181 d = (Reg *)((char *)env + PARAM1);
182 s = (Reg *)((char *)env + PARAM2);
183
184 if (s->Q(0) > 31) {
185 d->Q(0) = 0;
186#if SHIFT == 1
187 d->Q(1) = 0;
188#endif
189 } else {
190 shift = s->B(0);
191 d->L(0) <<= shift;
192 d->L(1) <<= shift;
193#if SHIFT == 1
194 d->L(2) <<= shift;
195 d->L(3) <<= shift;
196#endif
197 }
198 FORCE_RET();
199}
200
201void OPPROTO glue(op_psrlq, SUFFIX)(void)
202{
203 Reg *d, *s;
204 int shift;
205
206 d = (Reg *)((char *)env + PARAM1);
207 s = (Reg *)((char *)env + PARAM2);
208
209 if (s->Q(0) > 63) {
210 d->Q(0) = 0;
211#if SHIFT == 1
212 d->Q(1) = 0;
213#endif
214 } else {
215 shift = s->B(0);
216 d->Q(0) >>= shift;
217#if SHIFT == 1
218 d->Q(1) >>= shift;
219#endif
220 }
221 FORCE_RET();
222}
223
224void OPPROTO glue(op_psllq, SUFFIX)(void)
225{
226 Reg *d, *s;
227 int shift;
228
229 d = (Reg *)((char *)env + PARAM1);
230 s = (Reg *)((char *)env + PARAM2);
231
232 if (s->Q(0) > 63) {
233 d->Q(0) = 0;
234#if SHIFT == 1
235 d->Q(1) = 0;
236#endif
237 } else {
238 shift = s->B(0);
239 d->Q(0) <<= shift;
240#if SHIFT == 1
241 d->Q(1) <<= shift;
242#endif
243 }
244 FORCE_RET();
245}
246
247#if SHIFT == 1
248void OPPROTO glue(op_psrldq, SUFFIX)(void)
249{
250 Reg *d, *s;
251 int shift, i;
252
253 d = (Reg *)((char *)env + PARAM1);
254 s = (Reg *)((char *)env + PARAM2);
255 shift = s->L(0);
256 if (shift > 16)
257 shift = 16;
258 for(i = 0; i < 16 - shift; i++)
259 d->B(i) = d->B(i + shift);
260 for(i = 16 - shift; i < 16; i++)
261 d->B(i) = 0;
262 FORCE_RET();
263}
264
265void OPPROTO glue(op_pslldq, SUFFIX)(void)
266{
267 Reg *d, *s;
268 int shift, i;
269
270 d = (Reg *)((char *)env + PARAM1);
271 s = (Reg *)((char *)env + PARAM2);
272 shift = s->L(0);
273 if (shift > 16)
274 shift = 16;
275 for(i = 15; i >= shift; i--)
276 d->B(i) = d->B(i - shift);
277 for(i = 0; i < shift; i++)
278 d->B(i) = 0;
279 FORCE_RET();
280}
281#endif
282
283#define SSE_OP_B(name, F)\
284void OPPROTO glue(name, SUFFIX) (void)\
285{\
286 Reg *d, *s;\
287 d = (Reg *)((char *)env + PARAM1);\
288 s = (Reg *)((char *)env + PARAM2);\
289 d->B(0) = F(d->B(0), s->B(0));\
290 d->B(1) = F(d->B(1), s->B(1));\
291 d->B(2) = F(d->B(2), s->B(2));\
292 d->B(3) = F(d->B(3), s->B(3));\
293 d->B(4) = F(d->B(4), s->B(4));\
294 d->B(5) = F(d->B(5), s->B(5));\
295 d->B(6) = F(d->B(6), s->B(6));\
296 d->B(7) = F(d->B(7), s->B(7));\
297 XMM_ONLY(\
298 d->B(8) = F(d->B(8), s->B(8));\
299 d->B(9) = F(d->B(9), s->B(9));\
300 d->B(10) = F(d->B(10), s->B(10));\
301 d->B(11) = F(d->B(11), s->B(11));\
302 d->B(12) = F(d->B(12), s->B(12));\
303 d->B(13) = F(d->B(13), s->B(13));\
304 d->B(14) = F(d->B(14), s->B(14));\
305 d->B(15) = F(d->B(15), s->B(15));\
306 )\
307}
308
309#define SSE_OP_W(name, F)\
310void OPPROTO glue(name, SUFFIX) (void)\
311{\
312 Reg *d, *s;\
313 d = (Reg *)((char *)env + PARAM1);\
314 s = (Reg *)((char *)env + PARAM2);\
315 d->W(0) = F(d->W(0), s->W(0));\
316 d->W(1) = F(d->W(1), s->W(1));\
317 d->W(2) = F(d->W(2), s->W(2));\
318 d->W(3) = F(d->W(3), s->W(3));\
319 XMM_ONLY(\
320 d->W(4) = F(d->W(4), s->W(4));\
321 d->W(5) = F(d->W(5), s->W(5));\
322 d->W(6) = F(d->W(6), s->W(6));\
323 d->W(7) = F(d->W(7), s->W(7));\
324 )\
325}
326
327#define SSE_OP_L(name, F)\
328void OPPROTO glue(name, SUFFIX) (void)\
329{\
330 Reg *d, *s;\
331 d = (Reg *)((char *)env + PARAM1);\
332 s = (Reg *)((char *)env + PARAM2);\
333 d->L(0) = F(d->L(0), s->L(0));\
334 d->L(1) = F(d->L(1), s->L(1));\
335 XMM_ONLY(\
336 d->L(2) = F(d->L(2), s->L(2));\
337 d->L(3) = F(d->L(3), s->L(3));\
338 )\
339}
340
341#define SSE_OP_Q(name, F)\
342void OPPROTO glue(name, SUFFIX) (void)\
343{\
344 Reg *d, *s;\
345 d = (Reg *)((char *)env + PARAM1);\
346 s = (Reg *)((char *)env + PARAM2);\
347 d->Q(0) = F(d->Q(0), s->Q(0));\
348 XMM_ONLY(\
349 d->Q(1) = F(d->Q(1), s->Q(1));\
350 )\
351}
352
353#if SHIFT == 0
354static inline int satub(int x)
355{
356 if (x < 0)
357 return 0;
358 else if (x > 255)
359 return 255;
360 else
361 return x;
362}
363
364static inline int satuw(int x)
365{
366 if (x < 0)
367 return 0;
368 else if (x > 65535)
369 return 65535;
370 else
371 return x;
372}
373
374static inline int satsb(int x)
375{
376 if (x < -128)
377 return -128;
378 else if (x > 127)
379 return 127;
380 else
381 return x;
382}
383
384static inline int satsw(int x)
385{
386 if (x < -32768)
387 return -32768;
388 else if (x > 32767)
389 return 32767;
390 else
391 return x;
392}
393
394#define FADD(a, b) ((a) + (b))
395#define FADDUB(a, b) satub((a) + (b))
396#define FADDUW(a, b) satuw((a) + (b))
397#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
398#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
399
400#define FSUB(a, b) ((a) - (b))
401#define FSUBUB(a, b) satub((a) - (b))
402#define FSUBUW(a, b) satuw((a) - (b))
403#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
404#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b))
405#define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
406#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
407#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
408#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
409
410#define FAND(a, b) (a) & (b)
411#define FANDN(a, b) ((~(a)) & (b))
412#define FOR(a, b) (a) | (b)
413#define FXOR(a, b) (a) ^ (b)
414
415#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0
416#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0
417#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0
418#define FCMPEQ(a, b) (a) == (b) ? -1 : 0
419
420#define FMULLW(a, b) (a) * (b)
421#define FMULHUW(a, b) (a) * (b) >> 16
422#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
423
424#define FAVG(a, b) ((a) + (b) + 1) >> 1
425#endif
426
427SSE_OP_B(op_paddb, FADD)
428SSE_OP_W(op_paddw, FADD)
429SSE_OP_L(op_paddl, FADD)
430SSE_OP_Q(op_paddq, FADD)
431
432SSE_OP_B(op_psubb, FSUB)
433SSE_OP_W(op_psubw, FSUB)
434SSE_OP_L(op_psubl, FSUB)
435SSE_OP_Q(op_psubq, FSUB)
436
437SSE_OP_B(op_paddusb, FADDUB)
438SSE_OP_B(op_paddsb, FADDSB)
439SSE_OP_B(op_psubusb, FSUBUB)
440SSE_OP_B(op_psubsb, FSUBSB)
441
442SSE_OP_W(op_paddusw, FADDUW)
443SSE_OP_W(op_paddsw, FADDSW)
444SSE_OP_W(op_psubusw, FSUBUW)
445SSE_OP_W(op_psubsw, FSUBSW)
446
447SSE_OP_B(op_pminub, FMINUB)
448SSE_OP_B(op_pmaxub, FMAXUB)
449
450SSE_OP_W(op_pminsw, FMINSW)
451SSE_OP_W(op_pmaxsw, FMAXSW)
452
453SSE_OP_Q(op_pand, FAND)
454SSE_OP_Q(op_pandn, FANDN)
455SSE_OP_Q(op_por, FOR)
456SSE_OP_Q(op_pxor, FXOR)
457
458SSE_OP_B(op_pcmpgtb, FCMPGTB)
459SSE_OP_W(op_pcmpgtw, FCMPGTW)
460SSE_OP_L(op_pcmpgtl, FCMPGTL)
461
462SSE_OP_B(op_pcmpeqb, FCMPEQ)
463SSE_OP_W(op_pcmpeqw, FCMPEQ)
464SSE_OP_L(op_pcmpeql, FCMPEQ)
465
466SSE_OP_W(op_pmullw, FMULLW)
467SSE_OP_W(op_pmulhuw, FMULHUW)
468SSE_OP_W(op_pmulhw, FMULHW)
469
470SSE_OP_B(op_pavgb, FAVG)
471SSE_OP_W(op_pavgw, FAVG)
472
473void OPPROTO glue(op_pmuludq, SUFFIX) (void)
474{
475 Reg *d, *s;
476 d = (Reg *)((char *)env + PARAM1);
477 s = (Reg *)((char *)env + PARAM2);
478
479 d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
480#if SHIFT == 1
481 d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
482#endif
483}
484
485void OPPROTO glue(op_pmaddwd, SUFFIX) (void)
486{
487 int i;
488 Reg *d, *s;
489 d = (Reg *)((char *)env + PARAM1);
490 s = (Reg *)((char *)env + PARAM2);
491
492 for(i = 0; i < (2 << SHIFT); i++) {
493 d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
494 (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1);
495 }
496 FORCE_RET();
497}
498
499#if SHIFT == 0
500static inline int abs1(int a)
501{
502 if (a < 0)
503 return -a;
504 else
505 return a;
506}
507#endif
508void OPPROTO glue(op_psadbw, SUFFIX) (void)
509{
510 unsigned int val;
511 Reg *d, *s;
512 d = (Reg *)((char *)env + PARAM1);
513 s = (Reg *)((char *)env + PARAM2);
514
515 val = 0;
516 val += abs1(d->B(0) - s->B(0));
517 val += abs1(d->B(1) - s->B(1));
518 val += abs1(d->B(2) - s->B(2));
519 val += abs1(d->B(3) - s->B(3));
520 val += abs1(d->B(4) - s->B(4));
521 val += abs1(d->B(5) - s->B(5));
522 val += abs1(d->B(6) - s->B(6));
523 val += abs1(d->B(7) - s->B(7));
524 d->Q(0) = val;
525#if SHIFT == 1
526 val = 0;
527 val += abs1(d->B(8) - s->B(8));
528 val += abs1(d->B(9) - s->B(9));
529 val += abs1(d->B(10) - s->B(10));
530 val += abs1(d->B(11) - s->B(11));
531 val += abs1(d->B(12) - s->B(12));
532 val += abs1(d->B(13) - s->B(13));
533 val += abs1(d->B(14) - s->B(14));
534 val += abs1(d->B(15) - s->B(15));
535 d->Q(1) = val;
536#endif
537}
538
539void OPPROTO glue(op_maskmov, SUFFIX) (void)
540{
541 int i;
542 Reg *d, *s;
543 d = (Reg *)((char *)env + PARAM1);
544 s = (Reg *)((char *)env + PARAM2);
545 for(i = 0; i < (8 << SHIFT); i++) {
546 if (s->B(i) & 0x80)
547 stb(A0 + i, d->B(i));
548 }
549 FORCE_RET();
550}
551
552void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void)
553{
554 Reg *d;
555 d = (Reg *)((char *)env + PARAM1);
556 d->L(0) = T0;
557 d->L(1) = 0;
558#if SHIFT == 1
559 d->Q(1) = 0;
560#endif
561}
562
563void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void)
564{
565 Reg *s;
566 s = (Reg *)((char *)env + PARAM1);
567 T0 = s->L(0);
568}
569
570#ifdef TARGET_X86_64
571void OPPROTO glue(op_movq_mm_T0, SUFFIX) (void)
572{
573 Reg *d;
574 d = (Reg *)((char *)env + PARAM1);
575 d->Q(0) = T0;
576#if SHIFT == 1
577 d->Q(1) = 0;
578#endif
579}
580
581void OPPROTO glue(op_movq_T0_mm, SUFFIX) (void)
582{
583 Reg *s;
584 s = (Reg *)((char *)env + PARAM1);
585 T0 = s->Q(0);
586}
587#endif
588
589#if SHIFT == 0
590void OPPROTO glue(op_pshufw, SUFFIX) (void)
591{
592#if __GCC__ == 3 || defined(RT_ARCH_AMD64) /* VBOX hack in #else */
593 Reg r, *d, *s;
594 int order;
595 d = (Reg *)((char *)env + PARAM1);
596 s = (Reg *)((char *)env + PARAM2);
597 order = PARAM3;
598 r.W(0) = s->W(order & 3);
599 r.W(1) = s->W((order >> 2) & 3);
600 r.W(2) = s->W((order >> 4) & 3);
601 r.W(3) = s->W((order >> 6) & 3);
602 *d = r;
603#else
604 Reg *s;
605 int order;
606 uint32_t l0, l1;
607 s = (Reg *)((char *)env + PARAM2);
608 order = PARAM3;
609 l0 = s->W(order & 3);
610 l0 |= (uint32_t)s->W((order >> 2) & 3) << 16;
611 l1 = s->W((order >> 4) & 3);
612 l1 |= (uint32_t)s->W((order >> 6) & 3) << 16;
613
614 s = (Reg *)((char *)env + PARAM1);
615 s->_l[0] = l0;
616 s->_l[1] = l1;
617#endif
618}
619#else
620void OPPROTO op_shufps(void)
621{
622 Reg r, *d, *s;
623 int order;
624 d = (Reg *)((char *)env + PARAM1);
625 s = (Reg *)((char *)env + PARAM2);
626 order = PARAM3;
627 r.L(0) = d->L(order & 3);
628 r.L(1) = d->L((order >> 2) & 3);
629 r.L(2) = s->L((order >> 4) & 3);
630 r.L(3) = s->L((order >> 6) & 3);
631 *d = r;
632}
633
634void OPPROTO op_shufpd(void)
635{
636 Reg r, *d, *s;
637 int order;
638 d = (Reg *)((char *)env + PARAM1);
639 s = (Reg *)((char *)env + PARAM2);
640 order = PARAM3;
641 r.Q(0) = d->Q(order & 1);
642 r.Q(1) = s->Q((order >> 1) & 1);
643 *d = r;
644}
645
646void OPPROTO glue(op_pshufd, SUFFIX) (void)
647{
648 Reg r, *d, *s;
649 int order;
650 d = (Reg *)((char *)env + PARAM1);
651 s = (Reg *)((char *)env + PARAM2);
652 order = PARAM3;
653 r.L(0) = s->L(order & 3);
654 r.L(1) = s->L((order >> 2) & 3);
655 r.L(2) = s->L((order >> 4) & 3);
656 r.L(3) = s->L((order >> 6) & 3);
657 *d = r;
658}
659
660void OPPROTO glue(op_pshuflw, SUFFIX) (void)
661{
662 Reg r, *d, *s;
663 int order;
664 d = (Reg *)((char *)env + PARAM1);
665 s = (Reg *)((char *)env + PARAM2);
666 order = PARAM3;
667 r.W(0) = s->W(order & 3);
668 r.W(1) = s->W((order >> 2) & 3);
669 r.W(2) = s->W((order >> 4) & 3);
670 r.W(3) = s->W((order >> 6) & 3);
671 r.Q(1) = s->Q(1);
672 *d = r;
673}
674
675void OPPROTO glue(op_pshufhw, SUFFIX) (void)
676{
677 Reg r, *d, *s;
678 int order;
679 d = (Reg *)((char *)env + PARAM1);
680 s = (Reg *)((char *)env + PARAM2);
681 order = PARAM3;
682 r.Q(0) = s->Q(0);
683 r.W(4) = s->W(4 + (order & 3));
684 r.W(5) = s->W(4 + ((order >> 2) & 3));
685 r.W(6) = s->W(4 + ((order >> 4) & 3));
686 r.W(7) = s->W(4 + ((order >> 6) & 3));
687 *d = r;
688}
689#endif
690
691#if SHIFT == 1
692/* FPU ops */
693/* XXX: not accurate */
694
695#define SSE_OP_S(name, F)\
696void OPPROTO op_ ## name ## ps (void)\
697{\
698 Reg *d, *s;\
699 d = (Reg *)((char *)env + PARAM1);\
700 s = (Reg *)((char *)env + PARAM2);\
701 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
702 d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
703 d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
704 d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
705}\
706\
707void OPPROTO op_ ## name ## ss (void)\
708{\
709 Reg *d, *s;\
710 d = (Reg *)((char *)env + PARAM1);\
711 s = (Reg *)((char *)env + PARAM2);\
712 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
713}\
714void OPPROTO op_ ## name ## pd (void)\
715{\
716 Reg *d, *s;\
717 d = (Reg *)((char *)env + PARAM1);\
718 s = (Reg *)((char *)env + PARAM2);\
719 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
720 d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
721}\
722\
723void OPPROTO op_ ## name ## sd (void)\
724{\
725 Reg *d, *s;\
726 d = (Reg *)((char *)env + PARAM1);\
727 s = (Reg *)((char *)env + PARAM2);\
728 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
729}
730
731#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)
732#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
733#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
734#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
735#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)
736#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
737#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
738
739SSE_OP_S(add, FPU_ADD)
740SSE_OP_S(sub, FPU_SUB)
741SSE_OP_S(mul, FPU_MUL)
742SSE_OP_S(div, FPU_DIV)
743SSE_OP_S(min, FPU_MIN)
744SSE_OP_S(max, FPU_MAX)
745SSE_OP_S(sqrt, FPU_SQRT)
746
747
748/* float to float conversions */
749void OPPROTO op_cvtps2pd(void)
750{
751 float32 s0, s1;
752 Reg *d, *s;
753 d = (Reg *)((char *)env + PARAM1);
754 s = (Reg *)((char *)env + PARAM2);
755 s0 = s->XMM_S(0);
756 s1 = s->XMM_S(1);
757 d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
758 d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
759}
760
761void OPPROTO op_cvtpd2ps(void)
762{
763 Reg *d, *s;
764 d = (Reg *)((char *)env + PARAM1);
765 s = (Reg *)((char *)env + PARAM2);
766 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
767 d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
768 d->Q(1) = 0;
769}
770
771void OPPROTO op_cvtss2sd(void)
772{
773 Reg *d, *s;
774 d = (Reg *)((char *)env + PARAM1);
775 s = (Reg *)((char *)env + PARAM2);
776 d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
777}
778
779void OPPROTO op_cvtsd2ss(void)
780{
781 Reg *d, *s;
782 d = (Reg *)((char *)env + PARAM1);
783 s = (Reg *)((char *)env + PARAM2);
784 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
785}
786
787/* integer to float */
788void OPPROTO op_cvtdq2ps(void)
789{
790 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
791 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
792 d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
793 d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
794 d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
795 d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
796}
797
798void OPPROTO op_cvtdq2pd(void)
799{
800 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
801 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
802 int32_t l0, l1;
803 l0 = (int32_t)s->XMM_L(0);
804 l1 = (int32_t)s->XMM_L(1);
805 d->XMM_D(0) = int32_to_float64(l0, &env->sse_status);
806 d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
807}
808
809void OPPROTO op_cvtpi2ps(void)
810{
811 XMMReg *d = (Reg *)((char *)env + PARAM1);
812 MMXReg *s = (MMXReg *)((char *)env + PARAM2);
813 d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
814 d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
815}
816
817void OPPROTO op_cvtpi2pd(void)
818{
819 XMMReg *d = (Reg *)((char *)env + PARAM1);
820 MMXReg *s = (MMXReg *)((char *)env + PARAM2);
821 d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
822 d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
823}
824
825void OPPROTO op_cvtsi2ss(void)
826{
827 XMMReg *d = (Reg *)((char *)env + PARAM1);
828 d->XMM_S(0) = int32_to_float32(T0, &env->sse_status);
829}
830
831void OPPROTO op_cvtsi2sd(void)
832{
833 XMMReg *d = (Reg *)((char *)env + PARAM1);
834 d->XMM_D(0) = int32_to_float64(T0, &env->sse_status);
835}
836
837#ifdef TARGET_X86_64
838void OPPROTO op_cvtsq2ss(void)
839{
840 XMMReg *d = (Reg *)((char *)env + PARAM1);
841 d->XMM_S(0) = int64_to_float32(T0, &env->sse_status);
842}
843
844void OPPROTO op_cvtsq2sd(void)
845{
846 XMMReg *d = (Reg *)((char *)env + PARAM1);
847 d->XMM_D(0) = int64_to_float64(T0, &env->sse_status);
848}
849#endif
850
851/* float to integer */
852void OPPROTO op_cvtps2dq(void)
853{
854 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
855 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
856 d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
857 d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
858 d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
859 d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
860}
861
862void OPPROTO op_cvtpd2dq(void)
863{
864 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
865 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
866 d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
867 d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
868 d->XMM_Q(1) = 0;
869}
870
871void OPPROTO op_cvtps2pi(void)
872{
873 MMXReg *d = (MMXReg *)((char *)env + PARAM1);
874 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
875 d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
876 d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
877}
878
879void OPPROTO op_cvtpd2pi(void)
880{
881 MMXReg *d = (MMXReg *)((char *)env + PARAM1);
882 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
883 d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
884 d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
885}
886
887void OPPROTO op_cvtss2si(void)
888{
889 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
890 T0 = float32_to_int32(s->XMM_S(0), &env->sse_status);
891}
892
893void OPPROTO op_cvtsd2si(void)
894{
895 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
896 T0 = float64_to_int32(s->XMM_D(0), &env->sse_status);
897}
898
899#ifdef TARGET_X86_64
900void OPPROTO op_cvtss2sq(void)
901{
902 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
903 T0 = float32_to_int64(s->XMM_S(0), &env->sse_status);
904}
905
906void OPPROTO op_cvtsd2sq(void)
907{
908 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
909 T0 = float64_to_int64(s->XMM_D(0), &env->sse_status);
910}
911#endif
912
913/* float to integer truncated */
914void OPPROTO op_cvttps2dq(void)
915{
916 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
917 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
918 d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
919 d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
920 d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
921 d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
922}
923
924void OPPROTO op_cvttpd2dq(void)
925{
926 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
927 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
928 d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
929 d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
930 d->XMM_Q(1) = 0;
931}
932
933void OPPROTO op_cvttps2pi(void)
934{
935 MMXReg *d = (MMXReg *)((char *)env + PARAM1);
936 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
937 d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
938 d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
939}
940
941void OPPROTO op_cvttpd2pi(void)
942{
943 MMXReg *d = (MMXReg *)((char *)env + PARAM1);
944 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
945 d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
946 d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
947}
948
949void OPPROTO op_cvttss2si(void)
950{
951 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
952 T0 = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
953}
954
955void OPPROTO op_cvttsd2si(void)
956{
957 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
958 T0 = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
959}
960
961#ifdef TARGET_X86_64
962void OPPROTO op_cvttss2sq(void)
963{
964 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
965 T0 = float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
966}
967
968void OPPROTO op_cvttsd2sq(void)
969{
970 XMMReg *s = (XMMReg *)((char *)env + PARAM1);
971 T0 = float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
972}
973#endif
974
975void OPPROTO op_rsqrtps(void)
976{
977 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
978 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
979 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
980 d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
981 d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
982 d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
983}
984
985void OPPROTO op_rsqrtss(void)
986{
987 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
988 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
989 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
990}
991
992void OPPROTO op_rcpps(void)
993{
994 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
995 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
996 d->XMM_S(0) = approx_rcp(s->XMM_S(0));
997 d->XMM_S(1) = approx_rcp(s->XMM_S(1));
998 d->XMM_S(2) = approx_rcp(s->XMM_S(2));
999 d->XMM_S(3) = approx_rcp(s->XMM_S(3));
1000}
1001
1002void OPPROTO op_rcpss(void)
1003{
1004 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1005 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1006 d->XMM_S(0) = approx_rcp(s->XMM_S(0));
1007}
1008
1009void OPPROTO op_haddps(void)
1010{
1011 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1012 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1013 XMMReg r;
1014 r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
1015 r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
1016 r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1);
1017 r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3);
1018 *d = r;
1019}
1020
1021void OPPROTO op_haddpd(void)
1022{
1023 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1024 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1025 XMMReg r;
1026 r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
1027 r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
1028 *d = r;
1029}
1030
1031void OPPROTO op_hsubps(void)
1032{
1033 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1034 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1035 XMMReg r;
1036 r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
1037 r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
1038 r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1);
1039 r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3);
1040 *d = r;
1041}
1042
1043void OPPROTO op_hsubpd(void)
1044{
1045 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1046 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1047 XMMReg r;
1048 r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
1049 r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
1050 *d = r;
1051}
1052
1053void OPPROTO op_addsubps(void)
1054{
1055 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1056 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1057 d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
1058 d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
1059 d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
1060 d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
1061}
1062
1063void OPPROTO op_addsubpd(void)
1064{
1065 XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1066 XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1067 d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
1068 d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
1069}
1070
1071/* XXX: unordered */
1072#define SSE_OP_CMP(name, F)\
1073void OPPROTO op_ ## name ## ps (void)\
1074{\
1075 Reg *d, *s;\
1076 d = (Reg *)((char *)env + PARAM1);\
1077 s = (Reg *)((char *)env + PARAM2);\
1078 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1079 d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
1080 d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
1081 d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
1082}\
1083\
1084void OPPROTO op_ ## name ## ss (void)\
1085{\
1086 Reg *d, *s;\
1087 d = (Reg *)((char *)env + PARAM1);\
1088 s = (Reg *)((char *)env + PARAM2);\
1089 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1090}\
1091void OPPROTO op_ ## name ## pd (void)\
1092{\
1093 Reg *d, *s;\
1094 d = (Reg *)((char *)env + PARAM1);\
1095 s = (Reg *)((char *)env + PARAM2);\
1096 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1097 d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
1098}\
1099\
1100void OPPROTO op_ ## name ## sd (void)\
1101{\
1102 Reg *d, *s;\
1103 d = (Reg *)((char *)env + PARAM1);\
1104 s = (Reg *)((char *)env + PARAM2);\
1105 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1106}
1107
1108#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
1109#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
1110#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
1111#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
1112#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
1113#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
1114#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
1115#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
1116
1117SSE_OP_CMP(cmpeq, FPU_CMPEQ)
1118SSE_OP_CMP(cmplt, FPU_CMPLT)
1119SSE_OP_CMP(cmple, FPU_CMPLE)
1120SSE_OP_CMP(cmpunord, FPU_CMPUNORD)
1121SSE_OP_CMP(cmpneq, FPU_CMPNEQ)
1122SSE_OP_CMP(cmpnlt, FPU_CMPNLT)
1123SSE_OP_CMP(cmpnle, FPU_CMPNLE)
1124SSE_OP_CMP(cmpord, FPU_CMPORD)
1125
1126const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
1127
1128void OPPROTO op_ucomiss(void)
1129{
1130 int ret;
1131 float32 s0, s1;
1132 Reg *d, *s;
1133 d = (Reg *)((char *)env + PARAM1);
1134 s = (Reg *)((char *)env + PARAM2);
1135
1136 s0 = d->XMM_S(0);
1137 s1 = s->XMM_S(0);
1138 ret = float32_compare_quiet(s0, s1, &env->sse_status);
1139 CC_SRC = comis_eflags[ret + 1];
1140 FORCE_RET();
1141}
1142
1143void OPPROTO op_comiss(void)
1144{
1145 int ret;
1146 float32 s0, s1;
1147 Reg *d, *s;
1148 d = (Reg *)((char *)env + PARAM1);
1149 s = (Reg *)((char *)env + PARAM2);
1150
1151 s0 = d->XMM_S(0);
1152 s1 = s->XMM_S(0);
1153 ret = float32_compare(s0, s1, &env->sse_status);
1154 CC_SRC = comis_eflags[ret + 1];
1155 FORCE_RET();
1156}
1157
1158void OPPROTO op_ucomisd(void)
1159{
1160 int ret;
1161 float64 d0, d1;
1162 Reg *d, *s;
1163 d = (Reg *)((char *)env + PARAM1);
1164 s = (Reg *)((char *)env + PARAM2);
1165
1166 d0 = d->XMM_D(0);
1167 d1 = s->XMM_D(0);
1168 ret = float64_compare_quiet(d0, d1, &env->sse_status);
1169 CC_SRC = comis_eflags[ret + 1];
1170 FORCE_RET();
1171}
1172
1173void OPPROTO op_comisd(void)
1174{
1175 int ret;
1176 float64 d0, d1;
1177 Reg *d, *s;
1178 d = (Reg *)((char *)env + PARAM1);
1179 s = (Reg *)((char *)env + PARAM2);
1180
1181 d0 = d->XMM_D(0);
1182 d1 = s->XMM_D(0);
1183 ret = float64_compare(d0, d1, &env->sse_status);
1184 CC_SRC = comis_eflags[ret + 1];
1185 FORCE_RET();
1186}
1187
1188void OPPROTO op_movmskps(void)
1189{
1190 int b0, b1, b2, b3;
1191 Reg *s;
1192 s = (Reg *)((char *)env + PARAM1);
1193 b0 = s->XMM_L(0) >> 31;
1194 b1 = s->XMM_L(1) >> 31;
1195 b2 = s->XMM_L(2) >> 31;
1196 b3 = s->XMM_L(3) >> 31;
1197 T0 = b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
1198}
1199
1200void OPPROTO op_movmskpd(void)
1201{
1202 int b0, b1;
1203 Reg *s;
1204 s = (Reg *)((char *)env + PARAM1);
1205 b0 = s->XMM_L(1) >> 31;
1206 b1 = s->XMM_L(3) >> 31;
1207 T0 = b0 | (b1 << 1);
1208}
1209
1210#endif
1211
1212void OPPROTO glue(op_pmovmskb, SUFFIX)(void)
1213{
1214 Reg *s;
1215 s = (Reg *)((char *)env + PARAM1);
1216 T0 = 0;
1217 T0 |= (s->XMM_B(0) >> 7);
1218 T0 |= (s->XMM_B(1) >> 6) & 0x02;
1219 T0 |= (s->XMM_B(2) >> 5) & 0x04;
1220 T0 |= (s->XMM_B(3) >> 4) & 0x08;
1221 T0 |= (s->XMM_B(4) >> 3) & 0x10;
1222 T0 |= (s->XMM_B(5) >> 2) & 0x20;
1223 T0 |= (s->XMM_B(6) >> 1) & 0x40;
1224 T0 |= (s->XMM_B(7)) & 0x80;
1225#if SHIFT == 1
1226 T0 |= (s->XMM_B(8) << 1) & 0x0100;
1227 T0 |= (s->XMM_B(9) << 2) & 0x0200;
1228 T0 |= (s->XMM_B(10) << 3) & 0x0400;
1229 T0 |= (s->XMM_B(11) << 4) & 0x0800;
1230 T0 |= (s->XMM_B(12) << 5) & 0x1000;
1231 T0 |= (s->XMM_B(13) << 6) & 0x2000;
1232 T0 |= (s->XMM_B(14) << 7) & 0x4000;
1233 T0 |= (s->XMM_B(15) << 8) & 0x8000;
1234#endif
1235}
1236
1237void OPPROTO glue(op_pinsrw, SUFFIX) (void)
1238{
1239 Reg *d = (Reg *)((char *)env + PARAM1);
1240 int pos = PARAM2;
1241
1242 d->W(pos) = T0;
1243}
1244
1245void OPPROTO glue(op_pextrw, SUFFIX) (void)
1246{
1247 Reg *s = (Reg *)((char *)env + PARAM1);
1248 int pos = PARAM2;
1249
1250 T0 = s->W(pos);
1251}
1252
1253void OPPROTO glue(op_packsswb, SUFFIX) (void)
1254{
1255 Reg r, *d, *s;
1256 d = (Reg *)((char *)env + PARAM1);
1257 s = (Reg *)((char *)env + PARAM2);
1258
1259 r.B(0) = satsb((int16_t)d->W(0));
1260 r.B(1) = satsb((int16_t)d->W(1));
1261 r.B(2) = satsb((int16_t)d->W(2));
1262 r.B(3) = satsb((int16_t)d->W(3));
1263#if SHIFT == 1
1264 r.B(4) = satsb((int16_t)d->W(4));
1265 r.B(5) = satsb((int16_t)d->W(5));
1266 r.B(6) = satsb((int16_t)d->W(6));
1267 r.B(7) = satsb((int16_t)d->W(7));
1268#endif
1269 r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0));
1270 r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1));
1271 r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2));
1272 r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3));
1273#if SHIFT == 1
1274 r.B(12) = satsb((int16_t)s->W(4));
1275 r.B(13) = satsb((int16_t)s->W(5));
1276 r.B(14) = satsb((int16_t)s->W(6));
1277 r.B(15) = satsb((int16_t)s->W(7));
1278#endif
1279 *d = r;
1280}
1281
1282void OPPROTO glue(op_packuswb, SUFFIX) (void)
1283{
1284 Reg r, *d, *s;
1285 d = (Reg *)((char *)env + PARAM1);
1286 s = (Reg *)((char *)env + PARAM2);
1287
1288 r.B(0) = satub((int16_t)d->W(0));
1289 r.B(1) = satub((int16_t)d->W(1));
1290 r.B(2) = satub((int16_t)d->W(2));
1291 r.B(3) = satub((int16_t)d->W(3));
1292#if SHIFT == 1
1293 r.B(4) = satub((int16_t)d->W(4));
1294 r.B(5) = satub((int16_t)d->W(5));
1295 r.B(6) = satub((int16_t)d->W(6));
1296 r.B(7) = satub((int16_t)d->W(7));
1297#endif
1298 r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0));
1299 r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1));
1300 r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2));
1301 r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3));
1302#if SHIFT == 1
1303 r.B(12) = satub((int16_t)s->W(4));
1304 r.B(13) = satub((int16_t)s->W(5));
1305 r.B(14) = satub((int16_t)s->W(6));
1306 r.B(15) = satub((int16_t)s->W(7));
1307#endif
1308 *d = r;
1309}
1310
1311void OPPROTO glue(op_packssdw, SUFFIX) (void)
1312{
1313 Reg r, *d, *s;
1314 d = (Reg *)((char *)env + PARAM1);
1315 s = (Reg *)((char *)env + PARAM2);
1316
1317 r.W(0) = satsw(d->L(0));
1318 r.W(1) = satsw(d->L(1));
1319#if SHIFT == 1
1320 r.W(2) = satsw(d->L(2));
1321 r.W(3) = satsw(d->L(3));
1322#endif
1323 r.W((2 << SHIFT) + 0) = satsw(s->L(0));
1324 r.W((2 << SHIFT) + 1) = satsw(s->L(1));
1325#if SHIFT == 1
1326 r.W(6) = satsw(s->L(2));
1327 r.W(7) = satsw(s->L(3));
1328#endif
1329 *d = r;
1330}
1331
1332#define UNPCK_OP(base_name, base) \
1333 \
1334void OPPROTO glue(op_punpck ## base_name ## bw, SUFFIX) (void) \
1335{ \
1336 Reg r, *d, *s; \
1337 d = (Reg *)((char *)env + PARAM1); \
1338 s = (Reg *)((char *)env + PARAM2); \
1339 \
1340 r.B(0) = d->B((base << (SHIFT + 2)) + 0); \
1341 r.B(1) = s->B((base << (SHIFT + 2)) + 0); \
1342 r.B(2) = d->B((base << (SHIFT + 2)) + 1); \
1343 r.B(3) = s->B((base << (SHIFT + 2)) + 1); \
1344 r.B(4) = d->B((base << (SHIFT + 2)) + 2); \
1345 r.B(5) = s->B((base << (SHIFT + 2)) + 2); \
1346 r.B(6) = d->B((base << (SHIFT + 2)) + 3); \
1347 r.B(7) = s->B((base << (SHIFT + 2)) + 3); \
1348XMM_ONLY( \
1349 r.B(8) = d->B((base << (SHIFT + 2)) + 4); \
1350 r.B(9) = s->B((base << (SHIFT + 2)) + 4); \
1351 r.B(10) = d->B((base << (SHIFT + 2)) + 5); \
1352 r.B(11) = s->B((base << (SHIFT + 2)) + 5); \
1353 r.B(12) = d->B((base << (SHIFT + 2)) + 6); \
1354 r.B(13) = s->B((base << (SHIFT + 2)) + 6); \
1355 r.B(14) = d->B((base << (SHIFT + 2)) + 7); \
1356 r.B(15) = s->B((base << (SHIFT + 2)) + 7); \
1357) \
1358 *d = r; \
1359} \
1360 \
1361void OPPROTO glue(op_punpck ## base_name ## wd, SUFFIX) (void) \
1362{ \
1363 Reg r, *d, *s; \
1364 d = (Reg *)((char *)env + PARAM1); \
1365 s = (Reg *)((char *)env + PARAM2); \
1366 \
1367 r.W(0) = d->W((base << (SHIFT + 1)) + 0); \
1368 r.W(1) = s->W((base << (SHIFT + 1)) + 0); \
1369 r.W(2) = d->W((base << (SHIFT + 1)) + 1); \
1370 r.W(3) = s->W((base << (SHIFT + 1)) + 1); \
1371XMM_ONLY( \
1372 r.W(4) = d->W((base << (SHIFT + 1)) + 2); \
1373 r.W(5) = s->W((base << (SHIFT + 1)) + 2); \
1374 r.W(6) = d->W((base << (SHIFT + 1)) + 3); \
1375 r.W(7) = s->W((base << (SHIFT + 1)) + 3); \
1376) \
1377 *d = r; \
1378} \
1379 \
1380void OPPROTO glue(op_punpck ## base_name ## dq, SUFFIX) (void) \
1381{ \
1382 Reg r, *d, *s; \
1383 d = (Reg *)((char *)env + PARAM1); \
1384 s = (Reg *)((char *)env + PARAM2); \
1385 \
1386 r.L(0) = d->L((base << SHIFT) + 0); \
1387 r.L(1) = s->L((base << SHIFT) + 0); \
1388XMM_ONLY( \
1389 r.L(2) = d->L((base << SHIFT) + 1); \
1390 r.L(3) = s->L((base << SHIFT) + 1); \
1391) \
1392 *d = r; \
1393} \
1394 \
1395XMM_ONLY( \
1396void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \
1397{ \
1398 Reg r, *d, *s; \
1399 d = (Reg *)((char *)env + PARAM1); \
1400 s = (Reg *)((char *)env + PARAM2); \
1401 \
1402 r.Q(0) = d->Q(base); \
1403 r.Q(1) = s->Q(base); \
1404 *d = r; \
1405} \
1406)
1407
1408UNPCK_OP(l, 0)
1409UNPCK_OP(h, 1)
1410
1411#undef SHIFT
1412#undef XMM_ONLY
1413#undef Reg
1414#undef B
1415#undef W
1416#undef L
1417#undef Q
1418#undef SUFFIX
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette