Changeset 13337 in vbox for trunk/src/recompiler_new/target-i386
- Timestamp:
- Oct 16, 2008 11:59:21 AM (16 years ago)
- Location:
- trunk/src/recompiler_new/target-i386
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/recompiler_new/target-i386/cpu.h
r13230 r13337 267 267 #define MSR_MCG_CTL 0x17b 268 268 269 #define MSR_IA32_PERF_STATUS 0x198 270 269 271 #define MSR_PAT 0x277 270 272 … … 494 496 uint16_t _w[2]; 495 497 uint32_t _l[1]; 498 float32 _s[2]; 496 499 uint64_t q; 497 500 } MMXReg; … … 508 511 #define MMX_W(n) _w[3 - (n)] 509 512 #define MMX_L(n) _l[1 - (n)] 513 #define MMX_S(n) _s[1 - (n)] 510 514 #else 511 515 #define XMM_B(n) _b[n] … … 519 523 #define MMX_W(n) _w[n] 520 524 #define MMX_L(n) _l[n] 525 #define MMX_S(n) _s[n] 521 526 #endif 522 527 #define MMX_Q(n) q … … 555 560 556 561 target_ulong cr[5]; /* NOTE: cr1 is unused */ 557 uint 32_t a20_mask;562 uint64_t a20_mask; 558 563 559 564 /* FPU state */ … … 657 662 uint32_t cpuid_ext2_features; 658 663 uint32_t cpuid_ext3_features; 664 uint32_t cpuid_apic_id; 659 665 660 666 #ifndef VBOX -
trunk/src/recompiler_new/target-i386/helper.c
r13117 r13337 30 30 # include <VBox/err.h> 31 31 #endif 32 #include "exec.h" 32 33 #ifndef VBOX 34 #include <stdarg.h> 35 #include <stdlib.h> 36 #include <stdio.h> 37 #include <string.h> 38 #include <inttypes.h> 39 #include <signal.h> 40 #include <assert.h> 41 #endif 42 43 #include "cpu.h" 44 #include "exec-all.h" 45 #include "svm.h" 46 #include "qemu-common.h" 33 47 34 48 //#define DEBUG_PCALL -
trunk/src/recompiler_new/target-i386/ops_sse.h
r11982 r13337 1 1 /* 2 * MMX/ SSE/SSE2/PNI support3 * 2 * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support 3 * 4 4 * Copyright (c) 2005 Fabrice Bellard 5 * Copyright (c) 2008 Intel Corporation <[email protected]> 5 6 * 6 7 * This library is free software; you can redistribute it and/or … … 18 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 20 */ 20 21 21 /* 22 22 * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice … … 27 27 * of the LGPL is applied is otherwise unspecified. 28 28 */ 29 29 30 #if SHIFT == 0 30 31 #define Reg MMXReg … … 45 46 #endif 46 47 47 void OPPROTO glue(op_psrlw, SUFFIX)(void) 48 { 49 Reg *d, *s; 48 void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s) 49 { 50 50 int shift; 51 52 d = (Reg *)((char *)env + PARAM1);53 s = (Reg *)((char *)env + PARAM2);54 51 55 52 if (s->Q(0) > 15) { … … 74 71 } 75 72 76 void OPPROTO glue(op_psraw, SUFFIX)(void) 77 { 78 Reg *d, *s; 73 void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s) 74 { 79 75 int shift; 80 81 d = (Reg *)((char *)env + PARAM1);82 s = (Reg *)((char *)env + PARAM2);83 76 84 77 if (s->Q(0) > 15) { … … 99 92 } 100 93 101 void OPPROTO glue(op_psllw, SUFFIX)(void) 102 { 103 Reg *d, *s; 94 void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s) 95 { 104 96 int shift; 105 106 d = (Reg *)((char *)env + PARAM1);107 s = (Reg *)((char *)env + PARAM2);108 97 109 98 if (s->Q(0) > 15) { … … 128 117 } 129 118 130 void OPPROTO glue(op_psrld, SUFFIX)(void) 131 { 132 Reg *d, *s; 119 void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s) 120 { 133 121 int shift; 134 135 d = (Reg *)((char *)env + PARAM1);136 s = (Reg *)((char *)env + PARAM2);137 122 138 123 if (s->Q(0) > 31) { … … 153 138 } 154 139 155 void OPPROTO glue(op_psrad, SUFFIX)(void) 156 { 157 Reg *d, *s; 140 void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s) 141 { 158 142 int shift; 159 160 d = (Reg *)((char *)env + PARAM1);161 s = (Reg *)((char *)env + PARAM2);162 143 163 144 if (s->Q(0) > 31) { … … 174 155 } 175 156 176 void OPPROTO glue(op_pslld, SUFFIX)(void) 177 { 178 Reg *d, *s; 157 void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s) 158 { 179 159 int shift; 180 181 d = (Reg *)((char *)env + PARAM1);182 s = (Reg *)((char *)env + PARAM2);183 160 184 161 if (s->Q(0) > 31) { … … 199 176 } 200 177 201 void OPPROTO glue(op_psrlq, SUFFIX)(void) 202 { 203 Reg *d, *s; 178 void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s) 179 { 204 180 int shift; 205 206 d = (Reg *)((char *)env + PARAM1);207 s = (Reg *)((char *)env + PARAM2);208 181 209 182 if (s->Q(0) > 63) { … … 222 195 } 223 196 224 void OPPROTO glue(op_psllq, SUFFIX)(void) 225 { 226 Reg *d, *s; 197 void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s) 198 { 227 199 int shift; 228 229 d = (Reg *)((char *)env + PARAM1);230 s = (Reg *)((char *)env + PARAM2);231 200 232 201 if (s->Q(0) > 63) { … … 246 215 247 216 #if SHIFT == 1 248 void OPPROTO glue(op_psrldq, SUFFIX)(void) 249 { 250 Reg *d, *s; 217 void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s) 218 { 251 219 int shift, i; 252 220 253 d = (Reg *)((char *)env + PARAM1);254 s = (Reg *)((char *)env + PARAM2);255 221 shift = s->L(0); 256 222 if (shift > 16) … … 263 229 } 264 230 265 void OPPROTO glue(op_pslldq, SUFFIX)(void) 266 { 267 Reg *d, *s; 231 void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s) 232 { 268 233 int shift, i; 269 234 270 d = (Reg *)((char *)env + PARAM1);271 s = (Reg *)((char *)env + PARAM2);272 235 shift = s->L(0); 273 236 if (shift > 16) … … 281 244 #endif 282 245 283 #define SSE_ OP_B(name, F)\284 void OPPROTO glue(name, SUFFIX) (void)\246 #define SSE_HELPER_B(name, F)\ 247 void glue(name, SUFFIX) (Reg *d, Reg *s)\ 285 248 {\ 286 Reg *d, *s;\287 d = (Reg *)((char *)env + PARAM1);\288 s = (Reg *)((char *)env + PARAM2);\289 249 d->B(0) = F(d->B(0), s->B(0));\ 290 250 d->B(1) = F(d->B(1), s->B(1));\ … … 307 267 } 308 268 309 #define SSE_ OP_W(name, F)\310 void OPPROTO glue(name, SUFFIX) (void)\269 #define SSE_HELPER_W(name, F)\ 270 void glue(name, SUFFIX) (Reg *d, Reg *s)\ 311 271 {\ 312 Reg *d, *s;\313 d = (Reg *)((char *)env + PARAM1);\314 s = (Reg *)((char *)env + PARAM2);\315 272 d->W(0) = F(d->W(0), s->W(0));\ 316 273 d->W(1) = F(d->W(1), s->W(1));\ … … 325 282 } 326 283 327 #define SSE_ OP_L(name, F)\328 void OPPROTO glue(name, SUFFIX) (void)\284 #define SSE_HELPER_L(name, F)\ 285 void glue(name, SUFFIX) (Reg *d, Reg *s)\ 329 286 {\ 330 Reg *d, *s;\331 d = (Reg *)((char *)env + PARAM1);\332 s = (Reg *)((char *)env + PARAM2);\333 287 d->L(0) = F(d->L(0), s->L(0));\ 334 288 d->L(1) = F(d->L(1), s->L(1));\ … … 339 293 } 340 294 341 #define SSE_ OP_Q(name, F)\342 void OPPROTO glue(name, SUFFIX) (void)\295 #define SSE_HELPER_Q(name, F)\ 296 void glue(name, SUFFIX) (Reg *d, Reg *s)\ 343 297 {\ 344 Reg *d, *s;\345 d = (Reg *)((char *)env + PARAM1);\346 s = (Reg *)((char *)env + PARAM2);\347 298 d->Q(0) = F(d->Q(0), s->Q(0));\ 348 299 XMM_ONLY(\ … … 419 370 420 371 #define FMULLW(a, b) (a) * (b) 372 #define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16 421 373 #define FMULHUW(a, b) (a) * (b) >> 16 422 374 #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 … … 425 377 #endif 426 378 427 SSE_OP_B(op_paddb, FADD) 428 SSE_OP_W(op_paddw, FADD) 429 SSE_OP_L(op_paddl, FADD) 430 SSE_OP_Q(op_paddq, FADD) 431 432 SSE_OP_B(op_psubb, FSUB) 433 SSE_OP_W(op_psubw, FSUB) 434 SSE_OP_L(op_psubl, FSUB) 435 SSE_OP_Q(op_psubq, FSUB) 436 437 SSE_OP_B(op_paddusb, FADDUB) 438 SSE_OP_B(op_paddsb, FADDSB) 439 SSE_OP_B(op_psubusb, FSUBUB) 440 SSE_OP_B(op_psubsb, FSUBSB) 441 442 SSE_OP_W(op_paddusw, FADDUW) 443 SSE_OP_W(op_paddsw, FADDSW) 444 SSE_OP_W(op_psubusw, FSUBUW) 445 SSE_OP_W(op_psubsw, FSUBSW) 446 447 SSE_OP_B(op_pminub, FMINUB) 448 SSE_OP_B(op_pmaxub, FMAXUB) 449 450 SSE_OP_W(op_pminsw, FMINSW) 451 SSE_OP_W(op_pmaxsw, FMAXSW) 452 453 SSE_OP_Q(op_pand, FAND) 454 SSE_OP_Q(op_pandn, FANDN) 455 SSE_OP_Q(op_por, FOR) 456 SSE_OP_Q(op_pxor, FXOR) 457 458 SSE_OP_B(op_pcmpgtb, FCMPGTB) 459 SSE_OP_W(op_pcmpgtw, FCMPGTW) 460 SSE_OP_L(op_pcmpgtl, FCMPGTL) 461 462 SSE_OP_B(op_pcmpeqb, FCMPEQ) 463 SSE_OP_W(op_pcmpeqw, FCMPEQ) 464 SSE_OP_L(op_pcmpeql, FCMPEQ) 465 466 SSE_OP_W(op_pmullw, FMULLW) 467 SSE_OP_W(op_pmulhuw, FMULHUW) 468 SSE_OP_W(op_pmulhw, FMULHW) 469 470 SSE_OP_B(op_pavgb, FAVG) 471 SSE_OP_W(op_pavgw, FAVG) 472 473 void OPPROTO glue(op_pmuludq, SUFFIX) (void) 474 { 475 Reg *d, *s; 476 d = (Reg *)((char *)env + PARAM1); 477 s = (Reg *)((char *)env + PARAM2); 478 379 SSE_HELPER_B(helper_paddb, FADD) 380 SSE_HELPER_W(helper_paddw, FADD) 381 SSE_HELPER_L(helper_paddl, FADD) 382 SSE_HELPER_Q(helper_paddq, FADD) 383 384 SSE_HELPER_B(helper_psubb, FSUB) 385 SSE_HELPER_W(helper_psubw, FSUB) 386 SSE_HELPER_L(helper_psubl, FSUB) 387 SSE_HELPER_Q(helper_psubq, FSUB) 388 389 SSE_HELPER_B(helper_paddusb, FADDUB) 390 SSE_HELPER_B(helper_paddsb, FADDSB) 391 SSE_HELPER_B(helper_psubusb, FSUBUB) 392 SSE_HELPER_B(helper_psubsb, FSUBSB) 393 394 SSE_HELPER_W(helper_paddusw, FADDUW) 395 SSE_HELPER_W(helper_paddsw, FADDSW) 396 SSE_HELPER_W(helper_psubusw, FSUBUW) 397 SSE_HELPER_W(helper_psubsw, FSUBSW) 398 399 SSE_HELPER_B(helper_pminub, FMINUB) 400 SSE_HELPER_B(helper_pmaxub, FMAXUB) 401 402 SSE_HELPER_W(helper_pminsw, FMINSW) 403 SSE_HELPER_W(helper_pmaxsw, FMAXSW) 404 405 SSE_HELPER_Q(helper_pand, FAND) 406 SSE_HELPER_Q(helper_pandn, FANDN) 407 SSE_HELPER_Q(helper_por, FOR) 408 SSE_HELPER_Q(helper_pxor, FXOR) 409 410 SSE_HELPER_B(helper_pcmpgtb, FCMPGTB) 411 SSE_HELPER_W(helper_pcmpgtw, FCMPGTW) 412 SSE_HELPER_L(helper_pcmpgtl, FCMPGTL) 413 414 SSE_HELPER_B(helper_pcmpeqb, FCMPEQ) 415 SSE_HELPER_W(helper_pcmpeqw, FCMPEQ) 416 SSE_HELPER_L(helper_pcmpeql, FCMPEQ) 417 418 SSE_HELPER_W(helper_pmullw, FMULLW) 419 #if SHIFT == 0 420 SSE_HELPER_W(helper_pmulhrw, FMULHRW) 421 #endif 422 SSE_HELPER_W(helper_pmulhuw, FMULHUW) 423 SSE_HELPER_W(helper_pmulhw, FMULHW) 424 425 SSE_HELPER_B(helper_pavgb, FAVG) 426 SSE_HELPER_W(helper_pavgw, FAVG) 427 428 void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s) 429 { 479 430 d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0); 480 431 #if SHIFT == 1 … … 483 434 } 484 435 485 void OPPROTO glue(op_pmaddwd, SUFFIX) (void)436 void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s) 486 437 { 487 438 int i; 488 Reg *d, *s;489 d = (Reg *)((char *)env + PARAM1);490 s = (Reg *)((char *)env + PARAM2);491 439 492 440 for(i = 0; i < (2 << SHIFT); i++) { … … 506 454 } 507 455 #endif 508 void OPPROTO glue(op_psadbw, SUFFIX) (void)456 void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s) 509 457 { 510 458 unsigned int val; 511 Reg *d, *s;512 d = (Reg *)((char *)env + PARAM1);513 s = (Reg *)((char *)env + PARAM2);514 459 515 460 val = 0; … … 537 482 } 538 483 539 void OPPROTO glue(op_maskmov, SUFFIX) (void)484 void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s, target_ulong a0) 540 485 { 541 486 int i; 542 Reg *d, *s;543 d = (Reg *)((char *)env + PARAM1);544 s = (Reg *)((char *)env + PARAM2);545 487 for(i = 0; i < (8 << SHIFT); i++) { 546 488 if (s->B(i) & 0x80) 547 stb( A0 + i, d->B(i));489 stb(a0 + i, d->B(i)); 548 490 } 549 491 FORCE_RET(); 550 492 } 551 493 552 void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void) 553 { 554 Reg *d; 555 d = (Reg *)((char *)env + PARAM1); 556 d->L(0) = T0; 494 void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val) 495 { 496 d->L(0) = val; 557 497 d->L(1) = 0; 558 498 #if SHIFT == 1 … … 561 501 } 562 502 563 void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void)564 {565 Reg *s;566 s = (Reg *)((char *)env + PARAM1);567 T0 = s->L(0);568 }569 570 503 #ifdef TARGET_X86_64 571 void OPPROTO glue(op_movq_mm_T0, SUFFIX) (void) 572 { 573 Reg *d; 574 d = (Reg *)((char *)env + PARAM1); 575 d->Q(0) = T0; 504 void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val) 505 { 506 d->Q(0) = val; 576 507 #if SHIFT == 1 577 508 d->Q(1) = 0; 578 509 #endif 579 510 } 580 581 void OPPROTO glue(op_movq_T0_mm, SUFFIX) (void)582 {583 Reg *s;584 s = (Reg *)((char *)env + PARAM1);585 T0 = s->Q(0);586 }587 511 #endif 588 512 589 513 #if SHIFT == 0 590 void OPPROTO glue(op_pshufw, SUFFIX) (void) 591 { 592 #if __GCC__ == 3 || defined(RT_ARCH_AMD64) /* VBOX hack in #else */ 593 Reg r, *d, *s; 594 int order; 595 d = (Reg *)((char *)env + PARAM1); 596 s = (Reg *)((char *)env + PARAM2); 597 order = PARAM3; 514 void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order) 515 { 516 Reg r; 598 517 r.W(0) = s->W(order & 3); 599 518 r.W(1) = s->W((order >> 2) & 3); … … 601 520 r.W(3) = s->W((order >> 6) & 3); 602 521 *d = r; 522 } 603 523 #else 604 Reg *s; 605 int order; 606 uint32_t l0, l1; 607 s = (Reg *)((char *)env + PARAM2); 608 order = PARAM3; 609 l0 = s->W(order & 3); 610 l0 |= (uint32_t)s->W((order >> 2) & 3) << 16; 611 l1 = s->W((order >> 4) & 3); 612 l1 |= (uint32_t)s->W((order >> 6) & 3) << 16; 613 614 s = (Reg *)((char *)env + PARAM1); 615 s->_l[0] = l0; 616 s->_l[1] = l1; 617 #endif 618 } 619 #else 620 void OPPROTO op_shufps(void) 621 { 622 Reg r, *d, *s; 623 int order; 624 d = (Reg *)((char *)env + PARAM1); 625 s = (Reg *)((char *)env + PARAM2); 626 order = PARAM3; 524 void helper_shufps(Reg *d, Reg *s, int order) 525 { 526 Reg r; 627 527 r.L(0) = d->L(order & 3); 628 528 r.L(1) = d->L((order >> 2) & 3); … … 632 532 } 633 533 634 void OPPROTO op_shufpd(void) 635 { 636 Reg r, *d, *s; 637 int order; 638 d = (Reg *)((char *)env + PARAM1); 639 s = (Reg *)((char *)env + PARAM2); 640 order = PARAM3; 534 void helper_shufpd(Reg *d, Reg *s, int order) 535 { 536 Reg r; 641 537 r.Q(0) = d->Q(order & 1); 642 538 r.Q(1) = s->Q((order >> 1) & 1); … … 644 540 } 645 541 646 void OPPROTO glue(op_pshufd, SUFFIX) (void) 647 { 648 Reg r, *d, *s; 649 int order; 650 d = (Reg *)((char *)env + PARAM1); 651 s = (Reg *)((char *)env + PARAM2); 652 order = PARAM3; 542 void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order) 543 { 544 Reg r; 653 545 r.L(0) = s->L(order & 3); 654 546 r.L(1) = s->L((order >> 2) & 3); … … 658 550 } 659 551 660 void OPPROTO glue(op_pshuflw, SUFFIX) (void) 661 { 662 Reg r, *d, *s; 663 int order; 664 d = (Reg *)((char *)env + PARAM1); 665 s = (Reg *)((char *)env + PARAM2); 666 order = PARAM3; 552 void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order) 553 { 554 Reg r; 667 555 r.W(0) = s->W(order & 3); 668 556 r.W(1) = s->W((order >> 2) & 3); … … 673 561 } 674 562 675 void OPPROTO glue(op_pshufhw, SUFFIX) (void) 676 { 677 Reg r, *d, *s; 678 int order; 679 d = (Reg *)((char *)env + PARAM1); 680 s = (Reg *)((char *)env + PARAM2); 681 order = PARAM3; 563 void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order) 564 { 565 Reg r; 682 566 r.Q(0) = s->Q(0); 683 567 r.W(4) = s->W(4 + (order & 3)); … … 693 577 /* XXX: not accurate */ 694 578 695 #define SSE_ OP_S(name, F)\696 void OPPROTO op_ ## name ## ps (void)\579 #define SSE_HELPER_S(name, F)\ 580 void helper_ ## name ## ps (Reg *d, Reg *s)\ 697 581 {\ 698 Reg *d, *s;\699 d = (Reg *)((char *)env + PARAM1);\700 s = (Reg *)((char *)env + PARAM2);\701 582 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 702 583 d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ … … 705 586 }\ 706 587 \ 707 void OPPROTO op_ ## name ## ss (void)\588 void helper_ ## name ## ss (Reg *d, Reg *s)\ 708 589 {\ 709 Reg *d, *s;\710 d = (Reg *)((char *)env + PARAM1);\711 s = (Reg *)((char *)env + PARAM2);\712 590 d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 713 591 }\ 714 void OPPROTO op_ ## name ## pd (void)\592 void helper_ ## name ## pd (Reg *d, Reg *s)\ 715 593 {\ 716 Reg *d, *s;\717 d = (Reg *)((char *)env + PARAM1);\718 s = (Reg *)((char *)env + PARAM2);\719 594 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 720 595 d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ 721 596 }\ 722 597 \ 723 void OPPROTO op_ ## name ## sd (void)\598 void helper_ ## name ## sd (Reg *d, Reg *s)\ 724 599 {\ 725 Reg *d, *s;\726 d = (Reg *)((char *)env + PARAM1);\727 s = (Reg *)((char *)env + PARAM2);\728 600 d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 729 601 } … … 737 609 #define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status) 738 610 739 SSE_ OP_S(add, FPU_ADD)740 SSE_ OP_S(sub, FPU_SUB)741 SSE_ OP_S(mul, FPU_MUL)742 SSE_ OP_S(div, FPU_DIV)743 SSE_ OP_S(min, FPU_MIN)744 SSE_ OP_S(max, FPU_MAX)745 SSE_ OP_S(sqrt, FPU_SQRT)611 SSE_HELPER_S(add, FPU_ADD) 612 SSE_HELPER_S(sub, FPU_SUB) 613 SSE_HELPER_S(mul, FPU_MUL) 614 SSE_HELPER_S(div, FPU_DIV) 615 SSE_HELPER_S(min, FPU_MIN) 616 SSE_HELPER_S(max, FPU_MAX) 617 SSE_HELPER_S(sqrt, FPU_SQRT) 746 618 747 619 748 620 /* float to float conversions */ 749 void OPPROTO op_cvtps2pd(void)621 void helper_cvtps2pd(Reg *d, Reg *s) 750 622 { 751 623 float32 s0, s1; 752 Reg *d, *s;753 d = (Reg *)((char *)env + PARAM1);754 s = (Reg *)((char *)env + PARAM2);755 624 s0 = s->XMM_S(0); 756 625 s1 = s->XMM_S(1); … … 759 628 } 760 629 761 void OPPROTO op_cvtpd2ps(void) 762 { 763 Reg *d, *s; 764 d = (Reg *)((char *)env + PARAM1); 765 s = (Reg *)((char *)env + PARAM2); 630 void helper_cvtpd2ps(Reg *d, Reg *s) 631 { 766 632 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); 767 633 d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status); … … 769 635 } 770 636 771 void OPPROTO op_cvtss2sd(void) 772 { 773 Reg *d, *s; 774 d = (Reg *)((char *)env + PARAM1); 775 s = (Reg *)((char *)env + PARAM2); 637 void helper_cvtss2sd(Reg *d, Reg *s) 638 { 776 639 d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status); 777 640 } 778 641 779 void OPPROTO op_cvtsd2ss(void) 780 { 781 Reg *d, *s; 782 d = (Reg *)((char *)env + PARAM1); 783 s = (Reg *)((char *)env + PARAM2); 642 void helper_cvtsd2ss(Reg *d, Reg *s) 643 { 784 644 d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); 785 645 } 786 646 787 647 /* integer to float */ 788 void OPPROTO op_cvtdq2ps(void) 789 { 790 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 791 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 648 void helper_cvtdq2ps(Reg *d, Reg *s) 649 { 792 650 d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status); 793 651 d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status); … … 796 654 } 797 655 798 void OPPROTO op_cvtdq2pd(void) 799 { 800 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 801 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 656 void helper_cvtdq2pd(Reg *d, Reg *s) 657 { 802 658 int32_t l0, l1; 803 659 l0 = (int32_t)s->XMM_L(0); … … 807 663 } 808 664 809 void OPPROTO op_cvtpi2ps(void) 810 { 811 XMMReg *d = (Reg *)((char *)env + PARAM1); 812 MMXReg *s = (MMXReg *)((char *)env + PARAM2); 665 void helper_cvtpi2ps(XMMReg *d, MMXReg *s) 666 { 813 667 d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); 814 668 d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status); 815 669 } 816 670 817 void OPPROTO op_cvtpi2pd(void) 818 { 819 XMMReg *d = (Reg *)((char *)env + PARAM1); 820 MMXReg *s = (MMXReg *)((char *)env + PARAM2); 671 void helper_cvtpi2pd(XMMReg *d, MMXReg *s) 672 { 821 673 d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); 822 674 d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status); 823 675 } 824 676 825 void OPPROTO op_cvtsi2ss(void) 826 { 827 XMMReg *d = (Reg *)((char *)env + PARAM1); 828 d->XMM_S(0) = int32_to_float32(T0, &env->sse_status); 829 } 830 831 void OPPROTO op_cvtsi2sd(void) 832 { 833 XMMReg *d = (Reg *)((char *)env + PARAM1); 834 d->XMM_D(0) = int32_to_float64(T0, &env->sse_status); 677 void helper_cvtsi2ss(XMMReg *d, uint32_t val) 678 { 679 d->XMM_S(0) = int32_to_float32(val, &env->sse_status); 680 } 681 682 void helper_cvtsi2sd(XMMReg *d, uint32_t val) 683 { 684 d->XMM_D(0) = int32_to_float64(val, &env->sse_status); 835 685 } 836 686 837 687 #ifdef TARGET_X86_64 838 void OPPROTO op_cvtsq2ss(void) 839 { 840 XMMReg *d = (Reg *)((char *)env + PARAM1); 841 d->XMM_S(0) = int64_to_float32(T0, &env->sse_status); 842 } 843 844 void OPPROTO op_cvtsq2sd(void) 845 { 846 XMMReg *d = (Reg *)((char *)env + PARAM1); 847 d->XMM_D(0) = int64_to_float64(T0, &env->sse_status); 688 void helper_cvtsq2ss(XMMReg *d, uint64_t val) 689 { 690 d->XMM_S(0) = int64_to_float32(val, &env->sse_status); 691 } 692 693 void helper_cvtsq2sd(XMMReg *d, uint64_t val) 694 { 695 d->XMM_D(0) = int64_to_float64(val, &env->sse_status); 848 696 } 849 697 #endif 850 698 851 699 /* float to integer */ 852 void OPPROTO op_cvtps2dq(void) 853 { 854 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 855 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 700 void helper_cvtps2dq(XMMReg *d, XMMReg *s) 701 { 856 702 d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); 857 703 d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); … … 860 706 } 861 707 862 void OPPROTO op_cvtpd2dq(void) 863 { 864 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 865 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 708 void helper_cvtpd2dq(XMMReg *d, XMMReg *s) 709 { 866 710 d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); 867 711 d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); … … 869 713 } 870 714 871 void OPPROTO op_cvtps2pi(void) 872 { 873 MMXReg *d = (MMXReg *)((char *)env + PARAM1); 874 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 715 void helper_cvtps2pi(MMXReg *d, XMMReg *s) 716 { 875 717 d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); 876 718 d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); 877 719 } 878 720 879 void OPPROTO op_cvtpd2pi(void) 880 { 881 MMXReg *d = (MMXReg *)((char *)env + PARAM1); 882 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 721 void helper_cvtpd2pi(MMXReg *d, XMMReg *s) 722 { 883 723 d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); 884 724 d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); 885 725 } 886 726 887 void OPPROTO op_cvtss2si(void) 888 { 889 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 890 T0 = float32_to_int32(s->XMM_S(0), &env->sse_status); 891 } 892 893 void OPPROTO op_cvtsd2si(void) 894 { 895 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 896 T0 = float64_to_int32(s->XMM_D(0), &env->sse_status); 727 int32_t helper_cvtss2si(XMMReg *s) 728 { 729 return float32_to_int32(s->XMM_S(0), &env->sse_status); 730 } 731 732 int32_t helper_cvtsd2si(XMMReg *s) 733 { 734 return float64_to_int32(s->XMM_D(0), &env->sse_status); 897 735 } 898 736 899 737 #ifdef TARGET_X86_64 900 void OPPROTO op_cvtss2sq(void) 901 { 902 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 903 T0 = float32_to_int64(s->XMM_S(0), &env->sse_status); 904 } 905 906 void OPPROTO op_cvtsd2sq(void) 907 { 908 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 909 T0 = float64_to_int64(s->XMM_D(0), &env->sse_status); 738 int64_t helper_cvtss2sq(XMMReg *s) 739 { 740 return float32_to_int64(s->XMM_S(0), &env->sse_status); 741 } 742 743 int64_t helper_cvtsd2sq(XMMReg *s) 744 { 745 return float64_to_int64(s->XMM_D(0), &env->sse_status); 910 746 } 911 747 #endif 912 748 913 749 /* float to integer truncated */ 914 void OPPROTO op_cvttps2dq(void) 915 { 916 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 917 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 750 void helper_cvttps2dq(XMMReg *d, XMMReg *s) 751 { 918 752 d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); 919 753 d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); … … 922 756 } 923 757 924 void OPPROTO op_cvttpd2dq(void) 925 { 926 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 927 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 758 void helper_cvttpd2dq(XMMReg *d, XMMReg *s) 759 { 928 760 d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); 929 761 d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); … … 931 763 } 932 764 933 void OPPROTO op_cvttps2pi(void) 934 { 935 MMXReg *d = (MMXReg *)((char *)env + PARAM1); 936 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 765 void helper_cvttps2pi(MMXReg *d, XMMReg *s) 766 { 937 767 d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); 938 768 d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); 939 769 } 940 770 941 void OPPROTO op_cvttpd2pi(void) 942 { 943 MMXReg *d = (MMXReg *)((char *)env + PARAM1); 944 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 771 void helper_cvttpd2pi(MMXReg *d, XMMReg *s) 772 { 945 773 d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); 946 774 d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); 947 775 } 948 776 949 void OPPROTO op_cvttss2si(void) 950 { 951 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 952 T0 = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); 953 } 954 955 void OPPROTO op_cvttsd2si(void) 956 { 957 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 958 T0 = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); 777 int32_t helper_cvttss2si(XMMReg *s) 778 { 779 return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); 780 } 781 782 int32_t helper_cvttsd2si(XMMReg *s) 783 { 784 return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); 959 785 } 960 786 961 787 #ifdef TARGET_X86_64 962 void OPPROTO op_cvttss2sq(void) 963 { 964 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 965 T0 = float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status); 966 } 967 968 void OPPROTO op_cvttsd2sq(void) 969 { 970 XMMReg *s = (XMMReg *)((char *)env + PARAM1); 971 T0 = float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status); 972 } 973 #endif 974 975 void OPPROTO op_rsqrtps(void) 976 { 977 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 978 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 788 int64_t helper_cvttss2sq(XMMReg *s) 789 { 790 return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status); 791 } 792 793 int64_t helper_cvttsd2sq(XMMReg *s) 794 { 795 return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status); 796 } 797 #endif 798 799 void helper_rsqrtps(XMMReg *d, XMMReg *s) 800 { 979 801 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); 980 802 d->XMM_S(1) = approx_rsqrt(s->XMM_S(1)); … … 983 805 } 984 806 985 void OPPROTO op_rsqrtss(void) 986 { 987 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 988 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 807 void helper_rsqrtss(XMMReg *d, XMMReg *s) 808 { 989 809 d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); 990 810 } 991 811 992 void OPPROTO op_rcpps(void) 993 { 994 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 995 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 812 void helper_rcpps(XMMReg *d, XMMReg *s) 813 { 996 814 d->XMM_S(0) = approx_rcp(s->XMM_S(0)); 997 815 d->XMM_S(1) = approx_rcp(s->XMM_S(1)); … … 1000 818 } 1001 819 1002 void OPPROTO op_rcpss(void) 1003 { 1004 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 1005 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 820 void helper_rcpss(XMMReg *d, XMMReg *s) 821 { 1006 822 d->XMM_S(0) = approx_rcp(s->XMM_S(0)); 1007 823 } 1008 824 1009 void OPPROTO op_haddps(void) 1010 { 1011 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 1012 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 825 void helper_haddps(XMMReg *d, XMMReg *s) 826 { 1013 827 XMMReg r; 1014 828 r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1); … … 1019 833 } 1020 834 1021 void OPPROTO op_haddpd(void) 1022 { 1023 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 1024 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 835 void helper_haddpd(XMMReg *d, XMMReg *s) 836 { 1025 837 XMMReg r; 1026 838 r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1); … … 1029 841 } 1030 842 1031 void OPPROTO op_hsubps(void) 1032 { 1033 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 1034 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 843 void helper_hsubps(XMMReg *d, XMMReg *s) 844 { 1035 845 XMMReg r; 1036 846 r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1); … … 1041 851 } 1042 852 1043 void OPPROTO op_hsubpd(void) 1044 { 1045 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 1046 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 853 void helper_hsubpd(XMMReg *d, XMMReg *s) 854 { 1047 855 XMMReg r; 1048 856 r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1); … … 1051 859 } 1052 860 1053 void OPPROTO op_addsubps(void) 1054 { 1055 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 1056 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 861 void helper_addsubps(XMMReg *d, XMMReg *s) 862 { 1057 863 d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0); 1058 864 d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1); … … 1061 867 } 1062 868 1063 void OPPROTO op_addsubpd(void) 1064 { 1065 XMMReg *d = (XMMReg *)((char *)env + PARAM1); 1066 XMMReg *s = (XMMReg *)((char *)env + PARAM2); 869 void helper_addsubpd(XMMReg *d, XMMReg *s) 870 { 1067 871 d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0); 1068 872 d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1); … … 1070 874 1071 875 /* XXX: unordered */ 1072 #define SSE_ OP_CMP(name, F)\1073 void OPPROTO op_ ## name ## ps (void)\876 #define SSE_HELPER_CMP(name, F)\ 877 void helper_ ## name ## ps (Reg *d, Reg *s)\ 1074 878 {\ 1075 Reg *d, *s;\1076 d = (Reg *)((char *)env + PARAM1);\1077 s = (Reg *)((char *)env + PARAM2);\1078 879 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 1079 880 d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ … … 1082 883 }\ 1083 884 \ 1084 void OPPROTO op_ ## name ## ss (void)\885 void helper_ ## name ## ss (Reg *d, Reg *s)\ 1085 886 {\ 1086 Reg *d, *s;\1087 d = (Reg *)((char *)env + PARAM1);\1088 s = (Reg *)((char *)env + PARAM2);\1089 887 d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ 1090 888 }\ 1091 void OPPROTO op_ ## name ## pd (void)\889 void helper_ ## name ## pd (Reg *d, Reg *s)\ 1092 890 {\ 1093 Reg *d, *s;\1094 d = (Reg *)((char *)env + PARAM1);\1095 s = (Reg *)((char *)env + PARAM2);\1096 891 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 1097 892 d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ 1098 893 }\ 1099 894 \ 1100 void OPPROTO op_ ## name ## sd (void)\895 void helper_ ## name ## sd (Reg *d, Reg *s)\ 1101 896 {\ 1102 Reg *d, *s;\1103 d = (Reg *)((char *)env + PARAM1);\1104 s = (Reg *)((char *)env + PARAM2);\1105 897 d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ 1106 898 } … … 1115 907 #define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1 1116 908 1117 SSE_ OP_CMP(cmpeq, FPU_CMPEQ)1118 SSE_ OP_CMP(cmplt, FPU_CMPLT)1119 SSE_ OP_CMP(cmple, FPU_CMPLE)1120 SSE_ OP_CMP(cmpunord, FPU_CMPUNORD)1121 SSE_ OP_CMP(cmpneq, FPU_CMPNEQ)1122 SSE_ OP_CMP(cmpnlt, FPU_CMPNLT)1123 SSE_ OP_CMP(cmpnle, FPU_CMPNLE)1124 SSE_ OP_CMP(cmpord, FPU_CMPORD)909 SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) 910 SSE_HELPER_CMP(cmplt, FPU_CMPLT) 911 SSE_HELPER_CMP(cmple, FPU_CMPLE) 912 SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) 913 SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) 914 SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) 915 SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) 916 SSE_HELPER_CMP(cmpord, FPU_CMPORD) 1125 917 1126 918 const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 1127 919 1128 void OPPROTO op_ucomiss(void)920 void helper_ucomiss(Reg *d, Reg *s) 1129 921 { 1130 922 int ret; 1131 923 float32 s0, s1; 1132 Reg *d, *s;1133 d = (Reg *)((char *)env + PARAM1);1134 s = (Reg *)((char *)env + PARAM2);1135 924 1136 925 s0 = d->XMM_S(0); … … 1141 930 } 1142 931 1143 void OPPROTO op_comiss(void)932 void helper_comiss(Reg *d, Reg *s) 1144 933 { 1145 934 int ret; 1146 935 float32 s0, s1; 1147 Reg *d, *s;1148 d = (Reg *)((char *)env + PARAM1);1149 s = (Reg *)((char *)env + PARAM2);1150 936 1151 937 s0 = d->XMM_S(0); … … 1156 942 } 1157 943 1158 void OPPROTO op_ucomisd(void)944 void helper_ucomisd(Reg *d, Reg *s) 1159 945 { 1160 946 int ret; 1161 947 float64 d0, d1; 1162 Reg *d, *s;1163 d = (Reg *)((char *)env + PARAM1);1164 s = (Reg *)((char *)env + PARAM2);1165 948 1166 949 d0 = d->XMM_D(0); … … 1171 954 } 1172 955 1173 void OPPROTO op_comisd(void)956 void helper_comisd(Reg *d, Reg *s) 1174 957 { 1175 958 int ret; 1176 959 float64 d0, d1; 1177 Reg *d, *s;1178 d = (Reg *)((char *)env + PARAM1);1179 s = (Reg *)((char *)env + PARAM2);1180 960 1181 961 d0 = d->XMM_D(0); … … 1186 966 } 1187 967 1188 void OPPROTO op_movmskps(void)968 uint32_t helper_movmskps(Reg *s) 1189 969 { 1190 970 int b0, b1, b2, b3; 1191 Reg *s;1192 s = (Reg *)((char *)env + PARAM1);1193 971 b0 = s->XMM_L(0) >> 31; 1194 972 b1 = s->XMM_L(1) >> 31; 1195 973 b2 = s->XMM_L(2) >> 31; 1196 974 b3 = s->XMM_L(3) >> 31; 1197 T0 =b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);1198 } 1199 1200 void OPPROTO op_movmskpd(void)975 return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3); 976 } 977 978 uint32_t helper_movmskpd(Reg *s) 1201 979 { 1202 980 int b0, b1; 1203 Reg *s;1204 s = (Reg *)((char *)env + PARAM1);1205 981 b0 = s->XMM_L(1) >> 31; 1206 982 b1 = s->XMM_L(3) >> 31; 1207 T0 = b0 | (b1 << 1); 1208 } 1209 1210 #endif 1211 1212 void OPPROTO glue(op_pmovmskb, SUFFIX)(void) 1213 { 1214 Reg *s; 1215 s = (Reg *)((char *)env + PARAM1); 1216 T0 = 0; 1217 T0 |= (s->XMM_B(0) >> 7); 1218 T0 |= (s->XMM_B(1) >> 6) & 0x02; 1219 T0 |= (s->XMM_B(2) >> 5) & 0x04; 1220 T0 |= (s->XMM_B(3) >> 4) & 0x08; 1221 T0 |= (s->XMM_B(4) >> 3) & 0x10; 1222 T0 |= (s->XMM_B(5) >> 2) & 0x20; 1223 T0 |= (s->XMM_B(6) >> 1) & 0x40; 1224 T0 |= (s->XMM_B(7)) & 0x80; 1225 #if SHIFT == 1 1226 T0 |= (s->XMM_B(8) << 1) & 0x0100; 1227 T0 |= (s->XMM_B(9) << 2) & 0x0200; 1228 T0 |= (s->XMM_B(10) << 3) & 0x0400; 1229 T0 |= (s->XMM_B(11) << 4) & 0x0800; 1230 T0 |= (s->XMM_B(12) << 5) & 0x1000; 1231 T0 |= (s->XMM_B(13) << 6) & 0x2000; 1232 T0 |= (s->XMM_B(14) << 7) & 0x4000; 1233 T0 |= (s->XMM_B(15) << 8) & 0x8000; 1234 #endif 1235 } 1236 1237 void OPPROTO glue(op_pinsrw, SUFFIX) (void) 1238 { 1239 Reg *d = (Reg *)((char *)env + PARAM1); 1240 int pos = PARAM2; 1241 1242 d->W(pos) = T0; 1243 } 1244 1245 void OPPROTO glue(op_pextrw, SUFFIX) (void) 1246 { 1247 Reg *s = (Reg *)((char *)env + PARAM1); 1248 int pos = PARAM2; 1249 1250 T0 = s->W(pos); 1251 } 1252 1253 void OPPROTO glue(op_packsswb, SUFFIX) (void) 1254 { 1255 Reg r, *d, *s; 1256 d = (Reg *)((char *)env + PARAM1); 1257 s = (Reg *)((char *)env + PARAM2); 983 return b0 | (b1 << 1); 984 } 985 986 #endif 987 988 uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s) 989 { 990 uint32_t val; 991 val = 0; 992 val |= (s->XMM_B(0) >> 7); 993 val |= (s->XMM_B(1) >> 6) & 0x02; 994 val |= (s->XMM_B(2) >> 5) & 0x04; 995 val |= (s->XMM_B(3) >> 4) & 0x08; 996 val |= (s->XMM_B(4) >> 3) & 0x10; 997 val |= (s->XMM_B(5) >> 2) & 0x20; 998 val |= (s->XMM_B(6) >> 1) & 0x40; 999 val |= (s->XMM_B(7)) & 0x80; 1000 #if SHIFT == 1 1001 val |= (s->XMM_B(8) << 1) & 0x0100; 1002 val |= (s->XMM_B(9) << 2) & 0x0200; 1003 val |= (s->XMM_B(10) << 3) & 0x0400; 1004 val |= (s->XMM_B(11) << 4) & 0x0800; 1005 val |= (s->XMM_B(12) << 5) & 0x1000; 1006 val |= (s->XMM_B(13) << 6) & 0x2000; 1007 val |= (s->XMM_B(14) << 7) & 0x4000; 1008 val |= (s->XMM_B(15) << 8) & 0x8000; 1009 #endif 1010 return val; 1011 } 1012 1013 void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s) 1014 { 1015 Reg r; 1258 1016 1259 1017 r.B(0) = satsb((int16_t)d->W(0)); … … 1280 1038 } 1281 1039 1282 void OPPROTO glue(op_packuswb, SUFFIX) (void) 1283 { 1284 Reg r, *d, *s; 1285 d = (Reg *)((char *)env + PARAM1); 1286 s = (Reg *)((char *)env + PARAM2); 1040 void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s) 1041 { 1042 Reg r; 1287 1043 1288 1044 r.B(0) = satub((int16_t)d->W(0)); … … 1309 1065 } 1310 1066 1311 void OPPROTO glue(op_packssdw, SUFFIX) (void) 1312 { 1313 Reg r, *d, *s; 1314 d = (Reg *)((char *)env + PARAM1); 1315 s = (Reg *)((char *)env + PARAM2); 1067 void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s) 1068 { 1069 Reg r; 1316 1070 1317 1071 r.W(0) = satsw(d->L(0)); … … 1332 1086 #define UNPCK_OP(base_name, base) \ 1333 1087 \ 1334 void OPPROTO glue(op_punpck ## base_name ## bw, SUFFIX) (void) \1088 void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s) \ 1335 1089 { \ 1336 Reg r, *d, *s; \ 1337 d = (Reg *)((char *)env + PARAM1); \ 1338 s = (Reg *)((char *)env + PARAM2); \ 1090 Reg r; \ 1339 1091 \ 1340 1092 r.B(0) = d->B((base << (SHIFT + 2)) + 0); \ … … 1359 1111 } \ 1360 1112 \ 1361 void OPPROTO glue(op_punpck ## base_name ## wd, SUFFIX) (void) \1113 void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s) \ 1362 1114 { \ 1363 Reg r, *d, *s; \ 1364 d = (Reg *)((char *)env + PARAM1); \ 1365 s = (Reg *)((char *)env + PARAM2); \ 1115 Reg r; \ 1366 1116 \ 1367 1117 r.W(0) = d->W((base << (SHIFT + 1)) + 0); \ … … 1378 1128 } \ 1379 1129 \ 1380 void OPPROTO glue(op_punpck ## base_name ## dq, SUFFIX) (void) \1130 void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s) \ 1381 1131 { \ 1382 Reg r, *d, *s; \ 1383 d = (Reg *)((char *)env + PARAM1); \ 1384 s = (Reg *)((char *)env + PARAM2); \ 1132 Reg r; \ 1385 1133 \ 1386 1134 r.L(0) = d->L((base << SHIFT) + 0); \ … … 1394 1142 \ 1395 1143 XMM_ONLY( \ 1396 void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void) \1144 void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s) \ 1397 1145 { \ 1398 Reg r, *d, *s; \ 1399 d = (Reg *)((char *)env + PARAM1); \ 1400 s = (Reg *)((char *)env + PARAM2); \ 1146 Reg r; \ 1401 1147 \ 1402 1148 r.Q(0) = d->Q(base); \ … … 1408 1154 UNPCK_OP(l, 0) 1409 1155 UNPCK_OP(h, 1) 1156 1157 /* 3DNow! float ops */ 1158 #if SHIFT == 0 1159 void helper_pi2fd(MMXReg *d, MMXReg *s) 1160 { 1161 d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); 1162 d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); 1163 } 1164 1165 void helper_pi2fw(MMXReg *d, MMXReg *s) 1166 { 1167 d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); 1168 d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); 1169 } 1170 1171 void helper_pf2id(MMXReg *d, MMXReg *s) 1172 { 1173 d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); 1174 d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); 1175 } 1176 1177 void helper_pf2iw(MMXReg *d, MMXReg *s) 1178 { 1179 d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); 1180 d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); 1181 } 1182 1183 void helper_pfacc(MMXReg *d, MMXReg *s) 1184 { 1185 MMXReg r; 1186 r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); 1187 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); 1188 *d = r; 1189 } 1190 1191 void helper_pfadd(MMXReg *d, MMXReg *s) 1192 { 1193 d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); 1194 d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); 1195 } 1196 1197 void helper_pfcmpeq(MMXReg *d, MMXReg *s) 1198 { 1199 d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; 1200 d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; 1201 } 1202 1203 void helper_pfcmpge(MMXReg *d, MMXReg *s) 1204 { 1205 d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; 1206 d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; 1207 } 1208 1209 void helper_pfcmpgt(MMXReg *d, MMXReg *s) 1210 { 1211 d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; 1212 d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; 1213 } 1214 1215 void helper_pfmax(MMXReg *d, MMXReg *s) 1216 { 1217 if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) 1218 d->MMX_S(0) = s->MMX_S(0); 1219 if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) 1220 d->MMX_S(1) = s->MMX_S(1); 1221 } 1222 1223 void helper_pfmin(MMXReg *d, MMXReg *s) 1224 { 1225 if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) 1226 d->MMX_S(0) = s->MMX_S(0); 1227 if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) 1228 d->MMX_S(1) = s->MMX_S(1); 1229 } 1230 1231 void helper_pfmul(MMXReg *d, MMXReg *s) 1232 { 1233 d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); 1234 d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); 1235 } 1236 1237 void helper_pfnacc(MMXReg *d, MMXReg *s) 1238 { 1239 MMXReg r; 1240 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); 1241 r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); 1242 *d = r; 1243 } 1244 1245 void helper_pfpnacc(MMXReg *d, MMXReg *s) 1246 { 1247 MMXReg r; 1248 r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); 1249 r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); 1250 *d = r; 1251 } 1252 1253 void helper_pfrcp(MMXReg *d, MMXReg *s) 1254 { 1255 d->MMX_S(0) = approx_rcp(s->MMX_S(0)); 1256 d->MMX_S(1) = d->MMX_S(0); 1257 } 1258 1259 void helper_pfrsqrt(MMXReg *d, MMXReg *s) 1260 { 1261 d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; 1262 d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); 1263 d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; 1264 d->MMX_L(0) = d->MMX_L(1); 1265 } 1266 1267 void helper_pfsub(MMXReg *d, MMXReg *s) 1268 { 1269 d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); 1270 d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); 1271 } 1272 1273 void helper_pfsubr(MMXReg *d, MMXReg *s) 1274 { 1275 d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); 1276 d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); 1277 } 1278 1279 void helper_pswapd(MMXReg *d, MMXReg *s) 1280 { 1281 MMXReg r; 1282 r.MMX_L(0) = s->MMX_L(1); 1283 r.MMX_L(1) = s->MMX_L(0); 1284 *d = r; 1285 } 1286 #endif 1287 1288 /* SSSE3 op helpers */ 1289 void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s) 1290 { 1291 int i; 1292 Reg r; 1293 1294 for (i = 0; i < (8 << SHIFT); i++) 1295 r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); 1296 1297 *d = r; 1298 } 1299 1300 void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s) 1301 { 1302 d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); 1303 d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); 1304 XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); 1305 XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); 1306 d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); 1307 d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); 1308 XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); 1309 XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); 1310 } 1311 1312 void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s) 1313 { 1314 d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); 1315 XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); 1316 d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); 1317 XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); 1318 } 1319 1320 void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s) 1321 { 1322 d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); 1323 d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); 1324 XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); 1325 XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); 1326 d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); 1327 d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); 1328 XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); 1329 XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); 1330 } 1331 1332 void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s) 1333 { 1334 d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) + 1335 (int8_t)s->B( 1) * (uint8_t)d->B( 1)); 1336 d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) + 1337 (int8_t)s->B( 3) * (uint8_t)d->B( 3)); 1338 d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) + 1339 (int8_t)s->B( 5) * (uint8_t)d->B( 5)); 1340 d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) + 1341 (int8_t)s->B( 7) * (uint8_t)d->B( 7)); 1342 #if SHIFT == 1 1343 d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) + 1344 (int8_t)s->B( 9) * (uint8_t)d->B( 9)); 1345 d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) + 1346 (int8_t)s->B(11) * (uint8_t)d->B(11)); 1347 d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) + 1348 (int8_t)s->B(13) * (uint8_t)d->B(13)); 1349 d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) + 1350 (int8_t)s->B(15) * (uint8_t)d->B(15)); 1351 #endif 1352 } 1353 1354 void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s) 1355 { 1356 d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1); 1357 d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3); 1358 XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5)); 1359 XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7)); 1360 d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1); 1361 d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3); 1362 XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5)); 1363 XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7)); 1364 } 1365 1366 void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s) 1367 { 1368 d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1); 1369 XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3)); 1370 d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1); 1371 XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3)); 1372 } 1373 1374 void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s) 1375 { 1376 d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1)); 1377 d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3)); 1378 XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5))); 1379 XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7))); 1380 d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1)); 1381 d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3)); 1382 XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5))); 1383 XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7))); 1384 } 1385 1386 #define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x 1387 #define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x 1388 #define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x 1389 SSE_HELPER_B(helper_pabsb, FABSB) 1390 SSE_HELPER_W(helper_pabsw, FABSW) 1391 SSE_HELPER_L(helper_pabsd, FABSL) 1392 1393 #define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15 1394 SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) 1395 1396 #define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d 1397 #define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d 1398 #define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d 1399 SSE_HELPER_B(helper_psignb, FSIGNB) 1400 SSE_HELPER_W(helper_psignw, FSIGNW) 1401 SSE_HELPER_L(helper_psignd, FSIGNL) 1402 1403 void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift) 1404 { 1405 Reg r; 1406 1407 /* XXX could be checked during translation */ 1408 if (shift >= (16 << SHIFT)) { 1409 r.Q(0) = 0; 1410 XMM_ONLY(r.Q(1) = 0); 1411 } else { 1412 shift <<= 3; 1413 #define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) 1414 #if SHIFT == 0 1415 r.Q(0) = SHR(s->Q(0), shift - 0) | 1416 SHR(d->Q(0), shift - 64); 1417 #else 1418 r.Q(0) = SHR(s->Q(0), shift - 0) | 1419 SHR(s->Q(1), shift - 64) | 1420 SHR(d->Q(0), shift - 128) | 1421 SHR(d->Q(1), shift - 192); 1422 r.Q(1) = SHR(s->Q(0), shift + 64) | 1423 SHR(s->Q(1), shift - 0) | 1424 SHR(d->Q(0), shift - 64) | 1425 SHR(d->Q(1), shift - 128); 1426 #endif 1427 #undef SHR 1428 } 1429 1430 *d = r; 1431 } 1432 1433 #define XMM0 env->xmm_regs[0] 1434 1435 #if SHIFT == 1 1436 #define SSE_HELPER_V(name, elem, num, F)\ 1437 void glue(name, SUFFIX) (Reg *d, Reg *s)\ 1438 {\ 1439 d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\ 1440 d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\ 1441 if (num > 2) {\ 1442 d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\ 1443 d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\ 1444 if (num > 4) {\ 1445 d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\ 1446 d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\ 1447 d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\ 1448 d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\ 1449 if (num > 8) {\ 1450 d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\ 1451 d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\ 1452 d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\ 1453 d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\ 1454 d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\ 1455 d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\ 1456 d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\ 1457 d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\ 1458 }\ 1459 }\ 1460 }\ 1461 } 1462 1463 #define SSE_HELPER_I(name, elem, num, F)\ 1464 void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\ 1465 {\ 1466 d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\ 1467 d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\ 1468 if (num > 2) {\ 1469 d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\ 1470 d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\ 1471 if (num > 4) {\ 1472 d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\ 1473 d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\ 1474 d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\ 1475 d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\ 1476 if (num > 8) {\ 1477 d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\ 1478 d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\ 1479 d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\ 1480 d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\ 1481 d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\ 1482 d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\ 1483 d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\ 1484 d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\ 1485 }\ 1486 }\ 1487 }\ 1488 } 1489 1490 /* SSE4.1 op helpers */ 1491 #define FBLENDVB(d, s, m) (m & 0x80) ? s : d 1492 #define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d 1493 #define FBLENDVPD(d, s, m) (m & 0x8000000000000000LL) ? s : d 1494 SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB) 1495 SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS) 1496 SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD) 1497 1498 void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s) 1499 { 1500 uint64_t zf = (s->Q(0) & d->Q(0)) | (s->Q(1) & d->Q(1)); 1501 uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1)); 1502 1503 CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C); 1504 } 1505 1506 #define SSE_HELPER_F(name, elem, num, F)\ 1507 void glue(name, SUFFIX) (Reg *d, Reg *s)\ 1508 {\ 1509 d->elem(0) = F(0);\ 1510 d->elem(1) = F(1);\ 1511 d->elem(2) = F(2);\ 1512 d->elem(3) = F(3);\ 1513 if (num > 3) {\ 1514 d->elem(4) = F(4);\ 1515 d->elem(5) = F(5);\ 1516 if (num > 5) {\ 1517 d->elem(6) = F(6);\ 1518 d->elem(7) = F(7);\ 1519 }\ 1520 }\ 1521 } 1522 1523 SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B) 1524 SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B) 1525 SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B) 1526 SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W) 1527 SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W) 1528 SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L) 1529 SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B) 1530 SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B) 1531 SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B) 1532 SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W) 1533 SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W) 1534 SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L) 1535 1536 void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s) 1537 { 1538 d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0); 1539 d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2); 1540 } 1541 1542 #define FCMPEQQ(d, s) d == s ? -1 : 0 1543 SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ) 1544 1545 void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s) 1546 { 1547 d->W(0) = satuw((int32_t) d->L(0)); 1548 d->W(1) = satuw((int32_t) d->L(1)); 1549 d->W(2) = satuw((int32_t) d->L(2)); 1550 d->W(3) = satuw((int32_t) d->L(3)); 1551 d->W(4) = satuw((int32_t) s->L(0)); 1552 d->W(5) = satuw((int32_t) s->L(1)); 1553 d->W(6) = satuw((int32_t) s->L(2)); 1554 d->W(7) = satuw((int32_t) s->L(3)); 1555 } 1556 1557 #define FMINSB(d, s) MIN((int8_t) d, (int8_t) s) 1558 #define FMINSD(d, s) MIN((int32_t) d, (int32_t) s) 1559 #define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s) 1560 #define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s) 1561 SSE_HELPER_B(helper_pminsb, FMINSB) 1562 SSE_HELPER_L(helper_pminsd, FMINSD) 1563 SSE_HELPER_W(helper_pminuw, MIN) 1564 SSE_HELPER_L(helper_pminud, MIN) 1565 SSE_HELPER_B(helper_pmaxsb, FMAXSB) 1566 SSE_HELPER_L(helper_pmaxsd, FMAXSD) 1567 SSE_HELPER_W(helper_pmaxuw, MAX) 1568 SSE_HELPER_L(helper_pmaxud, MAX) 1569 1570 #define FMULLD(d, s) (int32_t) d * (int32_t) s 1571 SSE_HELPER_L(helper_pmulld, FMULLD) 1572 1573 void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s) 1574 { 1575 int idx = 0; 1576 1577 if (s->W(1) < s->W(idx)) 1578 idx = 1; 1579 if (s->W(2) < s->W(idx)) 1580 idx = 2; 1581 if (s->W(3) < s->W(idx)) 1582 idx = 3; 1583 if (s->W(4) < s->W(idx)) 1584 idx = 4; 1585 if (s->W(5) < s->W(idx)) 1586 idx = 5; 1587 if (s->W(6) < s->W(idx)) 1588 idx = 6; 1589 if (s->W(7) < s->W(idx)) 1590 idx = 7; 1591 1592 d->Q(1) = 0; 1593 d->L(1) = 0; 1594 d->W(1) = idx; 1595 d->W(0) = s->W(idx); 1596 } 1597 1598 void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode) 1599 { 1600 signed char prev_rounding_mode; 1601 1602 prev_rounding_mode = env->sse_status.float_rounding_mode; 1603 if (!(mode & (1 << 2))) 1604 switch (mode & 3) { 1605 case 0: 1606 set_float_rounding_mode(float_round_nearest_even, &env->sse_status); 1607 break; 1608 case 1: 1609 set_float_rounding_mode(float_round_down, &env->sse_status); 1610 break; 1611 case 2: 1612 set_float_rounding_mode(float_round_up, &env->sse_status); 1613 break; 1614 case 3: 1615 set_float_rounding_mode(float_round_to_zero, &env->sse_status); 1616 break; 1617 } 1618 1619 d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); 1620 d->L(1) = float64_round_to_int(s->L(1), &env->sse_status); 1621 d->L(2) = float64_round_to_int(s->L(2), &env->sse_status); 1622 d->L(3) = float64_round_to_int(s->L(3), &env->sse_status); 1623 1624 #if 0 /* TODO */ 1625 if (mode & (1 << 3)) 1626 set_float_exception_flags( 1627 get_float_exception_flags(&env->sse_status) & 1628 ~float_flag_inexact, 1629 &env->sse_status); 1630 #endif 1631 env->sse_status.float_rounding_mode = prev_rounding_mode; 1632 } 1633 1634 void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) 1635 { 1636 signed char prev_rounding_mode; 1637 1638 prev_rounding_mode = env->sse_status.float_rounding_mode; 1639 if (!(mode & (1 << 2))) 1640 switch (mode & 3) { 1641 case 0: 1642 set_float_rounding_mode(float_round_nearest_even, &env->sse_status); 1643 break; 1644 case 1: 1645 set_float_rounding_mode(float_round_down, &env->sse_status); 1646 break; 1647 case 2: 1648 set_float_rounding_mode(float_round_up, &env->sse_status); 1649 break; 1650 case 3: 1651 set_float_rounding_mode(float_round_to_zero, &env->sse_status); 1652 break; 1653 } 1654 1655 d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); 1656 d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status); 1657 1658 #if 0 /* TODO */ 1659 if (mode & (1 << 3)) 1660 set_float_exception_flags( 1661 get_float_exception_flags(&env->sse_status) & 1662 ~float_flag_inexact, 1663 &env->sse_status); 1664 #endif 1665 env->sse_status.float_rounding_mode = prev_rounding_mode; 1666 } 1667 1668 void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode) 1669 { 1670 signed char prev_rounding_mode; 1671 1672 prev_rounding_mode = env->sse_status.float_rounding_mode; 1673 if (!(mode & (1 << 2))) 1674 switch (mode & 3) { 1675 case 0: 1676 set_float_rounding_mode(float_round_nearest_even, &env->sse_status); 1677 break; 1678 case 1: 1679 set_float_rounding_mode(float_round_down, &env->sse_status); 1680 break; 1681 case 2: 1682 set_float_rounding_mode(float_round_up, &env->sse_status); 1683 break; 1684 case 3: 1685 set_float_rounding_mode(float_round_to_zero, &env->sse_status); 1686 break; 1687 } 1688 1689 d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); 1690 1691 #if 0 /* TODO */ 1692 if (mode & (1 << 3)) 1693 set_float_exception_flags( 1694 get_float_exception_flags(&env->sse_status) & 1695 ~float_flag_inexact, 1696 &env->sse_status); 1697 #endif 1698 env->sse_status.float_rounding_mode = prev_rounding_mode; 1699 } 1700 1701 void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) 1702 { 1703 signed char prev_rounding_mode; 1704 1705 prev_rounding_mode = env->sse_status.float_rounding_mode; 1706 if (!(mode & (1 << 2))) 1707 switch (mode & 3) { 1708 case 0: 1709 set_float_rounding_mode(float_round_nearest_even, &env->sse_status); 1710 break; 1711 case 1: 1712 set_float_rounding_mode(float_round_down, &env->sse_status); 1713 break; 1714 case 2: 1715 set_float_rounding_mode(float_round_up, &env->sse_status); 1716 break; 1717 case 3: 1718 set_float_rounding_mode(float_round_to_zero, &env->sse_status); 1719 break; 1720 } 1721 1722 d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); 1723 1724 #if 0 /* TODO */ 1725 if (mode & (1 << 3)) 1726 set_float_exception_flags( 1727 get_float_exception_flags(&env->sse_status) & 1728 ~float_flag_inexact, 1729 &env->sse_status); 1730 #endif 1731 env->sse_status.float_rounding_mode = prev_rounding_mode; 1732 } 1733 1734 #define FBLENDP(d, s, m) m ? s : d 1735 SSE_HELPER_I(helper_blendps, L, 4, FBLENDP) 1736 SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP) 1737 SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP) 1738 1739 void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask) 1740 { 1741 float32 iresult = 0 /*float32_zero*/; 1742 1743 if (mask & (1 << 4)) 1744 iresult = float32_add(iresult, 1745 float32_mul(d->L(0), s->L(0), &env->sse_status), 1746 &env->sse_status); 1747 if (mask & (1 << 5)) 1748 iresult = float32_add(iresult, 1749 float32_mul(d->L(1), s->L(1), &env->sse_status), 1750 &env->sse_status); 1751 if (mask & (1 << 6)) 1752 iresult = float32_add(iresult, 1753 float32_mul(d->L(2), s->L(2), &env->sse_status), 1754 &env->sse_status); 1755 if (mask & (1 << 7)) 1756 iresult = float32_add(iresult, 1757 float32_mul(d->L(3), s->L(3), &env->sse_status), 1758 &env->sse_status); 1759 d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/; 1760 d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/; 1761 d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/; 1762 d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/; 1763 } 1764 1765 void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask) 1766 { 1767 float64 iresult = 0 /*float64_zero*/; 1768 1769 if (mask & (1 << 4)) 1770 iresult = float64_add(iresult, 1771 float64_mul(d->Q(0), s->Q(0), &env->sse_status), 1772 &env->sse_status); 1773 if (mask & (1 << 5)) 1774 iresult = float64_add(iresult, 1775 float64_mul(d->Q(1), s->Q(1), &env->sse_status), 1776 &env->sse_status); 1777 d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/; 1778 d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/; 1779 } 1780 1781 void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset) 1782 { 1783 int s0 = (offset & 3) << 2; 1784 int d0 = (offset & 4) << 0; 1785 int i; 1786 Reg r; 1787 1788 for (i = 0; i < 8; i++, d0++) { 1789 r.W(i) = 0; 1790 r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0)); 1791 r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1)); 1792 r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2)); 1793 r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3)); 1794 } 1795 1796 *d = r; 1797 } 1798 1799 /* SSE4.2 op helpers */ 1800 /* it's unclear whether signed or unsigned */ 1801 #define FCMPGTQ(d, s) d > s ? -1 : 0 1802 SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ) 1803 1804 static inline int pcmp_elen(int reg, uint32_t ctrl) 1805 { 1806 int val; 1807 1808 /* Presence of REX.W is indicated by a bit higher than 7 set */ 1809 if (ctrl >> 8) 1810 val = abs1((int64_t) env->regs[reg]); 1811 else 1812 val = abs1((int32_t) env->regs[reg]); 1813 1814 if (ctrl & 1) { 1815 if (val > 8) 1816 return 8; 1817 } else 1818 if (val > 16) 1819 return 16; 1820 1821 return val; 1822 } 1823 1824 static inline int pcmp_ilen(Reg *r, uint8_t ctrl) 1825 { 1826 int val = 0; 1827 1828 if (ctrl & 1) { 1829 while (val < 8 && r->W(val)) 1830 val++; 1831 } else 1832 while (val < 16 && r->B(val)) 1833 val++; 1834 1835 return val; 1836 } 1837 1838 static inline int pcmp_val(Reg *r, uint8_t ctrl, int i) 1839 { 1840 switch ((ctrl >> 0) & 3) { 1841 case 0: 1842 return r->B(i); 1843 case 1: 1844 return r->W(i); 1845 case 2: 1846 return (int8_t) r->B(i); 1847 case 3: 1848 default: 1849 return (int16_t) r->W(i); 1850 } 1851 } 1852 1853 static inline unsigned pcmpxstrx(Reg *d, Reg *s, 1854 int8_t ctrl, int valids, int validd) 1855 { 1856 unsigned int res = 0; 1857 int v; 1858 int j, i; 1859 int upper = (ctrl & 1) ? 7 : 15; 1860 1861 valids--; 1862 validd--; 1863 1864 CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0); 1865 1866 switch ((ctrl >> 2) & 3) { 1867 case 0: 1868 for (j = valids; j >= 0; j--) { 1869 res <<= 1; 1870 v = pcmp_val(s, ctrl, j); 1871 for (i = validd; i >= 0; i--) 1872 res |= (v == pcmp_val(d, ctrl, i)); 1873 } 1874 break; 1875 case 1: 1876 for (j = valids; j >= 0; j--) { 1877 res <<= 1; 1878 v = pcmp_val(s, ctrl, j); 1879 for (i = ((validd - 1) | 1); i >= 0; i -= 2) 1880 res |= (pcmp_val(d, ctrl, i - 0) <= v && 1881 pcmp_val(d, ctrl, i - 1) >= v); 1882 } 1883 break; 1884 case 2: 1885 res = (2 << (upper - MAX(valids, validd))) - 1; 1886 res <<= MAX(valids, validd) - MIN(valids, validd); 1887 for (i = MIN(valids, validd); i >= 0; i--) { 1888 res <<= 1; 1889 v = pcmp_val(s, ctrl, i); 1890 res |= (v == pcmp_val(d, ctrl, i)); 1891 } 1892 break; 1893 case 3: 1894 for (j = valids - validd; j >= 0; j--) { 1895 res <<= 1; 1896 res |= 1; 1897 for (i = MIN(upper - j, validd); i >= 0; i--) 1898 res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i)); 1899 } 1900 break; 1901 } 1902 1903 switch ((ctrl >> 4) & 3) { 1904 case 1: 1905 res ^= (2 << upper) - 1; 1906 break; 1907 case 3: 1908 res ^= (2 << valids) - 1; 1909 break; 1910 } 1911 1912 if (res) 1913 CC_SRC |= CC_C; 1914 if (res & 1) 1915 CC_SRC |= CC_O; 1916 1917 return res; 1918 } 1919 1920 static inline int rffs1(unsigned int val) 1921 { 1922 int ret = 1, hi; 1923 1924 for (hi = sizeof(val) * 4; hi; hi /= 2) 1925 if (val >> hi) { 1926 val >>= hi; 1927 ret += hi; 1928 } 1929 1930 return ret; 1931 } 1932 1933 static inline int ffs1(unsigned int val) 1934 { 1935 int ret = 1, hi; 1936 1937 for (hi = sizeof(val) * 4; hi; hi /= 2) 1938 if (val << hi) { 1939 val <<= hi; 1940 ret += hi; 1941 } 1942 1943 return ret; 1944 } 1945 1946 void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) 1947 { 1948 unsigned int res = pcmpxstrx(d, s, ctrl, 1949 pcmp_elen(R_EDX, ctrl), 1950 pcmp_elen(R_EAX, ctrl)); 1951 1952 if (res) 1953 env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1; 1954 else 1955 env->regs[R_ECX] = 16 >> (ctrl & (1 << 0)); 1956 } 1957 1958 void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) 1959 { 1960 int i; 1961 unsigned int res = pcmpxstrx(d, s, ctrl, 1962 pcmp_elen(R_EDX, ctrl), 1963 pcmp_elen(R_EAX, ctrl)); 1964 1965 if ((ctrl >> 6) & 1) { 1966 if (ctrl & 1) 1967 for (i = 0; i <= 8; i--, res >>= 1) 1968 d->W(i) = (res & 1) ? ~0 : 0; 1969 else 1970 for (i = 0; i <= 16; i--, res >>= 1) 1971 d->B(i) = (res & 1) ? ~0 : 0; 1972 } else { 1973 d->Q(1) = 0; 1974 d->Q(0) = res; 1975 } 1976 } 1977 1978 void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) 1979 { 1980 unsigned int res = pcmpxstrx(d, s, ctrl, 1981 pcmp_ilen(s, ctrl), 1982 pcmp_ilen(d, ctrl)); 1983 1984 if (res) 1985 env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1; 1986 else 1987 env->regs[R_ECX] = 16 >> (ctrl & (1 << 0)); 1988 } 1989 1990 void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) 1991 { 1992 int i; 1993 unsigned int res = pcmpxstrx(d, s, ctrl, 1994 pcmp_ilen(s, ctrl), 1995 pcmp_ilen(d, ctrl)); 1996 1997 if ((ctrl >> 6) & 1) { 1998 if (ctrl & 1) 1999 for (i = 0; i <= 8; i--, res >>= 1) 2000 d->W(i) = (res & 1) ? ~0 : 0; 2001 else 2002 for (i = 0; i <= 16; i--, res >>= 1) 2003 d->B(i) = (res & 1) ? ~0 : 0; 2004 } else { 2005 d->Q(1) = 0; 2006 d->Q(0) = res; 2007 } 2008 } 2009 2010 #define CRCPOLY 0x1edc6f41 2011 #define CRCPOLY_BITREV 0x82f63b78 2012 target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len) 2013 { 2014 target_ulong crc = (msg & ((target_ulong) -1 >> 2015 (TARGET_LONG_BITS - len))) ^ crc1; 2016 2017 while (len--) 2018 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0); 2019 2020 return crc; 2021 } 2022 2023 #define POPMASK(i) ((target_ulong) -1 / ((1LL << (1 << i)) + 1)) 2024 #define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i)) 2025 target_ulong helper_popcnt(target_ulong n, uint32_t type) 2026 { 2027 CC_SRC = n ? 0 : CC_Z; 2028 2029 n = POPCOUNT(n, 0); 2030 n = POPCOUNT(n, 1); 2031 n = POPCOUNT(n, 2); 2032 n = POPCOUNT(n, 3); 2033 if (type == 1) 2034 return n & 0xff; 2035 2036 n = POPCOUNT(n, 4); 2037 #ifndef TARGET_X86_64 2038 return n; 2039 #else 2040 if (type == 2) 2041 return n & 0xff; 2042 2043 return POPCOUNT(n, 5); 2044 #endif 2045 } 2046 #endif 1410 2047 1411 2048 #undef SHIFT
Note:
See TracChangeset
for help on using the changeset viewer.