mirror of
git://sourceware.org/git/valgrind.git
synced 2026-01-12 00:19:31 +08:00
Support pmulld (packed multiply 32-bit doubleword integers) instruction in guest_x86_toIR.c and host_x86_isel.c. Add test function to sse4-common.h and update none/tests/x86/sse4-x86.c to test the instruction. BZ: https://bugs.kde.org/show_bug.cgi?id=513475
355 lines
8.9 KiB
C
355 lines
8.9 KiB
C
/* Common infrastructure for SSE4 tests (both x86 and amd64) */
|
|
|
|
#ifndef __SSE4_COMMON_H
|
|
#define __SSE4_COMMON_H
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include "tests/malloc.h"
|
|
#include <string.h>
|
|
|
|
typedef unsigned char V128[16];
|
|
typedef unsigned int UInt;
|
|
typedef signed int Int;
|
|
typedef unsigned char UChar;
|
|
typedef unsigned long long int ULong;
|
|
|
|
typedef unsigned char Bool;
|
|
#define False ((Bool)0)
|
|
#define True ((Bool)1)
|
|
|
|
|
|
typedef
|
|
struct {
|
|
V128 arg1;
|
|
V128 arg2;
|
|
V128 res;
|
|
}
|
|
RRArgs;
|
|
|
|
typedef
|
|
struct {
|
|
V128 arg1;
|
|
V128 res;
|
|
}
|
|
RMArgs;
|
|
|
|
static UChar randUChar ( void )
|
|
{
|
|
static UInt seed = 80021;
|
|
seed = 1103515245 * seed + 12345;
|
|
return (seed >> 17) & 0xFF;
|
|
}
|
|
|
|
|
|
static ULong randULong ( void )
|
|
{
|
|
Int i;
|
|
ULong r = 0;
|
|
for (i = 0; i < 8; i++) {
|
|
r = (r << 8) | (ULong)(0xFF & randUChar());
|
|
}
|
|
return r;
|
|
}
|
|
|
|
static void randV128 ( V128* v )
|
|
{
|
|
Int i;
|
|
for (i = 0; i < 16; i++)
|
|
(*v)[i] = randUChar();
|
|
}
|
|
|
|
static void showV128 ( V128* v )
|
|
{
|
|
Int i;
|
|
for (i = 15; i >= 0; i--)
|
|
printf("%02x", (Int)(*v)[i]);
|
|
}
|
|
|
|
static void showMaskedV128 ( V128* v, V128* mask )
|
|
{
|
|
Int i;
|
|
for (i = 15; i >= 0; i--)
|
|
printf("%02x", (Int)( ((*v)[i]) & ((*mask)[i]) ));
|
|
}
|
|
|
|
static void do64HLtoV128 ( /*OUT*/V128* res, ULong wHi, ULong wLo )
|
|
{
|
|
// try to sidestep strict-aliasing snafus by memcpying explicitly
|
|
UChar* p = (UChar*)res;
|
|
memcpy(&p[8], (UChar*)&wHi, 8);
|
|
memcpy(&p[0], (UChar*)&wLo, 8);
|
|
}
|
|
|
|
static void showIGVV( char* rOrM, char* op, Int imm,
|
|
ULong src64, V128* dst, V128* res )
|
|
{
|
|
printf("%s %10s $%d ", rOrM, op, imm);
|
|
printf("%016llx", src64);
|
|
printf(" ");
|
|
showV128(dst);
|
|
printf(" ");
|
|
showV128(res);
|
|
printf("\n");
|
|
}
|
|
|
|
static void showIAG ( char* rOrM, char* op, Int imm,
|
|
V128* argL, ULong argR, ULong res )
|
|
{
|
|
printf("%s %10s $%d ", rOrM, op, imm);
|
|
showV128(argL);
|
|
printf(" ");
|
|
printf("%016llx", argR);
|
|
printf(" ");
|
|
printf("%016llx", res);
|
|
printf("\n");
|
|
}
|
|
|
|
static void showIAA ( char* rOrM, char* op, Int imm, RRArgs* rra, V128* rmask )
|
|
{
|
|
printf("%s %10s $%d ", rOrM, op, imm);
|
|
showV128(&rra->arg1);
|
|
printf(" ");
|
|
showV128(&rra->arg2);
|
|
printf(" ");
|
|
showMaskedV128(&rra->res, rmask);
|
|
printf("\n");
|
|
}
|
|
|
|
static void showAA ( char* rOrM, char* op, RRArgs* rra, V128* rmask )
|
|
{
|
|
printf("%s %10s ", rOrM, op);
|
|
showV128(&rra->arg1);
|
|
printf(" ");
|
|
showV128(&rra->arg2);
|
|
printf(" ");
|
|
showMaskedV128(&rra->res, rmask);
|
|
printf("\n");
|
|
}
|
|
|
|
/* Note: these are little endian. Hence first byte is the least
|
|
significant byte of lane zero. */
|
|
|
|
/* Mask for insns where all result bits are non-approximated. */
|
|
static V128 AllMask = { 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
|
|
0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
|
|
|
|
/* Mark for insns which produce approximated vector short results. */
|
|
__attribute__((unused))
|
|
static V128 ApproxPS = { 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF,
|
|
0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF };
|
|
|
|
/* Mark for insns which produce approximated scalar short results. */
|
|
__attribute__((unused))
|
|
static V128 ApproxSS = { 0x00,0x00,0x80,0xFF, 0xFF,0xFF,0xFF,0xFF,
|
|
0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
|
|
|
|
static V128 fives = { 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55,
|
|
0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55 };
|
|
|
|
static V128 zeroes = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
|
|
0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
|
|
|
|
/* Helper functions for creating special float values */
|
|
static inline double mkPosInf ( void ) { return 1.0 / 0.0; }
|
|
static inline double mkNegInf ( void ) { return -mkPosInf(); }
|
|
static inline double mkPosNan ( void ) { return 0.0 / 0.0; }
|
|
static inline double mkNegNan ( void ) { return -mkPosNan(); }
|
|
|
|
/* Macros for testing XMM register to register and memory to register operations */
|
|
|
|
/* Use xmm7 for 32-bit x86, xmm11 for amd64 (xmm8-15 don't exist in 32-bit mode) */
|
|
#ifdef __x86_64__
|
|
#define XMMREG_DST "xmm11"
|
|
#else
|
|
#define XMMREG_DST "xmm7"
|
|
#endif
|
|
|
|
#define DO_imm_r_r(_opname, _imm, _src, _dst) \
|
|
{ \
|
|
V128 _tmp; \
|
|
__asm__ __volatile__( \
|
|
"movupd (%0), %%xmm2" "\n\t" \
|
|
"movupd (%1), %%" XMMREG_DST "\n\t" \
|
|
_opname " $" #_imm ", %%xmm2, %%" XMMREG_DST "\n\t" \
|
|
"movupd %%" XMMREG_DST ", (%2)" "\n" \
|
|
: /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \
|
|
: "cc", "memory", "xmm2", XMMREG_DST \
|
|
); \
|
|
RRArgs rra; \
|
|
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
|
|
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
|
|
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
|
|
showIAA("r", (_opname), (_imm), &rra, &AllMask); \
|
|
}
|
|
|
|
#define DO_imm_m_r(_opname, _imm, _src, _dst) \
|
|
{ \
|
|
V128 _tmp; \
|
|
V128* _srcM = memalign16(sizeof(V128)); \
|
|
memcpy(_srcM, &(_src), sizeof(V128)); \
|
|
__asm__ __volatile__( \
|
|
"movupd (%1), %%" XMMREG_DST "\n\t" \
|
|
_opname " $" #_imm ", (%0), %%" XMMREG_DST "\n\t" \
|
|
"movupd %%" XMMREG_DST ", (%2)" "\n" \
|
|
: /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \
|
|
: "cc", "memory", XMMREG_DST \
|
|
); \
|
|
RRArgs rra; \
|
|
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
|
|
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
|
|
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
|
|
showIAA("m", (_opname), (_imm), &rra, &AllMask); \
|
|
free(_srcM); \
|
|
}
|
|
|
|
#define DO_imm_mandr_r(_opname, _imm, _src, _dst) \
|
|
DO_imm_r_r( _opname, _imm, _src, _dst ) \
|
|
DO_imm_m_r( _opname, _imm, _src, _dst )
|
|
|
|
#define DO_r_r(_opname, _src, _dst) \
|
|
{ \
|
|
V128 _tmp; \
|
|
__asm__ __volatile__( \
|
|
"movupd (%0), %%xmm2" "\n\t" \
|
|
"movupd (%1), %%" XMMREG_DST "\n\t" \
|
|
_opname " %%xmm2, %%" XMMREG_DST "\n\t" \
|
|
"movupd %%" XMMREG_DST ", (%2)" "\n" \
|
|
: /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \
|
|
: "cc", "memory", "xmm2", XMMREG_DST \
|
|
); \
|
|
RRArgs rra; \
|
|
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
|
|
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
|
|
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
|
|
showAA("r", (_opname), &rra, &AllMask); \
|
|
}
|
|
|
|
#define DO_m_r(_opname, _src, _dst) \
|
|
{ \
|
|
V128 _tmp; \
|
|
V128* _srcM = memalign16(sizeof(V128)); \
|
|
memcpy(_srcM, &(_src), sizeof(V128)); \
|
|
__asm__ __volatile__( \
|
|
"movupd (%1), %%" XMMREG_DST "\n\t" \
|
|
_opname " (%0), %%" XMMREG_DST "\n\t" \
|
|
"movupd %%" XMMREG_DST ", (%2)" "\n" \
|
|
: /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \
|
|
: "cc", "memory", XMMREG_DST \
|
|
); \
|
|
RRArgs rra; \
|
|
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
|
|
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
|
|
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
|
|
showAA("m", (_opname), &rra, &AllMask); \
|
|
free(_srcM); \
|
|
}
|
|
|
|
#define DO_mandr_r(_opname, _src, _dst) \
|
|
DO_r_r(_opname, _src, _dst) \
|
|
DO_m_r(_opname, _src, _dst)
|
|
|
|
/* Common test functions */
|
|
|
|
static inline void test_PMAXSD ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pmaxsd", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMINSD ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pminsd", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMAXSB ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pmaxsb", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMAXUD ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pmaxud", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMAXUW ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pmaxuw", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMINSB ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pminsb", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMINUD ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pminud", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMINUW ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pminuw", src, dst);
|
|
}
|
|
}
|
|
|
|
static inline void test_PMULLD ( void )
|
|
{
|
|
V128 src, dst;
|
|
Int i;
|
|
for (i = 0; i < 10; i++) {
|
|
randV128(&src);
|
|
randV128(&dst);
|
|
DO_mandr_r("pmulld", src, dst);
|
|
}
|
|
}
|
|
|
|
#endif /* __SSE4_COMMON_H */
|