Files
valgrind/none/tests/sse4-common.h
Alexandra Hájková c3339bae0e Add SSE4.1 PMULLD instruction for x86 32 bit
Support pmulld (packed multiply 32-bit doubleword integers) instruction
in guest_x86_toIR.c and host_x86_isel.c. Add test function to
sse4-common.h and update none/tests/x86/sse4-x86.c to test the
instruction.

BZ: https://bugs.kde.org/show_bug.cgi?id=513475
2025-12-26 09:17:48 +01:00

355 lines
8.9 KiB
C

/* Common infrastructure for SSE4 tests (both x86 and amd64) */
#ifndef __SSE4_COMMON_H
#define __SSE4_COMMON_H
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "tests/malloc.h"
#include <string.h>
typedef unsigned char V128[16];
typedef unsigned int UInt;
typedef signed int Int;
typedef unsigned char UChar;
typedef unsigned long long int ULong;
typedef unsigned char Bool;
#define False ((Bool)0)
#define True ((Bool)1)
typedef
struct {
V128 arg1;
V128 arg2;
V128 res;
}
RRArgs;
typedef
struct {
V128 arg1;
V128 res;
}
RMArgs;
static UChar randUChar ( void )
{
static UInt seed = 80021;
seed = 1103515245 * seed + 12345;
return (seed >> 17) & 0xFF;
}
static ULong randULong ( void )
{
Int i;
ULong r = 0;
for (i = 0; i < 8; i++) {
r = (r << 8) | (ULong)(0xFF & randUChar());
}
return r;
}
static void randV128 ( V128* v )
{
Int i;
for (i = 0; i < 16; i++)
(*v)[i] = randUChar();
}
static void showV128 ( V128* v )
{
Int i;
for (i = 15; i >= 0; i--)
printf("%02x", (Int)(*v)[i]);
}
static void showMaskedV128 ( V128* v, V128* mask )
{
Int i;
for (i = 15; i >= 0; i--)
printf("%02x", (Int)( ((*v)[i]) & ((*mask)[i]) ));
}
static void do64HLtoV128 ( /*OUT*/V128* res, ULong wHi, ULong wLo )
{
// try to sidestep strict-aliasing snafus by memcpying explicitly
UChar* p = (UChar*)res;
memcpy(&p[8], (UChar*)&wHi, 8);
memcpy(&p[0], (UChar*)&wLo, 8);
}
static void showIGVV( char* rOrM, char* op, Int imm,
ULong src64, V128* dst, V128* res )
{
printf("%s %10s $%d ", rOrM, op, imm);
printf("%016llx", src64);
printf(" ");
showV128(dst);
printf(" ");
showV128(res);
printf("\n");
}
static void showIAG ( char* rOrM, char* op, Int imm,
V128* argL, ULong argR, ULong res )
{
printf("%s %10s $%d ", rOrM, op, imm);
showV128(argL);
printf(" ");
printf("%016llx", argR);
printf(" ");
printf("%016llx", res);
printf("\n");
}
static void showIAA ( char* rOrM, char* op, Int imm, RRArgs* rra, V128* rmask )
{
printf("%s %10s $%d ", rOrM, op, imm);
showV128(&rra->arg1);
printf(" ");
showV128(&rra->arg2);
printf(" ");
showMaskedV128(&rra->res, rmask);
printf("\n");
}
static void showAA ( char* rOrM, char* op, RRArgs* rra, V128* rmask )
{
printf("%s %10s ", rOrM, op);
showV128(&rra->arg1);
printf(" ");
showV128(&rra->arg2);
printf(" ");
showMaskedV128(&rra->res, rmask);
printf("\n");
}
/* Note: these are little endian. Hence first byte is the least
significant byte of lane zero. */
/* Mask for insns where all result bits are non-approximated. */
static V128 AllMask = { 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
/* Mark for insns which produce approximated vector short results. */
__attribute__((unused))
static V128 ApproxPS = { 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF,
0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF };
/* Mark for insns which produce approximated scalar short results. */
__attribute__((unused))
static V128 ApproxSS = { 0x00,0x00,0x80,0xFF, 0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
static V128 fives = { 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55,
0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55 };
static V128 zeroes = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
/* Helper functions for creating special float values */
static inline double mkPosInf ( void ) { return 1.0 / 0.0; }
static inline double mkNegInf ( void ) { return -mkPosInf(); }
static inline double mkPosNan ( void ) { return 0.0 / 0.0; }
static inline double mkNegNan ( void ) { return -mkPosNan(); }
/* Macros for testing XMM register to register and memory to register operations */
/* Use xmm7 for 32-bit x86, xmm11 for amd64 (xmm8-15 don't exist in 32-bit mode) */
#ifdef __x86_64__
#define XMMREG_DST "xmm11"
#else
#define XMMREG_DST "xmm7"
#endif
#define DO_imm_r_r(_opname, _imm, _src, _dst) \
{ \
V128 _tmp; \
__asm__ __volatile__( \
"movupd (%0), %%xmm2" "\n\t" \
"movupd (%1), %%" XMMREG_DST "\n\t" \
_opname " $" #_imm ", %%xmm2, %%" XMMREG_DST "\n\t" \
"movupd %%" XMMREG_DST ", (%2)" "\n" \
: /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \
: "cc", "memory", "xmm2", XMMREG_DST \
); \
RRArgs rra; \
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
showIAA("r", (_opname), (_imm), &rra, &AllMask); \
}
#define DO_imm_m_r(_opname, _imm, _src, _dst) \
{ \
V128 _tmp; \
V128* _srcM = memalign16(sizeof(V128)); \
memcpy(_srcM, &(_src), sizeof(V128)); \
__asm__ __volatile__( \
"movupd (%1), %%" XMMREG_DST "\n\t" \
_opname " $" #_imm ", (%0), %%" XMMREG_DST "\n\t" \
"movupd %%" XMMREG_DST ", (%2)" "\n" \
: /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \
: "cc", "memory", XMMREG_DST \
); \
RRArgs rra; \
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
showIAA("m", (_opname), (_imm), &rra, &AllMask); \
free(_srcM); \
}
#define DO_imm_mandr_r(_opname, _imm, _src, _dst) \
DO_imm_r_r( _opname, _imm, _src, _dst ) \
DO_imm_m_r( _opname, _imm, _src, _dst )
#define DO_r_r(_opname, _src, _dst) \
{ \
V128 _tmp; \
__asm__ __volatile__( \
"movupd (%0), %%xmm2" "\n\t" \
"movupd (%1), %%" XMMREG_DST "\n\t" \
_opname " %%xmm2, %%" XMMREG_DST "\n\t" \
"movupd %%" XMMREG_DST ", (%2)" "\n" \
: /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \
: "cc", "memory", "xmm2", XMMREG_DST \
); \
RRArgs rra; \
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
showAA("r", (_opname), &rra, &AllMask); \
}
#define DO_m_r(_opname, _src, _dst) \
{ \
V128 _tmp; \
V128* _srcM = memalign16(sizeof(V128)); \
memcpy(_srcM, &(_src), sizeof(V128)); \
__asm__ __volatile__( \
"movupd (%1), %%" XMMREG_DST "\n\t" \
_opname " (%0), %%" XMMREG_DST "\n\t" \
"movupd %%" XMMREG_DST ", (%2)" "\n" \
: /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \
: "cc", "memory", XMMREG_DST \
); \
RRArgs rra; \
memcpy(&rra.arg1, &(_src), sizeof(V128)); \
memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
memcpy(&rra.res, &(_tmp), sizeof(V128)); \
showAA("m", (_opname), &rra, &AllMask); \
free(_srcM); \
}
#define DO_mandr_r(_opname, _src, _dst) \
DO_r_r(_opname, _src, _dst) \
DO_m_r(_opname, _src, _dst)
/* Common test functions */
static inline void test_PMAXSD ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pmaxsd", src, dst);
}
}
static inline void test_PMINSD ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pminsd", src, dst);
}
}
static inline void test_PMAXSB ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pmaxsb", src, dst);
}
}
static inline void test_PMAXUD ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pmaxud", src, dst);
}
}
static inline void test_PMAXUW ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pmaxuw", src, dst);
}
}
static inline void test_PMINSB ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pminsb", src, dst);
}
}
static inline void test_PMINUD ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pminud", src, dst);
}
}
static inline void test_PMINUW ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pminuw", src, dst);
}
}
static inline void test_PMULLD ( void )
{
V128 src, dst;
Int i;
for (i = 0; i < 10; i++) {
randV128(&src);
randV128(&dst);
DO_mandr_r("pmulld", src, dst);
}
}
#endif /* __SSE4_COMMON_H */