Rewrite the byte swapping macros.

The clever pointer tricks were actually not working, and we were doing
the byte-by-byte moves in general.  By just doing the memcpy and
obvious byte swap code, we end up generating actual byte swap
instructions, thanks to optimizing compilers.

         text	   data	    bss	    dec	    hex	filename
before: 2240807	  51552	 132016	2424375	 24fe37	hw/xfree86/Xorg
after:  2215167	  51552	 132016	2398735	 249a0f	hw/xfree86/Xorg

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Keith Packard <keithp@keithp.com>
This commit is contained in:
Eric Anholt 2017-03-27 14:59:06 -07:00
parent 5ef4e78513
commit 563b6ee873
4 changed files with 37 additions and 87 deletions

View File

@ -600,7 +600,7 @@ are: REQUEST, REQUEST_SIZE_MATCH, REQUEST_AT_LEAST_SIZE,
REQUEST_FIXED_SIZE, LEGAL_NEW_RESOURCE, and REQUEST_FIXED_SIZE, LEGAL_NEW_RESOURCE, and
VALIDATE_DRAWABLE_AND_GC. Useful byte swapping macros can be found VALIDATE_DRAWABLE_AND_GC. Useful byte swapping macros can be found
in Xserver/include/dix.h: WriteReplyToClient and WriteSwappedDataToClient; and in Xserver/include/dix.h: WriteReplyToClient and WriteSwappedDataToClient; and
in Xserver/include/misc.h: lswapl, lswaps, LengthRestB, LengthRestS, in Xserver/include/misc.h: bswap_64, bswap_32, bswap_16, LengthRestB, LengthRestS,
LengthRestL, SwapRestS, SwapRestL, swapl, swaps, cpswapl, and cpswaps.</para> LengthRestL, SwapRestS, SwapRestL, swapl, swaps, cpswapl, and cpswaps.</para>
</section> </section>
</section> </section>

View File

@ -37,25 +37,4 @@
#include "misc.h" #include "misc.h"
static inline uint16_t
bswap_16(uint16_t val)
{
swap_uint16(&val);
return val;
}
static inline uint32_t
bswap_32(uint32_t val)
{
swap_uint32(&val);
return val;
}
static inline uint64_t
bswap_64(uint64_t val)
{
swap_uint64(&val);
return val;
}
#endif /* !defined(__GLXBYTEORDER_H__) */ #endif /* !defined(__GLXBYTEORDER_H__) */

View File

@ -128,21 +128,6 @@ typedef struct _xReq *xReqPtr;
#define USE_BACKGROUND_PIXEL 3 #define USE_BACKGROUND_PIXEL 3
#define USE_BORDER_PIXEL 3 #define USE_BORDER_PIXEL 3
/* byte swap a 32-bit literal */
static inline uint32_t
lswapl(uint32_t x)
{
return ((x & 0xff) << 24) |
((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | ((x >> 24) & 0xff);
}
/* byte swap a 16-bit literal */
static inline uint16_t
lswaps(uint16_t x)
{
return (uint16_t)((x & 0xff) << 8) | ((x >> 8) & 0xff);
}
#undef min #undef min
#undef max #undef max
@ -311,88 +296,74 @@ __builtin_constant_p(int x)
} }
#endif #endif
/* byte swap a 64-bit value */ static inline uint64_t
static inline void bswap_64(uint64_t x)
swap_uint64(uint64_t *x)
{ {
char n; return (((x & 0xFF00000000000000ull) >> 56) |
((x & 0x00FF000000000000ull) >> 40) |
n = ((char *) x)[0]; ((x & 0x0000FF0000000000ull) >> 24) |
((char *) x)[0] = ((char *) x)[7]; ((x & 0x000000FF00000000ull) >> 8) |
((char *) x)[7] = n; ((x & 0x00000000FF000000ull) << 8) |
((x & 0x0000000000FF0000ull) << 24) |
n = ((char *) x)[1]; ((x & 0x000000000000FF00ull) << 40) |
((char *) x)[1] = ((char *) x)[6]; ((x & 0x00000000000000FFull) << 56));
((char *) x)[6] = n;
n = ((char *) x)[2];
((char *) x)[2] = ((char *) x)[5];
((char *) x)[5] = n;
n = ((char *) x)[3];
((char *) x)[3] = ((char *) x)[4];
((char *) x)[4] = n;
} }
#define swapll(x) do { \ #define swapll(x) do { \
uint64_t temp; \
if (sizeof(*(x)) != 8) \ if (sizeof(*(x)) != 8) \
wrong_size(); \ wrong_size(); \
swap_uint64((uint64_t *)(x)); \ memcpy(&temp, x, 8); \
temp = bswap_64(temp); \
memcpy(x, &temp, 8); \
} while (0) } while (0)
/* byte swap a 32-bit value */ static inline uint32_t
static inline void bswap_32(uint32_t x)
swap_uint32(uint32_t * x)
{ {
char n = ((char *) x)[0]; return (((x & 0xFF000000) >> 24) |
((x & 0x00FF0000) >> 8) |
((char *) x)[0] = ((char *) x)[3]; ((x & 0x0000FF00) << 8) |
((char *) x)[3] = n; ((x & 0x000000FF) << 24));
n = ((char *) x)[1];
((char *) x)[1] = ((char *) x)[2];
((char *) x)[2] = n;
} }
#define swapl(x) do { \ #define swapl(x) do { \
uint32_t temp; \
if (sizeof(*(x)) != 4) \ if (sizeof(*(x)) != 4) \
wrong_size(); \ wrong_size(); \
if (__builtin_constant_p((uintptr_t)(x) & 3) && ((uintptr_t)(x) & 3) == 0) \ memcpy(&temp, x, 4); \
*(x) = lswapl(*(x)); \ temp = bswap_32(temp); \
else \ memcpy(x, &temp, 4); \
swap_uint32((uint32_t *)(x)); \
} while (0) } while (0)
/* byte swap a 16-bit value */ static inline uint16_t
static inline void bswap_16(uint16_t x)
swap_uint16(uint16_t * x)
{ {
char n = ((char *) x)[0]; return (((x & 0xFF00) >> 8) |
((x & 0x00FF) << 8));
((char *) x)[0] = ((char *) x)[1];
((char *) x)[1] = n;
} }
#define swaps(x) do { \ #define swaps(x) do { \
uint16_t temp; \
if (sizeof(*(x)) != 2) \ if (sizeof(*(x)) != 2) \
wrong_size(); \ wrong_size(); \
if (__builtin_constant_p((uintptr_t)(x) & 1) && ((uintptr_t)(x) & 1) == 0) \ memcpy(&temp, x, 2); \
*(x) = lswaps(*(x)); \ temp = bswap_16(temp); \
else \ memcpy(x, &temp, 2); \
swap_uint16((uint16_t *)(x)); \
} while (0) } while (0)
/* copy 32-bit value from src to dst byteswapping on the way */ /* copy 32-bit value from src to dst byteswapping on the way */
#define cpswapl(src, dst) do { \ #define cpswapl(src, dst) do { \
if (sizeof((src)) != 4 || sizeof((dst)) != 4) \ if (sizeof((src)) != 4 || sizeof((dst)) != 4) \
wrong_size(); \ wrong_size(); \
(dst) = lswapl((src)); \ (dst) = bswap_32((src)); \
} while (0) } while (0)
/* copy short from src to dst byteswapping on the way */ /* copy short from src to dst byteswapping on the way */
#define cpswaps(src, dst) do { \ #define cpswaps(src, dst) do { \
if (sizeof((src)) != 2 || sizeof((dst)) != 2) \ if (sizeof((src)) != 2 || sizeof((dst)) != 2) \
wrong_size(); \ wrong_size(); \
(dst) = lswaps((src)); \ (dst) = bswap_16((src)); \
} while (0) } while (0)
extern _X_EXPORT void SwapLongs(CARD32 *list, unsigned long count); extern _X_EXPORT void SwapLongs(CARD32 *list, unsigned long count);

View File

@ -108,12 +108,12 @@ static ConnectionOutputPtr FreeOutputs = (ConnectionOutputPtr) NULL;
static OsCommPtr AvailableInput = (OsCommPtr) NULL; static OsCommPtr AvailableInput = (OsCommPtr) NULL;
#define get_req_len(req,cli) ((cli)->swapped ? \ #define get_req_len(req,cli) ((cli)->swapped ? \
lswaps((req)->length) : (req)->length) bswap_16((req)->length) : (req)->length)
#include <X11/extensions/bigreqsproto.h> #include <X11/extensions/bigreqsproto.h>
#define get_big_req_len(req,cli) ((cli)->swapped ? \ #define get_big_req_len(req,cli) ((cli)->swapped ? \
lswapl(((xBigReq *)(req))->length) : \ bswap_32(((xBigReq *)(req))->length) : \
((xBigReq *)(req))->length) ((xBigReq *)(req))->length)
#define BUFSIZE 16384 #define BUFSIZE 16384