diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-03-21 13:01:38 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-03-21 13:01:38 +0200 |
commit | 92fa5f16d69707e302c0f85b2e5e80af8dc037f1 (patch) | |
tree | 4bd77b591c6053ba673d437a95b85b754069059f | |
parent | aa234561d00c3fb15fe501df4bf58f3db7c7c06b (diff) | |
download | libgcrypt-92fa5f16d69707e302c0f85b2e5e80af8dc037f1.tar.gz |
bufhelp: use one-byte aligned type for unaligned memory accesses
* cipher/bufhelp.h (BUFHELP_FAST_UNALIGNED_ACCESS): Enable only when
HAVE_GCC_ATTRIBUTE_PACKED and HAVE_GCC_ATTRIBUTE_ALIGNED are defined.
(bufhelp_int_t): New type.
(buf_cpy, buf_xor, buf_xor_1, buf_xor_2dst, buf_xor_n_copy_2): Use
'bufhelp_int_t'.
[BUFHELP_FAST_UNALIGNED_ACCESS] (bufhelp_u32_t, bufhelp_u64_t): New.
[BUFHELP_FAST_UNALIGNED_ACCESS] (buf_get_be32, buf_get_le32)
(buf_put_be32, buf_put_le32, buf_get_be64, buf_get_le64)
(buf_put_be64, buf_put_le64): Use 'bufhelp_uXX_t'.
* configure.ac (gcry_cv_gcc_attribute_packed): New.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | cipher/bufhelp.h | 147 | ||||
-rw-r--r-- | configure.ac | 18 |
2 files changed, 108 insertions, 57 deletions
diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h index c6bedc51..fb87939b 100644 --- a/cipher/bufhelp.h +++ b/cipher/bufhelp.h @@ -23,10 +23,13 @@ #include "bithelp.h" -#if defined(__i386__) || defined(__x86_64__) || \ - defined(__powerpc__) || defined(__powerpc64__) || \ - (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \ - defined(__aarch64__) +#undef BUFHELP_FAST_UNALIGNED_ACCESS +#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \ + defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \ + (defined(__i386__) || defined(__x86_64__) || \ + defined(__powerpc__) || defined(__powerpc64__) || \ + (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \ + defined(__aarch64__)) /* These architectures are able of unaligned memory accesses and can handle those fast. */ @@ -34,6 +37,25 @@ #endif +#ifdef BUFHELP_FAST_UNALIGNED_ACCESS +/* Define type with one-byte alignment on architectures with fast unaligned + memory accesses. + */ +typedef struct bufhelp_int_s +{ + uintptr_t a; +} __attribute__((packed, aligned(1))) bufhelp_int_t; +#else +/* Define type with default alignment for other architectures (unaligned + accessed handled in per byte loops). + */ +typedef struct bufhelp_int_s +{ + uintptr_t a; +} bufhelp_int_t; +#endif + + /* Optimized function for small buffer copying */ static inline void buf_cpy(void *_dst, const void *_src, size_t len) @@ -44,21 +66,21 @@ buf_cpy(void *_dst, const void *_src, size_t len) #else byte *dst = _dst; const byte *src = _src; - uintptr_t *ldst; - const uintptr_t *lsrc; + bufhelp_int_t *ldst; + const bufhelp_int_t *lsrc; #ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(uintptr_t) - 1; + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; /* Skip fast processing if buffers are unaligned. */ if (((uintptr_t)dst | (uintptr_t)src) & longmask) goto do_bytes; #endif - ldst = (uintptr_t *)(void *)dst; - lsrc = (const uintptr_t *)(const void *)src; + ldst = (bufhelp_int_t *)(void *)dst; + lsrc = (const bufhelp_int_t *)(const void *)src; - for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t)) - *ldst++ = *lsrc++; + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst++)->a = (lsrc++)->a; dst = (byte *)ldst; src = (const byte *)lsrc; @@ -80,22 +102,22 @@ buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len) byte *dst = _dst; const byte *src1 = _src1; const byte *src2 = _src2; - uintptr_t *ldst; - const uintptr_t *lsrc1, *lsrc2; + bufhelp_int_t *ldst; + const bufhelp_int_t *lsrc1, *lsrc2; #ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(uintptr_t) - 1; + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; /* Skip fast processing if buffers are unaligned. */ if (((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask) goto do_bytes; #endif - ldst = (uintptr_t *)(void *)dst; - lsrc1 = (const uintptr_t *)(const void *)src1; - lsrc2 = (const uintptr_t *)(const void *)src2; + ldst = (bufhelp_int_t *)(void *)dst; + lsrc1 = (const bufhelp_int_t *)(const void *)src1; + lsrc2 = (const bufhelp_int_t *)(const void *)src2; - for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t)) - *ldst++ = *lsrc1++ ^ *lsrc2++; + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a; dst = (byte *)ldst; src1 = (const byte *)lsrc1; @@ -116,21 +138,21 @@ buf_xor_1(void *_dst, const void *_src, size_t len) { byte *dst = _dst; const byte *src = _src; - uintptr_t *ldst; - const uintptr_t *lsrc; + bufhelp_int_t *ldst; + const bufhelp_int_t *lsrc; #ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(uintptr_t) - 1; + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; /* Skip fast processing if buffers are unaligned. */ if (((uintptr_t)dst | (uintptr_t)src) & longmask) goto do_bytes; #endif - ldst = (uintptr_t *)(void *)dst; - lsrc = (const uintptr_t *)(const void *)src; + ldst = (bufhelp_int_t *)(void *)dst; + lsrc = (const bufhelp_int_t *)(const void *)src; - for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t)) - *ldst++ ^= *lsrc++; + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst++)->a ^= (lsrc++)->a; dst = (byte *)ldst; src = (const byte *)lsrc; @@ -152,22 +174,22 @@ buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len) byte *dst1 = _dst1; byte *dst2 = _dst2; const byte *src = _src; - uintptr_t *ldst1, *ldst2; - const uintptr_t *lsrc; + bufhelp_int_t *ldst1, *ldst2; + const bufhelp_int_t *lsrc; #ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(uintptr_t) - 1; + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; /* Skip fast processing if buffers are unaligned. */ if (((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask) goto do_bytes; #endif - ldst1 = (uintptr_t *)(void *)dst1; - ldst2 = (uintptr_t *)(void *)dst2; - lsrc = (const uintptr_t *)(const void *)src; + ldst1 = (bufhelp_int_t *)(void *)dst1; + ldst2 = (bufhelp_int_t *)(void *)dst2; + lsrc = (const bufhelp_int_t *)(const void *)src; - for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t)) - *ldst1++ = (*ldst2++ ^= *lsrc++); + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) + (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a); dst1 = (byte *)ldst1; dst2 = (byte *)ldst2; @@ -193,11 +215,11 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy, const byte *src_xor = _src_xor; const byte *src_cpy = _src_cpy; byte temp; - uintptr_t *ldst_xor, *lsrcdst_cpy; - const uintptr_t *lsrc_cpy, *lsrc_xor; + bufhelp_int_t *ldst_xor, *lsrcdst_cpy; + const bufhelp_int_t *lsrc_cpy, *lsrc_xor; uintptr_t ltemp; #ifndef BUFHELP_FAST_UNALIGNED_ACCESS - const unsigned int longmask = sizeof(uintptr_t) - 1; + const unsigned int longmask = sizeof(bufhelp_int_t) - 1; /* Skip fast processing if buffers are unaligned. */ if (((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor | @@ -205,16 +227,16 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy, goto do_bytes; #endif - ldst_xor = (uintptr_t *)(void *)dst_xor; - lsrc_xor = (const uintptr_t *)(void *)src_xor; - lsrcdst_cpy = (uintptr_t *)(void *)srcdst_cpy; - lsrc_cpy = (const uintptr_t *)(const void *)src_cpy; + ldst_xor = (bufhelp_int_t *)(void *)dst_xor; + lsrc_xor = (const bufhelp_int_t *)(void *)src_xor; + lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy; + lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy; - for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t)) + for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t)) { - ltemp = *lsrc_cpy++; - *ldst_xor++ = *lsrcdst_cpy ^ *lsrc_xor++; - *lsrcdst_cpy++ = ltemp; + ltemp = (lsrc_cpy++)->a; + (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a; + (lsrcdst_cpy++)->a = ltemp; } dst_xor = (byte *)ldst_xor; @@ -347,53 +369,64 @@ static inline void buf_put_le64(void *_buf, u64 val) #else /*BUFHELP_FAST_UNALIGNED_ACCESS*/ +typedef struct bufhelp_u32_s +{ + u32 a; +} __attribute__((packed, aligned(1))) bufhelp_u32_t; + /* Functions for loading and storing unaligned u32 values of different endianness. */ static inline u32 buf_get_be32(const void *_buf) { - return be_bswap32(*(const u32 *)_buf); + return be_bswap32(((const bufhelp_u32_t *)_buf)->a); } static inline u32 buf_get_le32(const void *_buf) { - return le_bswap32(*(const u32 *)_buf); + return le_bswap32(((const bufhelp_u32_t *)_buf)->a); } static inline void buf_put_be32(void *_buf, u32 val) { - u32 *out = _buf; - *out = be_bswap32(val); + bufhelp_u32_t *out = _buf; + out->a = be_bswap32(val); } static inline void buf_put_le32(void *_buf, u32 val) { - u32 *out = _buf; - *out = le_bswap32(val); + bufhelp_u32_t *out = _buf; + out->a = le_bswap32(val); } #ifdef HAVE_U64_TYPEDEF + +typedef struct bufhelp_u64_s +{ + u64 a; +} __attribute__((packed, aligned(1))) bufhelp_u64_t; + /* Functions for loading and storing unaligned u64 values of different endianness. */ static inline u64 buf_get_be64(const void *_buf) { - return be_bswap64(*(const u64 *)_buf); + return be_bswap64(((const bufhelp_u64_t *)_buf)->a); } static inline u64 buf_get_le64(const void *_buf) { - return le_bswap64(*(const u64 *)_buf); + return le_bswap64(((const bufhelp_u64_t *)_buf)->a); } static inline void buf_put_be64(void *_buf, u64 val) { - u64 *out = _buf; - *out = be_bswap64(val); + bufhelp_u64_t *out = _buf; + out->a = be_bswap64(val); } static inline void buf_put_le64(void *_buf, u64 val) { - u64 *out = _buf; - *out = le_bswap64(val); + bufhelp_u64_t *out = _buf; + out->a = le_bswap64(val); } #endif /*HAVE_U64_TYPEDEF*/ diff --git a/configure.ac b/configure.ac index 4bbd686c..16f6a21b 100644 --- a/configure.ac +++ b/configure.ac @@ -958,6 +958,24 @@ fi # +# Check whether the compiler supports the GCC style packed attribute +# +AC_CACHE_CHECK([whether the GCC style packed attribute is supported], + [gcry_cv_gcc_attribute_packed], + [gcry_cv_gcc_attribute_packed=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[struct foo_s { char a; long b; } __attribute__ ((packed)); + enum bar { + FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))), + };]])], + [gcry_cv_gcc_attribute_packed=yes])]) +if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then + AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1, + [Defined if a GCC style "__attribute__ ((packed))" is supported]) +fi + + +# # Check whether the compiler supports 'asm' or '__asm__' keyword for # assembler blocks. # |