Merge branch 'jk/perf-no-dups'
[git/git.git] / compat / bswap.h
1 #ifndef COMPAT_BSWAP_H
2 #define COMPAT_BSWAP_H
3
4 /*
5 * Let's make sure we always have a sane definition for ntohl()/htonl().
6 * Some libraries define those as a function call, just to perform byte
7 * shifting, bringing significant overhead to what should be a simple
8 * operation.
9 */
10
11 /*
12 * Default version that the compiler ought to optimize properly with
13 * constant values.
14 */
15 static inline uint32_t default_swab32(uint32_t val)
16 {
17 return (((val & 0xff000000) >> 24) |
18 ((val & 0x00ff0000) >> 8) |
19 ((val & 0x0000ff00) << 8) |
20 ((val & 0x000000ff) << 24));
21 }
22
23 static inline uint64_t default_bswap64(uint64_t val)
24 {
25 return (((val & (uint64_t)0x00000000000000ffULL) << 56) |
26 ((val & (uint64_t)0x000000000000ff00ULL) << 40) |
27 ((val & (uint64_t)0x0000000000ff0000ULL) << 24) |
28 ((val & (uint64_t)0x00000000ff000000ULL) << 8) |
29 ((val & (uint64_t)0x000000ff00000000ULL) >> 8) |
30 ((val & (uint64_t)0x0000ff0000000000ULL) >> 24) |
31 ((val & (uint64_t)0x00ff000000000000ULL) >> 40) |
32 ((val & (uint64_t)0xff00000000000000ULL) >> 56));
33 }
34
35 #undef bswap32
36 #undef bswap64
37
38 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
39
40 #define bswap32 git_bswap32
41 static inline uint32_t git_bswap32(uint32_t x)
42 {
43 uint32_t result;
44 if (__builtin_constant_p(x))
45 result = default_swab32(x);
46 else
47 __asm__("bswap %0" : "=r" (result) : "0" (x));
48 return result;
49 }
50
51 #define bswap64 git_bswap64
52 #if defined(__x86_64__)
53 static inline uint64_t git_bswap64(uint64_t x)
54 {
55 uint64_t result;
56 if (__builtin_constant_p(x))
57 result = default_bswap64(x);
58 else
59 __asm__("bswap %q0" : "=r" (result) : "0" (x));
60 return result;
61 }
62 #else
63 static inline uint64_t git_bswap64(uint64_t x)
64 {
65 union { uint64_t i64; uint32_t i32[2]; } tmp, result;
66 if (__builtin_constant_p(x))
67 result.i64 = default_bswap64(x);
68 else {
69 tmp.i64 = x;
70 result.i32[0] = git_bswap32(tmp.i32[1]);
71 result.i32[1] = git_bswap32(tmp.i32[0]);
72 }
73 return result.i64;
74 }
75 #endif
76
77 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
78
79 #include <stdlib.h>
80
81 #define bswap32(x) _byteswap_ulong(x)
82 #define bswap64(x) _byteswap_uint64(x)
83
84 #endif
85
86 #if defined(bswap32)
87
88 #undef ntohl
89 #undef htonl
90 #define ntohl(x) bswap32(x)
91 #define htonl(x) bswap32(x)
92
93 #endif
94
95 #if defined(bswap64)
96
97 #undef ntohll
98 #undef htonll
99 #define ntohll(x) bswap64(x)
100 #define htonll(x) bswap64(x)
101
102 #else
103
104 #undef ntohll
105 #undef htonll
106
107 #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
108
109 # define GIT_BYTE_ORDER __BYTE_ORDER
110 # define GIT_LITTLE_ENDIAN __LITTLE_ENDIAN
111 # define GIT_BIG_ENDIAN __BIG_ENDIAN
112
113 #elif defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
114
115 # define GIT_BYTE_ORDER BYTE_ORDER
116 # define GIT_LITTLE_ENDIAN LITTLE_ENDIAN
117 # define GIT_BIG_ENDIAN BIG_ENDIAN
118
119 #else
120
121 # define GIT_BIG_ENDIAN 4321
122 # define GIT_LITTLE_ENDIAN 1234
123
124 # if defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)
125 # define GIT_BYTE_ORDER GIT_BIG_ENDIAN
126 # elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
127 # define GIT_BYTE_ORDER GIT_LITTLE_ENDIAN
128 # elif defined(__THW_BIG_ENDIAN__) && !defined(__THW_LITTLE_ENDIAN__)
129 # define GIT_BYTE_ORDER GIT_BIG_ENDIAN
130 # elif defined(__THW_LITTLE_ENDIAN__) && !defined(__THW_BIG_ENDIAN__)
131 # define GIT_BYTE_ORDER GIT_LITTLE_ENDIAN
132 # else
133 # error "Cannot determine endianness"
134 # endif
135
136 #endif
137
138 #if GIT_BYTE_ORDER == GIT_BIG_ENDIAN
139 # define ntohll(n) (n)
140 # define htonll(n) (n)
141 #else
142 # define ntohll(n) default_bswap64(n)
143 # define htonll(n) default_bswap64(n)
144 #endif
145
146 #endif
147
148 /*
149 * Performance might be improved if the CPU architecture is OK with
150 * unaligned 32-bit loads and a fast ntohl() is available.
151 * Otherwise fall back to byte loads and shifts which is portable,
152 * and is faster on architectures with memory alignment issues.
153 */
154
155 #if !defined(NO_UNALIGNED_LOADS) && ( \
156 defined(__i386__) || defined(__x86_64__) || \
157 defined(_M_IX86) || defined(_M_X64) || \
158 defined(__ppc__) || defined(__ppc64__) || \
159 defined(__powerpc__) || defined(__powerpc64__) || \
160 defined(__s390__) || defined(__s390x__))
161
162 #define get_be16(p) ntohs(*(unsigned short *)(p))
163 #define get_be32(p) ntohl(*(unsigned int *)(p))
164 #define get_be64(p) ntohll(*(uint64_t *)(p))
165 #define put_be32(p, v) do { *(unsigned int *)(p) = htonl(v); } while (0)
166 #define put_be64(p, v) do { *(uint64_t *)(p) = htonll(v); } while (0)
167
168 #else
169
170 static inline uint16_t get_be16(const void *ptr)
171 {
172 const unsigned char *p = ptr;
173 return (uint16_t)p[0] << 8 |
174 (uint16_t)p[1] << 0;
175 }
176
177 static inline uint32_t get_be32(const void *ptr)
178 {
179 const unsigned char *p = ptr;
180 return (uint32_t)p[0] << 24 |
181 (uint32_t)p[1] << 16 |
182 (uint32_t)p[2] << 8 |
183 (uint32_t)p[3] << 0;
184 }
185
186 static inline uint64_t get_be64(const void *ptr)
187 {
188 const unsigned char *p = ptr;
189 return (uint64_t)get_be32(&p[0]) << 32 |
190 (uint64_t)get_be32(&p[4]) << 0;
191 }
192
193 static inline void put_be32(void *ptr, uint32_t value)
194 {
195 unsigned char *p = ptr;
196 p[0] = value >> 24;
197 p[1] = value >> 16;
198 p[2] = value >> 8;
199 p[3] = value >> 0;
200 }
201
202 static inline void put_be64(void *ptr, uint64_t value)
203 {
204 unsigned char *p = ptr;
205 p[0] = value >> 56;
206 p[1] = value >> 48;
207 p[2] = value >> 40;
208 p[3] = value >> 32;
209 p[4] = value >> 24;
210 p[5] = value >> 16;
211 p[6] = value >> 8;
212 p[7] = value >> 0;
213 }
214
215 #endif
216
217 #endif /* COMPAT_BSWAP_H */