Skip to content

Commit bbd60e2

Browse files
addaleaxdanielleadams
authored andcommitted
deps: fix zlib compilation for CPUs without SIMD features
Fix the compile flags so that zlib can run on CPUs that do not have SSSE3/SSE4.2/etc. Do not compile zlib with flags that indicate that those features are available, and instead enable them selectively for functions that use them. There are probably better way to do this, e.g. through gyp file modifications as suggested in the issue. However, this patch should do just fine until that happens. Fixes: #32553 PR-URL: #32627 Reviewed-By: Gireesh Punathil <[email protected]> Reviewed-By: Ben Noordhuis <[email protected]> Reviewed-By: Tobias Nießen <[email protected]> Reviewed-By: James M Snell <[email protected]> PR-URL: #45387 Reviewed-By: Rafael Gonzaga <[email protected]>
1 parent 17893de commit bbd60e2

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

‎deps/zlib/adler32_simd.c‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,13 @@
5050
#defineNMAX 5552
5151

5252
#if defined(ADLER32_SIMD_SSSE3)
53+
#ifndef__GNUC__
54+
#define__attribute__()
55+
#endif
5356

5457
#include<tmmintrin.h>
5558

59+
__attribute__((target("ssse3")))
5660
uint32_tZLIB_INTERNALadler32_simd_( /* SSSE3 */
5761
uint32_tadler,
5862
constunsigned char*buf,

‎deps/zlib/crc32_simd.c‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#include"crc32_simd.h"
99

1010
#if defined(CRC32_SIMD_SSE42_PCLMUL)
11+
#ifndef__GNUC__
12+
#define__attribute__()
13+
#endif
1114

1215
/*
1316
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
@@ -21,6 +24,7 @@
2124
#include<smmintrin.h>
2225
#include<wmmintrin.h>
2326

27+
__attribute__((target("sse4.2,pclmul")))
2428
uint32_tZLIB_INTERNALcrc32_sse42_simd_( /* SSE4.2+PCLMUL */
2529
constunsigned char*buf,
2630
z_size_tlen,

‎deps/zlib/crc_folding.c‎

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
#include<immintrin.h>
2626
#include<wmmintrin.h>
2727

28+
#ifndef__GNUC__
29+
#define__attribute__()
30+
#endif
31+
2832
#defineCRC_LOAD(s) \
2933
do{\
3034
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);\
@@ -41,6 +45,7 @@
4145
_mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part);\
4246
} while (0);
4347

48+
__attribute__((target("sse4.2,pclmul")))
4449
ZLIB_INTERNALvoidcrc_fold_init(deflate_state*consts)
4550
{
4651
CRC_LOAD(s)
@@ -55,6 +60,7 @@ ZLIB_INTERNAL void crc_fold_init(deflate_state *const s)
5560
s->strm->adler=0;
5661
}
5762

63+
__attribute__((target("sse4.2,pclmul")))
5864
localvoidfold_1(deflate_state*consts,
5965
__m128i*xmm_crc0, __m128i*xmm_crc1,
6066
__m128i*xmm_crc2, __m128i*xmm_crc3)
@@ -81,6 +87,7 @@ local void fold_1(deflate_state *const s,
8187
*xmm_crc3=_mm_castps_si128(ps_res);
8288
}
8389

90+
__attribute__((target("sse4.2,pclmul")))
8491
localvoidfold_2(deflate_state*consts,
8592
__m128i*xmm_crc0, __m128i*xmm_crc1,
8693
__m128i*xmm_crc2, __m128i*xmm_crc3)
@@ -115,6 +122,7 @@ local void fold_2(deflate_state *const s,
115122
*xmm_crc3=_mm_castps_si128(ps_res31);
116123
}
117124

125+
__attribute__((target("sse4.2,pclmul")))
118126
localvoidfold_3(deflate_state*consts,
119127
__m128i*xmm_crc0, __m128i*xmm_crc1,
120128
__m128i*xmm_crc2, __m128i*xmm_crc3)
@@ -155,6 +163,7 @@ local void fold_3(deflate_state *const s,
155163
*xmm_crc3=_mm_castps_si128(ps_res32);
156164
}
157165

166+
__attribute__((target("sse4.2,pclmul")))
158167
localvoidfold_4(deflate_state*consts,
159168
__m128i*xmm_crc0, __m128i*xmm_crc1,
160169
__m128i*xmm_crc2, __m128i*xmm_crc3)
@@ -221,6 +230,7 @@ local const unsigned zalign(32) pshufb_shf_table[60] ={
221230
0x0201008f,0x06050403,0x0a090807,0x0e0d0c0b/* shl 1 (16 -15)/shr15*/
222231
};
223232

233+
__attribute__((target("sse4.2,pclmul")))
224234
localvoidpartial_fold(deflate_state*consts, constsize_tlen,
225235
__m128i*xmm_crc0, __m128i*xmm_crc1,
226236
__m128i*xmm_crc2, __m128i*xmm_crc3,
@@ -271,6 +281,7 @@ local void partial_fold(deflate_state *const s, const size_t len,
271281
*xmm_crc3=_mm_castps_si128(ps_res);
272282
}
273283

284+
__attribute__((target("sse4.2,pclmul")))
274285
ZLIB_INTERNALvoidcrc_fold_copy(deflate_state*consts,
275286
unsigned char*dst, constunsigned char*src, longlen)
276287
{
@@ -427,6 +438,7 @@ local const unsigned zalign(16) crc_mask2[4] ={
427438
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
428439
};
429440

441+
__attribute__((target("sse4.2,pclmul")))
430442
unsigned ZLIB_INTERNALcrc_fold_512to32(deflate_state*consts)
431443
{
432444
const__m128ixmm_mask=_mm_load_si128((__m128i*)crc_mask);

0 commit comments

Comments
(0)