Skip to content

Commit 901405e

Browse files
Add more generic AVX feature detection as well as flags for if avx512… (#1041)
1 parent 6ee34b3 commit 901405e

File tree

9 files changed

+60
-32
lines changed

9 files changed

+60
-32
lines changed

CMakeLists.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ include(AwsFeatureTests)
2424
include(AwsSanitizers)
2525
include(AwsThreadAffinity)
2626
include(AwsThreadName)
27-
include(AwsSIMD)
2827
include(CTest)
2928

3029
set(GENERATED_ROOT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
@@ -224,12 +223,9 @@ target_include_directories(${PROJECT_NAME} PUBLIC
224223

225224
target_compile_definitions(${PROJECT_NAME} PRIVATE -DCJSON_HIDE_SYMBOLS)
226225

227-
# Enable SIMD encoder if the compiler supports the right features
228-
simd_add_definitions(${PROJECT_NAME})
229-
230-
if (HAVE_AVX2_INTRINSICS)
226+
if (AWS_HAVE_AVX2_INTRINSICS)
231227
target_compile_definitions(${PROJECT_NAME} PRIVATE -DUSE_SIMD_ENCODING)
232-
simd_add_source_avx2(${PROJECT_NAME} "source/arch/intel/encoding_avx2.c")
228+
simd_add_source_avx(${PROJECT_NAME} "source/arch/intel/encoding_avx2.c")
233229
message(STATUS "Building SIMD base64 decoder")
234230
endif()
235231

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ Example:
158158
* Avoid C99 features in header files. For some types such as bool, uint32_t etc..., these are defined if not available for the language
159159
standard being used in `aws/common/common.h`, so feel free to use them.
160160
* For C++ compatibility, don't put const members in structs.
161-
* Avoid C++ style comments e.g. `//`.
161+
* Avoid C++ style comments e.g. `//` in header files and prefer block style (`/* */`) for long blocks of text. C++ style comments are fine in C files.
162162
* All public API functions need C++ guards and Windows dll semantics.
163163
* Use Unix line endings.
164164
* Where implementation hiding is desired for either ABI or runtime polymorphism reasons, use the `void *impl` pattern. v-tables

cmake/AwsFeatureTests.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,7 @@ if(MSVC)
117117
return 0;
118118
}" AWS_HAVE_MSVC_INTRINSICS_X64)
119119
endif()
120+
121+
# This does a lot to detect when intrinsics are available and has to set cflags to do so.
122+
# leave it in its own file for ease of managing it.
123+
include(AwsSIMD)

cmake/AwsSIMD.cmake

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,31 @@ if (USE_CPU_EXTENSIONS)
88
if (MSVC)
99
check_c_compiler_flag("/arch:AVX2" HAVE_M_AVX2_FLAG)
1010
if (HAVE_M_AVX2_FLAG)
11-
set(AVX2_CFLAGS "/arch:AVX2")
11+
set(AVX_CFLAGS "/arch:AVX2")
1212
endif()
1313
else()
1414
check_c_compiler_flag(-mavx2 HAVE_M_AVX2_FLAG)
1515
if (HAVE_M_AVX2_FLAG)
16-
set(AVX2_CFLAGS "-mavx -mavx2")
16+
set(AVX_CFLAGS "-mavx -mavx2")
1717
endif()
1818
endif()
1919

20+
if (MSVC)
21+
check_c_compiler_flag("/arch:AVX512" HAVE_M_AVX512_FLAG)
22+
if (HAVE_M_AVX512_FLAG)
23+
# docs imply AVX512 brings in AVX2. And it will compile, but it will break at runtime on
24+
# instructions such as _mm256_load_si256(). Leave it on.
25+
set(AVX_CFLAGS "/arch:AVX512 /arch:AVX2")
26+
endif()
27+
else()
28+
check_c_compiler_flag("-mavx512f -mvpclmulqdq" HAVE_M_AVX512_FLAG)
29+
if (HAVE_M_AVX512_FLAG)
30+
set(AVX_CFLAGS "-mavx512f -mvpclmulqdq -mpclmul -mavx -mavx2 -msse4.2")
31+
endif()
32+
endif()
2033

2134
set(old_flags "${CMAKE_REQUIRED_FLAGS}")
22-
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX2_CFLAGS}")
35+
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX_CFLAGS}")
2336

2437
check_c_source_compiles("
2538
#include <immintrin.h>
@@ -35,7 +48,15 @@ if (USE_CPU_EXTENSIONS)
3548
_mm256_permutevar8x32_epi32(vec, vec);
3649
3750
return 0;
38-
}" HAVE_AVX2_INTRINSICS)
51+
}" AWS_HAVE_AVX2_INTRINSICS)
52+
53+
check_c_source_compiles("
54+
#include <immintrin.h>
55+
56+
int main() {
57+
__m512 a = _mm512_setzero_ps();
58+
return 0;
59+
}" AWS_HAVE_AVX512_INTRINSICS)
3960

4061
check_c_source_compiles("
4162
#include <immintrin.h>
@@ -45,30 +66,20 @@ if (USE_CPU_EXTENSIONS)
4566
__m256i vec;
4667
memset(&vec, 0, sizeof(vec));
4768
return (int)_mm256_extract_epi64(vec, 2);
48-
}" HAVE_MM256_EXTRACT_EPI64)
69+
}" AWS_HAVE_MM256_EXTRACT_EPI64)
4970

5071
set(CMAKE_REQUIRED_FLAGS "${old_flags}")
5172
endif() # USE_CPU_EXTENSIONS
5273

53-
macro(simd_add_definition_if target definition)
54-
if(${definition})
55-
target_compile_definitions(${target} PRIVATE -D${definition})
56-
endif(${definition})
57-
endmacro(simd_add_definition_if)
58-
59-
# Configure private preprocessor definitions for SIMD-related features
60-
# Does not set any processor feature codegen flags
61-
function(simd_add_definitions target)
62-
simd_add_definition_if(${target} HAVE_AVX2_INTRINSICS)
63-
simd_add_definition_if(${target} HAVE_MM256_EXTRACT_EPI64)
64-
endfunction(simd_add_definitions)
74+
# The part where the definition is added to the compiler flags has been moved to config.h.in
75+
# see git history for more details.
6576

66-
# Adds source files only if AVX2 is supported. These files will be built with
67-
# avx2 intrinsics enabled.
68-
# Usage: simd_add_source_avx2(target file1.c file2.c ...)
69-
function(simd_add_source_avx2 target)
77+
# Adds AVX flags, if any, that are supported. These files will be built with
78+
# available avx intrinsics enabled.
79+
# Usage: simd_add_source_avx(target file1.c file2.c ...)
80+
function(simd_add_source_avx target)
7081
foreach(file ${ARGN})
7182
target_sources(${target} PRIVATE ${file})
72-
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${AVX2_CFLAGS}")
83+
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${AVX_CFLAGS}")
7384
endforeach()
74-
endfunction(simd_add_source_avx2)
85+
endfunction(simd_add_source_avx)

include/aws/common/config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,8 @@
1919
#cmakedefine AWS_HAVE_EXECINFO
2020
#cmakedefine AWS_HAVE_WINAPI_DESKTOP
2121
#cmakedefine AWS_HAVE_LINUX_IF_LINK_H
22+
#cmakedefine AWS_HAVE_AVX2_INTRINSICS
23+
#cmakedefine AWS_HAVE_AVX512_INTRINSICS
24+
#cmakedefine AWS_HAVE_MM256_EXTRACT_EPI64
2225

2326
#endif

include/aws/common/cpuid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ enum aws_cpu_feature_name {
1717
AWS_CPU_FEATURE_AVX512,
1818
AWS_CPU_FEATURE_ARM_CRC,
1919
AWS_CPU_FEATURE_BMI2,
20+
AWS_CPU_FEATURE_VPCLMULQDQ,
2021
AWS_CPU_FEATURE_COUNT,
2122
};
2223

source/arch/intel/cpuid.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,26 @@ static bool s_has_bmi2(void) {
113113
return true;
114114
}
115115

116+
static bool s_has_vpclmulqdq(void) {
117+
uint32_t abcd[4];
118+
/* Check VPCLMULQDQ:
119+
* CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 20]==1 */
120+
uint32_t vpclmulqdq_mask = (1 << 20);
121+
aws_run_cpuid(7, 0, abcd);
122+
if ((abcd[2] & vpclmulqdq_mask) != vpclmulqdq_mask) {
123+
return false;
124+
}
125+
return true;
126+
}
127+
116128
has_feature_fn *s_check_cpu_feature[AWS_CPU_FEATURE_COUNT] = {
117129
[AWS_CPU_FEATURE_CLMUL] = s_has_clmul,
118130
[AWS_CPU_FEATURE_SSE_4_1] = s_has_sse41,
119131
[AWS_CPU_FEATURE_SSE_4_2] = s_has_sse42,
120132
[AWS_CPU_FEATURE_AVX2] = s_has_avx2,
121133
[AWS_CPU_FEATURE_AVX512] = s_has_avx512,
122134
[AWS_CPU_FEATURE_BMI2] = s_has_bmi2,
135+
[AWS_CPU_FEATURE_VPCLMULQDQ] = s_has_vpclmulqdq,
123136
};
124137

125138
bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {

source/arch/intel/encoding_avx2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ static inline bool decode(const unsigned char *in, unsigned char *out) {
179179
* so we'll just copy right out of the vector as a fallback
180180
*/
181181

182-
#ifdef HAVE_MM256_EXTRACT_EPI64
182+
#ifdef AWS_HAVE_MM256_EXTRACT_EPI64
183183
uint64_t hi = _mm256_extract_epi64(vec, 2);
184184
const uint64_t *p_hi = &hi;
185185
#else

tests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ add_test_case(test_byte_cursor_utf8_parse_u64_hex)
295295

296296
add_test_case(byte_swap_test)
297297

298-
if(HAVE_AVX2_INTRINSICS)
298+
if(AWS_HAVE_AVX2_INTRINSICS)
299299
add_test_case(alignment32_test)
300300
else()
301301
add_test_case(alignment16_test)

0 commit comments

Comments
 (0)