diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index c63c392c0b9b49f712c042640aeb26f8fde766dc..a327766c9c509a6e5c58b269c2043d59f321cb44 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -701,8 +701,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, /* Add some margin of error, though this shouldn't be needed in theory. */ info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size; - util_cpu_detect(); - /* Set chip identification. */ info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */ info->pci_rev_id = amdinfo->pci_rev_id; diff --git a/src/amd/compiler/tests/main.cpp b/src/amd/compiler/tests/main.cpp index e0abf63b525be857f61b7981e2d33cca9683fece..8f5e8ea914b8325120e296757b66742f7abbceec 100644 --- a/src/amd/compiler/tests/main.cpp +++ b/src/amd/compiler/tests/main.cpp @@ -34,8 +34,6 @@ #include "aco_ir.h" #include "framework.h" -#include "util/u_cpu_detect.h" - static const char *help_message = "Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n" "\n" @@ -243,8 +241,6 @@ int main(int argc, char **argv) return 99; } - util_cpu_detect(); - if (do_list) { for (auto test : tests) printf("%s\n", test.first.c_str()); diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 02392bf546f4de4b3e4d700309964ab293f7e83a..937d85fad92345fb5dd9878e8badd862a41257f4 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -51,7 +51,6 @@ #include "util/build_id.h" #include "util/debug.h" -#include "util/u_cpu_detect.h" #ifdef VK_USE_PLATFORM_XCB_KHR #include @@ -253,8 +252,6 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, } } - util_cpu_detect(); - VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); *pInstance = v3dv_instance_to_handle(instance); diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index e0de4fef2efa0e32218b874cfa2628af4fd084c7..538ff49765ebe5c81bd0232423b14644c29f93e2 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -26,7 +26,6 @@ #include "compiler/glsl/glsl_parser_extras.h" #include "glsl_types.h" #include "util/hash_table.h" -#include "util/u_cpu_detect.h" #include "util/u_string.h" @@ -520,11 +519,6 @@ hash_free_type_function(struct hash_entry *entry) void glsl_type_singleton_init_or_ref() { - /* This is required for _mesa_half_to_float() which is - * required for constant-folding 16-bit float ops. - */ - util_cpu_detect(); - mtx_lock(&glsl_type::hash_mutex); glsl_type_users++; mtx_unlock(&glsl_type::hash_mutex); diff --git a/src/compiler/isaspec/decode.c b/src/compiler/isaspec/decode.c index 2dc4969ae445cee6700dc4707865d36e03363a2a..127773d6641082642b4c5b4167c0030a37767402 100644 --- a/src/compiler/isaspec/decode.c +++ b/src/compiler/isaspec/decode.c @@ -773,8 +773,6 @@ isa_decode(void *bin, int sz, FILE *out, const struct isa_decode_options *option if (!options) options = &default_options; - util_cpu_detect(); /* needed for _mesa_half_to_float() */ - state = rzalloc_size(NULL, sizeof(*state)); state->options = options; state->num_instr = sz / (BITMASK_WORDS * sizeof(BITSET_WORD)); diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 39d3a61b9cf413aec9ca30eb845c28ff4259ee1b..a8630e2715b56e7aadbbb3c6fca6deb30c7ead54 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,7 +34,6 @@ #include "pipe/p_context.h" #include "util/u_memory.h" #include "util/u_math.h" -#include "util/u_cpu_detect.h" #include "util/u_inlines.h" #include "util/u_helpers.h" #include "util/u_prim.h" @@ -85,9 +84,6 @@ draw_create_context(struct pipe_context *pipe, void *context, if (!draw) goto err_out; - /* we need correct cpu caps for disabling denorms in draw_vbo() */ - util_cpu_detect(); - #ifdef DRAW_LLVM_AVAILABLE if (try_llvm && draw_get_option_use_llvm()) { draw->llvm = draw_llvm_create(draw, (LLVMContextRef)context); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 3f040ac21d75946d6012996f9cb49a64569fb7fd..19b079b12759850a99e4cf8d9d5f5b9b26659280 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -430,8 +430,6 @@ lp_build_init(void) lp_set_target_options(); - util_cpu_detect(); - /* For simulating less capable machines */ #ifdef DEBUG if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) { diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c index 6d5204d629f3e971b54936ab0c23ba0b02a20bf2..1c58eaefd04f0050aaec2c59e973ab5923578bcc 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c @@ -27,7 +27,6 @@ #include "pipe_loader_priv.h" -#include "util/u_cpu_detect.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_string.h" @@ -165,7 +164,6 @@ pipe_loader_create_screen_vk(struct pipe_loader_device *dev, bool sw_vk) { struct pipe_screen_config config; - util_cpu_detect(); pipe_loader_load_options(dev); config.options_info = &dev->option_info; config.options = &dev->option_cache; diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index 03b11f914b47c8a7520313112b575a8fa85a0f40..272650314121ef08650a4f7f99e7bb58773da1af 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -37,7 +37,6 @@ DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", false); static const struct util_cpu_caps_t *get_cpu_caps(void) { - util_cpu_detect(); return util_get_cpu_caps(); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index dbdc737dfd938a2c1e687495d88ad50da10b2be2..9fc3a712244d8393e4c0fe721c822a3c6ced10bf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -2159,7 +2159,6 @@ struct x86_reg x86_fn_arg( struct x86_function *p, static void x86_init_func_common( struct x86_function *p ) { - util_cpu_detect(); p->caps = 0; if(util_get_cpu_caps()->has_mmx) p->caps |= X86_MMX; diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index de1ead7eabdb60e29da4e5c5b6bb06a293c2d58b..c7a5857be49c5ce00ab2d265b59c064ff9178853 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -4299,8 +4299,6 @@ threaded_context_create(struct pipe_context *pipe, if (!pipe) return NULL; - util_cpu_detect(); - if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1)) return pipe; diff --git a/src/gallium/drivers/lima/standalone/lima_disasm.c b/src/gallium/drivers/lima/standalone/lima_disasm.c index 82dcddc69beadbf9970ee37c6cb75a16bab73d43..9c8278cddd923b9cc2e03dadfd0e7c38eacde2f0 100644 --- a/src/gallium/drivers/lima/standalone/lima_disasm.c +++ b/src/gallium/drivers/lima/standalone/lima_disasm.c @@ -23,7 +23,6 @@ */ #include "util/ralloc.h" -#include "util/u_cpu_detect.h" #include #include @@ -176,9 +175,6 @@ main(int argc, char **argv) return -1; } - /* Needed by _mesa_half_to_float() */ - util_cpu_detect(); - if (is_frag) { assert((size & 0x3) == 0); size >>= 2; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 359c200279f70266f7286bc87cb5c1b5ea439561..7ee715bf380f9112fa7804cae1005736c6fb1e4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -921,8 +921,8 @@ static void update_cache_sha1_cpu(struct mesa_sha1 *ctx) * Don't need the cpu cache affinity stuff. The rest * is contained in first 5 dwords. */ - STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t)); - _mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t)); + STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 6 * sizeof(uint32_t)); + _mesa_sha1_update(ctx, cpu_caps, 6 * sizeof(uint32_t)); } static void lp_disk_cache_create(struct llvmpipe_screen *screen) @@ -1024,8 +1024,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys) { struct llvmpipe_screen *screen; - util_cpu_detect(); - glsl_type_singleton_init_or_ref(); #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 5ec0dd347bd3edb1938718ea674ad5866dbf449e..aee03dafe73fe73857164cad21e6398607a4b090 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -34,7 +34,6 @@ */ -#include "util/u_cpu_detect.h" #include "util/u_math.h" #include "gallivm/lp_bld_const.h" @@ -381,7 +380,6 @@ int main(int argc, char **argv) boolean single = FALSE; unsigned fpstate; - util_cpu_detect(); fpstate = util_fpstate_get(); util_fpstate_set_denorms_to_zero(fpstate); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 5ab278969f1e4f382b7a6d46c8a268cea0258b1c..49a4eccd36cb8df33c675b5338533127e58df0eb 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -994,8 +994,6 @@ static void si_init_renderer_string(struct si_screen *sscreen) void si_init_screen_get_functions(struct si_screen *sscreen) { - util_cpu_detect(); - sscreen->b.get_name = si_get_name; sscreen->b.get_vendor = si_get_vendor; sscreen->b.get_device_vendor = si_get_device_vendor; diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index b604cd5545c7641c6ff3ad3bc92be48611554503..0116ef2bdc83ec122d909b03b562d31d29c6f362 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -27,7 +27,6 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" -#include "util/u_cpu_detect.h" #include "util/u_debug.h" #include "util/u_memory.h" #include "util/format/u_format.h" @@ -591,8 +590,6 @@ vc4_screen_create(int fd, struct renderonly *ro) if (!vc4_get_chip_info(screen)) goto fail; - util_cpu_detect(); - slab_create_parent(&screen->transfer_pool, sizeof(struct vc4_transfer), 16); vc4_fence_screen_init(screen); diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 2abbef81df619a8866b2a8f3f8e956820126304e..366a9a586e865028e7ba29633b8453758a775217 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -2035,7 +2035,6 @@ zink_internal_create_screen(const struct pipe_screen_config *config) if (!screen) return NULL; - util_cpu_detect(); screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1); zink_debug = debug_get_option_zink_debug(); diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c index 782f16e7f7837efb91ce07f588e57b6b22aed674..16e55b231caf3e8cdabf9fab2eb01d19955998de 100644 --- a/src/gallium/tests/unit/translate_test.c +++ b/src/gallium/tests/unit/translate_test.c @@ -69,8 +69,6 @@ int main(int argc, char** argv) create_fn = 0; - util_cpu_detect(); - if (argc <= 1 || !strcmp(argv[1], "default") ) create_fn = translate_create; diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c index 4474cfb82b0497a220abef73ac2c29f04af2996a..36422afea2f40e9a97328d261f0620117e468368 100644 --- a/src/gallium/tests/unit/u_half_test.c +++ b/src/gallium/tests/unit/u_half_test.c @@ -36,7 +36,6 @@ test(void) int main(int argc, char **argv) { - util_cpu_detect(); test(); /* Test non-f16c. */ diff --git a/src/mesa/main/tests/mesa_formats.cpp b/src/mesa/main/tests/mesa_formats.cpp index 7ca3a2698965691354a4917a95c1c0dc145597e5..916f7378d389702455fb91812cf9645852fdc4f5 100644 --- a/src/mesa/main/tests/mesa_formats.cpp +++ b/src/mesa/main/tests/mesa_formats.cpp @@ -35,15 +35,9 @@ #include "main/glformats.h" #include "main/format_unpack.h" #include "main/format_pack.h" -#include "util/u_cpu_detect.h" // Test fixture for Format tests. -// Currently just ensures that util_cpu_detect() has been called class MesaFormatsTest : public ::testing::Test { - protected: - MesaFormatsTest() { - util_cpu_detect(); - } }; /** diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 88286f44634af8b811fea2192836537028942712..b3a12d4a77e12123be3ef493f64c7b4247c19ea2 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -480,8 +480,6 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe, uint i; struct st_context *st = CALLOC_STRUCT( st_context); - util_cpu_detect(); - st->options = *options; ctx->st_opts = &st->options; @@ -862,8 +860,6 @@ st_create_context(gl_api api, struct pipe_context *pipe, struct dd_function_table funcs; struct st_context *st; - util_cpu_detect(); - memset(&funcs, 0, sizeof(funcs)); st_init_driver_functions(pipe->screen, &funcs, has_egl_image_validate); diff --git a/src/util/tests/format/u_format_test.c b/src/util/tests/format/u_format_test.c index 1ce93b97cc09aecb6050beb02fc3eb3d75e8ca4b..0b852ace540f7f4d5ab0955e28cdeeaca49d24d1 100644 --- a/src/util/tests/format/u_format_test.c +++ b/src/util/tests/format/u_format_test.c @@ -869,8 +869,6 @@ int main(int argc, char **argv) { boolean success; - util_cpu_detect(); - success = test_all(); return success ? 0 : 1; diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c index 6e8743e254f9b1342e85bc319c729d0849da83e8..fb96e1042bd78b199b1f7f0dff614677fb36fe0f 100644 --- a/src/util/u_cpu_detect.c +++ b/src/util/u_cpu_detect.c @@ -860,6 +860,9 @@ util_cpu_detect_once(void) printf("util_cpu_caps.num_L3_caches = %u\n", util_cpu_caps.num_L3_caches); printf("util_cpu_caps.num_cpu_mask_bits = %u\n", util_cpu_caps.num_cpu_mask_bits); } + + /* This must happen at the end as it's used to guard everything else */ + p_atomic_set(&util_cpu_caps.detect_done, 1); } static once_flag cpu_once_flag = ONCE_FLAG_INIT; diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h index a1416d5a3f5cf4fcb1325d9c74f8b807b5b3a314..8e35b3640a16767e7a8c1371578926f2bbf2145f 100644 --- a/src/util/u_cpu_detect.h +++ b/src/util/u_cpu_detect.h @@ -38,6 +38,7 @@ #include #include "pipe/p_config.h" +#include "util/u_atomic.h" #include "util/u_thread.h" @@ -60,6 +61,12 @@ enum cpu_family { typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32]; struct util_cpu_caps_t { + /** + * Initialized to 0 and set to non-zero with an atomic after the entire + * struct has been initialized. + */ + uint32_t detect_done; + /** * Number of CPUs available to the process. * @@ -127,22 +134,42 @@ struct util_cpu_caps_t { #define U_CPU_INVALID_L3 0xffff -static inline const struct util_cpu_caps_t * +void util_cpu_detect(void); + +static inline ATTRIBUTE_CONST const struct util_cpu_caps_t * util_get_cpu_caps(void) { - extern struct util_cpu_caps_t util_cpu_caps; + extern struct util_cpu_caps_t util_cpu_caps; - /* If you hit this assert, it means that something is using the - * cpu-caps without having first called util_cpu_detect() - */ - assert(util_cpu_caps.nr_cpus >= 1); + /* On most CPU architectures, an atomic read is simply a regular memory + * load instruction with some extra compiler magic to prevent code + * re-ordering around it. The perf impact of doing this check should be + * negligible in most cases. + * + * Also, even though it looks like a bit of a lie, we've declared this + * function with ATTRIBUTE_CONST. The GCC docs say: + * + * "Calls to functions whose return value is not affected by changes to + * the observable state of the program and that have no observable + * effects on such state other than to return a value may lend + * themselves to optimizations such as common subexpression elimination. + * Declaring such functions with the const attribute allows GCC to avoid + * emitting some calls in repeated invocations of the function with the + * same argument values." + * + * The word "observable" is important here. With the exception of a + * llvmpipe debug flag behind an environment variable and a few unit tests, + * all of which emulate worse CPUs, this function neither affects nor is + * affected by any "observable" state. It has its own internal state for + * sure, but that state is such that it appears to return exactly the same + * value with the same internal data every time. + */ + if (unlikely(!p_atomic_read(&util_cpu_caps.detect_done))) + util_cpu_detect(); - return &util_cpu_caps; + return &util_cpu_caps; } -void util_cpu_detect(void); - - #ifdef __cplusplus } #endif diff --git a/src/util/u_queue.c b/src/util/u_queue.c index d35b8f2f29b5537eee07e09d27cadf1c4cdd013b..413f8aa019e124d3a03a0eaeaf2e7a12e11838e5 100644 --- a/src/util/u_queue.c +++ b/src/util/u_queue.c @@ -263,9 +263,6 @@ util_queue_thread_func(void *input) memset(mask, 0xff, sizeof(mask)); - /* Ensure util_cpu_caps.num_cpu_mask_bits is initialized: */ - util_cpu_detect(); - util_set_current_thread_affinity(mask, NULL, util_get_cpu_caps()->num_cpu_mask_bits); }