#include "config.h" #include "cpu_caps.h" #if defined(_WIN32) && (defined(_M_ARM) || defined(_M_ARM64)) #define WIN32_LEAN_AND_MEAN #include #ifndef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE #define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 #endif #endif #ifdef HAVE_INTRIN_H #include #endif #ifdef HAVE_CPUID_H #include #endif #include #include #include "alfstream.h" #include "logging.h" int CPUCapFlags{0}; namespace { #if defined(HAVE_GCC_GET_CPUID) \ && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) using reg_type = unsigned int; inline void get_cpuid(unsigned int f, reg_type *regs) { __get_cpuid(f, ®s[0], ®s[1], ®s[2], ®s[3]); } #define CAN_GET_CPUID #elif defined(HAVE_CPUID_INTRINSIC) \ && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) using reg_type = int; inline void get_cpuid(unsigned int f, reg_type *regs) { (__cpuid)(regs, f); } #define CAN_GET_CPUID #endif } // namespace void FillCPUCaps(int capfilter) { int caps{0}; /* FIXME: We really should get this for all available CPUs in case different * CPUs have different caps (is that possible on one machine?). */ #ifdef CAN_GET_CPUID union { reg_type regs[4]; char str[sizeof(reg_type[4])]; } cpuinf[3]{}; get_cpuid(0, cpuinf[0].regs); if(cpuinf[0].regs[0] == 0) ERR("Failed to get CPUID\n"); else { const reg_type maxfunc{cpuinf[0].regs[0]}; get_cpuid(0x80000000, cpuinf[0].regs); const reg_type maxextfunc{cpuinf[0].regs[0]}; TRACE("Detected max CPUID function: 0x%x (ext. 0x%x)\n", maxfunc, maxextfunc); TRACE("Vendor ID: \"%.4s%.4s%.4s\"\n", cpuinf[0].str+4, cpuinf[0].str+12, cpuinf[0].str+8); if(maxextfunc >= 0x80000004) { get_cpuid(0x80000002, cpuinf[0].regs); get_cpuid(0x80000003, cpuinf[1].regs); get_cpuid(0x80000004, cpuinf[2].regs); TRACE("Name: \"%.16s%.16s%.16s\"\n", cpuinf[0].str, cpuinf[1].str, cpuinf[2].str); } if(maxfunc >= 1) { get_cpuid(1, cpuinf[0].regs); if((cpuinf[0].regs[3]&(1<<25))) caps |= CPU_CAP_SSE; if((caps&CPU_CAP_SSE) && (cpuinf[0].regs[3]&(1<<26))) caps |= CPU_CAP_SSE2; if((caps&CPU_CAP_SSE2) && (cpuinf[0].regs[2]&(1<<0))) caps |= CPU_CAP_SSE3; if((caps&CPU_CAP_SSE3) && (cpuinf[0].regs[2]&(1<<19))) caps |= CPU_CAP_SSE4_1; } } #else /* Assume support for whatever's supported if we can't check for it */ #if defined(HAVE_SSE4_1) #warning "Assuming SSE 4.1 run-time support!" caps |= CPU_CAP_SSE | CPU_CAP_SSE2 | CPU_CAP_SSE3 | CPU_CAP_SSE4_1; #elif defined(HAVE_SSE3) #warning "Assuming SSE 3 run-time support!" caps |= CPU_CAP_SSE | CPU_CAP_SSE2 | CPU_CAP_SSE3; #elif defined(HAVE_SSE2) #warning "Assuming SSE 2 run-time support!" caps |= CPU_CAP_SSE | CPU_CAP_SSE2; #elif defined(HAVE_SSE) #warning "Assuming SSE run-time support!" caps |= CPU_CAP_SSE; #endif #endif #ifdef HAVE_NEON #ifdef __ARM_NEON caps |= CPU_CAP_NEON; #elif defined(_WIN32) && (defined(_M_ARM) || defined(_M_ARM64)) if(IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) caps |= CPU_CAP_NEON; #else al::ifstream file{"/proc/cpuinfo"}; if(!file.is_open()) ERR("Failed to open /proc/cpuinfo, cannot check for NEON support\n"); else { std::string features; auto getline = [](std::istream &f, std::string &output) -> bool { while(f.good() && f.peek() == '\n') f.ignore(); return std::getline(f, output) && !output.empty(); }; while(getline(file, features)) { if(features.compare(0, 10, "Features\t:", 10) == 0) break; } file.close(); size_t extpos{9}; while((extpos=features.find("neon", extpos+1)) != std::string::npos) { if(std::isspace(features[extpos-1]) && (extpos+4 == features.length() || std::isspace(features[extpos+4]))) { caps |= CPU_CAP_NEON; break; } } if(!(caps&CPU_CAP_NEON)) { extpos = 9; while((extpos=features.find("asimd", extpos+1)) != std::string::npos) { if(std::isspace(features[extpos-1]) && (extpos+5 == features.length() || std::isspace(features[extpos+5]))) { caps |= CPU_CAP_NEON; break; } } } } #endif #endif TRACE("Extensions:%s%s%s%s%s%s\n", ((capfilter&CPU_CAP_SSE) ? ((caps&CPU_CAP_SSE) ? " +SSE" : " -SSE") : ""), ((capfilter&CPU_CAP_SSE2) ? ((caps&CPU_CAP_SSE2) ? " +SSE2" : " -SSE2") : ""), ((capfilter&CPU_CAP_SSE3) ? ((caps&CPU_CAP_SSE3) ? " +SSE3" : " -SSE3") : ""), ((capfilter&CPU_CAP_SSE4_1) ? ((caps&CPU_CAP_SSE4_1) ? " +SSE4.1" : " -SSE4.1") : ""), ((capfilter&CPU_CAP_NEON) ? ((caps&CPU_CAP_NEON) ? " +NEON" : " -NEON") : ""), ((!capfilter) ? " -none-" : "") ); CPUCapFlags = caps & capfilter; }