summaryrefslogtreecommitdiff
path: root/sysdeps/x86/cacheinfo.c
diff options
context:
space:
mode:
authorSamuel Thibault <samuel.thibault@ens-lyon.org>2018-12-27 14:12:05 +0000
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2018-12-27 14:12:05 +0000
commit963c37d5c0eb62b38f8764b23931c0dcdd497a13 (patch)
tree12a521ddf17b3e1bb26594656bbb05903c54afd0 /sysdeps/x86/cacheinfo.c
parent7bb5f8a836b916d6ebf7b6921b136e99cea2442d (diff)
parent3c03baca37fdcb52c3881e653ca392bba7a99c2b (diff)
Merge tag 'glibc-2.28' into baseline-2.28baseline
The GNU C Library ================= The GNU C Library version 2.28 is now available. The GNU C Library is used as *the* C library in the GNU system and in GNU/Linux systems, as well as many other systems that use Linux as the kernel. The GNU C Library is primarily designed to be a portable and high performance C library. It follows all relevant standards including ISO C11 and POSIX.1-2008. It is also internationalized and has one of the most complete internationalization interfaces known. The GNU C Library webpage is at http://www.gnu.org/software/libc/ Packages for the 2.28 release may be downloaded from: http://ftpmirror.gnu.org/libc/ http://ftp.gnu.org/gnu/libc/ The mirror list is at http://www.gnu.org/order/ftp.html NEWS for version 2.28 ===================== Major new features: * The localization data for ISO 14651 is updated to match the 2016 Edition 4 release of the standard, this matches data provided by Unicode 9.0.0. This update introduces significant improvements to the collation of Unicode characters. This release deviates slightly from the standard in that the collation element ordering for lowercase and uppercase LATIN script characters is adjusted to ensure that regular expressions with ranges like [a-z] and [A-Z] don't interleave e.g. A is not matched by [a-z]. With the update many locales have been updated to take advantage of the new collation information. The new collation information has increased the size of the compiled locale archive or binary locales. * The GNU C Library can now be compiled with support for Intel CET, AKA Intel Control-flow Enforcement Technology. When the library is built with --enable-cet, the resulting glibc is protected with indirect branch tracking (IBT) and shadow stack (SHSTK). CET-enabled glibc is compatible with all existing executables and shared libraries. This feature is currently supported on i386, x86_64 and x32 with GCC 8 and binutils 2.29 or later. Note that CET-enabled glibc requires CPUs capable of multi-byte NOPs, like x86-64 processors as well as Intel Pentium Pro or newer. NOTE: --enable-cet has been tested for i686, x86_64 and x32 on non-CET processors. --enable-cet has been tested for x86_64 and x32 on CET SDVs, but Intel CET support hasn't been validated for i686. * The GNU C Library now has correct support for ABSOLUTE symbols (SHN_ABS-relative symbols). Previously such ABSOLUTE symbols were relocated incorrectly or in some cases discarded. The GNU linker can make use of the newer semantics, but it must communicate it to the dynamic loader by setting the ELF file's identification (EI_ABIVERSION field) to indicate such support is required. * Unicode 11.0.0 Support: Character encoding, character type info, and transliteration tables are all updated to Unicode 11.0.0, using generator scripts contributed by Mike FABIAN (Red Hat). * <math.h> functions that round their results to a narrower type are added from TS 18661-1:2014 and TS 18661-3:2015: - fadd, faddl, daddl and corresponding fMaddfN, fMaddfNx, fMxaddfN and fMxaddfNx functions. - fsub, fsubl, dsubl and corresponding fMsubfN, fMsubfNx, fMxsubfN and fMxsubfNx functions. - fmul, fmull, dmull and corresponding fMmulfN, fMmulfNx, fMxmulfN and fMxmulfNx functions. - fdiv, fdivl, ddivl and corresponding fMdivfN, fMdivfNx, fMxdivfN and fMxdivfNx functions. * Two grammatical forms of month names are now supported for the following languages: Armenian, Asturian, Catalan, Czech, Kashubian, Occitan, Ossetian, Scottish Gaelic, Upper Sorbian, and Walloon. The following languages now support two grammatical forms in abbreviated month names: Catalan, Greek, and Kashubian. * Newly added locales: Lower Sorbian (dsb_DE) and Yakut (sah_RU) also include the support for two grammatical forms of month names. * Building and running on GNU/Hurd systems now works without out-of-tree patches. * The renameat2 function has been added, a variant of the renameat function which has a flags argument. If the flags are zero, the renameat2 function acts like renameat. If the flag is not zero and there is no kernel support for renameat2, the function will fail with an errno value of EINVAL. This is different from the existing gnulib function renameatu, which performs a plain rename operation in case of a RENAME_NOREPLACE flags and a non-existing destination (and therefore has a race condition that can clobber the destination inadvertently). * The statx function has been added, a variant of the fstatat64 function with an additional flags argument. If there is no direct kernel support for statx, glibc provides basic stat support based on the fstatat64 function. * IDN domain names in getaddrinfo and getnameinfo now use the system libidn2 library if installed. libidn2 version 2.0.5 or later is recommended. If libidn2 is not available, internationalized domain names are not encoded or decoded even if the AI_IDN or NI_IDN flags are passed to getaddrinfo or getnameinfo. (getaddrinfo calls with non-ASCII names and AI_IDN will fail with an encoding error.) Flags which used to change the IDN encoding and decoding behavior (AI_IDN_ALLOW_UNASSIGNED, AI_IDN_USE_STD3_ASCII_RULES, NI_IDN_ALLOW_UNASSIGNED, NI_IDN_USE_STD3_ASCII_RULES) have been deprecated. They no longer have any effect. * Parsing of dynamic string tokens in DT_RPATH, DT_RUNPATH, DT_NEEDED, DT_AUXILIARY, and DT_FILTER has been expanded to support the full range of ELF gABI expressions including such constructs as '$ORIGIN$ORIGIN' (if valid). For SUID/GUID applications the rules have been further restricted, and where in the past a dynamic string token sequence may have been interpreted as a literal string it will now cause a load failure. These load failures were always considered unspecified behaviour from the perspective of the dynamic loader, and for safety are now load errors e.g. /foo/${ORIGIN}.so in DT_NEEDED results in a load failure now. * Support for ISO C threads (ISO/IEC 9899:2011) has been added. The implementation includes all the standard functions provided by <threads.h>: - thrd_current, thrd_equal, thrd_sleep, thrd_yield, thrd_create, thrd_detach, thrd_exit, and thrd_join for thread management. - mtx_init, mtx_lock, mtx_timedlock, mtx_trylock, mtx_unlock, and mtx_destroy for mutual exclusion. - call_once for function call synchronization. - cnd_broadcast, cnd_destroy, cnd_init, cnd_signal, cnd_timedwait, and cnd_wait for conditional variables. - tss_create, tss_delete, tss_get, and tss_set for thread-local storage. Application developers must link against libpthread to use ISO C threads. Deprecated and removed features, and other changes affecting compatibility: * The nonstandard header files <libio.h> and <_G_config.h> are no longer installed. Software that was using either header should be updated to use standard <stdio.h> interfaces instead. * The stdio functions 'getc' and 'putc' are no longer defined as macros. This was never required by the C standard, and the macros just expanded to call alternative names for the same functions. If you hoped getc and putc would provide performance improvements over fgetc and fputc, instead investigate using (f)getc_unlocked and (f)putc_unlocked, and, if necessary, flockfile and funlockfile. * All stdio functions now treat end-of-file as a sticky condition. If you read from a file until EOF, and then the file is enlarged by another process, you must call clearerr or another function with the same effect (e.g. fseek, rewind) before you can read the additional data. This corrects a longstanding C99 conformance bug. It is most likely to affect programs that use stdio to read interactive input from a terminal. (Bug #1190.) * The macros 'major', 'minor', and 'makedev' are now only available from the header <sys/sysmacros.h>; not from <sys/types.h> or various other headers that happen to include <sys/types.h>. These macros are rarely used, not part of POSIX nor XSI, and their names frequently collide with user code; see https://sourceware.org/bugzilla/show_bug.cgi?id=19239 for further explanation. <sys/sysmacros.h> is a GNU extension. Portable programs that require these macros should first include <sys/types.h>, and then include <sys/sysmacros.h> if __GNU_LIBRARY__ is defined. * The tilegx*-*-linux-gnu configurations are no longer supported. * The obsolete function ustat is no longer available to newly linked binaries; the headers <ustat.h> and <sys/ustat.h> have been removed. This function has been deprecated in favor of fstatfs and statfs. * The obsolete function nfsservctl is no longer available to newly linked binaries. This function was specific to systems using the Linux kernel and could not usefully be used with the GNU C Library on systems with version 3.1 or later of the Linux kernel. * The obsolete function name llseek is no longer available to newly linked binaries. This function was specific to systems using the Linux kernel and was not declared in a header. Programs should use the lseek64 name for this function instead. * The AI_IDN_ALLOW_UNASSIGNED and NI_IDN_ALLOW_UNASSIGNED flags for the getaddrinfo and getnameinfo functions have been deprecated. The behavior previously selected by them is now always enabled. * The AI_IDN_USE_STD3_ASCII_RULES and NI_IDN_USE_STD3_ASCII_RULES flags for the getaddrinfo and getnameinfo functions have been deprecated. The STD3 restriction (rejecting '_' in host names, among other things) has been removed, for increased compatibility with non-IDN name resolution. * The fcntl function now have a Long File Support variant named fcntl64. It is added to fix some Linux Open File Description (OFD) locks usage on non LFS mode. As for others *64 functions, fcntl64 semantics are analogous with fcntl and LFS support is handled transparently. Also for Linux, the OFD locks act as a cancellation entrypoint. * The obsolete functions encrypt, encrypt_r, setkey, setkey_r, cbc_crypt, ecb_crypt, and des_setparity are no longer available to newly linked binaries, and the headers <rpc/des_crypt.h> and <rpc/rpc_des.h> are no longer installed. These functions encrypted and decrypted data with the DES block cipher, which is no longer considered secure. Software that still uses these functions should switch to a modern cryptography library, such as libgcrypt. * Reflecting the removal of the encrypt and setkey functions above, the macro _XOPEN_CRYPT is no longer defined. As a consequence, the crypt function is no longer declared unless _DEFAULT_SOURCE or _GNU_SOURCE is enabled. * The obsolete function fcrypt is no longer available to newly linked binaries. It was just another name for the standard function crypt, and it has not appeared in any header file in many years. * We have tentative plans to hand off maintenance of the passphrase-hashing library, libcrypt, to a separate development project that will, we hope, keep up better with new passphrase-hashing algorithms. We will continue to declare 'crypt' in <unistd.h>, and programs that use 'crypt' or 'crypt_r' should not need to change at all; however, distributions will need to install <crypt.h> and libcrypt from a separate project. In this release, if the configure option --disable-crypt is used, glibc will not install <crypt.h> or libcrypt, making room for the separate project's versions of these files. The plan is to make this the default behavior in a future release. Changes to build and runtime requirements: GNU make 4.0 or later is now required to build glibc. Security related changes: CVE-2016-6261, CVE-2016-6263, CVE-2017-14062: Various vulnerabilities have been fixed by removing the glibc-internal IDNA implementation and using the system-provided libidn2 library instead. Originally reported by Hanno Böck and Christian Weisgerber. CVE-2017-18269: An SSE2-based memmove implementation for the i386 architecture could corrupt memory. Reported by Max Horn. CVE-2018-11236: Very long pathname arguments to realpath function could result in an integer overflow and buffer overflow. Reported by Alexey Izbyshev. CVE-2018-11237: The mempcpy implementation for the Intel Xeon Phi architecture could write beyond the target buffer, resulting in a buffer overflow. Reported by Andreas Schwab. The following bugs are resolved with this release: [1190] stdio: fgetc()/fread() behaviour is not POSIX compliant [6889] manual: 'PWD' mentioned but not specified [13575] libc: SSIZE_MAX defined as LONG_MAX is inconsistent with ssize_t, when __WORDSIZE != 64 [13762] regex: re_search etc. should return -2 on memory exhaustion [13888] build: /tmp usage during testing [13932] math: dbl-64 pow unexpectedly slow for some inputs [14092] nptl: Support C11 threads [14095] localedata: Review / update collation data from Unicode / ISO 14651 [14508] libc: -Wformat warnings [14553] libc: Namespace pollution loff_t in sys/types.h [14890] libc: Make NT_PRFPREG canonical. [15105] libc: Extra PLT references with -Os [15512] libc: __bswap_constant_16 not compiled when -Werror -Wsign- conversion is given [16335] manual: Feature test macro documentation incomplete and out of date [16552] libc: Unify umount implementations in terms of umount2 [17082] libc: htons et al.: statement-expressions prevent use on global scope with -O1 and higher [17343] libc: Signed integer overflow in /stdlib/random_r.c [17438] localedata: pt_BR: wrong d_fmt delimiter [17662] libc: please implement binding for the new renameat2 syscall [17721] libc: __restrict defined as /* Ignore */ even in c11 [17979] libc: inconsistency between uchar.h and stdint.h [18018] dynamic-link: Additional $ORIGIN handling issues (CVE-2011-0536) [18023] libc: extend_alloca is broken (questionable pointer comparison, horrible machine code) [18124] libc: hppa: setcontext erroneously returns -1 as exit code for last constant. [18471] libc: llseek should be a compat symbol [18473] soft-fp: [powerpc-nofpu] __sqrtsf2, __sqrtdf2 should be compat symbols [18991] nss: nss_files skips large entry in database [19239] libc: Including stdlib.h ends up with macros major and minor being defined [19463] libc: linknamespace failures when compiled with -Os [19485] localedata: csb_PL: Update month translations + add yesstr/nostr [19527] locale: Normalized charset name not recognized by setlocale [19667] string: Missing Sanity Check for malloc calls in file 'testcopy.c' [19668] libc: Missing Sanity Check for malloc() in file 'tst-setcontext- fpscr.c' [19728] network: out of bounds stack read in libidn function idna_to_ascii_4i (CVE-2016-6261) [19729] network: out of bounds heap read on invalid utf-8 inputs in stringprep_utf8_nfkc_normalize (CVE-2016-6263) [19818] dynamic-link: Absolute (SHN_ABS) symbols incorrectly relocated by the base address [20079] libc: Add SHT_X86_64_UNWIND to elf.h [20251] libc: 32bit programs pass garbage in struct flock for OFD locks [20419] dynamic-link: files with large allocated notes crash in open_verify [20530] libc: bswap_16 should use __builtin_bswap16() when available [20890] dynamic-link: ldconfig: fsync the files before atomic rename [20980] manual: CFLAGS environment variable replaces vital options [21163] regex: Assertion failure in pop_fail_stack when executing a malformed regexp (CVE-2015-8985) [21234] manual: use of CFLAGS makes glibc detect no optimization [21269] dynamic-link: i386 sigaction sa_restorer handling is wrong [21313] build: Compile Error GCC 5.4.0 MIPS with -0S [21314] build: Compile Error GCC 5.2.0 MIPS with -0s [21508] locale: intl/tst-gettext failure with latest msgfmt [21547] localedata: Tibetan script collation broken (Dzongkha and Tibetan) [21812] network: getifaddrs() returns entries with ifa_name == NULL [21895] libc: ppc64 setjmp/longjmp not fully interoperable with static dlopen [21942] dynamic-link: _dl_dst_substitute incorrectly handles $ORIGIN: with AT_SECURE=1 [22241] localedata: New locale: Yakut (Sakha) locale for Russia (sah_RU) [22247] network: Integer overflow in the decode_digit function in puny_decode.c in libidn (CVE-2017-14062) [22342] nscd: NSCD not properly caching netgroup [22391] nptl: Signal function clear NPTL internal symbols inconsistently [22550] localedata: es_ES locale (and other es_* locales): collation should treat ñ as a primary different character, sync the collation for Spanish with CLDR [22638] dynamic-link: sparc: static binaries are broken if glibc is built by gcc configured with --enable-default-pie [22639] time: year 2039 bug for localtime etc. on 64-bit platforms [22644] string: memmove-sse2-unaligned on 32bit x86 produces garbage when crossing 2GB threshold (CVE-2017-18269) [22646] localedata: redundant data (LC_TIME) for es_CL, es_CU, es_EC and es_BO [22735] time: Misleading typo in time.h source comment regarding CLOCKS_PER_SECOND [22753] libc: preadv2/pwritev2 fallback code should handle offset=-1 [22761] libc: No trailing `%n' conversion specifier in FMT passed from `__assert_perror_fail ()' to `__assert_fail_base ()' [22766] libc: all glibc internal dlopen should use RTLD_NOW for robust dlopen failures [22786] libc: Stack buffer overflow in realpath() if input size is close to SSIZE_MAX (CVE-2018-11236) [22787] dynamic-link: _dl_check_caller returns false when libc is linked through an absolute DT_NEEDED path [22792] build: tcb-offsets.h dependency dropped [22797] libc: pkey_get() uses non-reserved name of argument [22807] libc: PTRACE_* constants missing for powerpc [22818] glob: posix/tst-glob_lstat_compat failure on alpha [22827] dynamic-link: RISC-V ELF64 parser mis-reads flag in ldconfig [22830] malloc: malloc_stats doesn't restore cancellation state on stderr [22848] localedata: ca_ES: update date definitions from CLDR [22862] build: _DEFAULT_SOURCE is defined even when _ISOC11_SOURCE is [22884] math: RISCV fmax/fmin handle signalling NANs incorrectly [22896] localedata: Update locale data for an_ES [22902] math: float128 test failures with GCC 8 [22918] libc: multiple common of `__nss_shadow_database' [22919] libc: sparc32: backtrace yields infinite backtrace with makecontext [22926] libc: FTBFS on powerpcspe [22932] localedata: lt_LT: Update of abbreviated month names from CLDR required [22937] localedata: Greek (el_GR, el_CY) locales actually need ab_alt_mon [22947] libc: FAIL: misc/tst-preadvwritev2 [22963] localedata: cs_CZ: Add alternative month names [22987] math: [powerpc/sparc] fdim inlines errno, exceptions handling [22996] localedata: change LC_PAPER to en_US in es_BO locale [22998] dynamic-link: execstack tests are disabled when SELinux is disabled [23005] network: Crash in __res_context_send after memory allocation failure [23007] math: strtod cannot handle -nan [23024] nss: getlogin_r is performing NSS lookups when loginid isn't set [23036] regex: regex equivalence class regression [23037] libc: initialize msg_flags to zero for sendmmsg() calls [23069] libc: sigaction broken on riscv64-linux-gnu [23094] localedata: hr_HR: wrong thousands_sep and mon_thousands_sep [23102] dynamic-link: Incorrect parsing of multiple consecutive $variable patterns in runpath entries (e.g. $ORIGIN$ORIGIN) [23137] nptl: s390: pthread_join sometimes block indefinitely (on 31bit and libc build with -Os) [23140] localedata: More languages need two forms of month names [23145] libc: _init/_fini aren't marked as hidden [23152] localedata: gd_GB: Fix typo in "May" (abbreviated) [23171] math: C++ iseqsig for long double converts arguments to double [23178] nscd: sudo will fail when it is run in concurrent with commands that changes /etc/passwd [23196] string: __mempcpy_avx512_no_vzeroupper mishandles large copies (CVE-2018-11237) [23206] dynamic-link: static-pie + dlopen breaks debugger interaction [23208] localedata: New locale - Lower Sorbian (dsb) [23233] regex: Memory leak in build_charclass_op function in file posix/regcomp.c [23236] stdio: Harden function pointers in _IO_str_fields [23250] nptl: Offset of __private_ss differs from GCC [23253] math: tgamma test suite failures on i686 with -march=x86-64 -mtune=generic -mfpmath=sse [23259] dynamic-link: Unsubstituted ${ORIGIN} remains in DT_NEEDED for AT_SECURE [23264] libc: posix_spawnp wrongly executes ENOEXEC in non compat mode [23266] nis: stringop-truncation warning with new gcc8.1 in nisplus- parser.c [23272] math: fma(INFINITY,INFIITY,0.0) should be INFINITY [23277] math: nan function should not have const attribute [23279] math: scanf and strtod wrong for some hex floating-point [23280] math: wscanf rounds wrong; wcstod is ok for negative numbers and directed rounding [23290] localedata: IBM273 is not equivalent to ISO-8859-1 [23303] build: undefined reference to symbol '__parse_hwcap_and_convert_at_platform@@GLIBC_2.23' [23307] dynamic-link: Absolute symbols whose value is zero ignored in lookup [23313] stdio: libio vtables validation and standard file object interposition [23329] libc: The __libc_freeres infrastructure is not properly run across DSO boundaries. [23349] libc: Various glibc headers no longer compatible with <linux/time.h> [23351] malloc: Remove unused code related to heap dumps and malloc checking [23363] stdio: stdio-common/tst-printf.c has non-free license [23396] regex: Regex equivalence regression in single-byte locales [23422] localedata: oc_FR: More updates of locale data [23442] build: New warning with GCC 8 [23448] libc: Out of bounds access in IBM-1390 converter [23456] libc: Wrong index_cpu_LZCNT [23458] build: tst-get-cpu-features-static isn't added to tests [23459] libc: COMMON_CPUID_INDEX_80000001 isn't populated for Intel processors [23467] dynamic-link: x86/CET: A property note parser bug Release Notes ============= https://sourceware.org/glibc/wiki/Release/2.28 Contributors ============ This release was made possible by the contributions of many people. The maintainers are grateful to everyone who has contributed changes or bug reports. These include: Adhemerval Zanella Agustina Arzille Alan Modra Alexandre Oliva Amit Pawar Andreas Schwab Andrew Senkevich Andrew Waterman Aurelien Jarno Carlos O'Donell Chung-Lin Tang DJ Delorie Daniel Alvarez David Michael Dmitry V. Levin Dragan Stanojevic - Nevidljivi Florian Weimer Flávio Cruz Francois Goichon Gabriel F. T. Gomes H.J. Lu Herman ten Brugge Hongbo Zhang Igor Gnatenko Jesse Hathaway John David Anglin Joseph Myers Leonardo Sandoval Maciej W. Rozycki Mark Wielaard Martin Sebor Michael Wolf Mike FABIAN Patrick McGehearty Patsy Franklin Paul Pluzhnikov Quentin PAGÈS Rafal Luzynski Rajalakshmi Srinivasaraghavan Raymond Nicholson Rical Jasan Richard Braun Robert Buj Rogerio Alves Samuel Thibault Sean McKean Siddhesh Poyarekar Stefan Liebler Steve Ellcey Sylvain Lesage Szabolcs Nagy Thomas Schwinge Tulio Magno Quites Machado Filho Valery Timiriliyev Vincent Chen Wilco Dijkstra Zack Weinberg Zong Li
Diffstat (limited to 'sysdeps/x86/cacheinfo.c')
-rw-r--r--sysdeps/x86/cacheinfo.c791
1 files changed, 791 insertions, 0 deletions
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
new file mode 100644
index 0000000000..b9444ddd52
--- /dev/null
+++ b/sysdeps/x86/cacheinfo.c
@@ -0,0 +1,791 @@
+/* x86_64 cache info.
+ Copyright (C) 2003-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <cpuid.h>
+#include <init-arch.h>
+
+static const struct intel_02_cache_info
+{
+ unsigned char idx;
+ unsigned char assoc;
+ unsigned char linesize;
+ unsigned char rel_name;
+ unsigned int size;
+} intel_02_known [] =
+ {
+#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
+ { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
+ { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
+ { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
+ { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
+ { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
+ { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
+ { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
+ { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
+ { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
+ { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
+ { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
+ { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
+ { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
+ { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
+ { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
+ { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
+ { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
+ { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
+ { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
+ { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
+ { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
+ { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
+ { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
+ { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
+ { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
+ { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
+ { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
+ { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
+ { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
+ { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
+ { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
+ { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
+ { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
+ { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
+ { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
+ { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
+ { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
+ { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
+ { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
+ { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
+ { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
+ { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
+ { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
+ { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
+ { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
+ { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
+ { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
+ { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
+ { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
+ { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
+ { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
+ { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
+ { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
+ { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
+ { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
+ { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
+ { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
+ { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
+ { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
+ { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
+ { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
+ { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
+ { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
+ { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
+ { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
+ { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
+ { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
+ { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
+ };
+
+#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
+
+static int
+intel_02_known_compare (const void *p1, const void *p2)
+{
+ const struct intel_02_cache_info *i1;
+ const struct intel_02_cache_info *i2;
+
+ i1 = (const struct intel_02_cache_info *) p1;
+ i2 = (const struct intel_02_cache_info *) p2;
+
+ if (i1->idx == i2->idx)
+ return 0;
+
+ return i1->idx < i2->idx ? -1 : 1;
+}
+
+
+static long int
+__attribute__ ((noinline))
+intel_check_word (int name, unsigned int value, bool *has_level_2,
+ bool *no_level_2_or_3,
+ const struct cpu_features *cpu_features)
+{
+ if ((value & 0x80000000) != 0)
+ /* The register value is reserved. */
+ return 0;
+
+ /* Fold the name. The _SC_ constants are always in the order SIZE,
+ ASSOC, LINESIZE. */
+ int folded_rel_name = (M(name) / 3) * 3;
+
+ while (value != 0)
+ {
+ unsigned int byte = value & 0xff;
+
+ if (byte == 0x40)
+ {
+ *no_level_2_or_3 = true;
+
+ if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
+ /* No need to look further. */
+ break;
+ }
+ else if (byte == 0xff)
+ {
+ /* CPUID leaf 0x4 contains all the information. We need to
+ iterate over it. */
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ unsigned int round = 0;
+ while (1)
+ {
+ __cpuid_count (4, round, eax, ebx, ecx, edx);
+
+ enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
+ if (type == null)
+ /* That was the end. */
+ break;
+
+ unsigned int level = (eax >> 5) & 0x7;
+
+ if ((level == 1 && type == data
+ && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
+ || (level == 1 && type == inst
+ && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
+ || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
+ || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
+ || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
+ {
+ unsigned int offset = M(name) - folded_rel_name;
+
+ if (offset == 0)
+ /* Cache size. */
+ return (((ebx >> 22) + 1)
+ * (((ebx >> 12) & 0x3ff) + 1)
+ * ((ebx & 0xfff) + 1)
+ * (ecx + 1));
+ if (offset == 1)
+ return (ebx >> 22) + 1;
+
+ assert (offset == 2);
+ return (ebx & 0xfff) + 1;
+ }
+
+ ++round;
+ }
+ /* There is no other cache information anywhere else. */
+ break;
+ }
+ else
+ {
+ if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
+ {
+ /* Intel reused this value. For family 15, model 6 it
+ specifies the 3rd level cache. Otherwise the 2nd
+ level cache. */
+ unsigned int family = cpu_features->family;
+ unsigned int model = cpu_features->model;
+
+ if (family == 15 && model == 6)
+ {
+ /* The level 3 cache is encoded for this model like
+ the level 2 cache is for other models. Pretend
+ the caller asked for the level 2 cache. */
+ name = (_SC_LEVEL2_CACHE_SIZE
+ + (name - _SC_LEVEL3_CACHE_SIZE));
+ folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
+ }
+ }
+
+ struct intel_02_cache_info *found;
+ struct intel_02_cache_info search;
+
+ search.idx = byte;
+ found = bsearch (&search, intel_02_known, nintel_02_known,
+ sizeof (intel_02_known[0]), intel_02_known_compare);
+ if (found != NULL)
+ {
+ if (found->rel_name == folded_rel_name)
+ {
+ unsigned int offset = M(name) - folded_rel_name;
+
+ if (offset == 0)
+ /* Cache size. */
+ return found->size;
+ if (offset == 1)
+ return found->assoc;
+
+ assert (offset == 2);
+ return found->linesize;
+ }
+
+ if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
+ *has_level_2 = true;
+ }
+ }
+
+ /* Next byte for the next round. */
+ value >>= 8;
+ }
+
+ /* Nothing found. */
+ return 0;
+}
+
+
+static long int __attribute__ ((noinline))
+handle_intel (int name, const struct cpu_features *cpu_features)
+{
+ unsigned int maxidx = cpu_features->max_cpuid;
+
+ /* Return -1 for older CPUs. */
+ if (maxidx < 2)
+ return -1;
+
+ /* OK, we can use the CPUID instruction to get all info about the
+ caches. */
+ unsigned int cnt = 0;
+ unsigned int max = 1;
+ long int result = 0;
+ bool no_level_2_or_3 = false;
+ bool has_level_2 = false;
+
+ while (cnt++ < max)
+ {
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ __cpuid (2, eax, ebx, ecx, edx);
+
+ /* The low byte of EAX in the first round contain the number of
+ rounds we have to make. At least one, the one we are already
+ doing. */
+ if (cnt == 1)
+ {
+ max = eax & 0xff;
+ eax &= 0xffffff00;
+ }
+
+ /* Process the individual registers' value. */
+ result = intel_check_word (name, eax, &has_level_2,
+ &no_level_2_or_3, cpu_features);
+ if (result != 0)
+ return result;
+
+ result = intel_check_word (name, ebx, &has_level_2,
+ &no_level_2_or_3, cpu_features);
+ if (result != 0)
+ return result;
+
+ result = intel_check_word (name, ecx, &has_level_2,
+ &no_level_2_or_3, cpu_features);
+ if (result != 0)
+ return result;
+
+ result = intel_check_word (name, edx, &has_level_2,
+ &no_level_2_or_3, cpu_features);
+ if (result != 0)
+ return result;
+ }
+
+ if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
+ && no_level_2_or_3)
+ return -1;
+
+ return 0;
+}
+
+
+static long int __attribute__ ((noinline))
+handle_amd (int name)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ __cpuid (0x80000000, eax, ebx, ecx, edx);
+
+ /* No level 4 cache (yet). */
+ if (name > _SC_LEVEL3_CACHE_LINESIZE)
+ return 0;
+
+ unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
+ if (eax < fn)
+ return 0;
+
+ __cpuid (fn, eax, ebx, ecx, edx);
+
+ if (name < _SC_LEVEL1_DCACHE_SIZE)
+ {
+ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
+ ecx = edx;
+ }
+
+ switch (name)
+ {
+ case _SC_LEVEL1_DCACHE_SIZE:
+ return (ecx >> 14) & 0x3fc00;
+
+ case _SC_LEVEL1_DCACHE_ASSOC:
+ ecx >>= 16;
+ if ((ecx & 0xff) == 0xff)
+ /* Fully associative. */
+ return (ecx << 2) & 0x3fc00;
+ return ecx & 0xff;
+
+ case _SC_LEVEL1_DCACHE_LINESIZE:
+ return ecx & 0xff;
+
+ case _SC_LEVEL2_CACHE_SIZE:
+ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
+
+ case _SC_LEVEL2_CACHE_ASSOC:
+ switch ((ecx >> 12) & 0xf)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ return (ecx >> 12) & 0xf;
+ case 6:
+ return 8;
+ case 8:
+ return 16;
+ case 10:
+ return 32;
+ case 11:
+ return 48;
+ case 12:
+ return 64;
+ case 13:
+ return 96;
+ case 14:
+ return 128;
+ case 15:
+ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
+ default:
+ return 0;
+ }
+ /* NOTREACHED */
+
+ case _SC_LEVEL2_CACHE_LINESIZE:
+ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
+
+ case _SC_LEVEL3_CACHE_SIZE:
+ return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
+
+ case _SC_LEVEL3_CACHE_ASSOC:
+ switch ((edx >> 12) & 0xf)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ return (edx >> 12) & 0xf;
+ case 6:
+ return 8;
+ case 8:
+ return 16;
+ case 10:
+ return 32;
+ case 11:
+ return 48;
+ case 12:
+ return 64;
+ case 13:
+ return 96;
+ case 14:
+ return 128;
+ case 15:
+ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
+ default:
+ return 0;
+ }
+ /* NOTREACHED */
+
+ case _SC_LEVEL3_CACHE_LINESIZE:
+ return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
+
+ default:
+ assert (! "cannot happen");
+ }
+ return -1;
+}
+
+
+/* Get the value of the system variable NAME. */
+long int
+attribute_hidden
+__cache_sysconf (int name)
+{
+ const struct cpu_features *cpu_features = __get_cpu_features ();
+
+ if (cpu_features->kind == arch_kind_intel)
+ return handle_intel (name, cpu_features);
+
+ if (cpu_features->kind == arch_kind_amd)
+ return handle_amd (name);
+
+ // XXX Fill in more vendors.
+
+ /* CPU not known, we have no information. */
+ return 0;
+}
+
+
+/* Data cache size for use in memory and string routines, typically
+ L1 size, rounded to multiple of 256 bytes. */
+long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
+long int __x86_data_cache_size attribute_hidden = 32 * 1024;
+/* Similar to __x86_data_cache_size_half, but not rounded. */
+long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
+/* Similar to __x86_data_cache_size, but not rounded. */
+long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
+/* Shared cache size for use in memory and string routines, typically
+ L2 or L3 size, rounded to multiple of 256 bytes. */
+long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
+long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
+/* Similar to __x86_shared_cache_size_half, but not rounded. */
+long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
+/* Similar to __x86_shared_cache_size, but not rounded. */
+long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
+
+/* Threshold to use non temporal store. */
+long int __x86_shared_non_temporal_threshold attribute_hidden;
+
+#ifndef DISABLE_PREFETCHW
+/* PREFETCHW support flag for use in memory and string routines. */
+int __x86_prefetchw attribute_hidden;
+#endif
+
+
+static void
+__attribute__((constructor))
+init_cacheinfo (void)
+{
+ /* Find out what brand of processor. */
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int max_cpuid_ex;
+ long int data = -1;
+ long int shared = -1;
+ unsigned int level;
+ unsigned int threads = 0;
+ const struct cpu_features *cpu_features = __get_cpu_features ();
+ int max_cpuid = cpu_features->max_cpuid;
+
+ if (cpu_features->kind == arch_kind_intel)
+ {
+ data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
+
+ long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
+ bool inclusive_cache = true;
+
+ /* Try L3 first. */
+ level = 3;
+ shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
+
+ /* Number of logical processors sharing L2 cache. */
+ int threads_l2;
+
+ /* Number of logical processors sharing L3 cache. */
+ int threads_l3;
+
+ if (shared <= 0)
+ {
+ /* Try L2 otherwise. */
+ level = 2;
+ shared = core;
+ threads_l2 = 0;
+ threads_l3 = -1;
+ }
+ else
+ {
+ threads_l2 = 0;
+ threads_l3 = 0;
+ }
+
+ /* A value of 0 for the HTT bit indicates there is only a single
+ logical processor. */
+ if (HAS_CPU_FEATURE (HTT))
+ {
+ /* Figure out the number of logical threads that share the
+ highest cache level. */
+ if (max_cpuid >= 4)
+ {
+ unsigned int family = cpu_features->family;
+ unsigned int model = cpu_features->model;
+
+ int i = 0;
+
+ /* Query until cache level 2 and 3 are enumerated. */
+ int check = 0x1 | (threads_l3 == 0) << 1;
+ do
+ {
+ __cpuid_count (4, i++, eax, ebx, ecx, edx);
+
+ /* There seems to be a bug in at least some Pentium Ds
+ which sometimes fail to iterate all cache parameters.
+ Do not loop indefinitely here, stop in this case and
+ assume there is no such information. */
+ if ((eax & 0x1f) == 0)
+ goto intel_bug_no_cache_info;
+
+ switch ((eax >> 5) & 0x7)
+ {
+ default:
+ break;
+ case 2:
+ if ((check & 0x1))
+ {
+ /* Get maximum number of logical processors
+ sharing L2 cache. */
+ threads_l2 = (eax >> 14) & 0x3ff;
+ check &= ~0x1;
+ }
+ break;
+ case 3:
+ if ((check & (0x1 << 1)))
+ {
+ /* Get maximum number of logical processors
+ sharing L3 cache. */
+ threads_l3 = (eax >> 14) & 0x3ff;
+
+ /* Check if L2 and L3 caches are inclusive. */
+ inclusive_cache = (edx & 0x2) != 0;
+ check &= ~(0x1 << 1);
+ }
+ break;
+ }
+ }
+ while (check);
+
+ /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
+ numbers of addressable IDs for logical processors sharing
+ the cache, instead of the maximum number of threads
+ sharing the cache. */
+ if (max_cpuid >= 11)
+ {
+ /* Find the number of logical processors shipped in
+ one core and apply count mask. */
+ i = 0;
+
+ /* Count SMT only if there is L3 cache. Always count
+ core if there is no L3 cache. */
+ int count = ((threads_l2 > 0 && level == 3)
+ | ((threads_l3 > 0
+ || (threads_l2 > 0 && level == 2)) << 1));
+
+ while (count)
+ {
+ __cpuid_count (11, i++, eax, ebx, ecx, edx);
+
+ int shipped = ebx & 0xff;
+ int type = ecx & 0xff00;
+ if (shipped == 0 || type == 0)
+ break;
+ else if (type == 0x100)
+ {
+ /* Count SMT. */
+ if ((count & 0x1))
+ {
+ int count_mask;
+
+ /* Compute count mask. */
+ asm ("bsr %1, %0"
+ : "=r" (count_mask) : "g" (threads_l2));
+ count_mask = ~(-1 << (count_mask + 1));
+ threads_l2 = (shipped - 1) & count_mask;
+ count &= ~0x1;
+ }
+ }
+ else if (type == 0x200)
+ {
+ /* Count core. */
+ if ((count & (0x1 << 1)))
+ {
+ int count_mask;
+ int threads_core
+ = (level == 2 ? threads_l2 : threads_l3);
+
+ /* Compute count mask. */
+ asm ("bsr %1, %0"
+ : "=r" (count_mask) : "g" (threads_core));
+ count_mask = ~(-1 << (count_mask + 1));
+ threads_core = (shipped - 1) & count_mask;
+ if (level == 2)
+ threads_l2 = threads_core;
+ else
+ threads_l3 = threads_core;
+ count &= ~(0x1 << 1);
+ }
+ }
+ }
+ }
+ if (threads_l2 > 0)
+ threads_l2 += 1;
+ if (threads_l3 > 0)
+ threads_l3 += 1;
+ if (level == 2)
+ {
+ if (threads_l2)
+ {
+ threads = threads_l2;
+ if (threads > 2 && family == 6)
+ switch (model)
+ {
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5a:
+ case 0x5d:
+ /* Silvermont has L2 cache shared by 2 cores. */
+ threads = 2;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ else if (threads_l3)
+ threads = threads_l3;
+ }
+ else
+ {
+intel_bug_no_cache_info:
+ /* Assume that all logical threads share the highest cache
+ level. */
+
+ threads
+ = ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx
+ >> 16) & 0xff);
+ }
+
+ /* Cap usage of highest cache level to the number of supported
+ threads. */
+ if (shared > 0 && threads > 0)
+ shared /= threads;
+ }
+
+ /* Account for non-inclusive L2 and L3 caches. */
+ if (!inclusive_cache)
+ {
+ if (threads_l2 > 0)
+ core /= threads_l2;
+ shared += core;
+ }
+ }
+ else if (cpu_features->kind == arch_kind_amd)
+ {
+ data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
+ long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
+ shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+
+ /* Get maximum extended function. */
+ __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
+
+ if (shared <= 0)
+ /* No shared L3 cache. All we have is the L2 cache. */
+ shared = core;
+ else
+ {
+ /* Figure out the number of logical threads that share L3. */
+ if (max_cpuid_ex >= 0x80000008)
+ {
+ /* Get width of APIC ID. */
+ __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
+ threads = 1 << ((ecx >> 12) & 0x0f);
+ }
+
+ if (threads == 0)
+ {
+ /* If APIC ID width is not available, use logical
+ processor count. */
+ __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
+
+ if ((edx & (1 << 28)) != 0)
+ threads = (ebx >> 16) & 0xff;
+ }
+
+ /* Cap usage of highest cache level to the number of
+ supported threads. */
+ if (threads > 0)
+ shared /= threads;
+
+ /* Account for exclusive L2 and L3 caches. */
+ shared += core;
+ }
+
+#ifndef DISABLE_PREFETCHW
+ if (max_cpuid_ex >= 0x80000001)
+ {
+ __cpuid (0x80000001, eax, ebx, ecx, edx);
+ /* PREFETCHW || 3DNow! */
+ if ((ecx & 0x100) || (edx & 0x80000000))
+ __x86_prefetchw = -1;
+ }
+#endif
+ }
+
+ if (cpu_features->data_cache_size != 0)
+ data = cpu_features->data_cache_size;
+
+ if (data > 0)
+ {
+ __x86_raw_data_cache_size_half = data / 2;
+ __x86_raw_data_cache_size = data;
+ /* Round data cache size to multiple of 256 bytes. */
+ data = data & ~255L;
+ __x86_data_cache_size_half = data / 2;
+ __x86_data_cache_size = data;
+ }
+
+ if (cpu_features->shared_cache_size != 0)
+ shared = cpu_features->shared_cache_size;
+
+ if (shared > 0)
+ {
+ __x86_raw_shared_cache_size_half = shared / 2;
+ __x86_raw_shared_cache_size = shared;
+ /* Round shared cache size to multiple of 256 bytes. */
+ shared = shared & ~255L;
+ __x86_shared_cache_size_half = shared / 2;
+ __x86_shared_cache_size = shared;
+ }
+
+ /* The large memcpy micro benchmark in glibc shows that 6 times of
+ shared cache size is the approximate value above which non-temporal
+ store becomes faster on a 8-core processor. This is the 3/4 of the
+ total shared cache size. */
+ __x86_shared_non_temporal_threshold
+ = (cpu_features->non_temporal_threshold != 0
+ ? cpu_features->non_temporal_threshold
+ : __x86_shared_cache_size * threads * 3 / 4);
+}
+
+#endif