summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiubov Dmitrieva <liubov.dmitrieva@intel.com>2013-06-28 15:28:50 -0700
committerH.J. Lu <hjl.tools@gmail.com>2013-06-28 15:31:40 -0700
commit6308fd9a46a2f4aa550886e6f58190fb209ef027 (patch)
tree668039e1091165b38354a1ffebf35b0058de3eed
parent89cd956937f46e8f4a0374994965f991642dd408 (diff)
Skip SSE4.2 versions on Intel Silvermont
SSE2/SSSE3 versions are faster than SSE4.2 versions on Intel Silvermont.
-rw-r--r--ChangeLog14
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c10
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h4
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S2
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S34
-rw-r--r--sysdeps/x86_64/multiarch/strrchr.S2
6 files changed, 51 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index c5551b805e..406ca28bd2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2013-06-28 Liubov Dmitrieva <liubov.dmitrieva@intel.com>
+
+ * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Set
+ bit_Slow_SSE4_2 and bit_Prefer_PMINUB_for_stringop for Intel
+ Silvermont.
+ * sysdeps/x86_64/multiarch/init-arch.h (bit_Slow_SSE4_2): New
+ macro.
+ (index_Slow_SSE4_2): Likewise.
+ (index_Prefer_PMINUB_for_stringop): Likewise.
+ * sysdeps/x86_64/multiarch/strchr.S: Skip SSE4.2 version if
+ bit_Slow_SSE4_2 is set.
+ * sysdeps/x86_64/multiarch/strcmp.S: Likewise.
+ * sysdeps/x86_64/multiarch/strrchr.S: Likewise.
+
2013-06-28 Ryan S. Arnold <rsa@linux.vnet.ibm.com>
* sysdeps/powerpc/Makefile: Add comment about generating an offset to
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 9524aeea18..55839610e2 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -81,8 +81,16 @@ __init_cpu_features (void)
case 0x37:
/* Unaligned load versions are faster than SSSE3
on Silvermont. */
+#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
+# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
+#endif
+#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
+# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
+#endif
__cpu_features.feature[index_Fast_Unaligned_Load]
- |= bit_Fast_Unaligned_Load;
+ |= (bit_Fast_Unaligned_Load
+ | bit_Prefer_PMINUB_for_stringop
+ | bit_Slow_SSE4_2);
break;
default:
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 28edbf7d07..0cb5f5bc30 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -23,6 +23,7 @@
#define bit_AVX_Usable (1 << 6)
#define bit_FMA_Usable (1 << 7)
#define bit_FMA4_Usable (1 << 8)
+#define bit_Slow_SSE4_2 (1 << 9)
/* CPUID Feature flags. */
@@ -62,6 +63,7 @@
# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -156,9 +158,11 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_Fast_Copy_Backward FEATURE_INDEX_1
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
# define index_AVX_Usable FEATURE_INDEX_1
# define index_FMA_Usable FEATURE_INDEX_1
# define index_FMA4_Usable FEATURE_INDEX_1
+# define index_Slow_SSE4_2 FEATURE_INDEX_1
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index 6860329449..f170238b55 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -29,6 +29,8 @@ ENTRY(strchr)
jne 1f
call __init_cpu_features
1: leaq __strchr_sse2(%rip), %rax
+ testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ jnz 2f
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __strchr_sse42(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f69aaf42b3..1d4d711838 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -88,14 +88,16 @@ ENTRY(STRCMP)
jne 1f
call __init_cpu_features
1:
+ testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ jnz 2f
leaq STRCMP_SSE42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jnz 2f
- leaq STRCMP_SSSE3(%rip), %rax
+ jnz 3f
+2: leaq STRCMP_SSSE3(%rip), %rax
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
- jnz 2f
+ jnz 3f
leaq STRCMP_SSE2(%rip), %rax
-2: ret
+3: ret
END(STRCMP)
# ifdef USE_AS_STRCASECMP_L
@@ -109,16 +111,18 @@ ENTRY(__strcasecmp)
# ifdef HAVE_AVX_SUPPORT
leaq __strcasecmp_avx(%rip), %rax
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
- jnz 2f
+ jnz 3f
# endif
+ testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ jnz 2f
leaq __strcasecmp_sse42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jnz 2f
- leaq __strcasecmp_ssse3(%rip), %rax
+ jnz 3f
+2: leaq __strcasecmp_ssse3(%rip), %rax
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
- jnz 2f
+ jnz 3f
leaq __strcasecmp_sse2(%rip), %rax
-2: ret
+3: ret
END(__strcasecmp)
weak_alias (__strcasecmp, strcasecmp)
# endif
@@ -133,16 +137,18 @@ ENTRY(__strncasecmp)
# ifdef HAVE_AVX_SUPPORT
leaq __strncasecmp_avx(%rip), %rax
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
- jnz 2f
+ jnz 3f
# endif
+ testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ jnz 2f
leaq __strncasecmp_sse42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jnz 2f
- leaq __strncasecmp_ssse3(%rip), %rax
+ jnz 3f
+2: leaq __strncasecmp_ssse3(%rip), %rax
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
- jnz 2f
+ jnz 3f
leaq __strncasecmp_sse2(%rip), %rax
-2: ret
+3: ret
END(__strncasecmp)
weak_alias (__strncasecmp, strncasecmp)
# endif
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index ee6af6e9dd..3f92a41ef9 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -32,6 +32,8 @@ ENTRY(strrchr)
jne 1f
call __init_cpu_features
1: leaq __strrchr_sse2(%rip), %rax
+ testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+ jnz 2f
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __strrchr_sse42(%rip), %rax