summaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/power8/strncpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power8/strncpy.S')
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strncpy.S54
1 files changed, 45 insertions, 9 deletions
diff --git a/sysdeps/powerpc/powerpc64/power8/strncpy.S b/sysdeps/powerpc/powerpc64/power8/strncpy.S
index 5fda953526..17c3afb5fe 100644
--- a/sysdeps/powerpc/powerpc64/power8/strncpy.S
+++ b/sysdeps/powerpc/powerpc64/power8/strncpy.S
@@ -1,5 +1,5 @@
/* Optimized strncpy/stpncpy implementation for PowerPC64/POWER8.
- Copyright (C) 2015 Free Software Foundation, Inc.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -64,7 +64,7 @@ EALIGN (FUNC_NAME, 4, 0)
std r28,-32(r1)
std r29,-24(r1)
- cmpld r7,r9,r8
+ cmpld cr7,r9,r8
std r30,-16(r1)
std r31,-8(r1)
@@ -82,44 +82,80 @@ EALIGN (FUNC_NAME, 4, 0)
L(short_path):
mr r9,r3
L(short_path_1):
+ /* Return if there are no more bytes to be written. */
cmpdi cr7,r5,0
beq cr7,L(short_path_loop_end_1)
L(short_path_2):
+ /* Copy one char from src (r4) and write it to dest (r9). If it is the
+ end-of-string, start the null padding. Continue, otherwise. */
lbz r10,0(r4)
cmpdi cr7,r10,0
stb r10,0(r9)
beq cr7,L(zero_pad_start_1)
+ /* If there are no more bytes to be written, return. */
cmpdi cr0,r5,1
addi r8,r9,1
addi r6,r5,-1
beq cr0,L(short_path_loop_end_0)
+ /* Copy another char from src (r4) to dest (r9). Check again if it is
+ the end-of-string. If so, start the null padding. */
lbz r10,1(r4)
cmpdi cr7,r10,0
stb r10,1(r9)
beq cr7,L(zero_pad_start_prepare_1)
+ /* Eagerly decrement r5 by 3, which is the number of bytes already
+ written, plus one write that will be performed later on. */
addi r10,r5,-3
b L(short_path_loop_1)
.align 4
L(short_path_loop):
+ /* At this point, the induction variable, r5, as well as the pointers
+ to dest and src (r9 and r4, respectivelly) have been updated.
+
+ Note: The registers r7 and r10 are induction variables derived from
+ r5. They are used to determine if the total number of writes has
+ been reached at every other write.
+
+ Copy one char from src (r4) and write it to dest (r9). If it is the
+ end-of-string, start the null padding. Continue, otherwise. */
lbz r8,0(r4)
addi r7,r10,-2
cmpdi cr5,r8,0
stb r8,0(r9)
beq cr5,L(zero_pad_start_1)
- beq r7,L(short_path_loop_end_0)
+ beq cr7,L(short_path_loop_end_0)
+ /* Copy another char from src (r4) to dest (r9). Check again if it is
+ the end-of-string. If so, start the null padding. */
lbz r8,1(r4)
cmpdi cr7,r8,0
stb r8,1(r9)
beq cr7,L(zero_pad_start)
mr r10,r7
L(short_path_loop_1):
+ /* This block is reached after two chars have been already written to
+ dest. Nevertheless, r5 (the induction variable), r9 (the pointer to
+ dest), and r4 (the pointer to src) have not yet been updated.
+
+ At this point:
+ r5 holds the count of bytes yet to be written plus 2.
+ r9 points to the last two chars that were already written to dest.
+ r4 points to the last two chars that were already copied from src.
+
+ The algorithm continues by decrementing r5, the induction variable,
+ so that it reflects the last two writes. The pointers to dest (r9)
+ and to src (r4) are increment by two, for the same reason.
+
+ Note: Register r10 is another induction variable, derived from r5,
+ which determines if the total number of writes has been reached. */
addic. r5,r5,-2
addi r9,r9,2
- cmpdi cr7,r10,0
+ cmpdi cr7,r10,0 /* Eagerly check if the next write is the last. */
addi r4,r4,2
addi r6,r9,1
- bne cr0,L(short_path_loop)
+ bne cr0,L(short_path_loop) /* Check if the total number of writes
+ has been reached at every other
+ write. */
#ifdef USE_AS_STPNCPY
mr r3,r9
b L(short_path_loop_end)
@@ -234,7 +270,7 @@ L(unaligned_lt_16):
bne cr7,L(short_path_prepare_2)
addi r6,r5,-8
std r7,0(r3)
- addi r9,r3,r8
+ addi r9,r3,8
cmpldi cr7,r6,7
addi r7,r4,8
ble cr7,L(short_path_prepare_1_1)
@@ -288,11 +324,11 @@ L(pagecross):
cmpdi cr7,r9,0
bne cr7,L(short_path_prepare_2)
addi r8,r8,-16
- cmpldi r7,r8,8
+ cmpldi cr7,r8,8
ble cr7,L(short_path_prepare_2)
ld r8,24(r11)
cmpb r9,r8,r9
- cmpdi r7,r9,0
+ cmpdi cr7,r9,0
bne cr7,L(short_path_prepare_2)
/* No null byte found in the 32 bytes readed and length not reached,
@@ -367,7 +403,7 @@ L(loop_16b):
cmpb r7,r0,r30
or. r7,r8,r7
addi r12,r12,-32
- cmpldi r7,r12,15
+ cmpldi cr7,r12,15
addi r11,r11,32
bne cr0,L(short_path_2)
std r10,16(r6)