summaryrefslogtreecommitdiff
path: root/sysdeps/alpha/alphaev5/add_n.s
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/alpha/alphaev5/add_n.s')
-rw-r--r--sysdeps/alpha/alphaev5/add_n.s175
1 files changed, 102 insertions, 73 deletions
diff --git a/sysdeps/alpha/alphaev5/add_n.s b/sysdeps/alpha/alphaev5/add_n.s
index 2aaf041774..66cf82b3c3 100644
--- a/sysdeps/alpha/alphaev5/add_n.s
+++ b/sysdeps/alpha/alphaev5/add_n.s
@@ -35,84 +35,113 @@
__mpn_add_n:
.frame $30,0,$26,0
- ldq $3,0($17)
- ldq $4,0($18)
-
- subq $19,1,$19
- and $19,4-1,$2 # number of limbs in first loop
- bis $31,$31,$0
- beq $2,.L0 # if multiple of 4 limbs, skip first loop
-
- subq $19,$2,$19
-
-.Loop0: subq $2,1,$2
+ or $31,$31,$25 # clear cy
+ subq $19,4,$19 # decr loop cnt
+ blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+ ldq $0,0($18)
+ ldq $1,8($18)
+ ldq $4,0($17)
ldq $5,8($17)
- addq $4,$0,$4
- ldq $6,8($18)
- cmpult $4,$0,$1
- addq $3,$4,$4
- cmpult $4,$3,$0
- stq $4,0($16)
- or $0,$1,$0
-
- addq $17,8,$17
- addq $18,8,$18
- bis $5,$5,$3
- bis $6,$6,$4
- addq $16,8,$16
- bne $2,.Loop0
-
-.L0: beq $19,.Lend
-
+ addq $17,32,$17 # update s1_ptr
+ ldq $2,16($18)
+ addq $0,$4,$20 # 1st main add
+ ldq $3,24($18)
+ subq $19,4,$19 # decr loop cnt
+ ldq $6,-16($17)
+ cmpult $20,$0,$25 # compute cy from last add
+ ldq $7,-8($17)
+ addq $1,$25,$28 # cy add
+ addq $18,32,$18 # update s2_ptr
+ addq $5,$28,$21 # 2nd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ blt $19,.Lend1 # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
.align 4
-.Loop: subq $19,4,$19
- unop
-
- ldq $6,8($18)
- addq $4,$0,$0
+.Loop: cmpult $21,$28,$25 # compute cy from last add
+ ldq $0,0($18)
+ or $8,$25,$25 # combine cy from the two adds
+ ldq $1,8($18)
+ addq $2,$25,$28 # cy add
+ ldq $4,0($17)
+ addq $28,$6,$22 # 3rd main add
ldq $5,8($17)
- cmpult $0,$4,$1
- ldq $4,16($18)
- addq $3,$0,$20
- cmpult $20,$3,$0
- ldq $3,16($17)
- or $0,$1,$0
- addq $6,$0,$0
- cmpult $0,$6,$1
- ldq $6,24($18)
- addq $5,$0,$21
- cmpult $21,$5,$0
- ldq $5,24($17)
- or $0,$1,$0
- addq $4,$0,$0
- cmpult $0,$4,$1
- ldq $4,32($18)
- addq $3,$0,$22
- cmpult $22,$3,$0
- ldq $3,32($17)
- or $0,$1,$0
- addq $6,$0,$0
- cmpult $0,$6,$1
- addq $5,$0,$23
- cmpult $23,$5,$0
- or $0,$1,$0
-
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
stq $21,8($16)
- stq $22,16($16)
- stq $23,24($16)
-
- addq $17,32,$17
- addq $18,32,$18
- addq $16,32,$16
- bne $19,.Loop
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ addq $17,32,$17 # update s1_ptr
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ addq $0,$25,$28 # cy add
+ ldq $2,16($18)
+ addq $4,$28,$20 # 1st main add
+ ldq $3,24($18)
+ cmpult $28,$25,$8 # compute cy from last add
+ ldq $6,-16($17)
+ cmpult $20,$28,$25 # compute cy from last add
+ ldq $7,-8($17)
+ or $8,$25,$25 # combine cy from the two adds
+ subq $19,4,$19 # decr loop cnt
+ stq $22,-16($16)
+ addq $1,$25,$28 # cy add
+ stq $23,-8($16)
+ addq $5,$28,$21 # 2nd main add
+ addq $18,32,$18 # update s2_ptr
+ cmpult $28,$25,$8 # compute cy from last add
+ bge $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1: cmpult $21,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $2,$25,$28 # cy add
+ addq $28,$6,$22 # 3rd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ stq $22,-16($16)
+ stq $23,-8($16)
+.Lend2: addq $19,4,$19 # restore loop cnt
+ beq $19,.Lret
+ # Start software pipeline for 2nd loop
+ ldq $0,0($18)
+ ldq $4,0($17)
+ subq $19,1,$19
+ beq $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+ .align 4
+.Loop0: addq $0,$25,$28 # cy add
+ ldq $0,8($18)
+ addq $4,$28,$20 # main add
+ ldq $4,8($17)
+ addq $18,8,$18
+ cmpult $28,$25,$8 # compute cy from last add
+ addq $17,8,$17
+ stq $20,0($16)
+ cmpult $20,$28,$25 # compute cy from last add
+ subq $19,1,$19 # decr loop cnt
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,8,$16
+ bne $19,.Loop0
+.Lend0: addq $0,$25,$28 # cy add
+ addq $4,$28,$20 # main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $20,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
-.Lend: addq $4,$0,$4
- cmpult $4,$0,$1
- addq $3,$4,$4
- cmpult $4,$3,$0
- stq $4,0($16)
- or $0,$1,$0
+.Lret: or $25,$31,$0 # return cy
ret $31,($26),1
-
.end __mpn_add_n