195 lines
6.0 KiB
ArmAsm
195 lines
6.0 KiB
ArmAsm
|
/*
|
||
|
* DEC Alpha 64-bit math primitives. These use 64-bit words
|
||
|
* unless otherwise noted.
|
||
|
*
|
||
|
* Copyright (c) 1995, Colin Plumb.
|
||
|
* For licensing and other legal details, see the file legal.c.
|
||
|
*
|
||
|
* The DEC assembler apparently does some instruction scheduling,
|
||
|
* but I tried to do some of my own, and tries to spread things
|
||
|
* out over the register file to give the assembler more room
|
||
|
* to schedule things.
|
||
|
*
|
||
|
* Alpha OSF/1 register usage conventions:
|
||
|
* r0 - v0 - Temp, holds integer return value
|
||
|
* r1..r8 - t0..t7 - Temp, trashed by procedure call
|
||
|
* r9..r14 - s0..s5 - Saved across procedure calls
|
||
|
* r15 - s6/FP - Frame pointer, saved across procedure calls
|
||
|
* r16..r21 - a0..a5 - Argument registers, all trashed by procedure call
|
||
|
* r22..r25 - t8..t11 - Temp, trashed by procedure call
|
||
|
* r26 - ra - Return address
|
||
|
* r27 - t12/pv - Procedure value, trashed by procedure call
|
||
|
* r28 - at - Assembler temp, trashed by procedure call
|
||
|
* r29 - gp - Global pointer
|
||
|
* r30 - sp - Stack pointer
|
||
|
* r31 - zero - hardwired to zero
|
||
|
*/
|
||
|
.text
|
||
|
.align 4
|
||
|
.globl lbnMulN1_64
|
||
|
/* I have no idea what the '2' at the end of the .ent line means. */
|
||
|
.ent lbnMulN1_64 2
|
||
|
/*
|
||
|
* Arguments: $16 = out, $17 = in, $18 = len<32>, $19 = k
|
||
|
* Other registers: $0 = carry word, $1 = product low,
|
||
|
* $2 = product high, $3 = input word
|
||
|
*/
|
||
|
lbnMulN1_64:
|
||
|
ldq $3,0($17) /* Load first word of input */
|
||
|
subl $18,1,$18
|
||
|
mulq $3,$19,$1 /* Do low half of first multiply */
|
||
|
umulh $3,$19,$0 /* Do second half of first multiply */
|
||
|
stq $1,0($16)
|
||
|
beq $18,m64_done
|
||
|
m64_loop:
|
||
|
ldq $3,8($17)
|
||
|
addq $17,8,$17
|
||
|
mulq $3,$19,$1 /* Do bottom half of multiply */
|
||
|
subl $18,1,$18
|
||
|
umulh $3,$19,$2 /* Do top half of multiply */
|
||
|
addq $0,$1,$1 /* Add carry word from previous multiply */
|
||
|
stq $1,8($16)
|
||
|
cmpult $1,$0,$0 /* Compute carry bit from add */
|
||
|
addq $16,8,$16
|
||
|
addq $2,$0,$0 /* Add carry bit to carry word */
|
||
|
|
||
|
beq $18,m64_done
|
||
|
|
||
|
ldq $3,8($17)
|
||
|
addq $17,8,$17
|
||
|
mulq $3,$19,$1 /* Do bottom half of multiply */
|
||
|
subl $18,1,$18
|
||
|
umulh $3,$19,$2 /* Do top half of multiply */
|
||
|
addq $0,$1,$1 /* Add carry word from previous multiply */
|
||
|
stq $1,8($16)
|
||
|
cmpult $1,$0,$0 /* Compute carry bit from add */
|
||
|
addq $16,8,$16
|
||
|
addq $2,$0,$0 /* Add carry bit to carry word */
|
||
|
|
||
|
bne $18,m64_loop
|
||
|
m64_done:
|
||
|
stq $0,8($16) /* Store last word of result */
|
||
|
ret $31,($26),1
|
||
|
/* The '1' in the hint field means procedure return - software convention */
|
||
|
.end lbnMulN1_64
|
||
|
|
||
|
|
||
|
.text
|
||
|
.align 4
|
||
|
.globl lbnMulAdd1_64
|
||
|
.ent lbnMulAdd1_64 2
|
||
|
/*
|
||
|
* Arguments: $16 = out, $17 = in, $18 = len<32>, $19 = k
|
||
|
* Other registers: $0 = product high, $1 = product low,
|
||
|
* $2 = product high temp, $3 = input word, $4 = output word
|
||
|
* $5 = carry bit from add to out
|
||
|
*/
|
||
|
lbnMulAdd1_64:
|
||
|
ldq $3,0($17) /* Load first word of input */
|
||
|
subl $18,1,$18
|
||
|
mulq $3,$19,$1 /* Do low half of first multiply */
|
||
|
ldq $4,0($16) /* Load first word of output */
|
||
|
umulh $3,$19,$2 /* Do second half of first multiply */
|
||
|
addq $4,$1,$4
|
||
|
cmpult $4,$1,$5 /* Compute borrow bit from subtract */
|
||
|
stq $4,0($16)
|
||
|
addq $5,$2,$0 /* Add carry bit to high word */
|
||
|
beq $18,ma64_done
|
||
|
ma64_loop:
|
||
|
ldq $3,8($17) /* Load next word of input */
|
||
|
addq $17,8,$17
|
||
|
ldq $4,8($16) /* Load next word of output */
|
||
|
mulq $3,$19,$1 /* Do bottom half of multiply */
|
||
|
subl $18,1,$18
|
||
|
addq $0,$1,$1 /* Add carry word from previous multiply */
|
||
|
umulh $3,$19,$2 /* Do top half of multiply */
|
||
|
cmpult $1,$0,$0 /* Compute carry bit from add */
|
||
|
addq $4,$1,$4 /* Add product to loaded word */
|
||
|
cmpult $4,$1,$5 /* Compute carry bit from add */
|
||
|
stq $4,8($16)
|
||
|
addq $5,$0,$5 /* Add carry bits together */
|
||
|
addq $16,8,$16
|
||
|
addq $5,$2,$0 /* Add carry bits to carry word */
|
||
|
|
||
|
beq $18,ma64_done
|
||
|
|
||
|
ldq $3,8($17) /* Load next word of input */
|
||
|
addq $17,8,$17
|
||
|
ldq $4,8($16) /* Load next word of output */
|
||
|
mulq $3,$19,$1 /* Do bottom half of multiply */
|
||
|
subl $18,1,$18
|
||
|
addq $0,$1,$1 /* Add carry word from previous multiply */
|
||
|
umulh $3,$19,$2 /* Do top half of multiply */
|
||
|
cmpult $1,$0,$0 /* Compute carry bit from add */
|
||
|
addq $4,$1,$4 /* Add product to loaded word */
|
||
|
cmpult $4,$1,$5 /* Compute carry bit from add */
|
||
|
stq $4,8($16)
|
||
|
addq $5,$0,$5 /* Add carry bits together */
|
||
|
addq $16,8,$16
|
||
|
addq $5,$2,$0 /* Add carry bits to carry word */
|
||
|
|
||
|
bne $18,ma64_loop
|
||
|
ma64_done:
|
||
|
ret $31,($26),1
|
||
|
.end lbnMulAdd1_64
|
||
|
|
||
|
|
||
|
.text
|
||
|
.align 4
|
||
|
.globl lbnMulSub1_64
|
||
|
.ent lbnMulSub1_64 2
|
||
|
/*
|
||
|
* Arguments: $16 = out, $17 = in, $18 = len<32>, $19 = k
|
||
|
* Other registers: $0 = carry word, $1 = product low,
|
||
|
* $2 = product high temp, $3 = input word, $4 = output word
|
||
|
* $5 = borrow bit from subtract
|
||
|
*/
|
||
|
lbnMulSub1_64:
|
||
|
ldq $3,0($17) /* Load first word of input */
|
||
|
subl $18,1,$18
|
||
|
mulq $3,$19,$1 /* Do low half of first multiply */
|
||
|
ldq $4,0($16) /* Load first word of output */
|
||
|
umulh $3,$19,$2 /* Do second half of first multiply */
|
||
|
cmpult $4,$1,$5 /* Compute borrow bit from subtract */
|
||
|
subq $4,$1,$4
|
||
|
addq $5,$2,$0 /* Add carry bit to high word */
|
||
|
stq $4,0($16)
|
||
|
beq $18,ms64_done
|
||
|
ms64_loop:
|
||
|
ldq $3,8($17) /* Load next word of input */
|
||
|
addq $17,8,$17
|
||
|
ldq $4,8($16) /* Load next word of output */
|
||
|
mulq $3,$19,$1 /* Do bottom half of multiply */
|
||
|
subl $18,1,$18
|
||
|
addq $0,$1,$1 /* Add carry word from previous multiply */
|
||
|
umulh $3,$19,$2 /* Do top half of multiply */
|
||
|
cmpult $1,$0,$0 /* Compute carry bit from add */
|
||
|
cmpult $4,$1,$5 /* Compute borrow bit from subtract */
|
||
|
subq $4,$1,$4
|
||
|
addq $5,$0,$5 /* Add carry bits together */
|
||
|
stq $4,8($16)
|
||
|
addq $5,$2,$0 /* Add carry bits to carry word */
|
||
|
addq $16,8,$16
|
||
|
|
||
|
beq $18,ms64_done
|
||
|
|
||
|
ldq $3,8($17) /* Load next word of input */
|
||
|
addq $17,8,$17
|
||
|
ldq $4,8($16) /* Load next word of output */
|
||
|
mulq $3,$19,$1 /* Do bottom half of multiply */
|
||
|
subl $18,1,$18
|
||
|
addq $0,$1,$1 /* Add carry word from previous multiply */
|
||
|
umulh $3,$19,$2 /* Do top half of multiply */
|
||
|
cmpult $1,$0,$0 /* Compute carry bit from add */
|
||
|
cmpult $4,$1,$5 /* Compute borrow bit from subtract */
|
||
|
subq $4,$1,$4
|
||
|
addq $5,$0,$5 /* Add carry bits together */
|
||
|
stq $4,8($16)
|
||
|
addq $5,$2,$0 /* Add carry bits to carry word */
|
||
|
addq $16,8,$16
|
||
|
|
||
|
bne $18,ms64_loop
|
||
|
ms64_done:
|
||
|
ret $31,($26),1
|
||
|
.end lbnMulSub1_64
|