Andy David says:
Here's my 32 bit routine as written for the PIC 17c43 [ed: and converted to the Scenix SX] taken from a mail I sent Scott just after I wrote it, hence the comments about the implementations I used.Looks a lot like Scott's original 16-bit sqrt. As the root is going to be a 16 bit number the last subtract is awkward, so the 24-bit sqrt method wasn't appropriate. I did actually write this myself rather than automatically converting Scott's code to 32 bit. I DID, however, consciously and unashamedly steal two parts - how to carry out the final 17-bit subtraction and how to count iterations - the extra 'counting' bit in the mask was quite a devious idea. This one took a little longer to write than the 24-bit, probably because it needs to iterate more times...
Standard disclaimer applies
;=========================================================================
; brSQRT32
;
; Calculates the square root of a thirtytwo bit number using the
; binary restoring method.
;
; Result in ACCaHI:ACCaLO
; Mask in ACCbHI:ACCbLO
; Input in ACCcHI:ACCcLO:ACCdHI:ACCdLO
;
; Takes between ... and ... cycles (incl. call and return).
; Uses 66 words ROM, 8 bytes RAM including 4 holding the input.
;
;-------------------------------------------------------------------------
brSQRT32:
mov W, #$40 ; Initial value for Result is...
mov ACCaHI, W ; ... 01000000 00000000
clr ACCaLO ;
mov W, #$C0 ; Initial value for mask is...
mov ACCbHI, W ; ... 11000000 00000000
clr ACCbLO ; (second '1' is loop counter).
Sub_Cmp:
mov W, ACCaLO ; Compare root-so-far with current
sub ACCcLO, W ; ... remainder.
mov W, ACCaHI ;
sb C
movsz W, ++ACCaHI ;
sub ACCcHI, W ;
sb C ;
jmp brstr ; (result is negative, need to restore).
In1: mov W, ACCbLO ; set the current bit in the result.
or ACCaLO, W ;
mov W, ACCbHI ;
or ACCaHI, W ;
ShftUp: rl ACCdLO ;
rl ACCdHI ;
rl ACCcLO ;
rl ACCcHI ;
rr ACCbHI ; Shift mask right for next bit, whilst
rr ACCbLO ; ... shifting IN MSB from remainder.
snb ACCbHI.7 ; If MSB is set, unconditionally set the
jmp USet1 ; ... next bit.
mov W, ACCbLO ; Append '01' to root-so-far
xor ACCaLO, W ;
mov W, ACCbHI ;
xor ACCaHI, W ;
sb C ; If second '1' in mask is shifted out,
jmp Sub_Cmp ; ... then that was the last normal iteration.
mov W, ACCaLO ; Last bit Generation.
sub ACCcLO, W ; ... The final subtract is 17-bit (15-bit root
mov W, ACCaHI ; ... plus '01'). Subtract 16-bits: if result
sb C
movsz W, ++ACCaHI
sub ACCcHI, W ; ... generates a carry, last bit is 0.
sb C ;
ret
mov W, #1 ; If result is 0 AND msb of is '0', result bit
snb Z ; ... is 0, otherwise '1'.
snb ACCdHI.7 ;
xor ACCaLO, W ;
ret
USet1: snb C ; If mask has shifted out, leave. final bit
ret ; ... has been set by iorwf at in1.
clrb ACCbHI.7 ; clear bit shifted in from input.
mov W, ACCbLO ; Append '01' to root-so-far
xor ACCaLO, W ;
mov W, ACCbHI ;
xor ACCaHI, W ;
mov W, ACCaLO ; This subtraction is guaranteed not to
sub ACCcLO, W ; ... cause a borrow, so subtract and
mov W, ACCaHI ; ... jump back to insert a '1' in the
sb C
movsz W, ++ACCaHI
sub ACCcHI, W ; ... root.
jmp In1 ;
brstr: mov W, ACCaLO ; A subtract above at Sub_Cmp was -ve, so
add ACCcLO, W ; ... restore the remainder by adding.
mov W, ACCaHI ; The current bit of the root is zero.
snb C
movsz W, ++ACCaHI
add ACCcHI, W ;
jmp ShftUp ;