Nikolai Golovchenko says:
Final release! :)
;**********************************************************************
;By Nikolai Golovchenko
;24 BIT FLOATING POINT SQARE ROOT (MICROCHIP format, see AN575)
;
;Input:
; AARGB0 - high byte with sign bit
; AARGB1 - low byte
; AEXP - exponent 2^(AEXP - 127)
;Output:
; AARGB0-1, AEXP - 24 bit floating point
;Temporary:
; BARGB0-1, BEXP - temporary for result
; TEMPB0-1
; LOOPCOUNT - counter
;
;ROM: 85 instructions
;RAM: 9 bytes
;
;Timing (includes call and return)
;8 cycles best case
;3+3+18+8*28-2+8*23-2+7+3= 438 cycles worst case
;**********************************************************************
;NOTES: 1)Square root is taken on absolute value of the number
; (sign bit is ignored)
; 2)Rounding not implemented yet
;**********************************************************************
FPSQRT24
;Normalize input
;1)Check for zero input - if zero then exit
;2)Change sign to 1 and use sign bit as explicit MSB
;3)Divide BEXP = BEXP / 2
;4)If AEXP can be divided by 2 (AEXP<0>=1) then
;BARGB1 = 1, AARGB<0-1> << 1 and find 15 more bits
;5)Else find 16 more bits
mov W, AEXP ;if zero input then return
snb $03.2
retw $00
clr BARGB0 ;set up all used
clr BARGB1 ;temporary registers
clr TEMPB0 ;
clr TEMPB1 ;
setb AARGB0.7 ;make MSB explicit and ignore mantissa sign
;sqrt(2^E*M)=2^(E/2)*sqrt(M)
;we will align mantissa point above MSb. This is
;equivalent to division by 2. Or,
;sqrt(2^E*M)=sqrt(M/2)*2^((E+1)/2)
;new exponent is (E+1)/2
;new mantissa is M/2 (not changed, just new point position)
;divide (bexp+1) by 2. bexp is (eb + 127), where eb=-126.128!
;eb/2 is rounded to minus infinity!
mov BEXP, W ;copy aexp to bexp
dec BEXP ;ensure required round mode
clrb $03.0 ;divide by 2
rr BEXP ;
mov W, #$40 ;correct bias after division (J: was 64)
add BEXP, W ;
;if (E+1)/2 result has a remainder, then multiply mantissa by 2
;(left shift)
sb AEXP.0
jmp FPSQRT24a
rl AARGB1 ;(carry was zero)
rl AARGB0
setb BARGB1.0 ;set first bit of current result
;and discard MSb of mantissa (we
;used it already by setting first bit)
FPSQRT24a
;First find 8 bits of result. This will shift AARGB0 - AARGB1 to TEMPB1
;Then only zeros will be fed instead of AARGB0
mov W, #$08 ;loop counter (J: was 8)
mov BARGB2, W ;
FPSQRT24b
mov W, #$40 ;substract test bit from
sub AARGB0, W ;current lowest byte.
;it works also exactly
;like $C0 addition for the
;addition branch.
mov W, BARGB1 ;load accumulator with
;current result LSB
snb TEMPB0.7
jmp FPSQRT24b_add
sb $03.0
movsz W, ++BARGB1
sub TEMPB1, W
mov W, #$01 ;(J: was 1)
sb $03.0
sub TEMPB0, W
jmp FPSQRT24b_next
FPSQRT24b_add
snb $03.0
movsz W, ++BARGB1
add TEMPB1, W
mov W, #$01 ;(J: was 1)
snb $03.0
add TEMPB0, W
FPSQRT24b_next
rl BARGB1 ;shift result into result bytes
rl BARGB0
rl AARGB1 ;Shift out next two bits of input
rl AARGB0 ;
rl TEMPB1 ;
rl TEMPB0 ;
rl AARGB1 ;
rl AARGB0 ;
rl TEMPB1 ;
rl TEMPB0 ;
decsz BARGB2 ;repeat untill 8 bits will be found
jmp FPSQRT24b
;Find other 7 or 8 bits. Only zeros are fed instead of AARGB0
;Repeat untill MSb of result gets set
FPSQRT24d
snb BARGB1.0 ;if Temp sign is positive, than jump to subtraction
jmp FPSQRT24d_sub ;(previous result bit is inverted sign bit,
;Tempb07 can not be used instead, because
;it may overflow)
;after LSBs addition ($00 + $C0 = $C0) C=0,
;so we just continue adding higher bytes,
;keeping in mind that LSB=$C0 and C=0
mov W, BARGB1
add TEMPB1, W
mov W, BARGB0
snb $03.0
movsz W, ++BARGB0
add TEMPB0, W
jmp FPSQRT24d_next
FPSQRT24d_sub
clrb $03.0 ;simulate borrow ($00 - $40 = $C0, C=0)
movsz W, ++BARGB1
sub TEMPB1, W
mov W, BARGB0
sb $03.0
movsz W, ++BARGB0
sub TEMPB0, W
FPSQRT24d_next
rl BARGB1 ;shift result into result bytes
rl BARGB0
setb $03.0 ;Shift out next two bits of input
rl TEMPB1 ;(set carry before each shift to
rl TEMPB0 ;simulate $C0 value)
setb $03.0 ;
rl TEMPB1 ;
rl TEMPB0 ;
sb BARGB0.7 ;repeat untill all 16 bits will be found
jmp FPSQRT24d
;flag C, TEMPB1 - TEMPB0 contain current input that may be used to find 17th bit for rounding
;Copy BARG to AARG
mov W, BEXP
mov AEXP, W
mov W, BARGB0
mov AARGB0, W
mov W, BARGB1
mov AARGB1, W
clrb AARGB0.7 ;clear sign bit (overwrites explicit MSB, which is always one)
retw $00
;**********************************************************************
;Last updated 21Nov00
Interested: