24 Bit Floating Point Square Root
;**********************************************************************
;By Nikolai Golovchenko
;24 BIT FLOATING POINT SQARE ROOT
;Input:
;AARGB0 - high byte with sign bit
;AARGB1 - low byte
;AEXP - exponent 2^(AEXP - 127)
;FPFLAGS, RND - rounding/truncating (not used)
;Result:
;AARGB0-1, AEXP - 24 bit floating point
;Used:
;BARGB0-1, BEXP - temporary for result
;TEMPB0-1
;LOOPCOUNT - counter
;Memory used
;91 instructions
;9 RAM bytes
;Timing (includes call and return)
;6 cycles best case
;5+21+8*25-1+4+8*26-1+12= 445 cycles worst case
;**********************************************************************
;NOTES: 1)Square root is taken on absolute value of the number
; (sign bit is ignored)
; 2)Rounding not implemented yet
;**********************************************************************
FPSQRT24
;Normalize input
;1)Check for zero input - if zero then exit
;2)Change sign to 1 and use sign bit as explicit MSB
;3)BEXP = AEXP - 127
;4)If AEXP can be divided by 2 (AEXP<0>=1) then bexp = bexp/2,
;BARGB2 = 1, AARGB<0-2> << 1 and find 23 more bits
;5)Else bexp=(bexp-1)/2, find 24 more bits
;6)BEXP = BEXP + 127
mov W, AEXP ;if zero input then return
snb $03.2
retw #$00
clr BARGB0 ;set up all used
clr BARGB1 ;temporary registers
clr TEMPB0 ;
clr TEMPB1 ;
setb AARGB0.7 ;make MSB explicit and ignore mantissa sign
;*** WARNING: ADDLW was expanded in three instructions! Check if previous instruction is a skip instruction.
; addlw -127 ;BEXP = aexp - 127 or aexp - 128
mov Hack, W
mov W, #-127 ;BEXP = aexp - 127 or aexp - 128
add W, Hack
sb AEXP.0
;*** WARNING: ADDLW was expanded in three instructions! Check if previous instruction is a skip instruction.
; addlw -1
mov Hack, W
mov W, #-1
add W, Hack
mov BEXP, W
mov W, <<BEXP ;arithmetic right shift of BEXP
rr BEXP
sb AEXP.0 ;shift input left if exp can be divided by two
jmp FPSQRT24a
clrb $03.0
rl AARGB1
rl AARGB0
setb BARGB1.0
FPSQRT24a
mov W, #127 ;BEXP = BEXP + 127
add BEXP, W
;First find 8 bits of result. This will shift AARGB0 - AARGB1 to TEMPB1
;Then only zeros will be fed instead of AARGB0
mov W, #8 ;loop counter
mov LOOPCOUNT, W ;
FPSQRT24b
mov W, #$40 ;substract test bit from
sub AARGB0, W ;current lowest byte
mov W, BARGB1 ;and substract current result
sb $03.0 ;from higher bytes
movsz W, ++BARGB1
mov W, TEMPB1-w
snb $03.0 ;if result = 1 then
mov TEMPB1, W ;store substraction result
rl BARGB1 ;shift result into result bytes
rl BARGB0 ;
mov W, #$40 ;
sb BARGB1.0 ;if result bit = 1 then skip restoration
add AARGB0, W ;
clrb $03.0
rl AARGB1 ;Shift out next two bits of input
rl AARGB0 ;
rl TEMPB1 ;
rl TEMPB0 ;
rl AARGB1 ;
rl AARGB0 ;
rl TEMPB1 ;
rl TEMPB0 ;
decsz LOOPCOUNT ;repeat untill all 12 bits will be found
jmp FPSQRT24b
;Find other 7 or 8 bits. Only zeros are fed instead of AARGB0
mov W, #7 ;loop counter
sb AEXP.0
mov W, #8
mov LOOPCOUNT, W ;
FPSQRT24d
mov W, TEMPB1 ;store one byte of input
mov AARGB1, W
clrb $03.0 ;simulate borrow (0x00 - 0x40 = 0xC0)
movsz W, ++BARGB1
sub TEMPB1, W
mov W, BARGB0
sb $03.0
movsz W, ++BARGB0
mov W, TEMPB0-w
snb $03.0 ;if result = 1 then
mov TEMPB0, W ;store substraction result
rl BARGB1 ;shift result into result bytes
rl BARGB0
snb BARGB1.0 ;if result bit = 1 then skip restoration
jmp FPSQRT24e
mov W, AARGB1 ;restore previous data
mov TEMPB1, W ;
FPSQRT24e
mov W, >>BARGB1 ;Shift out next two bits of input
rl TEMPB1 ;
rl TEMPB0 ;
mov W, >>BARGB1 ;
rl TEMPB1 ;
rl TEMPB0 ;
decsz LOOPCOUNT ;repeat untill all 12 bits will be found
jmp FPSQRT24d
;flag C, TEMPB1 - TEMPB0 contain current input that may be used to find 17th bit for rounding
;Copy BARG to AARG
mov W, BEXP
mov AEXP, W
mov W, BARGB0
mov AARGB0, W
mov W, BARGB1
mov AARGB1, W
clrb AARGB0.7 ;clear sign bit (overwrites explicit MSB, which is always one)
retw #$00
;**********************************************************************
;Last updated 02Jan00