Nikolai Golovchenko says:
Final release! :);********************************************************************** ;By Nikolai Golovchenko ;24 BIT FLOATING POINT SQARE ROOT (MICROCHIP format, see AN575) ; ;Input: ; AARGB0 - high byte with sign bit ; AARGB1 - low byte ; AEXP - exponent 2^(AEXP - 127) ;Output: ; AARGB0-1, AEXP - 24 bit floating point ;Temporary: ; BARGB0-1, BEXP - temporary for result ; TEMPB0-1 ; LOOPCOUNT - counter ; ;ROM: 85 instructions ;RAM: 9 bytes ; ;Timing (includes call and return) ;8 cycles best case ;3+3+18+8*28-2+8*23-2+7+3= 438 cycles worst case ;********************************************************************** ;NOTES: 1)Square root is taken on absolute value of the number ; (sign bit is ignored) ; 2)Rounding not implemented yet ;********************************************************************** FPSQRT24 ;Normalize input ;1)Check for zero input - if zero then exit ;2)Change sign to 1 and use sign bit as explicit MSB ;3)Divide BEXP = BEXP / 2 ;4)If AEXP can be divided by 2 (AEXP<0>=1) then ;BARGB1 = 1, AARGB<0-1> << 1 and find 15 more bits ;5)Else find 16 more bits mov W, AEXP ;if zero input then return snb $03.2 retw $00 clr BARGB0 ;set up all used clr BARGB1 ;temporary registers clr TEMPB0 ; clr TEMPB1 ; setb AARGB0.7 ;make MSB explicit and ignore mantissa sign ;sqrt(2^E*M)=2^(E/2)*sqrt(M) ;we will align mantissa point above MSb. This is ;equivalent to division by 2. Or, ;sqrt(2^E*M)=sqrt(M/2)*2^((E+1)/2) ;new exponent is (E+1)/2 ;new mantissa is M/2 (not changed, just new point position) ;divide (bexp+1) by 2. bexp is (eb + 127), where eb=-126.128! ;eb/2 is rounded to minus infinity! mov BEXP, W ;copy aexp to bexp dec BEXP ;ensure required round mode clrb $03.0 ;divide by 2 rr BEXP ; mov W, #$40 ;correct bias after division (J: was 64) add BEXP, W ; ;if (E+1)/2 result has a remainder, then multiply mantissa by 2 ;(left shift) sb AEXP.0 jmp FPSQRT24a rl AARGB1 ;(carry was zero) rl AARGB0 setb BARGB1.0 ;set first bit of current result ;and discard MSb of mantissa (we ;used it already by setting first bit) FPSQRT24a ;First find 8 bits of result. This will shift AARGB0 - AARGB1 to TEMPB1 ;Then only zeros will be fed instead of AARGB0 mov W, #$08 ;loop counter (J: was 8) mov BARGB2, W ; FPSQRT24b mov W, #$40 ;substract test bit from sub AARGB0, W ;current lowest byte. ;it works also exactly ;like $C0 addition for the ;addition branch. mov W, BARGB1 ;load accumulator with ;current result LSB snb TEMPB0.7 jmp FPSQRT24b_add sb $03.0 movsz W, ++BARGB1 sub TEMPB1, W mov W, #$01 ;(J: was 1) sb $03.0 sub TEMPB0, W jmp FPSQRT24b_next FPSQRT24b_add snb $03.0 movsz W, ++BARGB1 add TEMPB1, W mov W, #$01 ;(J: was 1) snb $03.0 add TEMPB0, W FPSQRT24b_next rl BARGB1 ;shift result into result bytes rl BARGB0 rl AARGB1 ;Shift out next two bits of input rl AARGB0 ; rl TEMPB1 ; rl TEMPB0 ; rl AARGB1 ; rl AARGB0 ; rl TEMPB1 ; rl TEMPB0 ; decsz BARGB2 ;repeat untill 8 bits will be found jmp FPSQRT24b ;Find other 7 or 8 bits. Only zeros are fed instead of AARGB0 ;Repeat untill MSb of result gets set FPSQRT24d snb BARGB1.0 ;if Temp sign is positive, than jump to subtraction jmp FPSQRT24d_sub ;(previous result bit is inverted sign bit, ;Tempb07 can not be used instead, because ;it may overflow) ;after LSBs addition ($00 + $C0 = $C0) C=0, ;so we just continue adding higher bytes, ;keeping in mind that LSB=$C0 and C=0 mov W, BARGB1 add TEMPB1, W mov W, BARGB0 snb $03.0 movsz W, ++BARGB0 add TEMPB0, W jmp FPSQRT24d_next FPSQRT24d_sub clrb $03.0 ;simulate borrow ($00 - $40 = $C0, C=0) movsz W, ++BARGB1 sub TEMPB1, W mov W, BARGB0 sb $03.0 movsz W, ++BARGB0 sub TEMPB0, W FPSQRT24d_next rl BARGB1 ;shift result into result bytes rl BARGB0 setb $03.0 ;Shift out next two bits of input rl TEMPB1 ;(set carry before each shift to rl TEMPB0 ;simulate $C0 value) setb $03.0 ; rl TEMPB1 ; rl TEMPB0 ; sb BARGB0.7 ;repeat untill all 16 bits will be found jmp FPSQRT24d ;flag C, TEMPB1 - TEMPB0 contain current input that may be used to find 17th bit for rounding ;Copy BARG to AARG mov W, BEXP mov AEXP, W mov W, BARGB0 mov AARGB0, W mov W, BARGB1 mov AARGB1, W clrb AARGB0.7 ;clear sign bit (overwrites explicit MSB, which is always one) retw $00 ;********************************************************************** ;Last updated 21Nov00
Interested: