Here is a debugged version. It is slightly smaller and faster. Thanks to James Hillman [james at ind-interface.co.uk] for testing!Final release! :)
;********************************************************************** ;By Nikolai Golovchenko ;24 BIT FLOATING POINT SQARE ROOT ; ;Input: ; AARGB0 - high byte with sign bit ; AARGB1 - low byte ; AEXP - exponent 2^(AEXP - 127) ;Output: ; AARGB0-1, AEXP - 24 bit floating point ;Temporary: ; BARGB0-1, BEXP - temporary for result ; TEMPB0-1 ; LOOPCOUNT - counter ; ;ROM: 85 instructions ;RAM: 9 bytes ; ;Timing (includes call and return) ;6 cycles best case ;2+3+18+8*26-1+8*21-1+7+2= 406 cycles worst case ;********************************************************************** ;NOTES: 1)Square root is taken on absolute value of the number ; (sign bit is ignored) ; 2)Rounding not implemented yet ;********************************************************************** FPSQRT24 ;Normalize input ;1)Check for zero input - if zero then exit ;2)Change sign to 1 and use sign bit as explicit MSB ;3)Divide BEXP = BEXP / 2 ;4)If AEXP can be divided by 2 (AEXP<0>=1) then ;BARGB1 = 1, AARGB<0-1> << 1 and find 15 more bits ;5)Else find 16 more bits movf AEXP, w ;if zero input then return btfsc 0x03, 2 retlw 0x00 clrf BARGB0 ;set up all used clrf BARGB1 ;temporary registers clrf TEMPB0 ; clrf TEMPB1 ; bsf AARGB0, 7 ;make MSB explicit and ignore mantissa sign ;sqrt(2^E*M)=2^(E/2)*sqrt(M) ;we will align mantissa point above MSb. This is ;equivalent to division by 2. Or, ;sqrt(2^E*M)=sqrt(M/2)*2^((E+1)/2) ;new exponent is (E+1)/2 ;new mantissa is M/2 (not changed, just new point position) ;divide (bexp+1) by 2. bexp is (eb + 127), where eb=-126..128! ;eb/2 is rounded to minus infinity! movwf BEXP ;copy aexp to bexp decf BEXP, f ;ensure required round mode bcf 0x03, 0 ;divide by 2 rrf BEXP, f ; movlw 0x40 ;correct bias after division addwf BEXP, f ; ;if (E+1)/2 result has a remainder, then multiply mantissa by 2 ;(left shift) btfss AEXP, 0 goto FPSQRT24a rlf AARGB1, f ;(carry was zero) rlf AARGB0, f bsf BARGB1, 0 ;set first bit of current result ;and discard MSb of mantissa (we ;used it already by setting first bit) FPSQRT24a ;First find 8 bits of result. This will shift AARGB0 - AARGB1 to TEMPB1 ;Then only zeros will be fed instead of AARGB0 movlw 0x08 ;loop counter movwf LOOPCOUNT ; FPSQRT24b movlw 0x40 ;substract test bit from subwf AARGB0, f ;current lowest byte. ;it works also exactly ;like 0xC0 addition for the ;addition branch. movf BARGB1, w ;load accumulator with ;current result LSB btfsc TEMPB0, 7 goto FPSQRT24b_add btfss 0x03, 0 incfsz BARGB1, w subwf TEMPB1, f movlw 0x01 btfss 0x03, 0 subwf TEMPB0, f goto FPSQRT24b_next FPSQRT24b_add btfsc 0x03, 0 incfsz BARGB1, w addwf TEMPB1, f movlw 0x01 btfsc 0x03, 0 addwf TEMPB0, f FPSQRT24b_next rlf BARGB1, f ;shift result into result bytes rlf BARGB0, f rlf AARGB1, f ;Shift out next two bits of input rlf AARGB0, f ; rlf TEMPB1, f ; rlf TEMPB0, f ; rlf AARGB1, f ; rlf AARGB0, f ; rlf TEMPB1, f ; rlf TEMPB0, f ; decfsz LOOPCOUNT, f ;repeat untill 8 bits will be found goto FPSQRT24b ;Find other 7 or 8 bits. Only zeros are fed instead of AARGB0 ;Repeat untill MSb of result gets set FPSQRT24d btfsc BARGB1, 0 ;if Temp sign is positive, than jump to subtraction goto FPSQRT24d_sub ;(previous result bit is inverted sign bit, ;Tempb0.7 can not be used instead, because ;it may overflow) ;after LSBs addition (0x00 + 0xC0 = 0xC0) C=0, ;so we just continue adding higher bytes, ;keeping in mind that LSB=0xC0 and C=0 movf BARGB1, w addwf TEMPB1, f movf BARGB0, w btfsc 0x03, 0 incfsz BARGB0, w addwf TEMPB0, f goto FPSQRT24d_next FPSQRT24d_sub bcf 0x03, 0 ;simulate borrow (0x00 - 0x40 = 0xC0, C=0) incfsz BARGB1, w subwf TEMPB1, f movf BARGB0, w btfss 0x03, 0 incfsz BARGB0, w subwf TEMPB0, f FPSQRT24d_next rlf BARGB1, f ;shift result into result bytes rlf BARGB0, f bsf 0x03, 0 ;Shift out next two bits of input rlf TEMPB1, f ;(set carry before each shift to rlf TEMPB0, f ;simulate 0xC0 value) bsf 0x03, 0 ; rlf TEMPB1, f ; rlf TEMPB0, f ; btfss BARGB0, 7 ;repeat untill all 16 bits will be found goto FPSQRT24d ;flag C, TEMPB1 - TEMPB0 contain current input that may be used to find 17th bit for rounding ;Copy BARG to AARG movf BEXP, w movwf AEXP movf BARGB0, w movwf AARGB0 movf BARGB1, w movwf AARGB1 bcf AARGB0, 7 ;clear sign bit (overwrites explicit MSB, which is always one) retlw 0x00 ;********************************************************************** ;Last updated 21Nov00
Comments: