SX Microcontroller Math Method

24 Bit Floating Point Square Root

by Nikolai Golovchenko

;**********************************************************************
;By Nikolai Golovchenko
;24 BIT FLOATING POINT SQARE ROOT (MICROCHIP format, see AN575)
;
;Input:
; AARGB0 - high byte with sign bit
; AARGB1 - low byte
; AEXP - exponent 2^(AEXP - 127)
;Output:
; AARGB0-1, AEXP - 24 bit floating point
;Temporary:
; BARGB0-1, BEXP - temporary for result
; TEMPB0-1
; LOOPCOUNT - counter
;
;ROM: 85 instructions
;RAM: 9 bytes
;
;Timing (includes call and return)
;8 cycles best case
;3+3+18+8*28-2+8*23-2+7+3= 438 cycles worst case
;**********************************************************************
;NOTES: 1)Square root is taken on absolute value of the number
;       (sign bit is ignored)
;       2)Rounding not implemented yet
;**********************************************************************
FPSQRT24
;Normalize input
;1)Check for zero input - if zero then exit
;2)Change sign to 1 and use sign bit as explicit MSB
;3)Divide BEXP = BEXP / 2
;4)If AEXP can be divided by 2 (AEXP<0>=1) then
;BARGB1 = 1, AARGB<0-1> << 1 and find 15 more bits
;5)Else find 16 more bits

        mov     W, AEXP ;if zero input then return
        snb     $03.2
        retw    $00

        clr     BARGB0  ;set up all used
        clr     BARGB1  ;temporary registers
        clr     TEMPB0  ;
        clr     TEMPB1  ;

        setb    AARGB0.7        ;make MSB explicit and ignore mantissa sign

;sqrt(2^E*M)=2^(E/2)*sqrt(M)
;we will align mantissa point above MSb. This is
;equivalent to division by 2. Or,
;sqrt(2^E*M)=sqrt(M/2)*2^((E+1)/2)
;new exponent is (E+1)/2
;new mantissa is M/2 (not changed, just new point position)

;divide (bexp+1) by 2. bexp is (eb + 127), where eb=-126.128!
;eb/2 is rounded to minus infinity!
        mov     BEXP, W ;copy aexp to bexp
        dec     BEXP    ;ensure required round mode
        clrb    $03.0   ;divide by 2
        rr      BEXP    ;
        mov     W, #$40 ;correct bias after division (J: was 64)
        add     BEXP, W ;

;if (E+1)/2 result has a remainder, then multiply mantissa by 2
;(left shift)
        sb      AEXP.0
        jmp     FPSQRT24a
        rl      AARGB1  ;(carry was zero)
        rl      AARGB0
        setb    BARGB1.0        ;set first bit of current result
                        ;and discard MSb of mantissa (we
                        ;used it already by setting first bit)
FPSQRT24a

;First find 8 bits of result. This will shift AARGB0 - AARGB1 to TEMPB1
;Then only zeros will be fed instead of AARGB0
        mov     W, #$08 ;loop counter  (J: was 8)
        mov     BARGB2, W       ;
FPSQRT24b
        mov     W, #$40 ;substract test bit from
        sub     AARGB0, W       ;current lowest byte.
                        ;it works also exactly
                        ;like $C0 addition for the
                        ;addition branch.
        mov     W, BARGB1       ;load accumulator with
                        ;current result LSB

        snb     TEMPB0.7
        jmp     FPSQRT24b_add

        sb      $03.0
        movsz   W, ++BARGB1
        sub     TEMPB1, W
        mov     W, #$01 ;(J: was 1)
        sb      $03.0
        sub     TEMPB0, W
        jmp     FPSQRT24b_next

FPSQRT24b_add
        snb     $03.0
        movsz   W, ++BARGB1
        add     TEMPB1, W
        mov     W, #$01 ;(J: was 1)
        snb     $03.0
        add     TEMPB0, W

FPSQRT24b_next
        rl      BARGB1  ;shift result into result bytes
        rl      BARGB0

        rl      AARGB1  ;Shift out next two bits of input
        rl      AARGB0  ;
        rl      TEMPB1  ;
        rl      TEMPB0  ;
        rl      AARGB1  ;
        rl      AARGB0  ;
        rl      TEMPB1  ;
        rl      TEMPB0  ;

        decsz   BARGB2  ;repeat untill 8 bits will be found
        jmp     FPSQRT24b

;Find other 7 or 8 bits. Only zeros are fed instead of AARGB0
;Repeat untill MSb of result gets set
FPSQRT24d
        snb     BARGB1.0        ;if Temp sign is positive, than jump to subtraction
        jmp     FPSQRT24d_sub   ;(previous result bit is inverted sign bit,
                                ;Tempb07 can not be used instead, because
                                ;it may overflow)

        ;after LSBs addition ($00 + $C0 = $C0) C=0,
        ;so we just continue adding higher bytes,
        ;keeping in mind that LSB=$C0 and C=0
        mov     W, BARGB1
        add     TEMPB1, W
        mov     W, BARGB0
        snb     $03.0
        movsz   W, ++BARGB0
        add     TEMPB0, W
        jmp     FPSQRT24d_next

FPSQRT24d_sub

        clrb    $03.0   ;simulate borrow ($00 - $40 = $C0, C=0)
        movsz   W, ++BARGB1
        sub     TEMPB1, W
        mov     W, BARGB0
        sb      $03.0
        movsz   W, ++BARGB0
        sub     TEMPB0, W

FPSQRT24d_next
        rl      BARGB1  ;shift result into result bytes
        rl      BARGB0

        setb    $03.0   ;Shift out next two bits of input
        rl      TEMPB1  ;(set carry before each shift to
        rl      TEMPB0  ;simulate $C0 value)
        setb    $03.0   ;
        rl      TEMPB1  ;
        rl      TEMPB0  ;

        sb      BARGB0.7        ;repeat untill all 16 bits will be found
        jmp     FPSQRT24d

;flag C, TEMPB1 - TEMPB0 contain current input that may be used to find 17th bit for rounding

;Copy BARG to AARG
        mov     W, BEXP
        mov     AEXP, W
        mov     W, BARGB0
        mov     AARGB0, W
        mov     W, BARGB1
        mov     AARGB1, W
        clrb    AARGB0.7        ;clear sign bit (overwrites explicit MSB, which is always one)

        retw    $00

;**********************************************************************
;Last updated 21Nov00
Interested: