;***********************************************
;
; Fast 16 bit Square Root
;
; Input: xh:xl (is modified)
; Output: y
; Temporary: Temp
;
; Instructions: 90
; Execution time (including return):
; 92 cycles
;
; Description:
;
; Given y = sqrt(x),
;
; x = y * y
;
; Result is found by error and trial method, one bit at a
; time. This is accomplished by using the difference between
; squared current answer and the squared current answer with
; set next bit:
;
; Delta = (y + N)^2 - y^2 = 2*y*N + N^2
;
; N can be conviniently a power of two. N = 2^i (i=0..7)
;
; To check if the current bit is set, we try to substract Delta
; from remainder (initially remainder is equal to x) and if remainder
; is not negative that bit is set, and if remainder becomes negative
; the bit is left clear and remainder is restored.
;
; For example:
; x = 169, y=sqrt(x) ? (assume x is 8 bit, therefore y is 4 bit)
;
; Bit y<3>
; N=8? Delta = 2*0*8 + 8^2 = 64
; x = x - Delta = 169 - 64 = 105
; result positive, y<3> = 1, (y=8)
; Bit y<2>
; N=4? Delta = 2*8*4 + 4^2 = 80
; x = x - Delta = 105 - 80 = 25
; result positive, y<2> = 1, (y=8+4=12)
; Bit y<1>
; N=2? Delta = 2*12*2 + 2^2 = 52
; x = x - Delta = 25 - 52 = -27
; result negative, y<1> = 0, restore x (x=25), (y=12)
; Bit y<0>
; N=1? Delta = 2*12*1 + 1^2 = 25
; x = x - Delta = 25 - 25 = 0
; result positive, y<0> = 1, (y=12+1=13)
;
; As can be seen y=13 is a correct result.
;
; Because N is a power of two, Delta can be calculated without multiplication,
; just using shifts and adds. For 16 bit square root result is 8 bit. Therefore
; N accepts 8 possible values: 128, 64, 32, .. 1. In binary Delta can be
; represented by the following chart. Bits of x are x15, x14, .. x0 and
; bits of y are y7, y6, ...y0.
;
; x15 x12 x8 x4 x0
; -
; y7 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
; y6 0 y7 0 1 0 0 0 0 0 0 0 0 0 0 0 0
; y5 0 0 y7 y6 0 1 0 0 0 0 0 0 0 0 0 0
; y4 0 0 0 y7 y6 y5 0 1 0 0 0 0 0 0 0 0
; y3 0 0 0 0 y7 y6 y5 y4 0 1 0 0 0 0 0 0
; y2 0 0 0 0 0 y7 y6 y5 y4 y3 0 1 0 0 0 0
; y1 0 0 0 0 0 0 y7 y6 y5 y4 y3 y2 0 1 0 0
; y0 0 0 0 0 0 0 0 y7 y6 y5 y4 y3 y2 y1 0 1
;
; It can be seen that multiplication is not required to find Delta. Having this
; chart in mind will help follow the square root routine below.
;
; August 14, 2000 by Nikolai Golovchenko
;
;***********************************************
sqrt16fast
clr y ;clear result
;find bit 7 of result
mov W, #$40 ;substract 128^2 from x
sub xh, W
rl y ;shift bit 7 of result to y
sb y.0 ;restore xh if borrow
add xh, W
;find bit 6 of result
or W, #$10 ;substract (128+64)^2 or 64^2 from x
sb y.0
xor W, #$40
sub xh, W
rl y ;shift bit 6 of result to y
sb y.0 ;restore xh if borrow
add xh, W
;find bit 5 of result
mov W, <>y ;substract y*16+32^2 from x
or W, #$04 ;
sub xh, W ;
rl y ;shift bit 5 of result to y
sb y.0 ;restore xh if borrow
add xh, W
;find bit 4 of result
xor W, #$0E ;substract y*16+(32+16)^2
sb y.0 ;or y*16+16^2 from x
xor W, #$08 ;
rl xl ; shift left the remainder (carry ignored)
rl xh ;
sub xh, W ;
rl y ;shift bit 4 of result to y
sb y.0 ;restore xh if borrow
add xh, W
;find bit 3 of result
mov W, y ;store y in Temp
mov Temp, W
mov W, #$80 ;substract 2*Temp:0x80 from xh:xl (x -= 2*(8*p+2))
sub xl, W
mov W, <<Temp
xor W, #$01
mov W, xh-w
rl y ;shift bit 3 of result to y
snb y.0
mov xh, W ;store xh if no borrow
mov W, #$80 ;restore xl if borrow
sb y.0
add xl, W
;find bit 2 of result
mov W, #$A0 ;substract Temp:0x20 from xh:xl (x -= 1*(8*p+1))
sb y.0
mov W, #$20
sub xl, W
mov W, Temp
sb C
mov W, ++Temp
mov W, xh-w
rl y
snb y.0
mov xh, W ;store xh
mov W, #$A0
sb y.1
mov W, #$20
sb y.0
add xl, W ;restore xl
;find bit 1 of result
clrb C
rr xh
rr xl
mov W, xl
mov Temp, W
mov W, <>y
and W, #$F0
or W, #$04
sub xl, W
;substract y<5:4>+carry from xh
swap y
mov W, y
sb C
mov W, ++y
swap y
and W, #$0F
mov W, xh-W
rl y ;carry reset
snb y.0
mov xh, W ;store xh
mov W, Temp
sb y.0
mov xl, W ;restore xl
;find bit 0 of result
rl y ;carry reset
mov W, <<y ;carry = result<7>
or W, #$01
sub xl, W
sb C
dec xh
snb y.7
dec xh
sb xh.7
setb y.0
ret
;***********************************************
Comments: