;*********************************************** ; ; Fast 16 bit Square Root ; ; Input: xh:xl (is modified) ; Output: y ; Temporary: Temp ; ; Instructions: 90 ; Execution time (including return): ; 92 cycles ; ; Description: ; ; Given y = sqrt(x), ; ; x = y * y ; ; Result is found by error and trial method, one bit at a ; time. This is accomplished by using the difference between ; squared current answer and the squared current answer with ; set next bit: ; ; Delta = (y + N)^2 - y^2 = 2*y*N + N^2 ; ; N can be conviniently a power of two. N = 2^i (i=0..7) ; ; To check if the current bit is set, we try to substract Delta ; from remainder (initially remainder is equal to x) and if remainder ; is not negative that bit is set, and if remainder becomes negative ; the bit is left clear and remainder is restored. ; ; For example: ; x = 169, y=sqrt(x) ? (assume x is 8 bit, therefore y is 4 bit) ; ; Bit y<3> ; N=8? Delta = 2*0*8 + 8^2 = 64 ; x = x - Delta = 169 - 64 = 105 ; result positive, y<3> = 1, (y=8) ; Bit y<2> ; N=4? Delta = 2*8*4 + 4^2 = 80 ; x = x - Delta = 105 - 80 = 25 ; result positive, y<2> = 1, (y=8+4=12) ; Bit y<1> ; N=2? Delta = 2*12*2 + 2^2 = 52 ; x = x - Delta = 25 - 52 = -27 ; result negative, y<1> = 0, restore x (x=25), (y=12) ; Bit y<0> ; N=1? Delta = 2*12*1 + 1^2 = 25 ; x = x - Delta = 25 - 25 = 0 ; result positive, y<0> = 1, (y=12+1=13) ; ; As can be seen y=13 is a correct result. ; ; Because N is a power of two, Delta can be calculated without multiplication, ; just using shifts and adds. For 16 bit square root result is 8 bit. Therefore ; N accepts 8 possible values: 128, 64, 32, .. 1. In binary Delta can be ; represented by the following chart. Bits of x are x15, x14, .. x0 and ; bits of y are y7, y6, ...y0. ; ; x15 x12 x8 x4 x0 ; - ; y7 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; y6 0 y7 0 1 0 0 0 0 0 0 0 0 0 0 0 0 ; y5 0 0 y7 y6 0 1 0 0 0 0 0 0 0 0 0 0 ; y4 0 0 0 y7 y6 y5 0 1 0 0 0 0 0 0 0 0 ; y3 0 0 0 0 y7 y6 y5 y4 0 1 0 0 0 0 0 0 ; y2 0 0 0 0 0 y7 y6 y5 y4 y3 0 1 0 0 0 0 ; y1 0 0 0 0 0 0 y7 y6 y5 y4 y3 y2 0 1 0 0 ; y0 0 0 0 0 0 0 0 y7 y6 y5 y4 y3 y2 y1 0 1 ; ; It can be seen that multiplication is not required to find Delta. Having this ; chart in mind will help follow the square root routine below. ; ; August 14, 2000 by Nikolai Golovchenko ; ;*********************************************** sqrt16fast clr y ;clear result ;find bit 7 of result mov W, #$40 ;substract 128^2 from x sub xh, W rl y ;shift bit 7 of result to y sb y.0 ;restore xh if borrow add xh, W ;find bit 6 of result or W, #$10 ;substract (128+64)^2 or 64^2 from x sb y.0 xor W, #$40 sub xh, W rl y ;shift bit 6 of result to y sb y.0 ;restore xh if borrow add xh, W ;find bit 5 of result mov W, <>y ;substract y*16+32^2 from x or W, #$04 ; sub xh, W ; rl y ;shift bit 5 of result to y sb y.0 ;restore xh if borrow add xh, W ;find bit 4 of result xor W, #$0E ;substract y*16+(32+16)^2 sb y.0 ;or y*16+16^2 from x xor W, #$08 ; rl xl ; shift left the remainder (carry ignored) rl xh ; sub xh, W ; rl y ;shift bit 4 of result to y sb y.0 ;restore xh if borrow add xh, W ;find bit 3 of result mov W, y ;store y in Temp mov Temp, W mov W, #$80 ;substract 2*Temp:0x80 from xh:xl (x -= 2*(8*p+2)) sub xl, W mov W, <<Temp xor W, #$01 mov W, xh-w rl y ;shift bit 3 of result to y snb y.0 mov xh, W ;store xh if no borrow mov W, #$80 ;restore xl if borrow sb y.0 add xl, W ;find bit 2 of result mov W, #$A0 ;substract Temp:0x20 from xh:xl (x -= 1*(8*p+1)) sb y.0 mov W, #$20 sub xl, W mov W, Temp sb C mov W, ++Temp mov W, xh-w rl y snb y.0 mov xh, W ;store xh mov W, #$A0 sb y.1 mov W, #$20 sb y.0 add xl, W ;restore xl ;find bit 1 of result clrb C rr xh rr xl mov W, xl mov Temp, W mov W, <>y and W, #$F0 or W, #$04 sub xl, W ;substract y<5:4>+carry from xh swap y mov W, y sb C mov W, ++y swap y and W, #$0F mov W, xh-W rl y ;carry reset snb y.0 mov xh, W ;store xh mov W, Temp sb y.0 mov xl, W ;restore xl ;find bit 0 of result rl y ;carry reset mov W, <<y ;carry = result<7> or W, #$01 sub xl, W sb C dec xh snb y.7 dec xh sb xh.7 setb y.0 ret ;***********************************************
Comments: