PIC Microcontoller Math Method

Square 10 bits

Nikolai Golovchenko says:

[The] debugged version: {ed: with correction from Vladyslav Borodavka Thank you!}


;***********************************************
; Square 10 bits
;
; 6 Aug 2000 by Nikolai Golovchenko
; Based on the original version of John Payson
;
; Input:
;  SrcH:SrcL
; Output:
;  DstH:DstM:DstL
;
; Instructions: 68
; Execution time(including return): 51+5*3+1=67
;
; Description:
;  The goal is calculation of Dst = Src^2
;
;  Src may be rewritten as:
;  	Src = SrcH*256 + SrcL
;  Let's introduce variables a, ah, al, and b:
;  	a = ah*16 + al = SrcL
;  	b = SrcH
;  Then
;	Src^2 = (256*b+a)^2=65536*b^2+512*b*a+a^2
;
;  Sqr4 routine calculates squares for 4 bit data
;  (b, al, and ah) using look-up table.
;
;  Now we can find b^2 by Sqr4 and multiply it by 65536,
;  and calculate 512*b*a.
;
;  To calculate a^2 let's expand it in byte halves:
;  a^2=(16*ah+al)^2=256*ah^2+32*ah*al+al^2
;
;  So,
;	Src^2=65536*b^2+512*b*a+256*ah^2+32*ah*al+al^2
;
;  This is the algorithm of how to square a 10 bit number
;  using Sqr4 look-up table.
;
;***********************************************
Sqr10
	clrf DstH	;clear result
	clrf DstM
	clrf DstL
	clrc		;clear carry
;find 32*ah*al
	movf SrcL, w	;w = SrcL
	andlw 0x0F
	btfsc SrcL, 4
	 addwf DstM, f
	rrf DstM, f	;use carry after addition
	rrf DstL, f
;DstM:DstL=ah<0>*al*256/2
	btfsc SrcL, 5
	 addwf DstM, f
	rrf DstM, f
	rrf DstL, f
;DstM:DstL=(ah<0>*al*256/2 + ah<1>*al*256)/2=64*al(ah<0>+2*ah<1>)
	btfsc SrcL, 6
	 addwf DstM, f
	rrf DstM, f
	rrf DstL, f
;DstM:DstL=32*al(ah<0>+2*ah<1>+4*ah<2>)
	btfsc SrcL, 7
	 addwf DstM, f
;DstM:DstL=32*al(ah<0>+2*ah<1>+4*ah<2>+8*ah<3>)=32*ah*al
;(maximum value = 0x1C20)

;Now add squared al and ah
	call Sqr4
	addwf DstL, f	;add al^2
	skpnc
	 incf DstM, f	;propagate carry to DstM

	swapf SrcL, w
	andlw 0x0F	;w = ah
	call Sqr4
	addwf DstM, f	;carry is reset

;At this point DstM:DstH contains a^2

;Add 512*b*a to Dst
;512*b*a=512*b*a=512*(2*b<1>+b<0>)*<128*a<7>+a<0:6>)=
;=65536*b*a<7>+256*(2*a<0:6>*b<0>+2*a<0:6>*b<1>+2*a<0:6>*b<0>)

	movf SrcH, w	;Dst += 512*b*(128*a<7>) = 65536*b*a<7>
	btfsc SrcL, 7
	 addwf DstH, f  ;carry is reset

	rlf SrcL, w	;w = a<0:6>*2 (a<7> is already used)
	clrc
	btfsc SrcH, 0
	 addwf DstM, f	;Dst += 512*b<0>*a<0:6>
	skpnc
	 incf DstH, f

	clrc		;Dst += 512*b<1>*a<0:6>
	btfsc SrcH, 1
	 addwf DstM, f
	skpnc
	 incf DstH, f

	clrc		;Dst += 512*b<1>*a<0:6>
	btfsc SrcH, 1
	 addwf DstM, f
	skpnc
	 incf DstH, f

;Add 65536*b^2
	movf SrcH, w
	call Sqr4
	addwf DstH, f
	retlw 0		; All done!
Sqr4:			;Look-up table for 4 bit squares
	addwf PCL, f
	DT 0,1,4,9,16,25,36,49,64,81,100,121,144,169,196,225

;***********************************************

The original version:

	clrf DstH
	clrf DstM
	clrf DstL
	movf SrcL,w
	andlw $0F
	btfss Src,4
	addwf DstM
	rrf DstM
	rrf DstL
	btfss Src,5
	addwf DstM
	rrf DstM
	rrf DstL
	btfss Src,6
	addwf DstM
	rrf DstM
	rrf DstL
	btfss Src,7
	addwf DstM
	call Sqr4
	addwf DstL
	swapf SrcL
	andlw $0F
	call Sqr4
	addwf DstM
; At this point, 16-bit result is in DstM:DstH
; 25 words of code prior to this point (plus a
; 17-word table-lookup). Total execution time:
; 35 cycles up to this point.
	btfss SrcH,0
	goto NoBit8
	movf SrcL,w
	btfsc C
	incf DstH
	addwf DstM
	btfsc C
	incf DstH
	incf DstH
; Another 9 words for bit 8; 3 or 9 cycles to exec.
NoBit8:
	btfss SrcH,1
	goto NoBit9
	movlw 4
	btfss SrcH,0
	movlw 8
	addwf DstH
	rlf SrcL,w
	btfsc C
	incf DstH
	btfsc C
	incf DstH
	addwf DstM
	btfsc C
	incf DstH
	addwf DstM
	btfsc C
	incf DstH
; Another 17 words for bit 9; 3 or 17 cycles to execute
; Total worst-case time: 35+26 = 61 cycles.
NoBit9:
	retlw 0 ; All done!
Sqr4:
	addwf PC
	db 0,1,4,9,16,25,36,49,64,81,100,121,144,169,196,225