Code:
; Square 10bit unsigned ; by Martin Sturm 2010 ; Tested over full 10bit input range ; ; a*a --> r ; a = aH:aL (10bit, right justified) (not modified) ; r = rH:rM:rL (20bit result) ; ; algorithm ; r = 2^16*(aH*aH) + 2^8*(2*aH*aL) + aL*aL ; 2x2 2x8 8x8 ; all multiplications are unrolled ; ; 62 instructions, 49-62 cycles, 56 avg ; ; incorrect result if A has non-zero bits above the 10th ; use optional ANDLW 0x03 to correct for this if necessary ; ; helper macro mmac MACRO A,bit, uH,uL BTFSC A,bit ADDWF uH,F RRF uH,F RRF uL,F ENDM SQR_10 MACRO aH,aL, rH,rM,rL LOCAL g1, g2 ; rM:rL = aL*aL [8b x 8b mult] (36 instr, 36 cyc) CLRF rM CLRF rL CLRC MOVFW aL mmac aL,0, rM,rL mmac aL,1, rM,rL mmac aL,2, rM,rL mmac aL,3, rM,rL mmac aL,4, rM,rL mmac aL,5, rM,rL mmac aL,6, rM,rL mmac aL,7, rM,rL ; rH = aH*aH [2b x 2b square] (8 instr, 8 cyc) CLRF rH MOVFW aH ; multiplicand in W ; ANDLW 0x03 ; prevent errors if aH non-zero above 10th bit BTFSC aH,1 ADDWF rH,F ; never sets carry RLF rH,F BTFSC aH,0 ADDWF rH,F ; never sets carry ; rH:rM += 2*aH*aL [2b x 8b mult] (19 instr, 7-19 cyc, avg. 13) RLF aL,W ; W = 2*aL (carry is always clear before here) BTFSS aH,0 GOTO g1 SKPNC INCF rH,F ; add upper bit of 2*aL ADDWF rM,F ; add lower byte of 2*aL SKPNC INCF rH,F g1 BTFSS aH,1 GOTO g2 ; W still holds (2*aL & 0xFF) ADDWF rM,F ; add W to rM (twice) SKPNC INCF rH,F ADDWF rM,F ; SKPNC INCF rH,F MOVLW 0x02 BTFSC aL,7 ; ADDWF rH,F ; add twice the upper bit of 2*aL g2 ENDM