Code:
; 12bit by 12bit unsigned multiply ; by Martin Sturm 2010 ; tested over all 12bit input combinations ; ; a (12bit) = aH:aL (modified) ; b (12bit) = bH:bL (not modified) ; r3:r2:r1 is 24-bit result ; ; ignores non-zero bits above 12th for b, ; incorrect result if a has non-zero bits above the 12th ; unless optional ANDLW is used ; ; r = a*b = r3:r2:r1 ; r = 2^12*((aH:aL >> 4) * bH) + 2^8*((aL & 0x0F) * bH) + (aH:aL * bL) ; ; 92 instructions, 92 cycles ; ; helper macro mmac MACRO A,bit, u2,u1 BTFSC A,bit ADDWF u2,F RRF u2,F RRF u1,F ENDM MULT_12x12_FAST MACRO aH,aL, bH,bL, r3,r2,r1 LOCAL g1, g2 ; r3:r2:r1 = aH:aL * bL CLRF r3 CLRF r2 CLRF r1 CLRC MOVFW bL mmac aL,0, r3,r1 mmac aL,1, r3,r1 mmac aL,2, r3,r1 mmac aL,3, r3,r1 mmac aL,4, r3,r1 mmac aL,5, r3,r1 mmac aL,6, r3,r1 mmac aL,7, r3,r1 mmac aH,0, r3,r2 mmac aH,1, r3,r2 mmac aH,2, r3,r2 mmac aH,3, r3,r2 ; r3:r2 still needs 4 more rotates to finish ; aH aL W MOVLW 0x0F ; 0 b c d 0x0F ; ANDWF aH,F ; 0 b SWAPF aH,F ; b 0 SWAPF aL,F ; d c ANDWF aL,W ; 0 c XORWF aL,F ; d 0 IORWF aH,W ; b c ; W now holds (aH:aL >> 4) ; Perform 4 more rotates to complete 12x8 mult above ; while at the same time doing the 4x8 mult bH*(aH:aL & 0x0FF0) ; carry always clear by here mmac bH,0, r3,r2 mmac bH,1, r3,r2 mmac bH,2, r3,r2 mmac bH,3, r3,r2 ; 4x4 multiply bH*(aL & 0x0F) to finish bH*aH:aL (17 cycles) ; aL = bH*(aL & 0x0F) MOVFW aL ; loads W with ((aL & 0x0F) << 4) ; carry always clear by here BTFSS bH,0 CLRF aL RRF aL,F BTFSC bH,1 ADDWF aL,F RRF aL,F BTFSC bH,2 ADDWF aL,F RRF aL,F BTFSC bH,3 ADDWF aL,F RRF aL,F ; r3:r2 += aL MOVFW aL ADDWF r2,F SKPNC INCF r3,F ENDM