;------------------------------------------------------------------
;      Tutorial    Mult 24 x 24  out 48 (max)
;                                           Fred Maher 3rd Nov 2002
;------------------------------------------------------------------


	LIST P=16F628 ; 16F628 Runs at 20 MHz
	INCLUDE "p16F628.inc"

	__CONFIG _CP_OFF & _WDT_OFF & _HS_OSC & _PWRTE_ON & _LVP_OFF & _BODEN_OFF

	ERRORLEVEL -224 ; suppress annoying message because of tris
	ERRORLEVEL -302 ; suppress message because of page change

	CBLOCK 20H

	top1
	top2
	top3
	top4
	top5
	top6
	bot1
	bot2
	bot3
	res1
	res2
	res3
	res4
	res5
	res6
	count
	temp
      c1
      c2
 	ENDC

;    Top and bottom registers are not symetrical.   The bottom registers will never need to 
;    handle more than 24 bits. The top registers can, when the numbers are near the maximum,
;    have to handle 48 bits due to left shifting. Now we can reuse registers but it makes 
;    following these "tutor"  multiply routines  harder to follow. 

;There is no ORG, ISR,  as this is a sub routine of a main program. However we 
;want it to behave like a program to debug and emulate it running with real number inputs. 
;When it is all working we remove ;the header and  place the variables with the rest of 
;the variables of the existing main ;program code. 
;So we can start by  putting  values into the top and bottom, values that will not cause 
;overflow in the result.

;Example numbers :top 00FFFF bot 000024 txb = 23FFDC(inside 24 bit limit)decimal = 2,359,260

Testnums:
	movlw  0x0B
	movwf top1
	movlw  0x00
	movwf top2
	movlw  0x00
	movwf top3
	movlw  0x00
	movwf top4
	movlw  0x00
	movwf top5
	movlw  0x00
	movwf top6

	movlw  0xBA
	movwf bot1
	movlw  0xDC
	movwf bot2
	movlw  0xFE
	movwf bot3
	clrf res1
	clrf res2
	clrf res3
	clrf res4
	clrf res5
	clrf res6

; In example top x bot = B044 F100
;  we will use top as multiplicand and bot as the multiplier Note:- the smaller number is as we
; discussed in the multiplier, bottom.

; A word about the code we are going to write. The shifts and the checks produce a lot of trees 
;making it easy to lose sight of the main actions. These are. 
; THE MULTIPLY SEQUENCE
;----------------------

;Step 1. Use bot bit 0 to decide if we add tops to reses or to skip directly to 4.
; 
;Step  2. Add the rotated left  3top to 3res  if bot bit not 0,  else skip add.
; Please note, the First Time as there is no shifting,
; add directly all the top bits to result. If the first ( rightmost digit of the  
;bot..  multiplier )  had been zero and not four then we skip the add and move on 
;to the next action. ( bot bit tells us when to skip the add)
;Step 2 actions:. The addition of res  and top
; top1  add to res1,  carry flag? yes, add 1 to res2 , clear carry flag
; top2  add to res2,  carry flag? yes, add 1 to res3 , clear carry flag
; top3  add to res3 , carry flag? yes,   
; 
;Step 3. Rotate left  the three top registers  and check overflows  in  this sequence...
; top1  rotate left,  carry flag? yes, add 1 to top2 , clear carry flag
; top2  rotate left,  carry flag? yes, add 1 to top3 , clear carry flag
; top3 rotate left, carry flag? yes, 
; not this time                  ---> "add 1 to top4, clear carry flag"<--
;
;Step 4.  Bot,bit0( value 1 or 0 )decides if we add or don't add  so  we need to 
; right shift the three bots  or read a bit in the corresponding register
; using the loop counter count. This  counter would loop from 1 to 24  and 
;needs checks ;to move from bot1 reg to  bot2 reg then to bot3 reg. 
;This first attempt  will use the same actions as 3. and 4.  So we have after 1st read
;bot1 rotate right, Carry flag? yes                                   , clear carry flag
;bot2 rotate right, Carry flag? yes add b'1000000' = hex80    to bot1 , clear carry flag
;bot3 rotate right, Carry flag? yes add b'1000000' = hex80    to bot2 , clear carry flag

; These are the four steps  explained in words....

; The execution time of the multiplication can  be quickened considerably if
; we know when the multiplier has finished. 
; e.g. look at bot3, bot2, bot1  00 00 24  = 00000000 00000000 0010 0100
; now after five shifts right what remains are zeros. 
;  So calculation finished at  count 6, in 243 cycles; yet we continue
; until we reach count 24,  which takes 836 cycles
; What we need to add is a top bottom swap, and detect bot_first_1 for  count needed
; routine, to  optimize the 24x24 multiply  a little more.

;-----------------------------------------------------------------------
;       Mult24 is the main mult routine start point
;------------------------------------------------------------------------

Mult24:  ; 
	  	 
        bcf   STATUS,DC
        call  Regcomp    ; Puts smaller number into multiplier and finds count minimum.
        nop             
        incf  count,f
        clrf res1	 ; may have been used for storage in swap	 
        clrf res2
        clrf res3
        call  X24  ; main 24 x 24 mult  


Mult24end:   ; here back to principal program
        goto Mult24end ; dummy loop for testing
	 ; return   ; back to main program
;--------------------------------------------------------------------------
;    End of multiply 24 x 24 ; out 48,  routine
;--------------------------------------------------------------------------

Regcomp: ; compare top and bot regs 3 2 1  to decide if we need to swap.
 
	bcf STATUS,2
	bcf STATUS,1
	bcf STATUS,0

      ; These blocks test for:- "0 "... "="...  ">" ... using STATUS flags

Test33:   
 	movf  bot3,w
      subwf top3,w
	btfsc STATUS,C   ;
      goto $+ 3        ; top is  equal or bigger 
      call Swap
	goto  Leftbit    ; find leftmost 1 for count.
                       ; end case top>bot

; Think carefully about this bit. We get here because bot is 
; equal or less than top. So  we test top for zero, then if zero
; bot is also zero because we have no negative numbers 
; (e.g. bot can't be -7)
; if  STATUS not zero bot is less than top, then find
; leftmost "1" for count. 
 
	clrw             ; test top=0 (if top=0 then bot=0 (can't be minus))
      xorwf top3,w
  	btfsc STATUS,Z  
	goto  Test22     ;  both zero, continue test with top2,bot2
      goto  Leftbit
Test22:  
 	movf  bot2,w
      subwf top2,w
	btfsc STATUS,C   ;
      goto $+ 3        ; top is  equal or bigger 
      call Swap
	goto  Leftbit    ; find leftmost 1 for count.
                       ; end case top>bot
 	clrw             ; test top=0 (if top=0 then bot=0 (can't be minus))
      xorwf top2,w
  	btfsc STATUS,Z  
	goto  Test11     ;  both zero, continue test with top2,bot2
      goto  Leftbit     
Test11:
 	movf  bot1,w
      subwf top1,w
	btfsc STATUS,C   ;
      goto $+ 3        ; top is  equal or bigger 
      call Swap
	goto  Leftbit    ; find leftmost 1 for count.
                       ; end case top>bot
 	clrw             ; test top=0 (if top=0 then bot=0 (can't be minus))
      xorwf top1,w
  	btfsc STATUS,Z  
	goto  Test11     ;  both zero, continue test with top2,bot2
      movf bot1,w      ;put bot in temp   
	movwf temp 
	movlw d'08'      ;  bot is smaller so set count  to 24  
      movwf count      ;  do Leftbit
      goto  Leftbit
Swap:
	movf top3,w
      movwf res3
      movf  bot3,w
      movwf top3
      movf  res3,w
      movwf bot3
      movf top2,w
      movwf res2
      movf  bot2,w
      movwf top2
      movf  res2,w
      movwf bot2
      movf  top1,w
      movwf res1
      movf  bot1,w
      movwf top1
      movf  res1,w
      movwf bot1
	return

Leftbit:
          ;Now find in   bot3,2,1 that  which is not zero, find leftmost ONE
          ; We leave the routine with  count holding the minimum necessary number 
          ;for the multiplier adds and with multiplier as bot, the smaller number
     clrw 
     xorwf bot3,w
     btfsc STATUS, Z
     goto $+6
     movf bot3,w
     movwf temp 
     movlw d'24'
     movwf count
     goto Oneloop	

     clrw 
     xorwf bot2,w
     btfsc STATUS, Z
     goto $+6
     movf bot2,w
     movwf temp 
     movlw d'16'
     movwf count
     goto Oneloop	

     clrw 
     xorwf bot1,w
     btfsc STATUS, Z
     goto $+4
     movf bot1,w
     movwf temp 
     movlw d'08'
     movwf count
     goto Oneloop	
 
Oneloop:  ; We move through the 8 bits of temp from 8 to 1  
    btfsc temp,7
    return
    decf count,f
    rlf  temp,f 
    goto Oneloop
    return ; to main mult
 
 
;----------------------------------------------------------------
;    End of the compare / count routine
;----------------------------------------------------------------  
        

X24: ; mult loop proper starts here with entered values

Step1:   ; To add or not to add? that is the question.

	btfss bot1,0
      goto Step3      ; 0... don't add top to res
	goto Step2      ; 1... add top to res

Step2:  ; Adding top to res

     movfw       top1           
     addwf       res1,f         

     movfw       top2              
     skpnc
     incfsz      top2,w
     addwf       res2,f          

     movfw       top3              
     skpnc
     incfsz     top3,w
     addwf      res3,f          

     movfw       top4              
     skpnc
     incfsz     top4,w
     addwf      res4,f          

     movfw       top5              
     skpnc
     incfsz     top5,w
     addwf      res5,f          

     movfw       top6              
     skpnc
     incfsz     top6,w
     addwf      res6,f          

Step3: ; Left shifting top
       ; This looks the same as an add but it is different, when we shift
       ; we never sum, but rather drop the carried 1 into a space vacated 
       ; So we start with top6, check C,; top5  check C,.....; top1 check C   


	bcf   STATUS,C    
      rlf   top6,f
       	btfsc STATUS,C
		nop			;incf  top4,f

	bcf   STATUS,C
        rlf   top5,f
        btfsc STATUS,C
	incf  top6,f

	bcf   STATUS,C	
        rlf   top4,f
        btfsc STATUS,C
        incf  top5,f
 
	bcf   STATUS,C    
        rlf   top3,f
       	btfsc STATUS,C
	incf  top4,f
 
	bcf   STATUS,C
        rlf   top2,f
        btfsc STATUS,C
        incf  top3,f
 
	bcf   STATUS,C	
        rlf   top1,f
        btfsc STATUS,C
        incf  top2,f
	bcf  STATUS,C
Stepend:
      nop	

Step4: ;Right shifting bottom and terminate loop for count =   MAX, here 24.

      decf   count,f  ; we start with count =24 OR speeded up count 
 
      clrw
      xorwf count,w
      btfsc STATUS,Z
      RETURN            ; count finished
                        ; count not finished, continue Step4 
      
      rrf   bot1,f
      bcf   STATUS,C

      rrf   bot2,f
      btfss STATUS,C
      goto  $ + 4     ;  there was no carry, jump  forward 4
      movlw 0x80
      addwf bot1,f
      bcf STATUS,C

      rrf   bot3,f
      btfss STATUS,C
      goto  $ + 4     ;  there was no carry, jump  forward 4
      movlw 0x80
      addwf bot2,f
      bcf STATUS,C

      goto  Step1
;---------------------------------------------------------------------
;                     End of reg multiplication
;---------------------------------------------------------------------

 


      end ;not used when this is a subroutine
;------------------------------------------------------------------
;                        End Mult 24 x 24
;------------------------------------------------------------------