PIC Microcontoller Delay Method

Delay X cycles (n-x) exclusive

;-----------------------------------------------------------
;d0, d1, d2, ...dn      - counters
;d0 = 0..3              - least significant 2 bits
;d1,...dn = 0..255      - more significant 8, 16, 24, ... bits
;
;Total Delay =
;= d0+4*d1+4*256*d2+4*256*256*d3...+4*256^(n-1)*dn + overhead
;
;Overhead depends on a number of counters, see below

;First preincrement all counters except d0
;overhead = n cycles
        incf d1, f
        incf d2, f
        ...
        incf dn, f
;2 bit delay (d0 - 1 cycle resolution)
;overhead += 4 cycles
        comf d0, w
        andlw 0x03
        addwf PCL, f
Delay64Mx
Delay256Kx
        nop
Delay1Kx
        nop
Delay4x
        nop
;8 bit delay (d1 - 4 cycle resolution)
;overhead += 2 cycles
        decfsz d1, f
         goto Delay4x

;8 bit delay (d2 - 1024 cycle resolution)
;overhead += 3 cycles
        decf d1, f
        ;change d1 to 255, so previous loop
        ;(from Delay4x) will take
        ;255*4-1=1019 cycles
        ;we need to add 5 more cycles to get 1024.
        ;4 cycles are in this loop and
        ;another 1 cycle is a nop above

        decfsz d2, f
         goto Delay1Kx

;8 bit delay (d3 - 262144 cycle resolution)
;overhead += 3 cycles
        decf d2, f
        ;previous two loops (from Delay4x) will take
        ;1019+255*1024-1=262138 cycles
        ;we need to add 6 more cycles to get 262144.
        ;4 cycles are in this loop and
        ;another 2 cycles are above - two nops

        decfsz d3, f
         goto Delay256Kx

;8 bit delay (d4 - 262144*256 cycle resolution)
;overhead += 4 cycles
        decf d3, f
        ;previous two loops (from Delay4x) will take
        ;262138+255*262144-1 cycles
        ;we need to add 7 more cycles to get 4*256^3.
        ;5 cycles are in this loop and
        ;another 2 cycles are above - two nops
        nop
        decfsz d4, f
         goto Delay64Mx

;at this point we have a 34 bit one cycle resolution delay!
;Total Delay = overhead + 0..1.7e10 cycles
;overhead = 4+4+2+3+3+4 = 20 cycles
;-----------------------------------------------------------

And the same for a SX chip:

;-----------------------------------------------------------
;d0, d1, d2, ...dn      - counters
;d0 = 0..3              - least significant 2 bits
;d1,...dn = 0..255      - more significant 8, 16, 24, ... bits
;
;Total Delay =
;= d0+4*d1+4*256*d2+4*256*256*d3...+4*256^(n-1)*dn + overhead
;
;Overhead depends on a number of counters, see below

;First preincrement all counters except d0
;overhead = n cycles
        inc d1
        inc d2
        ...
        inc dn
;2 bit delay (d0 - 1 cycle resolution)
;overhead += 5 cycles
        mov w, /d0
        and #$03
        add PC, w
Delay256Kx
        nop
Delay64Mx
        nop
Delay1Kx
        nop
;8 bit delay (d1 - 4 cycle resolution)
;overhead += 2 cycles
Delay4x
        decsz d1
         jmp Delay4x

;8 bit delay (d2 - 1024 cycle resolution)
;overhead += 3 cycles
        dec d1
        ;change d1 to 255, so previous loop
        ;(from Delay4x) will take
        ;255*4-2=1018 cycles
        ;we need to add 6 more cycles to get 1024.
        ;5 cycles are in this loop and
        ;another 1 cycle is a nop above

        decsz d2
         jmp Delay1Kx

;8 bit delay (d3 - 262144 cycle resolution)
;overhead += 3 cycles
        dec d2
        ;previous two loops (from Delay4x) will take
        ;1018+255*1024-2=262136 cycles
        ;we need to add 8 more cycles to get 262144.
        ;5 cycles are in this loop and
        ;another 3 cycles are above - three nops

        decsz d3
         jmp Delay256Kx

;8 bit delay (d4 - 262144*256 cycle resolution)
;overhead += 6 cycles
        dec d3
        ;previous two loops (from Delay4x) will take
        ;262136+255*262144-2 cycles
        ;we need to add 10 more cycles to get 4*256^3.
        ;8 cycles are in this loop and
        ;another 2 cycles are above - two nops

        jmp $+1
        decsz d4
         jmp Delay64Mx

;at this point we have a 34 bit one cycle resolution delay!
;Total Delay = overhead + 0..1.7e10 cycles
;overhead = 4+5+2+3+3+6 = 23 cycles
;-----------------------------------------------------------

This is probably too small for SX :)

Interested:

Code:

Nikolai Golovchenko shares this code:

The PIC delay routine above works correctly only if d3 and d4 are zero (the same is probably true for the SX version as well). The problem lies in how the routine is extended by adding another loop. Basically, every time a loop is appended, it adds at least 2 cycles, which must be subtracted. Sometimes, it can be done by moving the jump target by 2 nops down, but when there are no more nops, it can be done by subtracting one from d1, which subtracts 4 cycles. So d1 is decremented, a new loop adds 3 cycles and subtracts 4, which is -1 overall. So the jump should include an additional nop. 

The corrected PIC version is below:

; preincrement counters except d0
                                incf    d1, f
                                incf    d2, f
                                incf    d3, f
                                incf    d4, f

; Let Delay = 0 at this point
                                comf    d0, w
                                andlw   0x03
                                addwf   PCL, f

                                nop
Delay2                          nop
Delay1                          nop

; Delay = 4 + d0

Delay0                          decfsz  d1, f
                                 goto   Delay1
                                 
; Delay = 6 + d0 + 4 * d1

                                decf    d1, f
                                decfsz  d2, f
                                 goto   Delay2

; Delay = 9 + d0 + 4 * (d1 + 256 * d2)


                                decfsz  d3, f
                                 goto   Delay0

; Delay = 11 + d0 + 4 * (d1 + 256 * d2 + 256^2 * d3)

                                decf    d1, f
                                decfsz  d4, f
                                 goto    Delay1

; Delay = 14 + d0 + 4 * (d1 + 256 * d2 + 256^2 * d3 + 256^3 * d4)


; This can be extended like this:
;
;   decf d1, f
;   decfsz d5, f
;    goto Delay2
;
;   decfsz d6, f
;    goto Delay0
;
;   decf d1, f
;   decfsz d7, f
;    goto Delay1
;
;   decf d1, f
;   decfsz d8, f
;    goto Delay2
;
;   decfsz d9, f
;    goto Delay0