share low byte of new/old pointers, compute directly from WRAM without loading in HRAM

This commit is contained in:
2019-01-03 00:56:04 +01:00
parent bdaa79be08
commit c21a794db9

View File

@@ -29,17 +29,56 @@ MemorySet:
jr nz, MemorySet jr nz, MemorySet
ret ret
AddConstantToHL: MACRO
IF \1 == 1
inc hl
ELIF \1 == -1
dec hl
ELIF \1 > 0 && \1 <= 255
ld a, l
add a, \1
ld l, a
jr nc, .nocarry1\@
inc h
.nocarry1\@
ELIF \1 > -255 && \1 < 0
ld a, l
sub a, -(\1)
ld l, a
jr nc, .nocarry2\@
ld l, a
dec h
.nocarry2\@
ELIF \1 > 255
ld a, l
add a, LOW(\1)
ld l, a
ld a, h
adc a, HIGH(\1)
ld h, a
ELIF \1 <= -255
ld a, l
sub a, LOW(-(\1))
ld l, a
ld a, h
sbc a, HIGH(-(\1))
ld h, a
ELSE
ENDC
ENDM
AddLiveNeighbors: MACRO AddLiveNeighbors: MACRO
; \1 is offset in Cells table in HRAM ; \1 offset
; \2 is mask for 2x2 cell ; \2 is mask for 2x2 cell
; D must be high byte of a BitsSet table (for 0..15) ; D must be high byte of a BitsSet table (for 0..15)
; H register will be incremented with number of alive neighbors ; C register will be incremented with number of alive neighbors
; destroys A, C, E ; moves HL with given offset
; does not touch B, L ; destroys A, E
; does not touch B
; load current 2x2 cell and mask out bits that are not neighbors ; load current 2x2 cell and mask out bits that are not neighbors
ld c, LOW(Cells + \1) AddConstantToHL \1
ld a, [$FF00+c] ld a, [hl]
and a, \2 and a, \2
; count bits set ; count bits set
@@ -47,34 +86,29 @@ AddLiveNeighbors: MACRO
ld a, [de] ld a, [de]
; add to alive ; add to alive
add a, h add a, c
ld h, a ld c, a
ENDM ENDM
Conway: MACRO Conway: MACRO
; \1 = bit of target cell in 2x2 group ; \1 = bit of target cell in 2x2 group
; \2 = first useful neighbor 2x2 cell ; (\2, \3), (\4, \5), (\6, \7) = (offset to neighbor, neighbor mask)
; \3 = mask for first useful neighbor 2x2 cell
; \4 = mask for second useful neighbor 2x2 cell
; \5 = mask for third useful neighbor 2x2 cell
; ;
; B will be updated with cell result ; B will be updated with cell result
; destroys all other registers ; destroys all other registers
; reset alive counter ; reset alive counter
ld h, 0 ld c, 0
; set high byte of DE to BitsSet high address
ld d, HIGH(BitsSet)
; Check all neighbors ; Check all neighbors
AddLiveNeighbors 0, (~(1 << \1)) & $F AddLiveNeighbors 0, (~(1 << \1)) & $F
AddLiveNeighbors (1 + (\2 + 0) % 8), \3 AddLiveNeighbors \2, \3
AddLiveNeighbors (1 + (\2 + 1) % 8), \4 AddLiveNeighbors \4 - \2, \5
AddLiveNeighbors (1 + (\2 + 2) % 8), \5 AddLiveNeighbors \6 - \4, \7
AddConstantToHL -(\6)
; if there are 3 neighbors, it's always alive ; if there are 3 neighbors, it's always alive
ld a, h ld a, c
cp a, 3 cp a, 3
jr z, .alive\@ jr z, .alive\@
@@ -83,7 +117,7 @@ Conway: MACRO
jr nz, .dead\@ jr nz, .dead\@
; load current old cell and test if alive ; load current old cell and test if alive
ldh a, [Cells] ld a, [hl]
bit \1, a bit \1, a
jr z, .dead\@ jr z, .dead\@
@@ -93,81 +127,32 @@ Conway: MACRO
.dead\@ .dead\@
ENDM ENDM
LoadCellToHRAM: MACRO
; \1 = offset to Old pointer
; destroys A
; increments C
; does not touch B, D, E, H, L
; load neighbor
ld a, [hl]
; store in HRAM
ld [$FF00+c], a
; increment hram pointer
inc c
ENDM
IncHL: MACRO
IF (\1) == 1
inc hl
ELIF (\1) == -1
dec hl
ELSE
ld de, (\1)
add hl, de
ENDC
ENDM
SECTION "Load cell group and 8 neighbors to HRAM, then compute", ROM0 SECTION "Load cell group and 8 neighbors to HRAM, then compute", ROM0
; \1..\8 offset to neighbors ; \1..\8 offset to neighbors
; destroys all registers ; destroys all registers
ConwayGroup: MACRO ConwayGroup: MACRO
; pointer to HRAM ld d, HIGH(BitsSet)
ld c, LOW(Cells)
; load old pointer into hl ; load old pointer into hl
ld hl, Old ldh a, [Old]
ld a, [hl+] ld h, a
ld h, [hl] ldh a, [Progress]
ld l, a ld l, a
; load current 2x2 cell then neighbor 2x2 cells to HRAM from old buffer
LoadCellToHRAM
IncHL \1
LoadCellToHRAM
IncHL \2 - \1
LoadCellToHRAM
IncHL \3 - \2
LoadCellToHRAM
IncHL \4 - \3
LoadCellToHRAM
IncHL \5 - \4
LoadCellToHRAM
IncHL \6 - \5
LoadCellToHRAM
IncHL \7 - \6
LoadCellToHRAM
IncHL \8 - \7
LoadCellToHRAM
; reset result ; reset result
xor a xor a
ld b, a ld b, a
; compute all 4 cells in current 2x2 cell ; compute all 4 cells in current 2x2 cell group
Conway 0, 4, 10, 8, 12 Conway 0, \5, 10, \6, 8, \7, 12
Conway 1, 6, 12, 4, 5 Conway 1, \1, 5, \7, 12, \8, 4
Conway 2, 2, 3, 2, 10 Conway 2, \5, 10, \4, 2, \3, 3
Conway 3, 0, 5, 1, 3 Conway 3, \1, 5, \3, 3, \2, 1
; load new pointer ; load new pointer
ld hl, New ldh a, [New]
ld a, [hl+] ld h, a
ld h, [hl]
ld l, a
; save result to new buffer ; save result to new buffer
ld [hl], b ld [hl], b
@@ -200,28 +185,17 @@ Start:
; set old and rendered pointers to buffer0 ; set old and rendered pointers to buffer0
ld a, HIGH(Buffer0) ld a, HIGH(Buffer0)
ldh [Old + 1], a ldh [Old], a
ldh [Rendered + 1], a ldh [Rendered + 1], a
; set new pointer to buffer1 ; set new pointer to buffer1
ld a, HIGH(Buffer1) ld a, HIGH(Buffer1)
ldh [New + 1], a ldh [New], a
; set video pointer to first tilemap ; set video pointer to first tilemap
ld a, HIGH(_SCRN0) ld a, HIGH(_SCRN0)
ldh [Video + 1], a ldh [Video + 1], a
; reset low bytes of pointers (all buffers are aligned)
xor a
ldh [New], a
ldh [Old], a
ldh [Rendered], a
ldh [Video], a
; set lines and tiles left to 0 to avoid rendering in interrupts
ldh [LinesLeft], a
ldh [TilesLeft], a
; enable v-blank interrupt ; enable v-blank interrupt
ld a, IEF_VBLANK ld a, IEF_VBLANK
ld [rIE], a ld [rIE], a
@@ -260,24 +234,12 @@ Start:
ld bc, 32 * 32 * 2 ld bc, 32 * 32 * 2
call MemorySet call MemorySet
; clear screen (both buffers)
ld hl, _SCRN1
ld d, 0 ; empty tile
ld bc, 32 * 32
call MemorySet
; init buffer 0 ; init buffer 0
ld hl, Buffer0 ld hl, Buffer0
ld de, DefaultMap ld de, DefaultMap
ld bc, 20 * 18 ld bc, 20 * 18
call MemoryCopy call MemoryCopy
; set total to render to start rendering
ld a, 18
ldh [LinesLeft], a
ld a, 20
ldh [TilesLeft], a
; enable h-blank interrupt in lcd stat ; enable h-blank interrupt in lcd stat
ld a, STATF_MODE00 ld a, STATF_MODE00
ld [rSTAT], a ld [rSTAT], a
@@ -287,6 +249,12 @@ Start:
ld [rLCDC], a ld [rLCDC], a
.mainloop .mainloop
; reset low bytes of pointers
xor a
ldh [Progress], a
ldh [Rendered], a
ldh [Video], a
; start rendering ; start rendering
ld a, 20 ld a, 20
ldh [TilesLeft], a ldh [TilesLeft], a
@@ -306,9 +274,7 @@ Start:
ConwayGroup 1, 21, 20, 39, 19, 359, 340, 341 ConwayGroup 1, 21, 20, 39, 19, 359, 340, 341
; advance to next cell in top row ; advance to next cell in top row
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
; handle all cells in top row except corners ; handle all cells in top row except corners
@@ -320,9 +286,7 @@ Start:
ConwayGroup 1, 21, 20, 19, -1, 339, 340, 341 ConwayGroup 1, 21, 20, 19, -1, 339, 340, 341
; advance to next cell in top row ; advance to next cell in top row
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
; loop horizontally ; loop horizontally
@@ -335,9 +299,7 @@ Start:
ConwayGroup -19, 1, 20, 19, -1, 339, 340, 321 ConwayGroup -19, 1, 20, 19, -1, 339, 340, 321
; advance pointers to next row ; advance pointers to next row
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
ld a, 16 ld a, 16
@@ -348,9 +310,7 @@ Start:
ConwayGroup 1, 21, 20, 39, 19, -1, -20, -19 ConwayGroup 1, 21, 20, 39, 19, -1, -20, -19
; advance to next cell ; advance to next cell
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
ld a, 18 ld a, 18
@@ -361,14 +321,12 @@ Start:
ConwayGroup 1, 21, 20, 19, -1, -21, -20, -19 ConwayGroup 1, 21, 20, 19, -1, -21, -20, -19
; advance to next cell ; advance to next cell
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
jr nz, .noCarry jr nz, .noCarry
inc hl ; old+1 ld hl, Old
inc [hl] inc [hl]
ld hl, New + 1 ld hl, New
inc [hl] inc [hl]
.noCarry .noCarry
@@ -382,9 +340,7 @@ Start:
ConwayGroup -19, 1, 20, 19, -1, -21, -20, -39 ConwayGroup -19, 1, 20, 19, -1, -21, -20, -39
; advance to next row ; advance to next row
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
; loop vertically ; loop vertically
@@ -397,9 +353,7 @@ Start:
ConwayGroup 1, -339, -340, -321, 19, -1, -20, -19 ConwayGroup 1, -339, -340, -321, 19, -1, -20, -19
; advance to next cell in bottom row ; advance to next cell in bottom row
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
; handle all cells in bottom row except corners ; handle all cells in bottom row except corners
@@ -411,9 +365,7 @@ Start:
ConwayGroup 1, -339, -340, -341, -1, -21, -20, -19 ConwayGroup 1, -339, -340, -341, -1, -21, -20, -19
; advance to next cell in top row ; advance to next cell in top row
ld hl, New ld hl, Progress
inc [hl]
ld hl, Old
inc [hl] inc [hl]
; loop horizontally ; loop horizontally
@@ -425,10 +377,6 @@ Start:
.bottomright .bottomright
ConwayGroup -19, -359, -340, -341, -1, -21, -20, -39 ConwayGroup -19, -359, -340, -341, -1, -21, -20, -39
; increment old pointer to first byte after buffer
ld hl, Old
inc [hl]
; wait end of rendering (not necessary, update is way slower than rendering...) ; wait end of rendering (not necessary, update is way slower than rendering...)
.waitRender .waitRender
ldh a, [LinesLeft] ldh a, [LinesLeft]
@@ -451,19 +399,19 @@ Start:
halt halt
; check which buffer has just been rendered ; check which buffer has just been rendered
ldh a, [Old + 1] ldh a, [Old]
cp a, HIGH(Buffer1) cp a, HIGH(Buffer1)
jr nc, .newToBuffer1 jr nc, .newToBuffer1
.newToBuffer0 .newToBuffer0
; set old and rendered pointers to buffer1 ; set old and rendered pointers to buffer1
ld a, HIGH(Buffer1) ld a, HIGH(Buffer1)
ldh [Old + 1], a ldh [Old], a
ldh [Rendered + 1], a ldh [Rendered + 1], a
; set new pointer to buffer0 ; set new pointer to buffer0
ld a, HIGH(Buffer0) ld a, HIGH(Buffer0)
ldh [New + 1], a ldh [New], a
; set video pointer to first tilemap ; set video pointer to first tilemap
ld a, HIGH(_SCRN1) ld a, HIGH(_SCRN1)
@@ -478,12 +426,12 @@ Start:
.newToBuffer1 .newToBuffer1
; set old and rendered pointers to buffer0 ; set old and rendered pointers to buffer0
ld a, HIGH(Buffer0) ld a, HIGH(Buffer0)
ldh [Old + 1], a ldh [Old], a
ldh [Rendered + 1], a ldh [Rendered + 1], a
; set new pointer to buffer1 ; set new pointer to buffer1
ld a, HIGH(Buffer1) ld a, HIGH(Buffer1)
ldh [New + 1], a ldh [New], a
; set video pointer to second tilemap ; set video pointer to second tilemap
ld a, HIGH(_SCRN0) ld a, HIGH(_SCRN0)
@@ -494,12 +442,6 @@ Start:
ld [rLCDC], a ld [rLCDC], a
.resetLowBytes .resetLowBytes
; reset low bytes of pointers
xor a
ldh [New], a
ldh [Old], a
ldh [Rendered], a
ldh [Video], a
jp .mainloop jp .mainloop
@@ -624,11 +566,11 @@ SECTION "Automata buffer 1", WRAM0, ALIGN[9]
Buffer1: ds 20 * 18 Buffer1: ds 20 * 18
SECTION "Compute Memory", HRAM SECTION "Compute Memory", HRAM
Old: ds 2 ; pointer to bufferX New: ds 1 ; high byte of pointer to bufferX
New: ds 2 ; pointer to bufferX Old: ds 1 ; high byte of pointer to bufferX
Progress: ds 1; low byte of pointer to bufferX, common to new and old
XLoop: ds 1 XLoop: ds 1
YLoop: ds 1 YLoop: ds 1
Cells: ds 9 ; cells loaded from old buffer, order is: self then right, clockwise
SECTION "Render Memory", HRAM SECTION "Render Memory", HRAM
LinesLeft: ds 1 ; number of lines left to render LinesLeft: ds 1 ; number of lines left to render