The most significant bit is left unchanged (only the least seven increment), and I believe it's incremented once every M-cycle.driesguldolf wrote:(Just for clarity: R increases on every instruction, right?)
[TI ASM] Useful routines.
Moderator: MaxCoderz Staff
-
- MCF Legend
- Posts: 1601
- Joined: Mon 20 Dec, 2004 8:45 am
- Location: Budapest, Absurdistan
- Contact:
I simply iterated over some area within page 0 to create the fire effect in the menu of Acelgoyobis (basically scrolling up a line, ANDing the screen with the TIOS code and redrawing text with an OR sprite routine in each frame), and it looks pretty good. Here’s the code:qarnos wrote:I'll play around with some other RNGs.
Code: Select all
FireEffect: ; BC - screen size in bytes; HL - screen address
push hl
ld de,12
add hl,de
ex de,hl
pop hl
FE_CHack .equ $+2
ld ix,1000
FE_Clear:
ld a,(de)
and (ix)
ld (hl),a
inc de
inc hl
inc ix
dec bc
ld a,b
or c
jp nz,FE_Clear
ld a,ixh
and 37
ld ixh,a
ld (FE_CHack),ix
ret
- driesguldolf
- Extreme Poster
- Posts: 395
- Joined: Thu 17 May, 2007 4:49 pm
- Location: $4080
- Contact:
As far as I know there are 2 random routines. One based on R and another by calculating (the slow one).Liazon wrote:I thought irandom already uses rdriesguldolf wrote:Couldn't you simple use the refresh register?qarnos wrote:I found the reason why my routine is slow and the CoBB version is jerky - irandom is slow, slow, slooooooowww.
I'll play around with some other RNGs.
16-bit square -> 32-bit result
This routine will take a 16-bit unsigned value in BC and produce the 32-bit square of that number in HL:DE.
I don't know if this is the most efficient way to do it - I couldn't find any useful information on the interwebs so I had to come up with it myself. The algorithm used is as follows:
And the Z80 version. A bit large (~150 bytes) since it breaks it up into 8, 16 and 32 bit loops so it only uses 32-bit arithmetic when it needs to.
I don't know if this is the most efficient way to do it - I couldn't find any useful information on the interwebs so I had to come up with it myself. The algorithm used is as follows:
Code: Select all
unsigned square(unsigned x)
{
unsigned sqrsum = 0;
unsigned sqrbit = 1;
unsigned result = 0;
for (; x; x >>= 1)
{
if (x & 1)
{
result += sqrbit;
result += sqrsum;
sqrsum += sqrbit * 2;
}
sqrsum *= 2;
sqrbit *= 4;
}
return result;
}
Code: Select all
;-------------------------------------------------------------------------------
;
;
; INPUTS:
;
; * BC - Number to square
;
; OUTPUTS:
;
; * HL - High word of square(BC)
; * DE - Low word of square(BC)
;
; DESTROYED:
;
; * AF
;-------------------------------------------------------------------------------
SquareBC:
;-------------------------------------------------------------------
; Process the low 4 bits of BC using 8-bit arithmetic.
;
; A = sqrsum
; D = sqrbit
; E = result
;-------------------------------------------------------------------
push bc ; [11]
xor a ; [4]
ld de, $0100 ; [10]
_sqrLoop8: add a, a ; [4] sqrsum * 2
sra c ; [8]
jr nc, _nextBit8 ; [12/7]
;-------------------------------------------------------------------
; When the next bit of BC is set...
;-------------------------------------------------------------------
ld l, a ; [4] save sqrsum
add a, e ; [4] += result
add a, d ; [4] += sqrbit
ld e, a ; [4] save result
ld a, l ; [4] restore sqrsum
sla d ; [8] sqrbit * 2
add a, d ; [4] sqrsum + sqrbit * 2
sla d ; [8]
jp nc, _sqrLoop8 ; [10]
jp _sqrDone8 ; [10]
;-------------------------------------------------------------------
; When the next bit of BC is reset...
;-------------------------------------------------------------------
_nextBit8: sla d ; [8]
sla d ; [8]
jp nc, _sqrLoop8 ; [10]
;-------------------------------------------------------------------
; Clean up from 8-bit mode and check if there is more to do
;-------------------------------------------------------------------
_sqrDone8: ld h, d ; [4] move sqrsuml into HL (D is zero).
ld l, a ; [4]
ld a, c ; [4] check if there is anything left
or b ; [4]
jp nz, _sqrGo16 ; [10]
ld l, d ; [4] zero HL and return
pop bc ; [10]
ret ; [10]
;-------------------------------------------------------------------
; Process the next 4 bits with 16-bit arithmetic
;
; HL = sqrsum
; DE = result
; BC = sqrbit
;-------------------------------------------------------------------
_sqrGo16: ld a, c ; [4]
push bc ; [11]
ld b, 1 ; [7]
ld c, d ; [4]
_sqrLoop16: add hl, hl ; [11] sqrsuml *= 2
rra ; [4] shift next bit
jr nc, _nextBit16 ; [12/7]
;-------------------------------------------------------------------
; When the next bit of BC is set...
;-------------------------------------------------------------------
ex de, hl ; [4] sqrsum <-> result
add hl, de ; [11] result += sqrsum
add hl, bc ; [15] result += sqrbit
ex de, hl ; [4] result += sqrsum
sla b ; [8]
add hl, bc ; [11] sqrsum += sqrbit
sla b ; [8]
jp nc, _sqrLoop16 ; [10]
jp _sqrDone16 ; [10]
;-------------------------------------------------------------------
; When the next bit of BC is reset...
;-------------------------------------------------------------------
_nextBit16: sla b ; [8]
sla b ; [8]
jp nc, _sqrLoop16 ; [10]
;-------------------------------------------------------------------
; Check if there are more bits to process before continuing.
;-------------------------------------------------------------------
_sqrDone16: pop af ; [10] restore high byte of initial BC
or a ; [4]
jp nz, _sqrGo32 ; [10]
ld h, b ; [4] B is zero
ld l, b ; [4]
pop bc ; [10]
ret ; [10]
;-------------------------------------------------------------------
; Process the remaining 16 bits with 32-bit arithmetic
;
; DE:HL = sqrsum
; (SP):IX = result
; BC = sqrbit
;-------------------------------------------------------------------
_sqrGo32: push ix ; [15] preserve IX
ld ixh, d ; [8] move resultl into IX
ld ixl, e ; [8]
ld d, b ; [4] zero sqrsumh
ld e, b ; [4]
push de ; [11] initial resulth = 0
ld c, $01 ; [7] initial sqrbit
_sqrLoop32: add hl, hl ; [11] sqrsum * 2
rl e ; [8]
rl d ; [8]
sra a ; [8]
jr nc, _nextBit32 ; [12/7]
;-------------------------------------------------------------------
; When the next bit of BC is set...
;-------------------------------------------------------------------
ex de, hl ; [4] sqrsuml <-> sqrsumh
add ix, de ; [15] resultl += sqrsuml
ex de, hl ; [4] sqrsumh <-> sqrsuml
ex (sp), hl ; [19] sqrsuml <-> resulth
adc hl, de ; [15] resulth += sqrsumh
add hl, bc ; [11] resulth += sqrbit
ex (sp), hl ; [19] resulth <-> sqrsuml
ex de, hl ; [4] sqrsuml <-> sqrsumh
sla c ; [8] sqrbit *= 2
rl b ; [8]
add hl, bc ; [11] sqrsumh += sqrbit
ex de, hl ; [4] sqrsumh <-> sqrsuml
sla c ; [8] squarebit *= 2
rl b ; [8]
jp nc, _sqrLoop32 ; [10]
jp _sqrDone32 ; [10]
;-------------------------------------------------------------------
; When the next bit of BC is reset...
;-------------------------------------------------------------------
_nextBit32: jr z, _sqrDone32 ; [12/7]
sla c ; [8] sqrbit * 4
rl b ; [8]
sla c ; [8]
rl b ; [8]
jp nc, _sqrLoop32 ; [10]
_sqrDone32: ld d, ixh ; [8] move result into HL:DE
ld e, ixl ; [8]
pop hl ; [10] pop high result
pop ix ; [14]
pop bc ; [10]
ret ; [10]
"I don't know why a refrigerator is now involved, but put that aside for now". - Jim e on unitedti.org
avatar courtesy of driesguldolf.
avatar courtesy of driesguldolf.
32-bit RNG
Here's a slow 32-bit pseudo-random number generator. It's useful if you want an RNG with a long (4 294 967 296) period, but I wouldn't be using it in any inner loops . You can eliminate 270 T-States by inlining the CALLs.
It's based on this algorithm attributed to D McDonnell from the SAS Institute C compiler:
This Z80 version drops the 0x7fffffff mask, since it's only point seems to be to eliminate the sign bit (since it returns a signed int). All it really does is cut the period of the RNG in half. If you want it in there, uncomment the "res 7, h" line.
It's based on this algorithm attributed to D McDonnell from the SAS Institute C compiler:
Code: Select all
int rand(void)
{
int r;
r = (((((((((((lastrand << 3) - lastrand) << 3)
+ lastrand) << 1) + lastrand) << 4)
- lastrand) << 1) - lastrand) + 0xe60)
& 0x7fffffff;
lastrand = r - 1;
return r;
}
Code: Select all
;-------------------------------------------------------------------------------
;
; INPUTS:
;
; MEMORY
; * (Rand32) - 32-bit seed value
;
; OUTPUTS:
;
; REGISTERS
; * HL - High word of pseudo-random number
; * DE - Low word of pseudo-random number
;
; MEMORY
; * (Rand32) - new 32-bit seed value
;
; DESTROYED:
;
; REGISTERS
; * F, BC
;
;
; TIMINGS: (Not including CALL)
;
; * 1230 T-States
;
;-------------------------------------------------------------------------------
Rand32:
;-------------------------------------------------------------------
; Call a bunch of subroutines [1028]
;-------------------------------------------------------------------
ld hl, (SRand32 + $00) ; [20] low byte
ld de, (SRand32 + $02) ; [20] high byte
call _shl3_32 ; [108]
call _sub_32 ; [109]
call _shl3_32 ; [108]
call _add_32 ; [101]
call _shl1_32 ; [54]
call _add_32 ; [101]
call _shl4_32 ; [135]
call _sub_32 ; [109]
call _shl1_32 ; [54]
call _sub_32 ; [109]
;-------------------------------------------------------------------
; Add $0360 to seed [50]
;-------------------------------------------------------------------
ld bc, $0e60 ; [10]
add hl, bc ; [11]
ld bc, $0000 ; [10]
ex de, hl ; [4] low <-> high
adc hl, bc ; [15]
;res 7, h ; [8] uncomment this if desired
;-------------------------------------------------------------------
; Store rand - 1 in Rand32 and return [134]
;-------------------------------------------------------------------
push hl ; [11]
push de ; [11]
scf ; [4]
ex de, hl ; [4]
sbc hl, bc ; [15]
ex de, hl ; [4]
sbc hl, bc ; [15]
ld (SRand32 + $00), de ; [20]
ld (SRand32 + $02), hl ; [20]
pop de ; [10]
pop hl ; [10]
ret ; [10]
;-------------------------------------------------------------------
; Subtract original seed from current value [109]
;-------------------------------------------------------------------
_sub_32: or a ; [4]
ld bc, (SRand32 + $00) ; [20]
sbc hl, bc ; [15]
ex de, hl ; [4]
ld bc, (SRand32 + $02) ; [20]
sbc hl, bc ; [15]
ex de, hl ; [4]
ret ; [10]
;-------------------------------------------------------------------
; Add original seed to current value [101]
;-------------------------------------------------------------------
_add_32: ld bc, (SRand32 + $00) ; [20]
add hl, bc ; [11]
ex de, hl ; [4]
ld bc, (SRand32 + $02) ; [20]
adc hl, bc ; [15]
ex de, hl ; [4]
ret ; [10]
;-------------------------------------------------------------------
; Left shift DE:HL four places [135]
;-------------------------------------------------------------------
_shl4_32: add hl, hl ; [11]
rl e ; [8]
rl d ; [8]
;-------------------------------------------------------------------
; Left shift DE:HL three places [108]
;-------------------------------------------------------------------
_shl3_32: add hl, hl ; [11]
rl e ; [8]
rl d ; [8]
add hl, hl ; [11]
rl e ; [8]
rl d ; [8]
;-------------------------------------------------------------------
; Left shift DE:HL one place [54]
;-------------------------------------------------------------------
_shl1_32: add hl, hl ; [11]
rl e ; [8]
rl d ; [8]
ret ; [10]
;-------------------------------------------------------------------------------
; End of Rand32
;-------------------------------------------------------------------------------
"I don't know why a refrigerator is now involved, but put that aside for now". - Jim e on unitedti.org
avatar courtesy of driesguldolf.
avatar courtesy of driesguldolf.