And it even works* (didn't expect that huh?), I'm sure it can be optimized though (and please give some clues as to where it can be optimised)
*: well in most cases anyway
Code: Select all
DivOp1by2:
ld hl,op1+2
ld b,8 ;number of bytes in the FP number to devide
ld c,0
ld (iy+asm_flag1),c
-: xor a ;clear C for rra (could have used shift but there is no speed gain)
ld a,(hl) ;load 2 numbers
rra ;"devide" them by 2 (can leave garbage in bit 3 if the high nibble is odd)
rr (iy+asm_flag1) ;store CA
add a,c ;add c, because I use c to store the correction
ld c,0 ;reset c
rl (iy+asm_flag1) ;retrieve CA (which means that the low nibble was odd and we have to add $50 to the next byte
jr nc,{+} ;do not set c to $50 if not CA
ld c,$50 ;do so if CA
+: ld d,a ;save a
and %01110111 ;kill the garbage bits (bit 7 doesn't really have to be 0, but why not make it 0 anyway?)
cp d ;compare - gives nz if bit 3 was set
jr z,{+} ;do not add 5 if it was set
add a,5 ;do so if it wasn't
+: ld (hl),a ;write the result back
inc hl ;move to next byte
djnz {-} ;repeat untill you hit the end of the number
ret