This time no GDI / GDI+ stuff. ^^
Code: Select all
'Simple Smoke Simulation v0.15
'coded by UEZ build 2018-11-02 / thanks to Eukalyptus for the assembler functions
'x86 only!
#Include "fbgfx.bi"
Using FB
Declare Function RandomRange(fStart As Single, fEnd As Single) As Single
Declare Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Long, iExpandEdge As Long = 0) As Any Ptr
Declare Function _ASM_Cos6th(fX As Double) As Double
Declare Function _ASM_Sin6th(fX As Double) As Double
#Define PokePixel(_x, _y, _color) *Cptr(Ulong Ptr, imgData + (_y Shl 0) * pitch + _x Shl 2) = (_color)
Dim Shared As Ulong iW, iH, iW2
iW = 450
iH = 700
iW2 = iW \ 2
Screenres iW, iH, 32
Dim As String sTitle = "FB Smoke / FPS: "
Windowtitle sTitle
Dim evt As Event
Dim As Ulong iFPS = 0, i, x, y
Dim As Any Ptr pImage = Imagecreate(iW, iH, 0, 32), pImageBlurred
Dim As Integer pitch
Dim As Any Pointer imgData
Imageinfo(pImage, , , , pitch, imgData)
Dim As Ulong iAmount = 75000
Type tParticles
x As Single
y As Single
vx As Single
vy As Single
col As Ubyte
End Type
Dim As Single fCol = &hFF / iH, f = 0
Dim As tParticles aParticles(0 To iAmount)
Randomize
For i = 0 To iAmount
aParticles(i).x = iW2 + RandomRange(-10.5, 10.5)
aParticles(i).y = Rnd() * iH
aParticles(i).vx = RandomRange(-1, 1)
aParticles(i).vy = 2 + Rnd() * 4
aParticles(i).col = &h80 + Rnd() * &h7F
Next
Dim fTimer As Double
fTimer = Timer
Do
For y = 0 To iH - 1
For x = 0 To iW - 1
PokePixel(x, y, Rgba(&h80, &h80, &hE0, &hB0))
Next
Next
For i = 0 To iAmount
aParticles(i).x += aParticles(i).vx + Atn(_ASM_Cos6th(f / 10) * 1 / aParticles(i).y * aParticles(i).col) * 2 '+ _ASM_Cos6th(f + 6 * aParticles(i).x / (2 * aParticles(i).y)) * -2 * Atn(10 * _ASM_Cos6th(f / 500))
aParticles(i).y -= aParticles(i).vy - Atn(-f / 100) * _ASM_Sin6th(f / 10) * 5
aParticles(i).col -= fCol '+ Atn(_ASM_Sin6th(f / 200) * 4)
If aParticles(i).col < 1 Then aParticles(i).col = 1
If aParticles(i).x < 0 Or aParticles(i).x > iW - 1 Or aParticles(i).y < 0 Or aParticles(i).y > iH - 1 Then
aParticles(i).x = iW2 + RandomRange(-40, 40)
aParticles(i).y = iH - RandomRange(1, 10)
aParticles(i).vx = RandomRange(-0.5, 0.5)
aParticles(i).vy = 2 + Rnd() * 3.5
aParticles(i).col = &h80 + Rnd() * (&h7F * _ASM_Cos6th(-f / 4))
Else
PokePixel(aParticles(i).x, aParticles(i).y, Rgba(aParticles(i).col, aParticles(i).col, aParticles(i).col, &hF0))
End If
Next
f += 0.075
pImageBlurred = _ASM_ImageBlur(pImage, 9)
Put (0, 0), pImageBlurred, Trans
Imagedestroy(pImageBlurred)
iFPS += 1
If Timer - fTimer > 0.99 Then
Windowtitle sTitle & iFPS & " / @" & iAmount & " particles"
iFPS = 0
fTimer = Timer
Endif
If (Screenevent(@evt)) Then
Select Case evt.Type
Case SC_ESCAPE, EVENT_WINDOW_CLOSE
Imagedestroy(pImage)
Exit Do
End Select
Endif
'Sleep(1)
Loop
Function RandomRange(fStart As Single, fEnd As Single) As Single
Return Rnd() * (fEnd - fStart) + fStart
End Function
Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Long, iExpandEdge As Long = 0) As Any Ptr
'By Eukalyptus
Dim As Integer iWidth, iHeight, iPX, iPitch, iPitchBlur
Dim As Any Ptr pData, pDataBlur, pDataTmp
If Imageinfo(pImage, iWidth, iHeight, iPX, iPitch, pData) <> 0 Then Return 0
If iPX <> 4 Then Return 0
If iRadius < 0 Then
iRadius = 0
Elseif iRadius > 127 Then
iRadius = 127
Endif
Dim As Any Ptr pImgBlur, pImgTmp
If iExpandEdge <> 0 Then
iWidth += iRadius * 2
iHeight += iRadius * 2
Endif
pImgBlur = Imagecreate(iWidth, iHeight, 0, 32)
pImgTmp = Imagecreate(iWidth, iHeight, 0, 32)
Imageinfo(pImgBlur, , , , iPitchBlur, pDataBlur)
Imageinfo(pImgTmp, , , , , pDataTmp)
If pImgBlur = 0 Or pImgTmp = 0 Then
Imagedestroy(pImgBlur)
Imagedestroy(pImgTmp)
Return 0
Endif
If iExpandEdge <> 0 Then
Put pImgBlur, (iRadius, iRadius), pImage, Alpha
Else
Put pImgBlur, (0, 0), pImage, Alpha
Endif
Asm
mov ecx, [iWidth]
mov ebx, [iHeight]
mov edx, [iPitchBlur]
mov edi, [pDataTmp]
mov esi, [pDataBlur]
mov eax, [iRadius]
inc eax
push ebp
mov ebp, eax
Sub esp, 64
mov [esp+8], ecx
mov [esp+12], ebx
mov [esp+16], edx
mov [esp+20], ebp
mov [esp+24], edi
mov [esp+32], edi
mov [esp+28], esi
mov [esp+36], esi
' 0 4 8 12 16 20 24 28 32 36
'esp = [X] [Y] [W] [H] [Stride] [R] [pDst] [pSrc] [pDstO] [pSrcO]
mov eax, 0x47000000 'ByteToFloat MSK
movd xmm7, eax
pshufd xmm7, xmm7, 0
' ####################################################
' # W-Loop
' ####################################################
mov ebx, [esp+12]
mov [esp+4], ebx
_Blur_LoopW:
mov edi, [esp+24]
mov esi, [esp+28]
mov edx, [esp+16] 'Stride
Add dword Ptr[esp+24], 4 'Next RowCol(Transform vertical<->horizontal)
Add [esp+28], edx 'Next Row
mov edx, [esp+12] 'Y-Stride
Shl edx, 2
pxor xmm6, xmm6 'Reset In-Out
pxor xmm5, xmm5 'Reset Sum
/'
xmm7 = Msk
xmm6 = [AO][RO][GO][BO][AI][RI][GI][BI]
xmm5 = [As][RS][GS][BS]
eax = (SumDiv)
ebx = (DivInc)
ecx = X
edx = Stride
esi = Src
edi = Dst
ebp = R
'/
pxor xmm4, xmm4 'UnPack
mov eax, 0 'Reset SumDiv
mov ebx, 0 'Reset DivInc
' ----------------------------------------------------
' | X-In += Next
' ----------------------------------------------------
mov ebp, 0 'Offset
mov ecx, [esp+20] 'iR
_Blur_LoopX_In:
movd xmm0, [esi+ebp]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
paddw xmm6, xmm0 'IN+=Next
movdqa xmm0, xmm6
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
paddd xmm5, xmm0 'Stack += IN
Add ebx, 1 'SumDivInc += 1
Add eax, ebx 'SumDiv += Inc
Add ebp, 4
Sub ecx, 1
jg _Blur_LoopX_In
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid
' ----------------------------------------------------
mov ecx, [esp+20] 'iR
_Blur_LoopX_InOut:
cvtsi2ss xmm3, eax
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [edi], xmm0
movd xmm0, [esi+ebp]
movd xmm1, [esi]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Sub ebx, 1 'SumDivInc += 1
Add eax, ebx 'SumDiv += Inc
Add esi, 4
Add edi, edx
Sub ecx, 1
jg _Blur_LoopX_InOut
cvtsi2ss xmm3, eax
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
mov ebx, ebp
neg ebx 'Last Index
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov ecx, [esp+8] 'iWidth
Sub ecx, [esp+20]
Sub ecx, [esp+20]
_Blur_LoopX:
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [edi], xmm0
movd xmm0, [esi+ebp]
movd xmm1, [esi]
movd xmm2, [esi+ebx]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 'Out-=Last / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add esi, 4
Add edi, edx
Sub ecx, 1
jg _Blur_LoopX
' ----------------------------------------------------
' | XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov ebp, 0 'DivInc
mov ecx, [esp+20] 'iR
_Blur_LoopX_Out:
cvtsi2ss xmm3, eax
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [edi], xmm0
movd xmm0, [esi]
movd xmm1, [esi+ebx]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
psubw xmm6, xmm0 'Out-=Last / IN-=Mid
pslldq xmm0, 8
paddw xmm6, xmm0 'Out+=Mid / (IN+=Next)
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add ebp, 1
Sub eax, ebp
Add esi, 4
Add edi, edx
Sub ecx, 1
jg _Blur_LoopX_Out
Sub dword Ptr[esp+4], 1
jg _Blur_LoopW
' ####################################################
' # H-Loop
' ####################################################
mov edi, [esp+36]
mov esi, [esp+32]
mov [esp+24], edi
mov [esp+28], esi
mov ebx, [esp+8]
mov [esp], ebx
_Blur_LoopH:
mov edi, [esp+24]
mov esi, [esp+28]
mov edx, [esp+12]
Shl edx, 2
Add dword Ptr[esp+24], 4 'Next Col
Add [esp+28], edx 'Next ColRow
mov edx, [esp+16] 'Stride
pxor xmm6, xmm6 'Reset In-Out
pxor xmm5, xmm5 'Reset Sum
/'
xmm7 = Msk
xmm6 = [AO][RO][GO][BO][AI][RI][GI][BI]
xmm5 = [As][RS][GS][BS]
eax = (SumDiv)
ebx = (DivInc)
ecx = X
edx = Stride
esi = Src
edi = Dst
ebp = R
'/
pxor xmm4, xmm4 'UnPack
mov eax, 0 'Reset SumDiv
mov ebx, 0 'Reset DivInc
' ----------------------------------------------------
' | X-In += Next
' ----------------------------------------------------
mov ebp, 0 'Offset
mov ecx, [esp+20] 'iR
_Blur_LoopY_In:
movd xmm0, [esi+ebp]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
paddw xmm6, xmm0 'IN+=Next
movdqa xmm0, xmm6
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
paddd xmm5, xmm0 'Stack += IN
Add ebx, 1 'SumDivInc += 1
Add eax, ebx 'SumDiv += Inc
Add ebp, 4
Sub ecx, 1
jg _Blur_LoopY_In
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid
' ----------------------------------------------------
mov ecx, [esp+20] 'iR
_Blur_LoopY_InOut:
cvtsi2ss xmm3, eax
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [edi], xmm0
movd xmm0, [esi+ebp]
movd xmm1, [esi]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Sub ebx, 1 'SumDivInc += 1
Add eax, ebx 'SumDiv += Inc
Add esi, 4
Add edi, edx
Sub ecx, 1
jg _Blur_LoopY_InOut
cvtsi2ss xmm3, eax
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
mov ebx, ebp
neg ebx 'Last Index
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov ecx, [esp+12] 'iHeight
Sub ecx, [esp+20]
Sub ecx, [esp+20]
_Blur_LoopY:
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [edi], xmm0
movd xmm0, [esi+ebp]
movd xmm1, [esi]
movd xmm2, [esi+ebx]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
paddw xmm6, xmm0 'Out+=Mid / IN+=Next
psubw xmm6, xmm1 'Out-=Last / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add esi, 4
Add edi, edx
Sub ecx, 1
jg _Blur_LoopY
' ----------------------------------------------------
' | XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov ebp, 0 'DivInc
mov ecx, [esp+20] 'iR
_Blur_LoopY_Out:
cvtsi2ss xmm3, eax
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' Ubyte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> Ubyte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [edi], xmm0
movd xmm0, [esi]
movd xmm1, [esi+ebx]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
psubw xmm6, xmm0 'Out-=Last / IN-=Mid
pslldq xmm0, 8
paddw xmm6, xmm0 'Out+=Mid / (IN+=Next)
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= Out
paddd xmm5, xmm0 'Stack += IN
Add ebp, 1
Sub eax, ebp
Add esi, 4
Add edi, edx
Sub ecx, 1
jg _Blur_LoopY_Out
Sub dword Ptr[esp], 1
jg _Blur_LoopH
Add esp, 64
pop ebp
End Asm
Imagedestroy(pImgTmp)
Return pImgBlur
End Function
Function _ASM_Sin6th(fX As Double) As Double
'By Eukalyptus
Asm
jmp 0f
1: .Double 683565275.57643158
2: .Double -0.0000000061763971109087229
3: .Double 6755399441055744.0
0:
movq xmm0, [fX]
mulsd xmm0, [1b]
addsd xmm0, [3b]
movd ebx, xmm0
lea eax, [ebx*2+0x80000000]
sar eax, 2
imul eax
sar ebx, 31
lea eax, [edx*2-0x70000000]
lea ecx, [edx*8+edx-0x24000000]
imul edx
Xor ecx, ebx
lea eax, [edx*8+edx+0x44A00000]
imul ecx
cvtsi2sd xmm0, edx
mulsd xmm0, [2b]
movq [Function], xmm0
End Asm
End Function
Function _ASM_Cos6th(fX As Double) As Double
'By Eukalyptus
Asm
jmp 0f
1: .Double 683565275.57643158
2: .Double -0.0000000061763971109087229
3: .Double 6755399441055744.0
0:
movq xmm0, [fX]
mulsd xmm0, [1b]
addsd xmm0, [3b]
movd ebx, xmm0
Add ebx, 0x40000000 'SinToCos
lea eax, [ebx*2+0x80000000]
sar eax, 2
imul eax
sar ebx, 31
lea eax, [edx*2-0x70000000]
lea ecx, [edx*8+edx-0x24000000]
imul edx
Xor ecx, ebx
lea eax, [edx*8+edx+0x44A00000]
imul ecx
cvtsi2sd xmm0, edx
mulsd xmm0, [2b]
movq [Function], xmm0
End Asm
End Function
Edit1: removed #include "Windows.bi" as it is not needed this time
Edit2: small modifications
Edit3: another small modifications