Simple Smoke Simulation build 2018-11-02

Post your FreeBASIC tips and tricks here. Please don’t post your code without including an explanation.
UEZ
Posts: 318
Joined: May 05, 2017 19:59
Location: Germany

Simple Smoke Simulation build 2018-11-02

Postby UEZ » Oct 25, 2018 18:20

Here a try to simulate some smoke particles. x86 only for now as the assembler functions are designed for x86!

This time no GDI / GDI+ stuff. ^^

Code: Select all

'Simple Smoke Simulation v0.15
'coded by UEZ build 2018-11-02 / thanks to Eukalyptus for the assembler functions
'x86 only!

#Include "fbgfx.bi"
Using FB

Declare Function RandomRange(fStart As Single, fEnd As Single) As Single
Declare Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Long, iExpandEdge As Long = 0) As Any Ptr
Declare Function _ASM_Cos6th(fX As Double) As Double
Declare Function _ASM_Sin6th(fX As Double) As Double

#Define PokePixel(_x, _y, _color)  *Cptr(Ulong Ptr, imgData + (_y Shl 0) * pitch + _x Shl 2) = (_color)

Dim Shared As Ulong iW, iH, iW2

iW = 450
iH = 700
iW2 = iW \ 2

Screenres iW, iH, 32

Dim As String sTitle = "FB Smoke / FPS: "
Windowtitle sTitle


Dim evt As Event
Dim As Ulong iFPS = 0, i, x, y

Dim As Any Ptr pImage = Imagecreate(iW, iH, 0, 32), pImageBlurred
Dim As Integer pitch
Dim As Any Pointer imgData
Imageinfo(pImage, , , , pitch, imgData)

Dim As Ulong iAmount = 75000
Type tParticles
   x As Single
   y As Single
   vx As Single
   vy As Single
   col As Ubyte
End Type

Dim As Single fCol = &hFF / iH, f = 0
Dim As tParticles aParticles(0 To iAmount)
Randomize
For i = 0 To iAmount
   aParticles(i).x = iW2 + RandomRange(-10.5, 10.5)
   aParticles(i).y = Rnd() * iH
   aParticles(i).vx = RandomRange(-1, 1)
   aParticles(i).vy = 2 + Rnd() * 4
   aParticles(i).col = &h80 + Rnd() * &h7F
Next

Dim fTimer As Double

fTimer = Timer

Do
   For y = 0 To iH - 1
      For x = 0 To iW - 1
         PokePixel(x, y, Rgba(&h80, &h80, &hE0, &hB0))
      Next
   Next
   For i = 0 To iAmount
      aParticles(i).x += aParticles(i).vx + Atn(_ASM_Cos6th(f / 10) * 1 / aParticles(i).y * aParticles(i).col) * 2 '+ _ASM_Cos6th(f + 6 * aParticles(i).x / (2 * aParticles(i).y)) * -2 * Atn(10 * _ASM_Cos6th(f / 500))
      aParticles(i).y -= aParticles(i).vy - Atn(-f / 100) * _ASM_Sin6th(f / 10) * 5
      aParticles(i).col -= fCol '+ Atn(_ASM_Sin6th(f / 200) * 4)
      If aParticles(i).col < 1 Then aParticles(i).col = 1
      If aParticles(i).x < 0 Or aParticles(i).x > iW - 1 Or aParticles(i).y < 0 Or aParticles(i).y > iH - 1 Then
         aParticles(i).x = iW2 + RandomRange(-40, 40)
         aParticles(i).y = iH - RandomRange(1, 10)
         aParticles(i).vx = RandomRange(-0.5, 0.5)
         aParticles(i).vy = 2 + Rnd() * 3.5
         aParticles(i).col = &h80 + Rnd() * (&h7F * _ASM_Cos6th(-f / 4))
      Else
         PokePixel(aParticles(i).x, aParticles(i).y, Rgba(aParticles(i).col, aParticles(i).col, aParticles(i).col, &hF0))
      End If
   Next
   f += 0.075
   pImageBlurred = _ASM_ImageBlur(pImage, 9)
   Put (0, 0), pImageBlurred, Trans
   Imagedestroy(pImageBlurred)
   
   iFPS += 1
   
   If Timer - fTimer > 0.99 Then
      Windowtitle sTitle & iFPS & " / @" & iAmount & " particles"
      iFPS = 0
      fTimer = Timer
   Endif
   
   If (Screenevent(@evt)) Then
      Select Case evt.Type
         Case SC_ESCAPE, EVENT_WINDOW_CLOSE
            Imagedestroy(pImage)
            Exit Do
      End Select
   Endif
   'Sleep(1)
Loop

Function RandomRange(fStart As Single, fEnd As Single) As Single
   Return Rnd() * (fEnd - fStart) + fStart
End Function

Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Long, iExpandEdge As Long = 0) As Any Ptr
   'By Eukalyptus
   Dim As Integer iWidth, iHeight, iPX, iPitch, iPitchBlur
   Dim As Any Ptr pData, pDataBlur, pDataTmp
   
   If Imageinfo(pImage, iWidth, iHeight, iPX, iPitch, pData) <> 0 Then Return 0
   If iPX <> 4 Then Return 0
   
   If iRadius < 0 Then
      iRadius = 0
   Elseif iRadius > 127 Then
      iRadius = 127
   Endif
   
   Dim As Any Ptr pImgBlur, pImgTmp
   If iExpandEdge <> 0 Then
      iWidth += iRadius * 2
      iHeight += iRadius * 2
   Endif
   
   pImgBlur = Imagecreate(iWidth, iHeight, 0, 32)
   pImgTmp = Imagecreate(iWidth, iHeight, 0, 32)
   
   Imageinfo(pImgBlur, , , , iPitchBlur, pDataBlur)
   Imageinfo(pImgTmp, , , , , pDataTmp)
   If pImgBlur = 0 Or pImgTmp = 0 Then
      Imagedestroy(pImgBlur)
      Imagedestroy(pImgTmp)
      Return 0
   Endif
   
   If iExpandEdge <> 0 Then
      Put pImgBlur, (iRadius, iRadius), pImage, Alpha
   Else
      Put pImgBlur, (0, 0), pImage, Alpha
   Endif
   

   Asm
      mov ecx, [iWidth]
      mov ebx, [iHeight]
      mov edx, [iPitchBlur]

      mov edi, [pDataTmp]
      mov esi, [pDataBlur]
       
      mov eax, [iRadius]
      inc eax
      push ebp
      mov ebp, eax
       
      Sub esp, 64

      mov [esp+8], ecx
      mov [esp+12], ebx
      mov [esp+16], edx
      mov [esp+20], ebp
      mov [esp+24], edi
      mov [esp+32], edi
      mov [esp+28], esi
      mov [esp+36], esi

      '       0   4   8   12  16       20  24     28     32      36
      'esp = [X] [Y] [W] [H] [Stride] [R] [pDst] [pSrc] [pDstO] [pSrcO]

      mov eax, 0x47000000 'ByteToFloat MSK
      movd xmm7, eax
      pshufd xmm7, xmm7, 0

      ' ####################################################
      ' # W-Loop
      ' ####################################################

      mov ebx, [esp+12]
      mov [esp+4], ebx
      _Blur_LoopW:
         mov edi, [esp+24]
         mov esi, [esp+28]
         mov edx, [esp+16] 'Stride
         Add dword Ptr[esp+24], 4 'Next RowCol(Transform vertical<->horizontal)
         Add [esp+28], edx 'Next Row

         mov edx, [esp+12] 'Y-Stride
         Shl edx, 2

         pxor xmm6, xmm6 'Reset In-Out
         pxor xmm5, xmm5 'Reset Sum
         /'
         xmm7 = Msk
         xmm6 = [AO][RO][GO][BO][AI][RI][GI][BI]
         xmm5 = [As][RS][GS][BS]

         eax = (SumDiv)
         ebx = (DivInc)
         ecx = X
         edx = Stride
         esi = Src
         edi = Dst
         ebp = R
         '/

         pxor xmm4, xmm4 'UnPack

         mov eax, 0 'Reset SumDiv
         mov ebx, 0 'Reset DivInc

         ' ----------------------------------------------------
         ' | X-In += Next
         ' ----------------------------------------------------
         mov ebp, 0 'Offset
         mov ecx, [esp+20] 'iR
         _Blur_LoopX_In:
            movd xmm0, [esi+ebp]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
            paddw xmm6, xmm0 'IN+=Next
            movdqa xmm0, xmm6
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            paddd xmm5, xmm0 'Stack += IN

            Add ebx, 1 'SumDivInc += 1
            Add eax, ebx 'SumDiv += Inc

            Add ebp, 4
            Sub ecx, 1
            jg _Blur_LoopX_In


         ' ----------------------------------------------------
         ' | XIn += Next / XIn -= Mid / XOut += Mid
         ' ----------------------------------------------------
         mov ecx, [esp+20] 'iR
         _Blur_LoopX_InOut:
            cvtsi2ss xmm3, eax
            rcpss xmm3, xmm3
            pshufd xmm3, xmm3, 0 'SumDiv

            movdqa xmm0, xmm5
            paddd xmm0, xmm7 ' Ubyte -> Float
            subps xmm0, xmm7 '/
            mulps xmm0, xmm3
            addps xmm0, xmm7 ' Float -> Ubyte
            psubd xmm0, xmm7 '/
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
            movd [edi], xmm0

            movd xmm0, [esi+ebp]
            movd xmm1, [esi]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next
            psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid
            movdqa xmm1, xmm6
            movdqa xmm0, xmm6
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            psubd xmm5, xmm1 'Stack -= Out
            paddd xmm5, xmm0 'Stack += IN

            Sub ebx, 1 'SumDivInc += 1
            Add eax, ebx 'SumDiv += Inc

            Add esi, 4
            Add edi, edx
            Sub ecx, 1
            jg _Blur_LoopX_InOut


         cvtsi2ss xmm3, eax
         rcpss xmm3, xmm3
         pshufd xmm3, xmm3, 0 'SumDiv

         mov ebx, ebp
         neg ebx 'Last Index


         ' ----------------------------------------------------
         ' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
         ' ----------------------------------------------------
         mov ecx, [esp+8] 'iWidth
         Sub ecx, [esp+20]
         Sub ecx, [esp+20]
         _Blur_LoopX:
            movdqa xmm0, xmm5
            paddd xmm0, xmm7 ' Ubyte -> Float
            subps xmm0, xmm7 '/
            mulps xmm0, xmm3
            addps xmm0, xmm7 ' Float -> Ubyte
            psubd xmm0, xmm7 '/
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
            movd [edi], xmm0

            movd xmm0, [esi+ebp]
            movd xmm1, [esi]
            movd xmm2, [esi+ebx]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
            punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
            movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next
            psubw xmm6, xmm1 'Out-=Last / IN-=Mid
            movdqa xmm1, xmm6
            movdqa xmm0, xmm6
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            psubd xmm5, xmm1 'Stack -= Out
            paddd xmm5, xmm0 'Stack += IN

            Add esi, 4
            Add edi, edx
            Sub ecx, 1
            jg _Blur_LoopX


         ' ----------------------------------------------------
         ' | XIn -= Mid / XOut += Mid / XOut -= Last
         ' ----------------------------------------------------
         mov ebp, 0 'DivInc
         mov ecx, [esp+20] 'iR
         _Blur_LoopX_Out:
            cvtsi2ss xmm3, eax
            rcpss xmm3, xmm3
            pshufd xmm3, xmm3, 0 'SumDiv

            movdqa xmm0, xmm5
            paddd xmm0, xmm7 ' Ubyte -> Float
            subps xmm0, xmm7 '/
            mulps xmm0, xmm3
            addps xmm0, xmm7 ' Float -> Ubyte
            psubd xmm0, xmm7 '/
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
            movd [edi], xmm0

            movd xmm0, [esi]
            movd xmm1, [esi+ebx]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
            movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
            psubw xmm6, xmm0 'Out-=Last / IN-=Mid
            pslldq xmm0, 8
            paddw xmm6, xmm0 'Out+=Mid / (IN+=Next)
            movdqa xmm1, xmm6
            movdqa xmm0, xmm6
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            psubd xmm5, xmm1 'Stack -= Out
            paddd xmm5, xmm0 'Stack += IN

            Add ebp, 1
            Sub eax, ebp

            Add esi, 4
            Add edi, edx
            Sub ecx, 1
            jg _Blur_LoopX_Out

         Sub dword Ptr[esp+4], 1
         jg _Blur_LoopW



      ' ####################################################
      ' # H-Loop
      ' ####################################################


      mov edi, [esp+36]
      mov esi, [esp+32]
      mov [esp+24], edi
      mov [esp+28], esi

      mov ebx, [esp+8]
      mov [esp], ebx
      _Blur_LoopH:
         mov edi, [esp+24]
         mov esi, [esp+28]
         mov edx, [esp+12]
         Shl edx, 2
         Add dword Ptr[esp+24], 4 'Next Col
         Add [esp+28], edx 'Next ColRow

         mov edx, [esp+16] 'Stride

         pxor xmm6, xmm6 'Reset In-Out
         pxor xmm5, xmm5 'Reset Sum

         /'
         xmm7 = Msk
         xmm6 = [AO][RO][GO][BO][AI][RI][GI][BI]
         xmm5 = [As][RS][GS][BS]

         eax = (SumDiv)
         ebx = (DivInc)
         ecx = X
         edx = Stride
         esi = Src
         edi = Dst
         ebp = R
         '/

         pxor xmm4, xmm4 'UnPack

         mov eax, 0 'Reset SumDiv
         mov ebx, 0 'Reset DivInc

         ' ----------------------------------------------------
         ' | X-In += Next
         ' ----------------------------------------------------
         mov ebp, 0 'Offset
         mov ecx, [esp+20] 'iR
         _Blur_LoopY_In:
            movd xmm0, [esi+ebp]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
            paddw xmm6, xmm0 'IN+=Next
            movdqa xmm0, xmm6
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            paddd xmm5, xmm0 'Stack += IN

            Add ebx, 1 'SumDivInc += 1
            Add eax, ebx 'SumDiv += Inc

            Add ebp, 4
            Sub ecx, 1
            jg _Blur_LoopY_In


         ' ----------------------------------------------------
         ' | XIn += Next / XIn -= Mid / XOut += Mid
         ' ----------------------------------------------------
         mov ecx, [esp+20] 'iR
         _Blur_LoopY_InOut:
            cvtsi2ss xmm3, eax
            rcpss xmm3, xmm3
            pshufd xmm3, xmm3, 0 'SumDiv

            movdqa xmm0, xmm5
            paddd xmm0, xmm7 ' Ubyte -> Float
            subps xmm0, xmm7 '/
            mulps xmm0, xmm3
            addps xmm0, xmm7 ' Float -> Ubyte
            psubd xmm0, xmm7 '/
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
            movd [edi], xmm0

            movd xmm0, [esi+ebp]
            movd xmm1, [esi]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next
            psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid
            movdqa xmm1, xmm6
            movdqa xmm0, xmm6
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            psubd xmm5, xmm1 'Stack -= Out
            paddd xmm5, xmm0 'Stack += IN

            Sub ebx, 1 'SumDivInc += 1
            Add eax, ebx 'SumDiv += Inc

            Add esi, 4
            Add edi, edx
            Sub ecx, 1
            jg _Blur_LoopY_InOut


         cvtsi2ss xmm3, eax
         rcpss xmm3, xmm3
         pshufd xmm3, xmm3, 0 'SumDiv

         mov ebx, ebp
         neg ebx 'Last Index


         ' ----------------------------------------------------
         ' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
         ' ----------------------------------------------------
         mov ecx, [esp+12] 'iHeight
         Sub ecx, [esp+20]
         Sub ecx, [esp+20]
         _Blur_LoopY:
            movdqa xmm0, xmm5
            paddd xmm0, xmm7 ' Ubyte -> Float
            subps xmm0, xmm7 '/
            mulps xmm0, xmm3
            addps xmm0, xmm7 ' Float -> Ubyte
            psubd xmm0, xmm7 '/
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
            movd [edi], xmm0

            movd xmm0, [esi+ebp]
            movd xmm1, [esi]
            movd xmm2, [esi+ebx]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
            punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
            movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next
            psubw xmm6, xmm1 'Out-=Last / IN-=Mid
            movdqa xmm1, xmm6
            movdqa xmm0, xmm6
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            psubd xmm5, xmm1 'Stack -= Out
            paddd xmm5, xmm0 'Stack += IN

            Add esi, 4
            Add edi, edx
            Sub ecx, 1
            jg _Blur_LoopY


         ' ----------------------------------------------------
         ' | XIn -= Mid / XOut += Mid / XOut -= Last
         ' ----------------------------------------------------
         mov ebp, 0 'DivInc
         mov ecx, [esp+20] 'iR
         _Blur_LoopY_Out:
            cvtsi2ss xmm3, eax
            rcpss xmm3, xmm3
            pshufd xmm3, xmm3, 0 'SumDiv

            movdqa xmm0, xmm5
            paddd xmm0, xmm7 ' Ubyte -> Float
            subps xmm0, xmm7 '/
            mulps xmm0, xmm3
            addps xmm0, xmm7 ' Float -> Ubyte
            psubd xmm0, xmm7 '/
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
            movd [edi], xmm0

            movd xmm0, [esi]
            movd xmm1, [esi+ebx]
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
            movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
            psubw xmm6, xmm0 'Out-=Last / IN-=Mid
            pslldq xmm0, 8
            paddw xmm6, xmm0 'Out+=Mid / (IN+=Next)
            movdqa xmm1, xmm6
            movdqa xmm0, xmm6
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
            psubd xmm5, xmm1 'Stack -= Out
            paddd xmm5, xmm0 'Stack += IN

            Add ebp, 1
            Sub eax, ebp

            Add esi, 4
            Add edi, edx
            Sub ecx, 1
            jg _Blur_LoopY_Out


         Sub dword Ptr[esp], 1
         jg _Blur_LoopH

      Add esp, 64
       
      pop ebp
   End Asm
   

   Imagedestroy(pImgTmp)
   Return pImgBlur
End Function

Function _ASM_Sin6th(fX As Double) As Double
   'By Eukalyptus
   Asm
      jmp 0f
      1: .Double 683565275.57643158
      2: .Double -0.0000000061763971109087229
      3: .Double 6755399441055744.0
         
      0:
         movq xmm0, [fX]
         mulsd xmm0, [1b]
         addsd xmm0, [3b]
         movd ebx, xmm0

         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx

         cvtsi2sd xmm0, edx
         mulsd xmm0, [2b]
         movq [Function], xmm0
   End Asm
End Function

Function _ASM_Cos6th(fX As Double) As Double
   'By Eukalyptus
   Asm
      jmp 0f
         1: .Double 683565275.57643158
         2: .Double -0.0000000061763971109087229
         3: .Double 6755399441055744.0
       
      0:
         movq xmm0, [fX]
         mulsd xmm0, [1b]
         addsd xmm0, [3b]
         movd ebx, xmm0
         
         Add ebx, 0x40000000 'SinToCos
   
         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx
         
         cvtsi2sd xmm0, edx
         mulsd xmm0, [2b]
         movq [Function], xmm0
   End Asm
End Function


I don't know whether the code runs also on Linux OS. -> seems to be running

Edit1: removed #include "Windows.bi" as it is not needed this time
Edit2: small modifications
Edit3: another small modifications
Last edited by UEZ on Nov 02, 2018 14:17, edited 3 times in total.
badidea
Posts: 1368
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: Simple Smoke Simulation build 2018-10-28

Postby badidea » Oct 25, 2018 18:34

UEZ wrote:I don't know whether the code runs also on Linux OS.

Nope:

Code: Select all

fbc32 -w all -exx -mt -maxerr 3 "test.bas" (in directory: /home/badidea/Desktop)
ld: cannot find -lkernel32
ld: cannot find -lgdi32
ld: cannot find -lmsimg32
ld: cannot find -luser32
ld: cannot find -lversion
ld: cannot find -ladvapi32
ld: cannot find -limm32
Compilation failed.

Or (64-bit):

Code: Select all

fbc64 -w all -exx  -maxerr 3 "test.bas" (in directory: /home/badidea/Desktop)
test.c: Assembler messages:
test.c:360: Error: incorrect register `ecx' used with `q' suffix
test.c:361: Error: incorrect register `ebx' used with `q' suffix
test.c:362: Error: incorrect register `edx' used with `q' suffix
test.c:363: Error: incorrect register `edi' used with `q' suffix
test.c:364: Error: incorrect register `esi' used with `q' suffix
test.c:367: Error: operand type mismatch for `push'
test.c:660: Error: operand type mismatch for `pop'
Compilation failed.

However, if I delete '#Include "windows.bi", it runs fine with fbc 32-bit. 19 to 21 FPS.
Not sure what the problem is with 64-bit.
Last edited by badidea on Oct 25, 2018 18:56, edited 1 time in total.
UEZ
Posts: 318
Joined: May 05, 2017 19:59
Location: Germany

Re: Simple Smoke Simulation build 2018-10-28

Postby UEZ » Oct 25, 2018 18:49

badidea wrote:However, if I delete '#Include "windows.bi", it runs fine with fbc 32-bit.
Not sure what the problem is with 64-bit.


Thanks, indeed windows.bi is not needed. The assembler code is designed for x86 only (I guess). I will look for a fast x64 version...
dafhi
Posts: 1238
Joined: Jun 04, 2005 9:51

Re: Simple Smoke Simulation build 2018-10-26

Postby dafhi » Oct 26, 2018 21:33

one cannot deny the benefit of knowing asm. thanks for sharing
deltarho[1859]
Posts: 1765
Joined: Jan 02, 2017 0:34
Location: UK

Re: Simple Smoke Simulation build 2018-10-26

Postby deltarho[1859] » Oct 27, 2018 8:36

As is the code will not compile with gcc. We need to replace the assembler labels with numeric labels as Eukalyptus has done.

For example, changing

Code: Select all

_Blur_LoopW:
...
...
jg _Blur_LoopW

to

Code: Select all

0:
...
...
jg 0b

8 label changes were needed.

I was then able to compile with FBC 1.06/gcc 8.1 32-bit.

I then replaced RND with PCG32II.

On running the gas version next to the edited code I was not able to tell any difference.

The exe of the edited code was 15KB larger, 3.5KB for PCG32II and 11.5KB for the different compilers.

A worthwhile exercise as it showed that for some applications gas + RND are well up to the job at hand.
UEZ
Posts: 318
Joined: May 05, 2017 19:59
Location: Germany

Re: Simple Smoke Simulation build 2018-10-26

Postby UEZ » Oct 27, 2018 12:10

Using v0.15 build 2018-10-26 with 75.000 particles and changing the labels in the asm blur function I get following results:

-s gui -fpu SSE -vec 2 -O max -> ~32 fps (131072 bytes)
-s gui -> ~34 fps (130048 bytes)
-gen gcc -Wc -O2 -> ~25 fps (130560 bytes)
-gen gcc -O 3 -> ~27 fps (133120 bytes)
-gen gas -> ~33 fps (130048 bytes)

Test environment:
Win10 x64
Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz
8 GB memory
dodicat
Posts: 5772
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Simple Smoke Simulation build 2018-10-26

Postby dodicat » Oct 27, 2018 13:19

That's a really fast blur UEZ.
Anyway, an old slogger: The new recycling plant and incinerator on the edge of town.
Fully compliant with EEC carbon emissions.
Green energy for the future bla bla bla ...

Code: Select all

Dim Shared As Integer xres,yres
Screen 19,32,,64
Screeninfo xres,yres
Dim Shared As Ulong Pointer im
im=Imagecreate(xres,yres)
Type v2
    As Single x,y
End Type

Dim Shared As Single k=4    'drag coefficient
Dim Shared As Single g=9.81  'gravity coefficient
Dim Shared As Single m=5    ' mass coefficient
Redim Shared As v2 position(),startpos()
Redim Shared As Single t(),vel(),ang(),wind(),rad()
Dim As Single w
Dim Shared As Single pi=4*Atn(1)
Dim As Long temp
#define Intrange(f,l) int(Rnd*((l+1)-(f))+(f))
#define r(f,l) Rnd * ((l) - (f)) + (f)
#define map(a,b,x,c,d) ((d)-(c))*((x)-(a))/((b)-(a))+(c)
#define onscreen(_x,_y) ((_x)>=0)*((_x)<xres)*((_y)>=0)*((_y)<yres)

Sub cloud(x As Integer, y As Integer,length As Integer=100,Alpha As Integer=105, Zoom As Single = 0,im As Any Pointer=0)
    Dim As Integer rr=255
    Dim As Integer bb=255
    Dim As Integer gg=255
    Dim As Double pi=3.14159
    #define mp(a,b,x,c,d) ((d)-(c))*((x)-(a))/((b)-(a))+(c)
    If Length<=1 Or Alpha<=1 Then Exit Sub
    Dim As Single rnded = -pi+Rnd*1*pi/2
    Dim As Single rnded2 = -pi+Rnd*-3*pi
    If Alpha<25 Then
        For i As Integer = 0 To 255-Alpha Step 100
            Var c=mp((0),(500),y,0,100)
            Line im,(-(Zoom/2)+x-length/2,y)-((Zoom/2)+x+Length/6+length*Cos(-pi/2+rnded*PI/3),y+Length/6+length*Sin(-pi/2+rnded*PI/3)),Rgba(Rr-c,Gg-c,Bb-c,Alpha)
            Line im,(-(Zoom/2)+x-length/2,y)-((Zoom/2)+x+Length/6+length*Cos(-pi/2+rnded2*PI/3),y+Length/6+length*Sin(pi/2+rnded2*PI/3)),Rgba(Rr-c,Gg-c,Bb-c,Alpha)
        Next
    End If
    cloud(-(Zoom/2)+x+length*Cos(-pi/2+rnded*PI/3),(Zoom/2)+y+length*Sin(-pi/2+rnded*PI/3),length/1.4,Alpha/1.2,Zoom,im)
    cloud(-(Zoom/2)+x+length*Cos(-pi/2+rnded2*PI/3),(Zoom/2)+y+length*Sin(pi/2+rnded2*PI/3),length/1.4,Alpha/1.2,Zoom,im)
    cloud(-(Zoom/2)+x+length*Cos(pi/3+rnded2*PI/3),(Zoom/2)+y+length*Sin(pi+rnded2*PI/3),length/1.4,Alpha/2,Zoom,im)
End Sub

Sub trees()
    Dim As Integer rotx,roty
    #define rr(first,last) Rnd * (last - first) + first
    #macro rotate(pivotx,pivoty,px,py,a,scale)
    rotx=scale*(Cos(a*.0174533)*(px-pivotx)-Sin(a*.0174533)*(py-pivoty))+pivotx
    roty=scale*(Sin(a*.0174533)*(px-pivotx)+Cos(a*.0174533)*(py-pivoty))+pivoty
    #endmacro
    #macro turnline(piv,p1,p2,ang,col,d)
    Scope
        rotate(piv.x,piv.y,p1.x,p1.y,ang,d)
        Var rot1=Type<v2>(rotx,roty)
        rotate(piv.x,piv.y,p2.x,p2.y,ang,d)
        Var rot2=Type<v2>(rotx,roty)
        Line im,(rot1.x,rot1.y)-(rot2.x,rot2.y),col
    End Scope
    #endmacro
    Dim As v2 v1,v2,piv
    Dim As Uinteger treecol
    Dim As Double pivx,pivy,pivz,l,k,d
    Dim As Integer rd,g,b
    For m As Double=0 To 50 Step 5
        Randomize m
        For n As Double=200-(m+rr(2,20)) To 990+m Step rr(3,9)
            Randomize n^2
            l=rr(2,11)
            k=rr(1,5)
            piv=Type(n,.8*yres+20*(1-Sin(.01*(n-m*5-k+40-200))))
            Line im,(piv.x,piv.y)-(piv.x+rr(-2,5),piv.y+8),Rgb((100),(35),37)
            Var cc=rr(1,40)
            For a As Double=90 To 450 Step 7
                Randomize a
                Var shader=rr(1,6)
                rd=20+shader+cc
                g=150+shader:If g>40 Then g=g-40
                b=20+shader:If b>20 Then b=b-20
                treecol=Rgb(rd/2,g/2,b/2)
                For a2 As Double=0 To l Step .3
                    If a>270 Then shader=-shader
                    treecol=Rgb(rd/2,(g-a2*shader)/2,b/2)
                    v1=Type(piv.x-a2,piv.y)
                    v2=Type(piv.x-l,piv.y)
                    turnline(piv,v1,v2,a,treecol,1)
                Next a2
            Next a
        Next n
       
    Next m
End Sub

Sub backdrop() 'hills/trees
    #macro paintsketch(_function,minx,maxx,miny,maxy,r,g,b,alp)
    For x As Double=minx To maxx Step (maxx-minx)/10000
        Var x1=(xres)*(x-minx)/(maxx-minx)
        Var y1=(yres)*(_function-maxy)/(miny-maxy)
        gr=(lasty-y1)*1000
        lasty=y1
        If gr>g Then gr=g
        Line im,(x1,yres)-(x1,y1),Rgba(r,g-gr,b,alp)
    Next x
    #endmacro
    Dim As Double lasty,gr
    paintsketch(.8*yres+20*Sin(.01*(x-200)),xres,0,yres,0,50,100,0,255)
    trees()
End Sub

Sub background
    For y As Long=0 To yres
        Var rd=map(0,yres,y,0,200)
        Var bl= map(0,yres,y,250,200)
        Var gr=map(0,yres,y,0,200)
        Line im,(0,y)-(xres,y),Rgb(rd,gr,bl)
    Next y
    cloud(100,340,150/2,250,1,im)
    cloud(300,340,150/2,250,1,im)
    cloud(600,340,150/2,250,1,im)
    backdrop
    Dim As Long counter=30
    For n As Single=300 To 310 Step .2
        counter=counter-1
        Var b=map(300,310,n,(305-20),(305+20))
        Line im,(n,390)-(b,800),Rgb(200-2*counter,100-2*counter,0)
    Next n
    counter=30
    For n As Single=502 To 508 Step .2
        counter=counter-1
        Var b=map(502,508,n,(505-15),(505+15))
        Line im,(n,490)-(b,800),Rgb(.6*(200-2*counter),.6*(100-2*counter),0)
    Next n
End Sub

Sub setup(num As Long)
    Redim position(1 To num)
    Redim startpos(1 To num)
    Redim vel(1 To num)
    Redim t(1 To num)
    Redim ang(1 To num)
    Redim wind(1 To num)
    Redim rad(1 To num)
    For n As Long=1 To num
        startpos(n)=Type<v2>(r(300,310),r(380,390))
        vel(n)=r(-220,-180)
        ang(n)=r(pi/2-.2,pi/2+.2)
        wind(n)=r(-10,10)
        rad(n)=r(3,5)
    Next n
End Sub
Sub source(n As Long,num As Long)
    If num=1 Then
        startpos(n)=Type<v2>(r(302,308),r(380,390))
        rad(n)=r(3,7)
    Else
        startpos(n)=Type<v2>(r(502,508),r(480,490))
        rad(n)=r(2,5)
    End If
    vel(n)=r(-220,-180)
    ang(n)=r(pi/2-.2,pi/2+.2)
    wind(n)=r(-10,10)
    t(n)=0
End Sub

Function Regulate(Byval MyFps As Long,Byref fps As Long) As Long
    Static As Double timervalue,_lastsleeptime,t3,frames
    frames+=1
    If (Timer-t3)>=1 Then t3=Timer:fps=frames:frames=0
    Var sleeptime=_lastsleeptime+((1/myfps)-Timer+timervalue)*1000
    If sleeptime<1 Then sleeptime=1
    _lastsleeptime=sleeptime
    timervalue=Timer
    Return sleeptime
End Function


'============================================ 
setup(15000)
background
Dim As String info="Starting Production"
Dim As Double frames=62,stabilizer=1
Dim As Long fps
Do
   
    Windowtitle "fps " &fps
    temp=temp+5
    If temp>Ubound(position)-5 Then temp=Ubound(position):info="Full Production"
    Screenlock
    Cls
    Put(0,0),im,Pset
    For n As Long=1 To temp
        w=30+10*Sin(position(n).y/10)+wind(n)
        t(n)=t(n)+.005
        position(n).x=startpos(n).x+(m/k)*(1-Exp(-(k/m)*t(n)))*(Vel(n)*Cos(ang(n))-w)+w*t(n)
        position(n).y=startpos(n).y+(m/k)*(1-Exp(-(k/m)*t(n)))*(Vel(n)*Sin(ang(n))+g*m/k)-(g*m/k)*t(n)
        Var two=Intrange(1,2)
        If onscreen(position(n).x,position(n).y)=0 Then source(n,two)
        Var cc=map(390,0,position(n).y,150,0)
        Var _alpha=map(390,0,position(n).y,50,5)
        Circle (position(n).x,position(n).y),rad(n),Rgba(cc,cc,cc,_alpha),,,.5,f
    Next n
    Draw String(1,1),info
    Screenunlock
    Sleep regulate(60,fps),1
Loop Until Len(Inkey)
Imagedestroy im 
badidea
Posts: 1368
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: Simple Smoke Simulation build 2018-10-26

Postby badidea » Oct 27, 2018 16:42

dodicat wrote:Anyway, an old slogger: The new recycling plant and incinerator on the edge of town.
Fully compliant with EEC carbon emissions.
Green energy for the future bla bla bla ...

Looks peaceful, some acid rain and grey sheep walking around would make the picture complete.
dafhi
Posts: 1238
Joined: Jun 04, 2005 9:51

Re: Simple Smoke Simulation build 2018-10-26

Postby dafhi » Oct 27, 2018 19:26

those clouds.

[update] totally forgot. happy birthday :-)
Last edited by dafhi on Oct 27, 2018 21:19, edited 1 time in total.
MrSwiss
Posts: 3083
Joined: Jun 02, 2013 9:27
Location: Switzerland

Re: Simple Smoke Simulation build 2018-10-26

Postby MrSwiss » Oct 27, 2018 20:29

@dodicat, looks cool. Congrats to your birthday (yesterday), many happy returns.
Better late, than never ...

Also proof, that ASM makes little sense, these days (compatibility issues among
many others).
All the ASM dependent BASIC-Compilers, are on the way out ... (PB, e.t.c.)
because, nobody wants to re-write them, in 64 bit ASM!
Thanks, for FBC x64 (and, GCC to keep FBC alive & kicking)!
UEZ
Posts: 318
Joined: May 05, 2017 19:59
Location: Germany

Re: Simple Smoke Simulation build 2018-10-26

Postby UEZ » Oct 28, 2018 10:09

@dodicat: Belated happy birthday! Never expected that you are 70! For me 24 years to go to be at same age...
Btw, did you have that nice code above newly coded or is it something within your library?

Return to “Tips and Tricks”

Who is online

Users browsing this forum: Majestic-12 [Bot] and 2 guests