Simple Smoke Simulation build 2018-11-02

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
UEZ
Posts: 988
Joined: May 05, 2017 19:59
Location: Germany

Simple Smoke Simulation build 2018-11-02

Post by UEZ »

Here a try to simulate some smoke particles. x86 only for now as the assembler functions are designed for x86!

This time no GDI / GDI+ stuff. ^^

Code: Select all

'Simple Smoke Simulation v0.15
'coded by UEZ build 2018-11-02 / thanks to Eukalyptus for the assembler functions
'x86 only!

#Include "fbgfx.bi"
Using FB

Declare Function RandomRange(fStart As Single, fEnd As Single) As Single
Declare Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Long, iExpandEdge As Long = 0) As Any Ptr
Declare Function _ASM_Cos6th(fX As Double) As Double
Declare Function _ASM_Sin6th(fX As Double) As Double

#Define PokePixel(_x, _y, _color)  *Cptr(Ulong Ptr, imgData + (_y Shl 0) * pitch + _x Shl 2) = (_color)

Dim Shared As Ulong iW, iH, iW2

iW = 450
iH = 700
iW2 = iW \ 2

Screenres iW, iH, 32

Dim As String sTitle = "FB Smoke / FPS: "
Windowtitle sTitle


Dim evt As Event
Dim As Ulong iFPS = 0, i, x, y

Dim As Any Ptr pImage = Imagecreate(iW, iH, 0, 32), pImageBlurred
Dim As Integer pitch
Dim As Any Pointer imgData
Imageinfo(pImage, , , , pitch, imgData)

Dim As Ulong iAmount = 75000
Type tParticles
	x As Single
	y As Single
	vx As Single
	vy As Single
	col As Ubyte
End Type

Dim As Single fCol = &hFF / iH, f = 0
Dim As tParticles aParticles(0 To iAmount)
Randomize
For i = 0 To iAmount
	aParticles(i).x = iW2 + RandomRange(-10.5, 10.5)
	aParticles(i).y = Rnd() * iH
	aParticles(i).vx = RandomRange(-1, 1)
	aParticles(i).vy = 2 + Rnd() * 4
	aParticles(i).col = &h80 + Rnd() * &h7F
Next

Dim fTimer As Double

fTimer = Timer

Do
	For y = 0 To iH - 1
		For x = 0 To iW - 1
			PokePixel(x, y, Rgba(&h80, &h80, &hE0, &hB0))
		Next
	Next
	For i = 0 To iAmount
		aParticles(i).x += aParticles(i).vx + Atn(_ASM_Cos6th(f / 10) * 1 / aParticles(i).y * aParticles(i).col) * 2 '+ _ASM_Cos6th(f + 6 * aParticles(i).x / (2 * aParticles(i).y)) * -2 * Atn(10 * _ASM_Cos6th(f / 500))
		aParticles(i).y -= aParticles(i).vy - Atn(-f / 100) * _ASM_Sin6th(f / 10) * 5
		aParticles(i).col -= fCol '+ Atn(_ASM_Sin6th(f / 200) * 4)
		If aParticles(i).col < 1 Then aParticles(i).col = 1
		If aParticles(i).x < 0 Or aParticles(i).x > iW - 1 Or aParticles(i).y < 0 Or aParticles(i).y > iH - 1 Then
			aParticles(i).x = iW2 + RandomRange(-40, 40)
			aParticles(i).y = iH - RandomRange(1, 10)
			aParticles(i).vx = RandomRange(-0.5, 0.5)
			aParticles(i).vy = 2 + Rnd() * 3.5
			aParticles(i).col = &h80 + Rnd() * (&h7F * _ASM_Cos6th(-f / 4))
		Else
			PokePixel(aParticles(i).x, aParticles(i).y, Rgba(aParticles(i).col, aParticles(i).col, aParticles(i).col, &hF0))
		End If
	Next
	f += 0.075
	pImageBlurred = _ASM_ImageBlur(pImage, 9)
	Put (0, 0), pImageBlurred, Trans
	Imagedestroy(pImageBlurred)
	
	iFPS += 1
	
	If Timer - fTimer > 0.99 Then
		Windowtitle sTitle & iFPS & " / @" & iAmount & " particles"
		iFPS = 0
		fTimer = Timer
	Endif
	
	If (Screenevent(@evt)) Then
		Select Case evt.Type 
			Case SC_ESCAPE, EVENT_WINDOW_CLOSE
				Imagedestroy(pImage)
				Exit Do
		End Select
	Endif
	'Sleep(1)
Loop 

Function RandomRange(fStart As Single, fEnd As Single) As Single
   Return Rnd() * (fEnd - fStart) + fStart
End Function

Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As Long, iExpandEdge As Long = 0) As Any Ptr 
   'By Eukalyptus 
   Dim As Integer iWidth, iHeight, iPX, iPitch, iPitchBlur 
   Dim As Any Ptr pData, pDataBlur, pDataTmp 
    
   If Imageinfo(pImage, iWidth, iHeight, iPX, iPitch, pData) <> 0 Then Return 0 
   If iPX <> 4 Then Return 0 
    
   If iRadius < 0 Then 
      iRadius = 0 
   Elseif iRadius > 127 Then 
      iRadius = 127 
   Endif 
    
   Dim As Any Ptr pImgBlur, pImgTmp 
   If iExpandEdge <> 0 Then 
      iWidth += iRadius * 2 
      iHeight += iRadius * 2 
   Endif 
    
   pImgBlur = Imagecreate(iWidth, iHeight, 0, 32) 
   pImgTmp = Imagecreate(iWidth, iHeight, 0, 32) 
    
   Imageinfo(pImgBlur, , , , iPitchBlur, pDataBlur) 
   Imageinfo(pImgTmp, , , , , pDataTmp) 
   If pImgBlur = 0 Or pImgTmp = 0 Then 
      Imagedestroy(pImgBlur) 
      Imagedestroy(pImgTmp) 
      Return 0 
   Endif 
    
   If iExpandEdge <> 0 Then 
      Put pImgBlur, (iRadius, iRadius), pImage, Alpha 
   Else 
      Put pImgBlur, (0, 0), pImage, Alpha 
   Endif 
    

   Asm 
      mov ecx, [iWidth] 
      mov ebx, [iHeight] 
      mov edx, [iPitchBlur] 

      mov edi, [pDataTmp] 
      mov esi, [pDataBlur] 
       
      mov eax, [iRadius] 
      inc eax 
      push ebp 
      mov ebp, eax 
       
      Sub esp, 64 

      mov [esp+8], ecx 
      mov [esp+12], ebx 
      mov [esp+16], edx 
      mov [esp+20], ebp 
      mov [esp+24], edi 
      mov [esp+32], edi 
      mov [esp+28], esi 
      mov [esp+36], esi 

      '       0   4   8   12  16       20  24     28     32      36 
      'esp = [X] [Y] [W] [H] [Stride] [R] [pDst] [pSrc] [pDstO] [pSrcO] 

      mov eax, 0x47000000 'ByteToFloat MSK 
      movd xmm7, eax 
      pshufd xmm7, xmm7, 0 

      ' #################################################### 
      ' # W-Loop 
      ' #################################################### 

      mov ebx, [esp+12] 
      mov [esp+4], ebx 
      _Blur_LoopW: 
         mov edi, [esp+24] 
         mov esi, [esp+28] 
         mov edx, [esp+16] 'Stride 
         Add dword Ptr[esp+24], 4 'Next RowCol(Transform vertical<->horizontal) 
         Add [esp+28], edx 'Next Row 

         mov edx, [esp+12] 'Y-Stride 
         Shl edx, 2 

         pxor xmm6, xmm6 'Reset In-Out 
         pxor xmm5, xmm5 'Reset Sum 
         /' 
         xmm7 = Msk 
         xmm6 = [AO][RO][GO][BO][AI][RI][GI][BI] 
         xmm5 = [As][RS][GS][BS] 

         eax = (SumDiv) 
         ebx = (DivInc) 
         ecx = X 
         edx = Stride 
         esi = Src 
         edi = Dst 
         ebp = R 
         '/ 

         pxor xmm4, xmm4 'UnPack 

         mov eax, 0 'Reset SumDiv 
         mov ebx, 0 'Reset DivInc 

         ' ---------------------------------------------------- 
         ' | X-In += Next 
         ' ---------------------------------------------------- 
         mov ebp, 0 'Offset 
         mov ecx, [esp+20] 'iR 
         _Blur_LoopX_In: 
            movd xmm0, [esi+ebp] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next 
            paddw xmm6, xmm0 'IN+=Next 
            movdqa xmm0, xmm6 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            paddd xmm5, xmm0 'Stack += IN 

            Add ebx, 1 'SumDivInc += 1 
            Add eax, ebx 'SumDiv += Inc 

            Add ebp, 4 
            Sub ecx, 1 
            jg _Blur_LoopX_In 


         ' ---------------------------------------------------- 
         ' | XIn += Next / XIn -= Mid / XOut += Mid 
         ' ---------------------------------------------------- 
         mov ecx, [esp+20] 'iR 
         _Blur_LoopX_InOut: 
            cvtsi2ss xmm3, eax 
            rcpss xmm3, xmm3 
            pshufd xmm3, xmm3, 0 'SumDiv 

            movdqa xmm0, xmm5 
            paddd xmm0, xmm7 ' Ubyte -> Float 
            subps xmm0, xmm7 '/ 
            mulps xmm0, xmm3 
            addps xmm0, xmm7 ' Float -> Ubyte 
            psubd xmm0, xmm7 '/ 
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B] 
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB] 
            movd [edi], xmm0 

            movd xmm0, [esi+ebp] 
            movd xmm1, [esi] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next 
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid 
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next] 
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next 
            psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid 
            movdqa xmm1, xmm6 
            movdqa xmm0, xmm6 
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO] 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            psubd xmm5, xmm1 'Stack -= Out 
            paddd xmm5, xmm0 'Stack += IN 

            Sub ebx, 1 'SumDivInc += 1 
            Add eax, ebx 'SumDiv += Inc 

            Add esi, 4 
            Add edi, edx 
            Sub ecx, 1 
            jg _Blur_LoopX_InOut 


         cvtsi2ss xmm3, eax 
         rcpss xmm3, xmm3 
         pshufd xmm3, xmm3, 0 'SumDiv 

         mov ebx, ebp 
         neg ebx 'Last Index 


         ' ---------------------------------------------------- 
         ' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last 
         ' ---------------------------------------------------- 
         mov ecx, [esp+8] 'iWidth 
         Sub ecx, [esp+20] 
         Sub ecx, [esp+20] 
         _Blur_LoopX: 
            movdqa xmm0, xmm5 
            paddd xmm0, xmm7 ' Ubyte -> Float 
            subps xmm0, xmm7 '/ 
            mulps xmm0, xmm3 
            addps xmm0, xmm7 ' Float -> Ubyte 
            psubd xmm0, xmm7 '/ 
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B] 
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB] 
            movd [edi], xmm0 

            movd xmm0, [esi+ebp] 
            movd xmm1, [esi] 
            movd xmm2, [esi+ebx] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next 
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid 
            punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last 
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next] 
            movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid] 
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next 
            psubw xmm6, xmm1 'Out-=Last / IN-=Mid 
            movdqa xmm1, xmm6 
            movdqa xmm0, xmm6 
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO] 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            psubd xmm5, xmm1 'Stack -= Out 
            paddd xmm5, xmm0 'Stack += IN 

            Add esi, 4 
            Add edi, edx 
            Sub ecx, 1 
            jg _Blur_LoopX 


         ' ---------------------------------------------------- 
         ' | XIn -= Mid / XOut += Mid / XOut -= Last 
         ' ---------------------------------------------------- 
         mov ebp, 0 'DivInc 
         mov ecx, [esp+20] 'iR 
         _Blur_LoopX_Out: 
            cvtsi2ss xmm3, eax 
            rcpss xmm3, xmm3 
            pshufd xmm3, xmm3, 0 'SumDiv 

            movdqa xmm0, xmm5 
            paddd xmm0, xmm7 ' Ubyte -> Float 
            subps xmm0, xmm7 '/ 
            mulps xmm0, xmm3 
            addps xmm0, xmm7 ' Float -> Ubyte 
            psubd xmm0, xmm7 '/ 
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B] 
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB] 
            movd [edi], xmm0 

            movd xmm0, [esi] 
            movd xmm1, [esi+ebx] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid 
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last 
            movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid] 
            psubw xmm6, xmm0 'Out-=Last / IN-=Mid 
            pslldq xmm0, 8 
            paddw xmm6, xmm0 'Out+=Mid / (IN+=Next) 
            movdqa xmm1, xmm6 
            movdqa xmm0, xmm6 
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO] 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            psubd xmm5, xmm1 'Stack -= Out 
            paddd xmm5, xmm0 'Stack += IN 

            Add ebp, 1 
            Sub eax, ebp 

            Add esi, 4 
            Add edi, edx 
            Sub ecx, 1 
            jg _Blur_LoopX_Out 

         Sub dword Ptr[esp+4], 1 
         jg _Blur_LoopW 



      ' #################################################### 
      ' # H-Loop 
      ' #################################################### 


      mov edi, [esp+36] 
      mov esi, [esp+32] 
      mov [esp+24], edi 
      mov [esp+28], esi 

      mov ebx, [esp+8] 
      mov [esp], ebx 
      _Blur_LoopH: 
         mov edi, [esp+24] 
         mov esi, [esp+28] 
         mov edx, [esp+12] 
         Shl edx, 2 
         Add dword Ptr[esp+24], 4 'Next Col 
         Add [esp+28], edx 'Next ColRow 

         mov edx, [esp+16] 'Stride 

         pxor xmm6, xmm6 'Reset In-Out 
         pxor xmm5, xmm5 'Reset Sum 

         /' 
         xmm7 = Msk 
         xmm6 = [AO][RO][GO][BO][AI][RI][GI][BI] 
         xmm5 = [As][RS][GS][BS] 

         eax = (SumDiv) 
         ebx = (DivInc) 
         ecx = X 
         edx = Stride 
         esi = Src 
         edi = Dst 
         ebp = R 
         '/ 

         pxor xmm4, xmm4 'UnPack 

         mov eax, 0 'Reset SumDiv 
         mov ebx, 0 'Reset DivInc 

         ' ---------------------------------------------------- 
         ' | X-In += Next 
         ' ---------------------------------------------------- 
         mov ebp, 0 'Offset 
         mov ecx, [esp+20] 'iR 
         _Blur_LoopY_In: 
            movd xmm0, [esi+ebp] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next 
            paddw xmm6, xmm0 'IN+=Next 
            movdqa xmm0, xmm6 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            paddd xmm5, xmm0 'Stack += IN 

            Add ebx, 1 'SumDivInc += 1 
            Add eax, ebx 'SumDiv += Inc 

            Add ebp, 4 
            Sub ecx, 1 
            jg _Blur_LoopY_In 


         ' ---------------------------------------------------- 
         ' | XIn += Next / XIn -= Mid / XOut += Mid 
         ' ---------------------------------------------------- 
         mov ecx, [esp+20] 'iR 
         _Blur_LoopY_InOut: 
            cvtsi2ss xmm3, eax 
            rcpss xmm3, xmm3 
            pshufd xmm3, xmm3, 0 'SumDiv 

            movdqa xmm0, xmm5 
            paddd xmm0, xmm7 ' Ubyte -> Float 
            subps xmm0, xmm7 '/ 
            mulps xmm0, xmm3 
            addps xmm0, xmm7 ' Float -> Ubyte 
            psubd xmm0, xmm7 '/ 
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B] 
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB] 
            movd [edi], xmm0 

            movd xmm0, [esi+ebp] 
            movd xmm1, [esi] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next 
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid 
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next] 
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next 
            psubw xmm6, xmm1 '(Out-=Last) / IN-=Mid 
            movdqa xmm1, xmm6 
            movdqa xmm0, xmm6 
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO] 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            psubd xmm5, xmm1 'Stack -= Out 
            paddd xmm5, xmm0 'Stack += IN 

            Sub ebx, 1 'SumDivInc += 1 
            Add eax, ebx 'SumDiv += Inc 

            Add esi, 4 
            Add edi, edx 
            Sub ecx, 1 
            jg _Blur_LoopY_InOut 


         cvtsi2ss xmm3, eax 
         rcpss xmm3, xmm3 
         pshufd xmm3, xmm3, 0 'SumDiv 

         mov ebx, ebp 
         neg ebx 'Last Index 


         ' ---------------------------------------------------- 
         ' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last 
         ' ---------------------------------------------------- 
         mov ecx, [esp+12] 'iHeight 
         Sub ecx, [esp+20] 
         Sub ecx, [esp+20] 
         _Blur_LoopY: 
            movdqa xmm0, xmm5 
            paddd xmm0, xmm7 ' Ubyte -> Float 
            subps xmm0, xmm7 '/ 
            mulps xmm0, xmm3 
            addps xmm0, xmm7 ' Float -> Ubyte 
            psubd xmm0, xmm7 '/ 
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B] 
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB] 
            movd [edi], xmm0 

            movd xmm0, [esi+ebp] 
            movd xmm1, [esi] 
            movd xmm2, [esi+ebx] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next 
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid 
            punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last 
            movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next] 
            movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid] 
            paddw xmm6, xmm0 'Out+=Mid / IN+=Next 
            psubw xmm6, xmm1 'Out-=Last / IN-=Mid 
            movdqa xmm1, xmm6 
            movdqa xmm0, xmm6 
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO] 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            psubd xmm5, xmm1 'Stack -= Out 
            paddd xmm5, xmm0 'Stack += IN 

            Add esi, 4 
            Add edi, edx 
            Sub ecx, 1 
            jg _Blur_LoopY 


         ' ---------------------------------------------------- 
         ' | XIn -= Mid / XOut += Mid / XOut -= Last 
         ' ---------------------------------------------------- 
         mov ebp, 0 'DivInc 
         mov ecx, [esp+20] 'iR 
         _Blur_LoopY_Out: 
            cvtsi2ss xmm3, eax 
            rcpss xmm3, xmm3 
            pshufd xmm3, xmm3, 0 'SumDiv 

            movdqa xmm0, xmm5 
            paddd xmm0, xmm7 ' Ubyte -> Float 
            subps xmm0, xmm7 '/ 
            mulps xmm0, xmm3 
            addps xmm0, xmm7 ' Float -> Ubyte 
            psubd xmm0, xmm7 '/ 
            packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B] 
            packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB] 
            movd [edi], xmm0 

            movd xmm0, [esi] 
            movd xmm1, [esi+ebx] 
            punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid 
            punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last 
            movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid] 
            psubw xmm6, xmm0 'Out-=Last / IN-=Mid 
            pslldq xmm0, 8 
            paddw xmm6, xmm0 'Out+=Mid / (IN+=Next) 
            movdqa xmm1, xmm6 
            movdqa xmm0, xmm6 
            punpckhwd xmm1, xmm4 '[AO][RO][GO][BO] 
            punpcklwd xmm0, xmm4 '[AI][RI][GI][BI] 
            psubd xmm5, xmm1 'Stack -= Out 
            paddd xmm5, xmm0 'Stack += IN 

            Add ebp, 1 
            Sub eax, ebp 

            Add esi, 4 
            Add edi, edx 
            Sub ecx, 1 
            jg _Blur_LoopY_Out 


         Sub dword Ptr[esp], 1 
         jg _Blur_LoopH 

      Add esp, 64 
       
      pop ebp 
   End Asm 
    

   Imagedestroy(pImgTmp) 
   Return pImgBlur 
End Function 

Function _ASM_Sin6th(fX As Double) As Double 
	'By Eukalyptus 
   Asm
      jmp 0f
      1: .Double 683565275.57643158 
      2: .Double -0.0000000061763971109087229 
      3: .Double 6755399441055744.0 
          
      0: 
         movq xmm0, [fX] 
         mulsd xmm0, [1b] 
         addsd xmm0, [3b] 
         movd ebx, xmm0 

         lea  eax, [ebx*2+0x80000000] 
         sar  eax, 2 
         imul eax 
         sar  ebx, 31 
         lea  eax, [edx*2-0x70000000] 
         lea  ecx, [edx*8+edx-0x24000000] 
         imul edx 
         Xor  ecx, ebx 
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx 

         cvtsi2sd xmm0, edx 
         mulsd xmm0, [2b] 
         movq [Function], xmm0 
   End Asm 
End Function

Function _ASM_Cos6th(fX As Double) As Double 
   'By Eukalyptus 
   Asm 
      jmp 0f 
         1: .Double 683565275.57643158 
         2: .Double -0.0000000061763971109087229 
         3: .Double 6755399441055744.0 
       
      0: 
         movq xmm0, [fX] 
         mulsd xmm0, [1b] 
         addsd xmm0, [3b] 
         movd ebx, xmm0 
          
         Add ebx, 0x40000000 'SinToCos 
    
         lea  eax, [ebx*2+0x80000000] 
         sar  eax, 2 
         imul eax 
         sar  ebx, 31 
         lea  eax, [edx*2-0x70000000] 
         lea  ecx, [edx*8+edx-0x24000000] 
         imul edx 
         Xor  ecx, ebx 
         lea  eax, [edx*8+edx+0x44A00000] 
         imul ecx 
          
         cvtsi2sd xmm0, edx 
         mulsd xmm0, [2b] 
         movq [Function], xmm0 
   End Asm 
End Function
I don't know whether the code runs also on Linux OS. -> seems to be running

Edit1: removed #include "Windows.bi" as it is not needed this time
Edit2: small modifications
Edit3: another small modifications
Last edited by UEZ on Nov 02, 2018 14:17, edited 3 times in total.
badidea
Posts: 2591
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: Simple Smoke Simulation build 2018-10-28

Post by badidea »

UEZ wrote:I don't know whether the code runs also on Linux OS.
Nope:

Code: Select all

fbc32 -w all -exx -mt -maxerr 3 "test.bas" (in directory: /home/badidea/Desktop)
ld: cannot find -lkernel32
ld: cannot find -lgdi32
ld: cannot find -lmsimg32
ld: cannot find -luser32
ld: cannot find -lversion
ld: cannot find -ladvapi32
ld: cannot find -limm32
Compilation failed.
Or (64-bit):

Code: Select all

fbc64 -w all -exx  -maxerr 3 "test.bas" (in directory: /home/badidea/Desktop)
test.c: Assembler messages:
test.c:360: Error: incorrect register `ecx' used with `q' suffix
test.c:361: Error: incorrect register `ebx' used with `q' suffix
test.c:362: Error: incorrect register `edx' used with `q' suffix
test.c:363: Error: incorrect register `edi' used with `q' suffix
test.c:364: Error: incorrect register `esi' used with `q' suffix
test.c:367: Error: operand type mismatch for `push'
test.c:660: Error: operand type mismatch for `pop'
Compilation failed.
However, if I delete '#Include "windows.bi", it runs fine with fbc 32-bit. 19 to 21 FPS.
Not sure what the problem is with 64-bit.
Last edited by badidea on Oct 25, 2018 18:56, edited 1 time in total.
UEZ
Posts: 988
Joined: May 05, 2017 19:59
Location: Germany

Re: Simple Smoke Simulation build 2018-10-28

Post by UEZ »

badidea wrote: However, if I delete '#Include "windows.bi", it runs fine with fbc 32-bit.
Not sure what the problem is with 64-bit.
Thanks, indeed windows.bi is not needed. The assembler code is designed for x86 only (I guess). I will look for a fast x64 version...
dafhi
Posts: 1641
Joined: Jun 04, 2005 9:51

Re: Simple Smoke Simulation build 2018-10-26

Post by dafhi »

one cannot deny the benefit of knowing asm. thanks for sharing
deltarho[1859]
Posts: 4310
Joined: Jan 02, 2017 0:34
Location: UK
Contact:

Re: Simple Smoke Simulation build 2018-10-26

Post by deltarho[1859] »

As is the code will not compile with gcc. We need to replace the assembler labels with numeric labels as Eukalyptus has done.

For example, changing

Code: Select all

_Blur_LoopW:
...
...
jg _Blur_LoopW
to

Code: Select all

0:
...
...
jg 0b
8 label changes were needed.

I was then able to compile with FBC 1.06/gcc 8.1 32-bit.

I then replaced RND with PCG32II.

On running the gas version next to the edited code I was not able to tell any difference.

The exe of the edited code was 15KB larger, 3.5KB for PCG32II and 11.5KB for the different compilers.

A worthwhile exercise as it showed that for some applications gas + RND are well up to the job at hand.
UEZ
Posts: 988
Joined: May 05, 2017 19:59
Location: Germany

Re: Simple Smoke Simulation build 2018-10-26

Post by UEZ »

Using v0.15 build 2018-10-26 with 75.000 particles and changing the labels in the asm blur function I get following results:

-s gui -fpu SSE -vec 2 -O max -> ~32 fps (131072 bytes)
-s gui -> ~34 fps (130048 bytes)
-gen gcc -Wc -O2 -> ~25 fps (130560 bytes)
-gen gcc -O 3 -> ~27 fps (133120 bytes)
-gen gas -> ~33 fps (130048 bytes)

Test environment:
Win10 x64
Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz
8 GB memory
dodicat
Posts: 7983
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Simple Smoke Simulation build 2018-10-26

Post by dodicat »

That's a really fast blur UEZ.
Anyway, an old slogger: The new recycling plant and incinerator on the edge of town.
Fully compliant with EEC carbon emissions.
Green energy for the future bla bla bla ...

Code: Select all

Dim Shared As Integer xres,yres
Screen 19,32,,64
Screeninfo xres,yres
Dim Shared As Ulong Pointer im
im=Imagecreate(xres,yres)
Type v2
    As Single x,y
End Type

Dim Shared As Single k=4    'drag coefficient
Dim Shared As Single g=9.81  'gravity coefficient
Dim Shared As Single m=5    ' mass coefficient
Redim Shared As v2 position(),startpos()
Redim Shared As Single t(),vel(),ang(),wind(),rad()
Dim As Single w
Dim Shared As Single pi=4*Atn(1)
Dim As Long temp
#define Intrange(f,l) int(Rnd*((l+1)-(f))+(f))
#define r(f,l) Rnd * ((l) - (f)) + (f)
#define map(a,b,x,c,d) ((d)-(c))*((x)-(a))/((b)-(a))+(c)
#define onscreen(_x,_y) ((_x)>=0)*((_x)<xres)*((_y)>=0)*((_y)<yres)

Sub cloud(x As Integer, y As Integer,length As Integer=100,Alpha As Integer=105, Zoom As Single = 0,im As Any Pointer=0)
    Dim As Integer rr=255
    Dim As Integer bb=255
    Dim As Integer gg=255
    Dim As Double pi=3.14159
    #define mp(a,b,x,c,d) ((d)-(c))*((x)-(a))/((b)-(a))+(c)
    If Length<=1 Or Alpha<=1 Then Exit Sub
    Dim As Single rnded = -pi+Rnd*1*pi/2
    Dim As Single rnded2 = -pi+Rnd*-3*pi
    If Alpha<25 Then
        For i As Integer = 0 To 255-Alpha Step 100
            Var c=mp((0),(500),y,0,100)
            Line im,(-(Zoom/2)+x-length/2,y)-((Zoom/2)+x+Length/6+length*Cos(-pi/2+rnded*PI/3),y+Length/6+length*Sin(-pi/2+rnded*PI/3)),Rgba(Rr-c,Gg-c,Bb-c,Alpha)
            Line im,(-(Zoom/2)+x-length/2,y)-((Zoom/2)+x+Length/6+length*Cos(-pi/2+rnded2*PI/3),y+Length/6+length*Sin(pi/2+rnded2*PI/3)),Rgba(Rr-c,Gg-c,Bb-c,Alpha)
        Next
    End If
    cloud(-(Zoom/2)+x+length*Cos(-pi/2+rnded*PI/3),(Zoom/2)+y+length*Sin(-pi/2+rnded*PI/3),length/1.4,Alpha/1.2,Zoom,im)
    cloud(-(Zoom/2)+x+length*Cos(-pi/2+rnded2*PI/3),(Zoom/2)+y+length*Sin(pi/2+rnded2*PI/3),length/1.4,Alpha/1.2,Zoom,im)
    cloud(-(Zoom/2)+x+length*Cos(pi/3+rnded2*PI/3),(Zoom/2)+y+length*Sin(pi+rnded2*PI/3),length/1.4,Alpha/2,Zoom,im)
End Sub

Sub trees()
    Dim As Integer rotx,roty
    #define rr(first,last) Rnd * (last - first) + first
    #macro rotate(pivotx,pivoty,px,py,a,scale)
    rotx=scale*(Cos(a*.0174533)*(px-pivotx)-Sin(a*.0174533)*(py-pivoty))+pivotx
    roty=scale*(Sin(a*.0174533)*(px-pivotx)+Cos(a*.0174533)*(py-pivoty))+pivoty
    #endmacro
    #macro turnline(piv,p1,p2,ang,col,d)
    Scope
        rotate(piv.x,piv.y,p1.x,p1.y,ang,d)
        Var rot1=Type<v2>(rotx,roty)
        rotate(piv.x,piv.y,p2.x,p2.y,ang,d)
        Var rot2=Type<v2>(rotx,roty)
        Line im,(rot1.x,rot1.y)-(rot2.x,rot2.y),col
    End Scope
    #endmacro
    Dim As v2 v1,v2,piv
    Dim As Uinteger treecol
    Dim As Double pivx,pivy,pivz,l,k,d
    Dim As Integer rd,g,b
    For m As Double=0 To 50 Step 5
        Randomize m
        For n As Double=200-(m+rr(2,20)) To 990+m Step rr(3,9)
            Randomize n^2
            l=rr(2,11)
            k=rr(1,5)
            piv=Type(n,.8*yres+20*(1-Sin(.01*(n-m*5-k+40-200))))
            Line im,(piv.x,piv.y)-(piv.x+rr(-2,5),piv.y+8),Rgb((100),(35),37)
            Var cc=rr(1,40)
            For a As Double=90 To 450 Step 7
                Randomize a
                Var shader=rr(1,6)
                rd=20+shader+cc
                g=150+shader:If g>40 Then g=g-40
                b=20+shader:If b>20 Then b=b-20
                treecol=Rgb(rd/2,g/2,b/2)
                For a2 As Double=0 To l Step .3
                    If a>270 Then shader=-shader
                    treecol=Rgb(rd/2,(g-a2*shader)/2,b/2)
                    v1=Type(piv.x-a2,piv.y)
                    v2=Type(piv.x-l,piv.y)
                    turnline(piv,v1,v2,a,treecol,1)
                Next a2
            Next a
        Next n
        
    Next m
End Sub

Sub backdrop() 'hills/trees
    #macro paintsketch(_function,minx,maxx,miny,maxy,r,g,b,alp)
    For x As Double=minx To maxx Step (maxx-minx)/10000
        Var x1=(xres)*(x-minx)/(maxx-minx)
        Var y1=(yres)*(_function-maxy)/(miny-maxy)
        gr=(lasty-y1)*1000
        lasty=y1
        If gr>g Then gr=g
        Line im,(x1,yres)-(x1,y1),Rgba(r,g-gr,b,alp)
    Next x
    #endmacro
    Dim As Double lasty,gr
    paintsketch(.8*yres+20*Sin(.01*(x-200)),xres,0,yres,0,50,100,0,255)
    trees()
End Sub

Sub background
    For y As Long=0 To yres
        Var rd=map(0,yres,y,0,200)
        Var bl= map(0,yres,y,250,200)
        Var gr=map(0,yres,y,0,200)
        Line im,(0,y)-(xres,y),Rgb(rd,gr,bl)
    Next y
    cloud(100,340,150/2,250,1,im)
    cloud(300,340,150/2,250,1,im)
    cloud(600,340,150/2,250,1,im)
    backdrop
    Dim As Long counter=30
    For n As Single=300 To 310 Step .2
        counter=counter-1
        Var b=map(300,310,n,(305-20),(305+20))
        Line im,(n,390)-(b,800),Rgb(200-2*counter,100-2*counter,0)
    Next n
    counter=30
    For n As Single=502 To 508 Step .2
        counter=counter-1
        Var b=map(502,508,n,(505-15),(505+15))
        Line im,(n,490)-(b,800),Rgb(.6*(200-2*counter),.6*(100-2*counter),0)
    Next n
End Sub

Sub setup(num As Long)
    Redim position(1 To num)
    Redim startpos(1 To num)
    Redim vel(1 To num)
    Redim t(1 To num)
    Redim ang(1 To num)
    Redim wind(1 To num)
    Redim rad(1 To num)
    For n As Long=1 To num
        startpos(n)=Type<v2>(r(300,310),r(380,390))
        vel(n)=r(-220,-180)
        ang(n)=r(pi/2-.2,pi/2+.2)
        wind(n)=r(-10,10)
        rad(n)=r(3,5)
    Next n
End Sub
Sub source(n As Long,num As Long)
    If num=1 Then 
        startpos(n)=Type<v2>(r(302,308),r(380,390))
        rad(n)=r(3,7)
    Else
        startpos(n)=Type<v2>(r(502,508),r(480,490))
        rad(n)=r(2,5)
    End If
    vel(n)=r(-220,-180)
    ang(n)=r(pi/2-.2,pi/2+.2)
    wind(n)=r(-10,10)
    t(n)=0
End Sub

Function Regulate(Byval MyFps As Long,Byref fps As Long) As Long
    Static As Double timervalue,_lastsleeptime,t3,frames
    frames+=1
    If (Timer-t3)>=1 Then t3=Timer:fps=frames:frames=0
    Var sleeptime=_lastsleeptime+((1/myfps)-Timer+timervalue)*1000
    If sleeptime<1 Then sleeptime=1
    _lastsleeptime=sleeptime
    timervalue=Timer
    Return sleeptime
End Function


'============================================  
setup(15000)
background
Dim As String info="Starting Production"
Dim As Double frames=62,stabilizer=1
Dim As Long fps
Do
    
    Windowtitle "fps " &fps 
    temp=temp+5
    If temp>Ubound(position)-5 Then temp=Ubound(position):info="Full Production"
    Screenlock
    Cls
    Put(0,0),im,Pset
    For n As Long=1 To temp
        w=30+10*Sin(position(n).y/10)+wind(n)
        t(n)=t(n)+.005
        position(n).x=startpos(n).x+(m/k)*(1-Exp(-(k/m)*t(n)))*(Vel(n)*Cos(ang(n))-w)+w*t(n)
        position(n).y=startpos(n).y+(m/k)*(1-Exp(-(k/m)*t(n)))*(Vel(n)*Sin(ang(n))+g*m/k)-(g*m/k)*t(n)
        Var two=Intrange(1,2)
        If onscreen(position(n).x,position(n).y)=0 Then source(n,two)
        Var cc=map(390,0,position(n).y,150,0)
        Var _alpha=map(390,0,position(n).y,50,5)
        Circle (position(n).x,position(n).y),rad(n),Rgba(cc,cc,cc,_alpha),,,.5,f
    Next n
    Draw String(1,1),info 
    Screenunlock
    Sleep regulate(60,fps),1
Loop Until Len(Inkey)
Imagedestroy im  
badidea
Posts: 2591
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: Simple Smoke Simulation build 2018-10-26

Post by badidea »

dodicat wrote:Anyway, an old slogger: The new recycling plant and incinerator on the edge of town.
Fully compliant with EEC carbon emissions.
Green energy for the future bla bla bla ...
Looks peaceful, some acid rain and grey sheep walking around would make the picture complete.
dafhi
Posts: 1641
Joined: Jun 04, 2005 9:51

Re: Simple Smoke Simulation build 2018-10-26

Post by dafhi »

those clouds.

[update] totally forgot. happy birthday :-)
Last edited by dafhi on Oct 27, 2018 21:19, edited 1 time in total.
MrSwiss
Posts: 3910
Joined: Jun 02, 2013 9:27
Location: Switzerland

Re: Simple Smoke Simulation build 2018-10-26

Post by MrSwiss »

@dodicat, looks cool. Congrats to your birthday (yesterday), many happy returns.
Better late, than never ...

Also proof, that ASM makes little sense, these days (compatibility issues among
many others).
All the ASM dependent BASIC-Compilers, are on the way out ... (PB, e.t.c.)
because, nobody wants to re-write them, in 64 bit ASM!
Thanks, for FBC x64 (and, GCC to keep FBC alive & kicking)!
UEZ
Posts: 988
Joined: May 05, 2017 19:59
Location: Germany

Re: Simple Smoke Simulation build 2018-10-26

Post by UEZ »

@dodicat: Belated happy birthday! Never expected that you are 70! For me 24 years to go to be at same age...
Btw, did you have that nice code above newly coded or is it something within your library?
Post Reply