Here are how to use the same Assembler code for x86 and x86_64.
I changed only the LONG arguments to INTEGER so it becomes 32 or 64-bit.
And redefined only the register names e.g REG_SP becomes ESP or RSP ...
and used #define for the local var stuff e.g. mov reg,XOFF instead off mov reg,[esp+0] ...
Code: Select all
'Snowfall v0.6 coded by UEZ Using classes (my 1st attempt^^)
#Include "fbgfx.bi"
Using FB
Declare Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As integer, iExpandEdge As integer = 0) As Any Ptr 'function by Eukalyptus
Const scrw = 1200, scrh = 800
Function RandomRange(fStart As Single, fEnd As Single) As Single
Return Rnd() * (fEnd - fStart) + fStart
End Function
Type Snowflake
Public:
Declare Constructor()
Declare Destructor()
Declare Sub Init()
Declare Sub Reset()
Declare Sub update()
As Ushort w, h
As Single x, y, vx, vy, radius, Alpha
End Type
Sub Snowflake.init()
This.radius = RandomRange(1, 3)
This.x = Rnd() * (This.w - This.radius)
This.y = Rnd() * (This.h - This.radius)
This.vx = RandomRange(-3, 3)
This.vy = RandomRange(1, 4)
This.Alpha = RandomRange(0.25, 0.95)
End Sub
Sub Snowflake.Reset()
This.radius = RandomRange(1, 3)
This.x = Rnd() * (This.w - This.radius)
This.y = Rnd() * -This.radius
This.vx = RandomRange(-3, 3)
This.vy = RandomRange(1, 4)
This.Alpha = RandomRange(0.25, 0.95)
End Sub
Sub Snowflake.Update()
This.x += This.vx
This.y += This.vy
If (This.y > This.h + This.radius) Or (This.x < -This.radius) Or (This.x > This.w) Then This.Reset()
End Sub
Constructor Snowflake()
This.w = scrw
This.h = scrh
This.Init
End Constructor
Destructor Snowflake()
End Destructor
Type Snowflakes
Declare Constructor(n As Ushort = 10000)
Declare Destructor()
Declare Sub Draw()
Private:
As integer w, h, amount
As Snowflake Ptr pBuffer
As fb.Image Ptr Img_Empty, Img_Snowfall, Img_Blur
End Type
Sub Snowflakes.Draw()
Put This.Img_Snowfall, (0, 0), This.Img_Empty, Pset
For i As integer = 0 To This.amount - 1
Circle This.Img_Snowfall, (pBuffer[i].x, pBuffer[i].y), pBuffer[i].radius, Rgba(255, 255, 255, 255 * pBuffer[i].Alpha),,,,F
pBuffer[i].update
Next
This.Img_Blur = _ASM_ImageBlur(This.Img_Snowfall, 2)
Put (0, 0), This.Img_Blur, Trans
'Put (0, 0), This.Img_Snowfall, Trans
Imagedestroy This.Img_Blur
End Sub
Constructor Snowflakes(n As Ushort)
amount = n
w = scrw
h = scrh
Img_Empty = Imagecreate(This.w, This.h, 32)
Img_Snowfall = Imagecreate(This.w, This.h, 32)
pBuffer = New Snowflake[amount]
End Constructor
Destructor Snowflakes()
Delete [] pBuffer
pBuffer = 0
Imagedestroy This.Img_Empty
Imagedestroy This.Img_Snowfall
End Destructor
Screenres (scrw, scrh, 32, 1, GFX_ALPHA_PRIMITIVES Or GFX_HIGH_PRIORITY Or GFX_NO_SWITCH)
'#Ifdef __Fb_win32__
' #Include "windows.bi"
' Dim As Integer iDW, iDH
' Screencontrol GET_DESKTOP_SIZE, iDW, iDH
' Dim tWorkingArea As RECT
' SystemParametersInfo(SPI_GETWORKAREA, null, @tWorkingArea, null)
' Screencontrol SET_WINDOW_POS, (iDW - scrw) \ 2, _
' ((tWorkingArea.Bottom - scrh) - (iDH - tWorkingArea.Bottom)) \ 2
'#Endif
Windowtitle "Simple Snowfall"
Dim As Snowflakes Snowfall
Dim As integer i, iFPS = 0, iFPS_current = 0
Dim As Double fTimer = Timer
Do
Screenlock
Snowfall.Draw
Draw String(0, 0), iFPS_current & " fps", Rgb(&hFF, &h00, &h00)
Screenunlock
If Timer - fTimer > 0.99 Then
iFPS_current = iFPS
iFPS = 0
fTimer = Timer
Else
iFPS += 1
Endif
Sleep 10
Loop Until Inkey = Chr(27)
Function _ASM_ImageBlur(pImage As Any Ptr, iRadius As integer, iExpandEdge As integer = 0) As Any Ptr
'By Eukalyptus
Dim As Integer iWidth, iHeight, iPX, iPitch, iPitchBlur
Dim As Any Ptr pData, pDataBlur, pDataTmp
If ImageInfo(pImage, iWidth, iHeight, iPX, iPitch, pData) <> 0 Then Return 0
If iPX <> 4 Then Return 0
If iRadius < 0 Then
iRadius = 0
ElseIf iRadius > 127 Then
iRadius = 127
EndIf
Dim As Any Ptr pImgBlur, pImgTmp
If iExpandEdge <> 0 Then
iWidth += iRadius * 2
iHeight += iRadius * 2
EndIf
pImgBlur = ImageCreate(iWidth, iHeight, 0, 32)
pImgTmp = ImageCreate(iWidth, iHeight, 0, 32)
ImageInfo(pImgBlur, , , , iPitchBlur, pDataBlur)
ImageInfo(pImgTmp, , , , , pDataTmp)
If pImgBlur = 0 Orelse pImgTmp = 0 Then
ImageDestroy(pImgBlur)
ImageDestroy(pImgTmp)
Return 0
End If
If iExpandEdge <> 0 Then
Put pImgBlur, (iRadius, iRadius), pImage, Alpha
Else
Put pImgBlur, (0, 0), pImage, Alpha
End If
#ifndef __FB_64BIT__
#define REG_SIZE 4
#define REG_ACCESS DWORD
#define REG_AX eax
#define REG_BX ebx
#define REG_CX ecx
#define REG_DX edx
#define REG_DI edi
#define REG_SI esi
#define REG_SP esp
#define REG_BP ebp
#else
#define REG_SIZE 8
#define REG_ACCESS QWORD
#define REG_AX rax
#define REG_BX rbx
#define REG_CX rcx
#define REG_DX rdx
#define REG_DI rdi
#define REG_SI rsi
#define REG_SP rsp
#define REG_BP rbp
#endif
#define LOCAL_VAR_SPACE 16*REG_SIZE
'esp/rsp = [X] [Y] [W] [H] [Stride] [R] [pDst] [pSrc] [pDstO] [pSrcO]
#define X_OFF [REG_SP]
#define Y_OFF [REG_SP+1*REG_SIZE]
#define W_OFF [REG_SP+2*REG_SIZE]
#define H_OFF [REG_SP+3*REG_SIZE]
#define S_OFF [REG_SP+4*REG_SIZE]
#define R_OFF [REG_SP+5*REG_SIZE]
#define DST_OFF [REG_SP+6*REG_SIZE]
#define SRC_OFF [REG_SP+7*REG_SIZE]
#define DSTO_OFF [REG_SP+8*REG_SIZE]
#define SRCO_OFF [REG_SP+9*REG_SIZE]
Asm
mov REG_CX, [iWidth]
mov REG_BX, [iHeight]
mov REG_DX, [iPitchBlur]
mov REG_DI, [pDataTmp]
mov REG_SI, [pDataBlur]
mov REG_AX, [iRadius]
inc REG_AX
push REG_BP
mov REG_BP, REG_AX
sub REG_SP, LOCAL_VAR_SPACE
mov W_OFF, REG_CX
mov H_OFF, REG_BX
mov S_OFF, REG_DX
mov R_OFF, REG_BP
mov DST_OFF, REG_DI
mov DSTO_OFF, REG_DI
mov SRC_OFF, REG_SI
mov SRCO_OFF, REG_SI
mov REG_AX, 0x47000000 'ByteToFloat MSK
movd xmm7, REG_AX
pshufd xmm7, xmm7, 0
' ####################################################
' # W-Loop
' ####################################################
mov REG_BX, H_OFF
mov Y_OFF, REG_BX
_Blur_LoopW:
mov REG_DI, DST_OFF
mov REG_SI, SRC_OFF
mov REG_DX, S_OFF 'Stride
add REG_ACCESS ptr DST_OFF, 4 'next RowCol(Transform vertical<->horizontal)
add SRC_OFF, REG_DX 'next Row
mov REG_DX, H_OFF 'Y-Stride
shl REG_DX, 2
pxor xmm6, xmm6 'Reset In-Out
pxor xmm5, xmm5 'Reset Sum
pxor xmm4, xmm4 'UnPack
mov REG_AX, 0 'Reset SumDiv
mov REG_BX, 0 'Reset DivInc
' ----------------------------------------------------
' | X-In += Next
' ----------------------------------------------------
mov REG_BP, 0 'Offset
mov REG_CX, R_OFF 'iR
_Blur_LoopX_In:
movd xmm0, [REG_SI+REG_BP]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
paddw xmm6, xmm0 'IN+=Next
movdqa xmm0, xmm6
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
paddd xmm5, xmm0 'Stack += IN
add REG_BX, 1 'SumDivInc += 1
add REG_AX, REG_BX 'SumDiv += Inc
add REG_BP, 4
sub REG_CX, 1
jg _Blur_LoopX_In
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid
' ----------------------------------------------------
mov REG_CX, R_OFF 'iR
_Blur_LoopX_InOut:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' UByte -> Float
subps xmm0, xmm7 ' /
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> UByte
psubd xmm0, xmm7 ' /
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI+REG_BP]
movd xmm1, [REG_SI]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
paddw xmm6, xmm0 'OUT+=Mid / IN+=Next
psubw xmm6, xmm1 '(OUT-=Last) / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= OUT
paddd xmm5, xmm0 'Stack += IN
sub REG_BX, 1 'SumDivInc += 1
add REG_AX, REG_BX 'SumDiv += Inc
add REG_SI, 4
add REG_DI, REG_DX
sub REG_CX, 1
jg _Blur_LoopX_InOut
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
mov REG_BX, REG_BP
neg REG_BX 'Last Index
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_CX, W_OFF 'iWidth
sub REG_CX, R_OFF
sub REG_CX, R_OFF
_Blur_LoopX:
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' UByte -> Float
subps xmm0, xmm7 ' /
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> UByte
psubd xmm0, xmm7 ' /
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0,[REG_SI+REG_BP]
movd xmm1,[REG_SI]
movd xmm2,[REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
paddw xmm6, xmm0 'OUT+=Mid / IN+=Next
psubw xmm6, xmm1 'OUT-=Last / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= OUT
paddd xmm5, xmm0 'Stack += IN
add REG_SI, 4
add REG_DI, REG_DX
sub REG_CX, 1
jg _Blur_LoopX
' ----------------------------------------------------
' | XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_BP, 0 'DivInc
mov REG_CX, R_OFF 'iR
_Blur_LoopX_Out:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' UByte -> Float
subps xmm0, xmm7 ' /
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> UByte
psubd xmm0, xmm7 ' /
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI]
movd xmm1, [REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
psubw xmm6, xmm0 'OUT-=Last / IN-=Mid
pslldq xmm0, 8
paddw xmm6, xmm0 'OUT+=Mid / (IN+=Next)
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= OUT
paddd xmm5, xmm0 'Stack += IN
add REG_BP, 1
sub REG_AX, REG_BP
add REG_SI, 4
add REG_DI, REG_DX
sub REG_CX, 1
jg _Blur_LoopX_Out
sub REG_ACCESS ptr Y_OFF, 1
jg _Blur_LoopW
' ####################################################
' # H-Loop
' ####################################################
mov REG_DI, SRCO_OFF
mov REG_SI, DSTO_OFF
mov DST_OFF, REG_DI
mov SRC_OFF, REG_SI
mov REG_BX, W_OFF
mov X_OFF, REG_BX
_Blur_LoopH:
mov REG_DI, DST_OFF
mov REG_SI, SRC_OFF
mov REG_DX, H_OFF
Shl REG_DX, 2
Add REG_ACCESS ptr DST_OFF, 4 'next Col
Add SRC_OFF, REG_DX 'next ColRow
mov REG_DX, S_OFF 'Stride
pxor xmm6, xmm6 'Reset In-Out
pxor xmm5, xmm5 'Reset Sum
pxor xmm4, xmm4 'UnPack
mov REG_AX, 0 'Reset SumDiv
mov REG_BX, 0 'Reset DivInc
' ----------------------------------------------------
' | X-In += Next
' ----------------------------------------------------
mov REG_BP, 0 'Offset
mov REG_CX, R_OFF 'iR
_Blur_LoopY_In:
movd xmm0, [REG_SI+REG_BP]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
paddw xmm6, xmm0 'IN+=Next
movdqa xmm0, xmm6
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
paddd xmm5, xmm0 'Stack += IN
Add REG_BX, 1 'SumDivInc += 1
Add REG_AX, REG_BX 'SumDiv += Inc
Add REG_BP, 4
Sub REG_CX, 1
jg _Blur_LoopY_In
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid
' ----------------------------------------------------
mov REG_CX, R_OFF 'iR
_Blur_LoopY_InOut:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' UByte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> UByte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI+REG_BP]
movd xmm1, [REG_SI]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
paddw xmm6, xmm0 'OUT+=Mid / IN+=Next
psubw xmm6, xmm1 '(OUT-=Last) / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= OUT
paddd xmm5, xmm0 'Stack += IN
Sub REG_BX, 1 'SumDivInc += 1
Add REG_AX, REG_BX 'SumDiv += Inc
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopY_InOut
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
mov REG_BX, REG_BP
neg REG_BX 'Last Index
' ----------------------------------------------------
' | XIn += Next / XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_CX, H_OFF 'iHeight
Sub REG_CX, R_OFF
Sub REG_CX, R_OFF
_Blur_LoopY:
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' UByte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> UByte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI+REG_BP]
movd xmm1, [REG_SI]
movd xmm2, [REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][An][Rn][Gn][Bn] Next
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm2, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Am][Rm][Gm][Bm][An][Rn][Gn][Bn] = [Mid][Next]
movlhps xmm1, xmm2 '[Al][Rl][Gl][Bl][Ao][Ro][Go][Bo] = [Last][Mid]
paddw xmm6, xmm0 'OUT+=Mid / IN+=Next
psubw xmm6, xmm1 'OUT-=Last / IN-=Mid
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= OUT
paddd xmm5, xmm0 'Stack += IN
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopY
' ----------------------------------------------------
' | XIn -= Mid / XOut += Mid / XOut -= Last
' ----------------------------------------------------
mov REG_BP, 0 'DivInc
mov REG_CX, R_OFF 'iR
_Blur_LoopY_Out:
cvtsi2ss xmm3, REG_AX
rcpss xmm3, xmm3
pshufd xmm3, xmm3, 0 'SumDiv
movdqa xmm0, xmm5
paddd xmm0, xmm7 ' UByte -> Float
subps xmm0, xmm7 '/
mulps xmm0, xmm3
addps xmm0, xmm7 ' Float -> UByte
psubd xmm0, xmm7 '/
packssdw xmm0, xmm0 '[A][R][G][B][A][R][G][B]
packuswb xmm0, xmm0 '[ARGB][ARGB][ARGB][ARGB]
movd [REG_DI], xmm0
movd xmm0, [REG_SI]
movd xmm1, [REG_SI+REG_BX]
punpcklbw xmm0, xmm4 '[ ][ ][ ][ ][Am][Rm][Gm][Bm] Mid
punpcklbw xmm1, xmm4 '[ ][ ][ ][ ][Al][Rl][Gl][Bl] Last
movlhps xmm0, xmm1 '[Al][Rl][Gl][Bl][Am][Rm][Gm][Bm] = [Last][Mid]
psubw xmm6, xmm0 'OUT-=Last / IN-=Mid
pslldq xmm0, 8
paddw xmm6, xmm0 'OUT+=Mid / (IN+=Next)
movdqa xmm1, xmm6
movdqa xmm0, xmm6
punpckhwd xmm1, xmm4 '[AO][RO][GO][BO]
punpcklwd xmm0, xmm4 '[AI][RI][GI][BI]
psubd xmm5, xmm1 'Stack -= OUT
paddd xmm5, xmm0 'Stack += IN
Add REG_BP, 1
Sub REG_AX, REG_BP
Add REG_SI, 4
Add REG_DI, REG_DX
Sub REG_CX, 1
jg _Blur_LoopY_Out
Sub REG_ACCESS Ptr X_OFF, 1
jg _Blur_LoopH
add REG_SP, LOCAL_VAR_SPACE
pop REG_BP
End Asm
ImageDestroy(pImgTmp)
Return pImgBlur
End Function