Squares
Re: Squares
@srvaldez
your optimized am code:
function md naked cdecl (byval a as ulongint , byval b as ulongint ) as ulongint
asm
mov r9, rdx
mov rax, rcx
cqo
idiv r9
mov rax, rdx
ret
end asm
end function
when you call the function , your
moving r9 with rdx , does rdx hold a or b?
moving rax with rcx , does rcx hold a or b?
then your moving rax with rdx , so rdx holds the answer...
Just wondering what regs are loaded with what values when you call the function...
your optimized am code:
function md naked cdecl (byval a as ulongint , byval b as ulongint ) as ulongint
asm
mov r9, rdx
mov rax, rcx
cqo
idiv r9
mov rax, rdx
ret
end asm
end function
when you call the function , your
moving r9 with rdx , does rdx hold a or b?
moving rax with rcx , does rcx hold a or b?
then your moving rax with rdx , so rdx holds the answer...
Just wondering what regs are loaded with what values when you call the function...
Re: Squares
albert, rdx holds b but since the cqo instruction will wipeout rdx it's saved to the register R9
the register RCX holds a, but a needs to be in RAX to perform the division
after the instruction idiv R9, RAX holds the quotient and RDX the remainder
moving RDX to RAX is because RAX returns the function result, it's like mov [Function],RDX
the register RCX holds a, but a needs to be in RAX to perform the division
after the instruction idiv R9, RAX holds the quotient and RDX the remainder
moving RDX to RAX is because RAX returns the function result, it's like mov [Function],RDX
Last edited by srvaldez on Dec 20, 2018 1:42, edited 1 time in total.
Re: Squares
@srvaldez
Okay , so when you call the function
rdx = b and rcx = a
Thank you for explaining it to me!
Okay , so when you call the function
rdx = b and rcx = a
Thank you for explaining it to me!
Re: Squares
@albert
I made a big mistake, I used idiv when I should have used div, idiv is for signed integers and div for unsigned integers
here's the corrected code
I made a big mistake, I used idiv when I should have used div, idiv is for signed integers and div for unsigned integers
here's the corrected code
Code: Select all
'' first integer argument RCX
'' second integer argument RDX
'' third integer argument R8
'' fourth integer argument R9
function md naked cdecl (byval a as ulongint , byval b as ulongint ) as ulongint
asm
mov r9, rdx ''save value of b, because cqo will clobber it
mov rax, rcx''move value of a into rax
xor edx, edx
div r9 ''div a,b
mov rax, rdx''move remainder into rax
ret
end asm
end function
function qr naked cdecl (byval a as ulongint , byval b as ulongint, byref r as ulongint ) as ulongint
asm
mov r9, rdx ''save value of b, because cqo will clobber it
mov rax, rcx''move value of a into rax
xor edx, edx
div r9 ''div a,b
mov [r8], rdx''move remainder into r
ret '' quotient in rax
end asm
end function
#macro qr_macro(a , b, q, r )
asm
mov rcx, [b]''move value of b into rcx
mov rax, [a]''move value of a into rax
xor edx, edx
div rcx ''div a,b
mov [r], rdx''move remainder into r
mov [q], rax''move quotient into q
end asm
#endmacro
dim as ulongint a, b, q, r
a=18446744073709551615
b=4
qr_macro(a,b,q,r)
print q,r
q=qr(a,b,r)
print q,r
q=qr(18446744073709551615,4,r)
print q,r
print a\b,a mod b
Re: Squares
@srvalddez , thanks!
@Dodicat
I did a comparison code.. with Geany IDE -gen GCC -w all -O 3 ...
Here's the results....
I left out srvaldez's code because it was running way slower , he needs to make it inline ASM....
My formula a - ( ( a / b ) * b ) is still the fastest...
@Dodicat
I did a comparison code.. with Geany IDE -gen GCC -w all -O 3 ...
Here's the results....
I left out srvaldez's code because it was running way slower , he needs to make it inline ASM....
My formula a - ( ( a / b ) * b ) is still the fastest...
Code: Select all
'====================================================
'Code designed for FreeBASIC compiler , FreeBASIC.net
'IDE , number 1 IDE , for FreeBASIC , FBIDE
'====================================================
'My formula is faster using vars instead of real digits.. faster than MOD...
'In some cases i'm getting e-010 , compared to the MOD function e-009 so its faster than the MOD function..
'Here's code doing 1,000,000 loops for each type of MOD , and printing out the timings...Mine is the fastest...
screen 19
'srvaldez's formula in assembly
'function md naked cdecl (byval a as ulongint , byval b as ulongint ) as ulongint
' asm
' mov r9, rdx
' mov rax, rcx
' cqo
' idiv r9
' mov rax, rdx
' ret
' end asm
'end function
randomize
dim as double time1 , time2 , time3 , time4 , time5 , time6 , time7 , time8
do
dim as ulongint a = rnd * 1e10
dim as ulongint b = rnd * 1e10
if b > a then swap b , a
dim as ulongint mod1
time1 = timer
for x as longint = 1 to 1000000
mod1 = a mod b
next
time2 = timer
dim as ulongint mod_Albert_asm
time3 = timer
for x as longint = 1 to 1000000
'Albert's formula written in assembly code...
asm
mov rdx , 0
mov rax , [a]
mov rbx , [b]
mov rcx , [a]
idiv rbx
imul rbx
mov rbx , rax
mov rax , rcx
sub rax , rbx
mov [mod_Albert_asm], rax
end asm
next
time4 = timer
' alberts formula
dim as ulongint mod_Albert
time5 = timer
for x as longint = 1 to 1000000
mod_Albert = a - ( ( a \ b ) *b )
next
time6 = timer
dim as ulongint mod_Dodicat_asm
time7 = timer
for x as longint = 1 to 1000000
asm
mov rdx, 0
mov rax, [a]
mov rbx, [b]
div rbx
mov [mod_Dodicat_asm], rdx
end asm
next
time8 = timer
print a; " mod " ;b
print "Built in MOD " ; mod1 ; " " ; time2-time1 , (time2 - time1) / 10000000
print "Dodicat ASM " ; mod_dodicat_asm ; " " ; time8-time7 , (time8 - time7) / 10000000
print "Albert ASM " ; mod_Albert_asm ; " " ; time4-time3 , (time4 - time3) / 10000000
print "Albert MOD " ; mod_Albert ; " " ; time6-time5 , (time6 - time5) / 10000000
print
sleep
loop until multikey(1)
sleep
end
Re: Squares
@albert
the reason your formula a - ( ( a / b ) * b ) performs as the native mod function is because the compiler optimizes your code and transforms it into the same code as the mod function
try this slightly modified test of yours
the reason your formula a - ( ( a / b ) * b ) performs as the native mod function is because the compiler optimizes your code and transforms it into the same code as the mod function
try this slightly modified test of yours
Code: Select all
'====================================================
'Code designed for FreeBASIC compiler , FreeBASIC.net
'IDE , number 1 IDE , for FreeBASIC , FBIDE
'====================================================
'My formula is faster using vars instead of real digits.. faster than MOD...
'In some cases i'm getting e-010 , compared to the MOD function e-009 so its faster than the MOD function..
'Here's code doing 1,000,000 loops for each type of MOD , and printing out the timings...Mine is the fastest...
screen 19
'srvaldez's formula in assembly
'function md naked cdecl (byval a as ulongint , byval b as ulongint ) as ulongint
' asm nop
' asm
' mov r9, rdx
' mov rax, rcx
' xor edx, edx
' div r9
' mov rax, rdx
' ret
' end asm
'end function
'srvaldez's formula macro in assembly
#macro md_macro (a , b, r )
asm
mov rcx, [b]''move value of b into rcx
mov rax, [a]''move value of a into rax
xor edx, edx
div rcx ''div a,b
mov [r], rdx''move remainder into r
end asm
#endmacro
randomize
dim as double time1 , time2 , time3 , time4 , time5 , time6 , time7 , time8, time9 , time10
do
dim as ulongint a = rnd * 1e10
dim as ulongint b = rnd * 1e10
if b > a then swap b , a
dim as ulongint mod1
time1 = timer
for x as longint = 1 to 10000000
asm nop
mod1 = a mod b
next
time2 = timer
dim as ulongint mod_Albert_asm
time3 = timer
for x as longint = 1 to 10000000
'Albert's formula written in assembly code...
asm nop
asm
mov rdx , 0
mov rax , [a]
mov rbx , [b]
mov rcx , [a]
idiv rbx
imul rbx
mov rbx , rax
mov rax , rcx
sub rax , rbx
mov [mod_Albert_asm], rax
end asm
next
time4 = timer
' alberts formula
dim as ulongint mod_Albert
time5 = timer
for x as longint = 1 to 10000000
asm nop
mod_Albert = a - ( ( a \ b ) *b )
next
time6 = timer
dim as ulongint mod_Dodicat_asm
time7 = timer
for x as longint = 1 to 10000000
asm nop
asm
mov rdx, 0
mov rax, [a]
mov rbx, [b]
div rbx
mov [mod_Dodicat_asm], rdx
end asm
next
time8 = timer
dim as ulongint mod_srvaldez_asm
time9 = timer
for x as longint = 1 to 10000000
asm nop
md_macro (a , b, mod_srvaldez_asm )
next
time10 = timer
print a; " mod " ;b
print "Built in MOD " ; mod1 ; " " ; time2-time1 , (time2 - time1) / 1000000
print "Dodicat ASM " ; mod_dodicat_asm ; " " ; time8-time7 , (time8 - time7) / 1000000
print "Albert ASM " ; mod_Albert_asm ; " " ; time4-time3 , (time4 - time3) / 1000000
print "Albert MOD " ; mod_Albert ; " " ; time6-time5 , (time6 - time5) / 1000000
print "srvaldez ASM " ; mod_srvaldez_asm ; " " ; time10-time9 , (time10 - time9) / 1000000
print
sleep
loop until multikey(1)
sleep
end
Re: Squares
The multiply of 1 million digit numbers in ASCII strings is really never going to be used because humans will never read all those digits. The time taken to code in ASM for a little used program is therefore wasted. Some internal binary format that avoids MOD will be faster because it does not need all the pack and unpack base changes for every step in a programmed calculator.
I agree that without ASM, FB does lack the ability to compute ( a ÷ b ) and get the result (a \ b) and the remainder ( a MOD b ) in separate registers. But that will be the same for all our algorithms. Separate DIV and MOD are not that slow, if you can reduce their use you should do so, but you don't need ASM.
Once you commit to using ASM you reduce portability, and then have to re-code it every time you make a slight change to an algorithm. Optimisation and ASM are distractions that do not change the algorithmic efficiency. So I don't think assembly code should be used when you are optimising and comparing different algorithms.
You should restrict your code to FB only without ASM inserts, or you may as well use GMP.
I agree that without ASM, FB does lack the ability to compute ( a ÷ b ) and get the result (a \ b) and the remainder ( a MOD b ) in separate registers. But that will be the same for all our algorithms. Separate DIV and MOD are not that slow, if you can reduce their use you should do so, but you don't need ASM.
Once you commit to using ASM you reduce portability, and then have to re-code it every time you make a slight change to an algorithm. Optimisation and ASM are distractions that do not change the algorithmic efficiency. So I don't think assembly code should be used when you are optimising and comparing different algorithms.
You should restrict your code to FB only without ASM inserts, or you may as well use GMP.
Re: Squares
Here is another moderator -- oops--- sorry, mod, extendable to double.
Code: Select all
Function dmod(x As Double,y As Double, Byref d As Integer=0) As Double
Dim As Double f=x/y
Function=y*Frac(f)
d=Fix(f)
End Function
Dim As Integer i,d
For z As Long=1 To 10000000
Dim As Integer n1=Rnd*1000000-Rnd*1000000
Dim As Integer n2=Rnd*10000-Rnd*10000
If n2<>0 Then
i=dmod(n1,n2,d)
If d<>n1\n2 Then Print n1,n2,d,n1\n2
If i<> n1 Mod n2 Then Print n1,n2,i,n1 Mod n2
End If
Next
Dim As Double pi=4*Atn(1)
Var z=dmod(100.01*pi,pi)
Print z
Print .01*pi
print "done"
Sleep
Re: Squares
@Dodicat
I came up with faster way to make the numbers n equal length...
It should speed the multiplier up few ticks...
I'm gonna go thru all the computer logic function and try to write faster formulas..
I came up with faster way to make the numbers n equal length...
It should speed the multiplier up few ticks...
I'm gonna go thru all the computer logic function and try to write faster formulas..
Code: Select all
screen 19
dim as double time1 , time2 , time3 , time4
do
dim as longint size1 = int( rnd * 1000 )
dim as string num1
for a as longint = 1 to size1 step 1
num1+=str(int(rnd*10))
next
if left(num1,1) = "0" then mid(num1,1,1) = str(int(rnd*9)+1)
'================================================
time1 = timer
'make equal multiple of 7
dim as string number1 = num1
dim as string str1
dim as longint dec1
do
str1 = str( len(number1) / 7 )
dec1 = instr( 1 , str1 , "." )
if dec1 > 0 then number1 = "0" + number1
loop until dec1 = 0
time2 = timer
'================================================
'================================================
time3 = timer
'(FASTER) make equal multiple of 7
dim as string test = num1
if len(num1) mod 7 > 0 then test = string( ( 7 - len(num1) mod 7 ) , "0") + test
time4 = timer
'================================================
print
print len(num1) , len(num1) / 7
print
print len(test ) , len(test ) / 7 , time2-time1
print len(number1) , len(number1) / 7 , time4-time3
sleep
loop until inkey = chr(27)
sleep
end
Re: Squares
@Dodicat
On the MOD timings?
How do you create a percentage?
To see what percentage faster or slower a particular formula is ?
On the MOD timings?
How do you create a percentage?
To see what percentage faster or slower a particular formula is ?
Re: Squares
The easiest way I suppose would be
print (time2-time1)/(time4-time3);" times the speed"
print (time2-time1)/(time4-time3);" times the speed"
Re: Squares
@Dodicat
I started with "XOR" .... Here's n1 XOR 1 ... i got it working with xor 1 , now to speed it up and do n1 xor 2
I started with "XOR" .... Here's n1 XOR 1 ... i got it working with xor 1 , now to speed it up and do n1 xor 2
Code: Select all
screen 19
do
dim as longint builtin_xor
dim as longint Albert_xor
for a as longint = 0 to 1000
dim as longint value = valulng( "1" + string(len(str(a)),"0") )
dim as longint add : if a mod 2 = 0 then add = 1 else add=0
builtin_xor = a xor 1
Albert_xor = a - (( value - a) mod 2) + add
'print a , a xor 1 , a - (( value - a) mod 2) + add
print a , builtin_xor , Albert_xor
if builtin_xor <> Albert_xor then
print
print "!!~~ERROR~~!!" : sleep
end if
if inkey = chr(27) then exit for : exit do
next
sleep
loop until inkey = chr(27)
print "press a key to exit"
sleep
end
Re: Squares
In this case you can now speed up mod
anything mod 2 =anything and 1
anything mod 2 =anything and 1
Code: Select all
for n as long=1 to 50
dim as integer t=rnd*5
print t;" ";t mod 2;t and 1
next
print
sleep
Re: Squares
@Dodicat
I'm trying to write the logic functions without using logic..
To write a faster XOR , AND , OR , etc...
I'm trying to write the logic functions without using logic..
To write a faster XOR , AND , OR , etc...
Re: Squares
You have all the means Albert.
Change your big strings to binary and compare the zeros and ones.
Change your big strings to binary and compare the zeros and ones.