UEZ's sin6th, cos6th

General FreeBASIC programming questions.
Post Reply
srvaldez
Posts: 3373
Joined: Sep 25, 2005 21:54

UEZ's sin6th, cos6th

Post by srvaldez »

I thought that I might be able to speedup UEZ's Rotating Earth build 2019-04-14 viewtopic.php?f=7&t=27543 by tweaking his trig functions a tiny bit, but no luck
however, if you use -gen gcc then they may be a bit faster, but not necessarily, depends on the O level, btw, I think that if they were written in FB rather than asm then gcc could optimize them even more
note, the first two are the originals, the bottom two are the tweaked ones plus there's a benchmark

Code: Select all

Function _ASM_Sin6th(fX As Double) As Double 
	'By Eukalyptus 
	Asm
		jmp 0f
		1: .Double 683565275.57643158 
		2: .Double -0.0000000061763971109087229 
		3: .Double 6755399441055744.0 
		  
		0: 
			movq xmm0, [fX] 
			mulsd xmm0, [1b] 
			addsd xmm0, [3b] 
			movd ebx, xmm0 

			lea  eax, [ebx*2+0x80000000] 
			sar  eax, 2 
			imul eax 
			sar  ebx, 31 
			lea  eax, [edx*2-0x70000000] 
			lea  ecx, [edx*8+edx-0x24000000] 
			imul edx 
			Xor  ecx, ebx 
			lea  eax, [edx*8+edx+0x44A00000]
			imul ecx 

			cvtsi2sd xmm0, edx 
			mulsd xmm0, [2b] 
			movq [Function], xmm0 
	End Asm 
End Function

Function _ASM_Cos6th(fX As Double) As Double 
	'By Eukalyptus 
	Asm 
		jmp 0f 
		1: .Double 683565275.57643158 
		2: .Double -0.0000000061763971109087229 
		3: .Double 6755399441055744.0 

		0: 
			movq xmm0, [fX] 
			mulsd xmm0, [1b] 
			addsd xmm0, [3b] 
			movd ebx, xmm0 

			Add ebx, 0x40000000 'SinToCos 

			lea  eax, [ebx*2+0x80000000] 
			sar  eax, 2 
			imul eax 
			sar  ebx, 31 
			lea  eax, [edx*2-0x70000000] 
			lea  ecx, [edx*8+edx-0x24000000] 
			imul edx 
			Xor  ecx, ebx 
			lea  eax, [edx*8+edx+0x44A00000] 
			imul ecx 

			cvtsi2sd xmm0, edx 
			mulsd xmm0, [2b] 
			movq [Function], xmm0 
	End Asm 
End Function

Function ASM_Sin6th naked cdecl(byval fX As Double) As Double 
	'By Eukalyptus 
	Asm
	' if FB-32-bit, then load fx from stack, else it's already in xmm0
	' ebx/rbx needs to be preserved, not sure about ecx/rcx
		#ifndef __FB_64BIT__
			lea eax, [esp+4]
			push ebx
			push ecx
			movq xmm0, [eax]
		#else
			push rbx
			push rcx
		#endif
			mulsd xmm0, [1f] 
			addsd xmm0, [3f] 
			movd ebx, xmm0 

			lea  eax, [ebx*2+0x80000000] 
			sar  eax, 2 
			imul eax 
			sar  ebx, 31 
			lea  eax, [edx*2-0x70000000] 
			lea  ecx, [edx*8+edx-0x24000000] 
			imul edx 
			Xor  ecx, ebx 
			lea  eax, [edx*8+edx+0x44A00000]
			imul ecx 

			cvtsi2sd xmm0, edx 
			mulsd xmm0, [2f]
		' if FB-32-bit, then transfer xmm0 into fpu, else we are done
		' restore saved registers
		#ifndef __FB_64BIT__
			pop ecx
			pop ebx
			movq [esp-12], xmm0 
			fld qword ptr [esp-12]
		#else
			pop rcx
			pop rbx
		#endif
			ret
		1: .Double 683565275.57643158 
		2: .Double -0.0000000061763971109087229 
		3: .Double 6755399441055744.0 
	End Asm 
End Function

Function ASM_Cos6th naked cdecl(byval fX As Double) As Double
	'By Eukalyptus 
	Asm 
	' if FB-32-bit, then load fx from stack, else it's already in xmm0
	' ebx/rbx needs to be preserved, not sure about ecx/rcx
		#ifndef __FB_64BIT__
			lea eax, [esp+4]
			push ebx
			push ecx
			movq xmm0, [eax]
		#else
			push rbx
			push rcx
		#endif
			mulsd xmm0, [1f] 
			addsd xmm0, [3f] 
			movd ebx, xmm0 

			Add ebx, 0x40000000 'SinToCos 

			lea  eax, [ebx*2+0x80000000] 
			sar  eax, 2 
			imul eax 
			sar  ebx, 31 
			lea  eax, [edx*2-0x70000000] 
			lea  ecx, [edx*8+edx-0x24000000] 
			imul edx 
			Xor  ecx, ebx 
			lea  eax, [edx*8+edx+0x44A00000] 
			imul ecx 

			cvtsi2sd xmm0, edx 
			mulsd xmm0, [2f] 
		' if FB-32-bit, then transfer xmm0 into fpu, else we are done
		' restore saved registers
		#ifndef __FB_64BIT__
			pop ecx
			pop ebx
			movq [esp-12], xmm0 
			fld qword ptr [esp-12]
		#else
			pop rcx
			pop rbx
		#endif
			ret
		1: .Double 683565275.57643158 
		2: .Double -0.0000000061763971109087229 
		3: .Double 6755399441055744.0
	End Asm 
End Function


Print "just now starting"
dim as double s, t
dim as double x
dim as integer k

s=0
t=timer
for k=1 to 100000000
	x=k
	s+=ASM_Sin6th(x)
next
t=timer-t
Print t, s, "29.19713395039346"

s=0
t=timer
for k=1 to 100000000
	x=k
	s+=ASM_Cos6th(x)
next
t=timer-t
Print t, s, "9.347239003491984"
Print "------------------------"
s=0
t=timer
for k=1 to 100000000
	x=k
	s+=_ASM_Sin6th(x)
next
t=timer-t
Print t, s, "29.19713395039346"

s=0
t=timer
for k=1 to 100000000
	x=k
	s+=_ASM_Cos6th(x)
next
t=timer-t
Print t, s, "9.347239003491984"
Post Reply