Find (At the assembler inserts)

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
VANYA
Posts: 1834
Joined: Oct 24, 2010 15:16
Location: Ярославль
Contact:

Find (At the assembler inserts)

Post by VANYA »

Decided to try to implement functions like Instr in assembler inserts, but the results disappointed me. Search speed get slow, but perhaps that's understandable ... Well post the code can find someone interested and optimizes it.

Code: Select all

Function FindString(ByVal dest As String, ByVal search As String, ByVal pos_ As Integer) As Integer Export
	Dim As Integer return__
	Dim As Integer ptr return_=@return__,add_=@pos_
	
	Asm
		push ebp 
		mov edi,[add_] 
		push edi	
		mov edi,[return_] 
		push edi 
		mov edx,[dest]
		Add edx,[pos_]
		dec edx 
		mov ebx,[search]
		Xor edi,edi 
		Xor esi,esi 
		Xor ebp,ebp 
		mov ecx,-1 
		main:
		mov al,[ebx+esi] 
		mov ah,[edx+edi] 
		cmp al,0 
		jz finish 
		cmp ah,0   
		jz finish
		cmp al,ah 
		jz Yes 
		cmp ebp,0 
		jz ret__
		mov edi,ebp		
		ret__:
		Xor ebp,ebp 
		Xor esi,esi 
		inc edi 
		jmp No_ 
		Yes: 
		cmp ebp,0 
		jnz EBPNONULL 
		mov ebp,edi 
		inc ebp
		EBPNONULL:
		inc edi
		inc esi
		No_:
		loop main 
		finish: 
		cmp ebp,0
		jz Err
		Not ecx	
		jmp Valid
		Err:
		pop eax
		pop ebx
		mov [eax],ebp	
		jmp END_
		Valid:		
		pop eax
		pop ecx
		Add ebp,[ecx]
		dec ebp
		mov [eax],ebp
		END_:
		pop ebp
	End Asm
	Return return__
End Function
Print FindString("FreeBasic","Bas",1):Sleep
AGS
Posts: 1284
Joined: Sep 25, 2007 0:26
Location: the Netherlands

Post by AGS »

You will get better performance by implementing the same search method as implemented by instr. The method used by instr is called the Boyer Moore string searching algorithm

http://en.wikipedia.org/wiki/Boyer-Moor ... _algorithm

I've implemented the Horspool variant (in BASIC) of the Boyer Moore algorithm http://en.wikipedia.org/wiki/Boyer-Moor ... _algorithm

and got good results using it (performance was close to instr performance).
VANYA
Posts: 1834
Joined: Oct 24, 2010 15:16
Location: Ярославль
Contact:

Post by VANYA »

I read, thanks.
MichaelW
Posts: 3500
Joined: May 16, 2006 22:34
Location: USA

Post by MichaelW »

The problem with Boyer-Moore and similar is that the preprocessing takes a significant amount of time. For short strings, up to perhaps several hundred characters, a simple byte-scanner is generally faster.

Code: Select all

'==============================================================================
#include "windows.bi"
#include "counter.bas"
'==============================================================================
'' Counter.bas is available here:
''
'' http://www.freebasic.net/forum/viewtopic.php?t=4221
'==============================================================================

Function FindString(ByVal dest As String,ByVal search As String,ByVal pos_ As Integer) As Integer

        Dim As Integer return__
        Dim As Integer ptr return_=@return__,add_=@pos_

        Asm
                push ebp
                mov edi,[add_]
                push edi
                mov edi,[return_]
                push edi
                mov edx,[dest]
                Add edx,[pos_]
                dec edx
                mov ebx,[search]
                Xor edi,edi
                Xor esi,esi
                Xor ebp,ebp
                mov ecx,-1
              main:
                mov al,[ebx+esi]
                mov ah,[edx+edi]
                cmp al,0
                jz finish
                cmp ah,0
                jz finish
                cmp al,ah
                jz Yes
                cmp ebp,0
                jz ret__
                mov edi,ebp
              ret__:
                Xor ebp,ebp
                Xor esi,esi
                inc edi
                jmp No_
              Yes:
                cmp ebp,0
                jnz EBPNONULL
                mov ebp,edi
                inc ebp
              EBPNONULL:
                inc edi
                inc esi
              No_:
                loop main
              finish:
                cmp ebp,0
                jz Err
                Not ecx
                jmp Valid
              Err:
                pop eax
                pop ebx
                mov [eax],ebp
                jmp END_
              Valid:
                pop eax
                pop ecx
                Add ebp,[ecx]
                dec ebp
                mov [eax],ebp
              END_:
                pop ebp
        End Asm
        Return return__
End Function

'==============================================================================

dim as string s1,ss1,s2,ss2,s3,ss3

s1 = "FreeBASIC"
ss1 = "IC"
s2 = "FreeBASIC is a free 32-bit compiler for the BASIC language."
ss2 = "language"
s3 = s2 & "It is open source and licensed under the GPL."
s3 &= "It is designed to be syntax compatible with QuickBASIC,"
s3 &= "while expanding on the language and capabilities."
ss3 = "language and capabilities"
print s1
print
print s2
print
print s3
print

print instr( s1 , ss1 ),
print FindString( s1, ss1, 1 )
print instr( s2 , ss2 ),
print FindString( s2, ss2, 1 )
print instr( s3 , ss3 ),
print FindString( s3, ss3, 1 )
print

sleep 3000

counter_begin( 1000, HIGH_PRIORITY_CLASS )
counter_end
print counter_cycles

counter_begin( 1000, HIGH_PRIORITY_CLASS )
    instr( 1, s1, ss1 )
counter_end
print counter_cycles, "cycles, Instr s1"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   FindString( s1, ss1, 1 )
counter_end
print counter_cycles, "cycles, FindString s1"


counter_begin( 1000, HIGH_PRIORITY_CLASS )
    instr( 1, s2, ss2 )
counter_end
print counter_cycles, "cycles, Instr s2"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   FindString( s2, ss2, 1 )
counter_end
print counter_cycles, "cycles, FindString s2"


counter_begin( 1000, HIGH_PRIORITY_CLASS )
    instr( 1, s3, ss3 )
counter_end
print counter_cycles, "cycles, Instr s3"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   FindString( s3, ss3, 1 )
counter_end
print counter_cycles, "cycles, FindString s3"

sleep

Running on a P3:

Code: Select all

1716          cycles, Instr s1
180           cycles, FindString s1
2223          cycles, Instr s2
834           cycles, FindString s2
2940          cycles, Instr s3
2867          cycles, FindString s3
AGS
Posts: 1284
Joined: Sep 25, 2007 0:26
Location: the Netherlands

Post by AGS »

My results (using the code by MichaelW on a Pentium I7):
2977 cycles, Instr s1
165 cycles, FindString s1
3269 cycles, Instr s2
1121 cycles, FindString s2
4156 cycles, Instr s3
3946 cycles, FindString s3
The timing is almost the same every time I run the benchmark. Almost no difference between different runs. Very accurate.

I know the rtlib uses strchr for patterns no longer than 1 char (the rtlib uses Boyer Moore otherwise). So I tried a case where the pattern length equals 1.
I added

Code: Select all

dim ss4 as string
ss4 = "4"
to the code and put the character 4 before "on" in s3 (so s3 now reads "while expanding 4on"). Tested using instr and FindString.
counter_begin( 1000, HIGH_PRIORITY_CLASS )
instr( 1, s3, ss4 )
counter_end
print counter_cycles, "cycles, Instr ss4"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
FindString( s3, ss4,1 )
counter_end
print counter_cycles, "cycles, FindString ss4"
Results:
722 cycles, Instr ss4
2711 cycles, FindString ss4
So what happens when using another function from the c rtlib, strstr, for string matching?

Benchmark using strstr, instr and FindString.

Code: Select all

dim as string s1,ss1,s2,ss2,s3,ss3,ss4
dim as zstring ptr z

s1 = "FreeBASIC"
ss1 = "IC"
s2 = "FreeBASIC is a free 32-bit compiler for the BASIC language."
ss2 = "language"
s3 = s2 & "It is open source and licensed under the GPL."
s3 &= "It is designed to be syntax compatible with QuickBASIC,"
s3 &= "while expanding 4on the language and capabilities."
ss3 = "language and capabilities"
ss4 = "4"
print s1
print
print s2
print
print s3
print

print instr( s1 , ss1 ),
print FindString( s1, ss1, 1 )
z = strstr( strptr(s1), strptr(ss1))
print (z - strptr(s1)) + 1
print instr( s2 , ss2 ),
print FindString( s2, ss2, 1 )
z = strstr( strptr(s2), strptr(ss2))
print (z - strptr(s2)) + 1
print instr( s3 , ss3 ),
print FindString( s3, ss3, 1 )
z = strstr( strptr(s3), strptr(ss3))
print (z - strptr(s3)) + 1
print instr( s3,ss4 ),
print FindString( s3,ss4, 1)
z = strstr( strptr(s3), strptr(ss4))
print (z - strptr(s3)) + 1
print

sleep 3000

counter_begin( 1000, HIGH_PRIORITY_CLASS )
counter_end
print counter_cycles

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   z = strstr( strptr(s1), strptr(ss1))   
counter_end
print counter_cycles, "cycles, strstr s1"


counter_begin( 1000, HIGH_PRIORITY_CLASS )
    instr( 1, s1, ss1 )
counter_end
print counter_cycles, "cycles, Instr s1"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   FindString( s1, ss1, 1 )
counter_end
print counter_cycles, "cycles, FindString s1"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   strstr( strptr(s2), strptr(ss2))
counter_end
print counter_cycles, "cycles, strstr s2"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
    instr( 1, s2, ss2 )
counter_end
print counter_cycles, "cycles, Instr s2"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   FindString( s2, ss2, 1 )
counter_end
print counter_cycles, "cycles, FindString s2"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   strstr( s3, ss3)
counter_end
print counter_cycles, "cycles, strstr s3"


counter_begin( 1000, HIGH_PRIORITY_CLASS )
    instr( 1, s3, ss3 )
counter_end
print counter_cycles, "cycles, Instr s3"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
   FindString( s3, ss3, 1 )
counter_end
print counter_cycles, "cycles, FindString s3"


counter_begin( 1000, HIGH_PRIORITY_CLASS )
   strstr( s3, ss4)
counter_end
print counter_cycles, "cycles, strstr ss4"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
    instr( 1, s3, ss4 )
counter_end
print counter_cycles, "cycles, Instr ss4"

counter_begin( 1000, HIGH_PRIORITY_CLASS )
    FindString( s3, ss4,1 )
counter_end
print counter_cycles, "cycles, FindString ss4"

sleep
results
8 8
8
51 51
51
184 184
184
176 176
176

0
51 cycles, strstr s1
2971 cycles, Instr s1
166 cycles, FindString s1
349 cycles, strstr s2
3275 cycles, Instr s2
1115 cycles, FindString s2
1109 cycles, strstr s3
4180 cycles, Instr s3
3927 cycles, FindString s3
431 cycles, strstr ss4
727 cycles, Instr ss4
2711 cycles, FindString ss4
Of course strstr is a 'raw' strstr. If any string (either pattern or haystack) equals 0 then strstr crashes. Which is not so good (makes you wonder why the msvcrt does not catch those errors?) And starting the matching at a different position is only possible by doing some pointer arithmetic which means more checking to guard against strstr crashing.

I'm not quite sure how to interpret the results using strstr. Perhaps I'm doing something wrong here. The results of the last testcase seems about right (instr using strchr at 727 and strstr at 431). But the other results are 'suspicious'.
MichaelW
Posts: 3500
Joined: May 16, 2006 22:34
Location: USA

Post by MichaelW »

I posted code that adapts strstr to behave more or less like Instr here and here.
The timing is almost the same every time I run the benchmark. Almost no difference between different runs. Very accurate.
It is for most processors, but not for a Pentium 4.
exagonx
Posts: 314
Joined: Mar 20, 2009 17:03
Location: Italy
Contact:

Re: Find (At the assembler inserts)

Post by exagonx »

VANYA wrote: Apr 19, 2011 16:51 Decided to try to implement functions like Instr
Hello, I dont know if exist a function for do the same thing that because I made this little function for Find a string inside a string.

Code: Select all

'dim MyString as string = "Hello my World"
'dim Position as integer = 0
'
'Position = FindString(MyString, "my")
'
'if Position > 0 then 
'	print "String found at position:" & str(Position)
'else
'	print "String not Found"
'end if
'
' Syntax: as integer = FindString(MainString, SearchString, CaseSensitive = 1)
'
' MainString = Are the String with the all words with the one you need
' SearchString = Are the one word that you want know if exist and where are
' CaseSensitive = Default are 1 (True) if you want ignore the case put 0 and will find the word in upper case or lower
'

Declare Function FindString(ByVal MainString as String = "", ByVal FindStr as String, ByVal CaseSensitive as Integer = 1)as Integer


Function FindString(ByVal MainString as String = "", ByVal FindStr as String, ByVal CaseSensitive as Integer = 1)as Integer export
	Dim as integer MainLen, FindLen, CicleStep = 0, OffSet, Found = 0
	Dim as string ExtractFString, CompareFString
	
	MainLen = len(MainString)
	FindLen = len(FindStr)
	
	if CaseSensitive > 0 then 
			CompareFString = FindStr
		else 
			CompareFString = UCASE(FindStr)
		End if
		
	if FindLen < MainLen then
		Do until(Found > (MainLen - FindLen))
			CicleStep += 1
			if CaseSensitive > 0 then 
				ExtractFString = mid(MainString,CicleStep, FindLen)
			else 
				ExtractFString = UCASE(mid(MainString,CicleStep, FindLen))
			End if
		
			'MID(String, Position, Lenght'
			if  ExtractFString = CompareFString then 
				Found = 1
				OffSet = CicleStep
				Found = MainLen
			end if
	
	
		Loop
	else
		OffSet = -1
	end if
	
	Return OffSet
End Function
Post Reply