This latest version of InstrMulti fixes two (minor) bugs: duplicate occurrence detections due to unmanaged 250/500 overlaps, and the untested Start parameter.
The performance degradation issue on long strings has been addressed in several ways: a Borg collective submission mentions that ByPass would be much simpler and at least as efficient as a queue handling Aside/Recover indices. The minor drawback is that the strings to be detected must not exceed 250 characters, and in any case, lzle is not intended to handle excessively long keys.
str_tmp_1/Mid is replaced by pointers; the difference is imperceptible and rather in favor of the old code using Mid. However, there is no need to copy Str_Input into a variable, which is or
should be cleaner and more memory-efficient. A change and new feature concerns the uBStopFirstFound parameter, which acts as a limit on the number of detected parameters, thus allowing for optimal detection. If this parameter is set to 255, there is no identification in the subset of the string concerned by the detection (faster), and the returned value is the position of the found occurrence, as with Instr, to meet the objective of a textual join.
In other cases, the returned value is the logical bound for the modified values in the return result Arrays.
The performance is virtually unchanged; the result finally seems suitable for professional use. Thanks, Seven.
Code: Select all
#Include once "D:\Basic\LZLE_.bi"
' WITH CONST MAX_ASIDE=500 & change Byte to Integer in .Aside, .Recover & .AsideReset properties
Function InstrMulti(Str_Input As String, ListeQC As List, Str_Results() As String, uIntIndice() As uInteger, uBStopFirstFound As uByte=0, uIntStart As uInteger=0) As uInteger
' Dim As String str_tmp_1
Dim as zString Ptr z_Input
Dim As uInteger uInt_InputLen_0=Len(Str_Input)-uIntStart, uInt_InputLen, i, u, t, IndiceRes
Dim As uByte ubRevocated(500), ByPass=1
ListeQC.RootPrivate : ListeQC.SeekMethod(0) : Str_Results(0)="" : uInt_InputLen=500
For t=1 to uInt_InputLen_0 Step 250
If ByPass Then
'Using Mid
' str_tmp_1= Mid(Str_Input, t, 500)
' uInt_InputLen=Len(str_tmp_1)
' z_Input= StrPtr(str_tmp_1) : z_Input+=uIntStart
'Using Ptr
If uInt_InputLen_0-t <250 Then : uInt_InputLen=uInt_InputLen_0-t+1 : End If
z_Input= StrPtr(Str_Input) : z_Input+=t-1+uIntStart
For i=1 to uInt_InputLen
ubRevocated(i)=0 : ListeQC.AsideReset(i)
For u=1 to i
If ubRevocated(u)=0 Then
ListeQC.Recover(u)
If ListeQC.HasTag(Chr((*z_Input)[0])) Then
ListeQC.SeekMethod(1)
If ListeQC.Check Then
If uBStopFirstFound=255 Then : Return u : End If
If IndiceRes>=Ubound(Str_Results) Then : Redim Preserve Str_Results(Ubound(Str_Results)+100) : End If
If IndiceRes>=Ubound(uIntIndice) Then : Redim Preserve uIntIndice(Ubound(uIntIndice)+100) : End If
If uBStopFirstFound<>0 AndAlso uBStopFirstFound<=IndiceRes Then : Return IndiceRes-1 : End If
Str_Results(IndiceRes)=ListeQC.HashTag : uIntIndice(IndiceRes)=u+t-1 : IndiceRes+=1
If ListeQC.Down Then : ListeQC.Aside(u) : Else : ubRevocated(u)=1 : ListeQC.SeekMethod(0) : End If
ListeQC.RootPrivate : ListeQC.SeekMethod(0)
Else
If ListeQC.Down Then : ListeQC.Aside(u) : ubRevocated(u)=0 : Else : ubRevocated(u)=1 : ListeQC.SeekMethod(0) : End If
ListeQC.RootPrivate : ListeQC.SeekMethod(0)
End If
Else : ubRevocated(u)=1
ListeQC.RootPrivate : ListeQC.Aside(u) : ListeQC.SeekMethod(0)
End If
End If
Next u
z_Input+=1
Next i
End If
If ByPass=0 Then : ByPass=1 : Else : ByPass=0 : End If
Next t
Return IndiceRes-1
End Function
Dim ListeQC As List
Dim Str_Results() As string
Dim uIntIndice() As uInteger
Dim uIntIndiceClassic(100) As uInteger
Redim Str_Results(10) : Redim uIntIndice(100)
'Dim As string Str_Input = "78poissonchatTGHVBNI98virgulepointavionordinateurchienboite7654titiimprimantesourisAzertyuiopqsdfghjklmwxcvbn" 'Len Max=200
Dim As string Str_Input = "*NePYT98qsazerfdcxwAzertyuiopAzertylmwxcvbnAzertyuiopqsdfghjklmwxcvbnpoissonchatvirgulepointavionordinateurchienboite7654titiZimprimantesouris"
'Dim As string Str_Input = "GTCCYCTRCGTCCYCTRCTTXTXTRCTTXTXT7654EXTRRCKYNRCTTXTXTEXTRRCKYNRCTTXGTCCYCTRCTTXTXTRCTTXTXT7654EXTRRCKYNRCTTXTXTEXTRRCKYNRCTTXGTCCYCTRCTTXTXTRCTTXTXT7654EXTRRCKYNRCTTXTXTEXTRRCKYNRCTTXTTXTXTRCTTXTXT7654EXTRRCKYNRCTTXTXTEXTRRCKYNRCTTXTXTEXTRRCKYNRCTTXTXTEXTRRCKYNEXTRRCKYNI?INYDRESXeYTYOPNFGHFsourisAzertyREZAWTYTYUIOPMLPOUNBGNOFDH*NedfghjklmwxcvqsazerfdcxwAzertyuiopAzertylmwxcvbnAzertyuiopqsdfghjklmwxcvbnpoissonchat"
Dim As string Str_Res_1, Str_Res_2
Dim As uByte uBStopFirstFound=0
Dim as double T1, T2
Dim As Integer i, w, x, IndiceInstrMulti, TestMaxi = 100000
' TestMaxi = 350
ListeQC.HashTag("virgule")
ListeQC.HashTag("poissonchatTGHVBNI")
ListeQC.HashTag("chienboite")
ListeQC.HashTag("Bar")
ListeQC.HashTag("ordi")
ListeQC.HashTag("poiss")
ListeQC.HashTag("isso")
ListeQC.HashTag("imprimante")
ListeQC.HashTag("oisson")
ListeQC.HashTag("oissonch")
ListeQC.HashTag("titi")
? "Loading values to FT search"
For i=5 to TestMaxi
ListeQC.HashTag(str(i) )
Next i
While ListeQC.Up : Wend : ListeQC.FastAside(1)
? "Loaded =" & ListeQC.NodeCount
dim as double t=timer
IndiceInstrMulti = InstrMulti(Str_Input, ListeQC, Str_Results(), uIntIndice(), uBStopFirstFound )
For i=0 To IndiceInstrMulti
Str_Res_1 += Str_Results(i)+";"
Next i
T1= timer-t :
x=0
t=timer
For i=5 to TestMaxi
w=Instr(Str_Input, str(i))
If w<>0 Then
Str_Res_2+=str(i)+";"
uIntIndiceClassic(x)=w : x+=1
End If
Next i
T2= timer-t
ListeQC.Root
While ListeQC.KeyStep
If Cint(ListeQC.Tag)=0 Then
w=Instr(Str_Input, ListeQC.HashTag) ':
If w<>0 Then
Str_Res_2+=ListeQC.HashTag+ ";"
uIntIndiceClassic(x)=w : x+=1
End If
End If
Wend
?
? "Using InstrMulti :"
? "Result=" & Str_Res_1
Str_Res_1=""
For i=0 to IndiceInstrMulti
Str_Res_1+= Str(uIntIndice(i)) + ";"
Next i
? "Positions=" & Str_Res_1
?
? "Using Instr inside a loop :"
? "Result=" & Str_Res_2
Str_Res_2=""
For i=0 to IndiceInstrMulti
Str_Res_2+= Str(uIntIndiceClassic(i)) + ";"
Next i
? "Positions=" & Str_Res_2
?
? T1
? T2
? "InstrMulti Warp Factor=" & Cint(Cdbl(T2/T1)*100)/100
?
' Just to check list's inegrity
ListeQC.Recycle
? "Garbage=" & ListeQC.GarbageCount
ListeQC.HashTag("virgule")
ListeQC.HashTag("poissonchatTGHVBNI")
ListeQC.HashTag("chienboite")
ListeQC.HashTag("Bar")
ListeQC.HashTag("ordi")
ListeQC.HashTag("poiss")
ListeQC.HashTag("isso")
ListeQC.HashTag("imprimante")
ListeQC.HashTag("oisson")
ListeQC.HashTag("oissonch")
ListeQC.HashTag("titi")
ListeQC.Root
'? "Loading values to FT search"
For i=5 to TestMaxi
ListeQC.HashTag(str(i) )
Next i
ListeQC.Destroy : gCollector.Destroy
Print "??=" & AllocateDeallocateCounter & " nodecount=" & ListeQC.NodeCount
sleep : system
Edited : issue on uInt_InputLen=>500
To make it a bit seriously, a short documentation
in the taste of FB.
InstrMulti
Locates every occurrence of a substring or character
of a set (List) within a string
Or
Locates 'uBStopFirstFound' first occurrences of a substring or character
of a set (List) within a string
Or
Locates first/if occurrence of a substring or character
of a set (List) within a string (without identify the hit in the set uBStopFirstFound=255)
Syntax
Function InStrMulti ( ByRef str As Const String, ByRef substring set As List, ByRef varlen StringArray() as String, ByRef varlen uIntegerArray() As uInteger, [ByRef SearchMode as uByte,] [ByRef uIntStart As uInteger ] ) As uInteger
Usage
LogicalUbound = InStrMulti( str, List, StrArray(), uIntegerArray(), [ Mode (<>255) ], [ start, ] )
Or
First = InStrMulti( str, List, StrArray(), uIntegerArray(), 255, [ start, ] )
Parameters
str : string to search
List : list of strings to search for in str
StrArray() : string array passed byref for results
uIntegerArray() : uInteger array passed byref for results positions
Mode : 0=All hits, 1-254=max hits threashold, 255=Just locate first hit
Start : starting position
Return Value
Mode 0-254 : LogicalUbound for reading values in StrArray and uIntegerArray
ByRef : StrArray() and uIntegerArray()
Mode 255 : the position of the first occurrence of one of the substring of the substring list in str
Limitations
- Substrings size in list =< 250 characters per substring
- Number of substrings to search for should be >500 (vs Instr in a loop)
- Not deeply tested - As is
- Usually not efficient for one-off operations
Addendum :
In lzle .DropAll property, please replace :
If this.IsDestroyed=1 Then : Return 0 : End If : this.NodeRecycle : this.NodeRecycle2
By
If this.IsDestroyed=1 Then : Return 0 : End If : this.NodeRecycle : this.NodeRecycle2 : this.TrackCompute : this.Recycle ' PATCHED
hope it will works better