Find all keywords within a string

General FreeBASIC programming questions.
badidea
Posts: 1175
Joined: May 24, 2007 22:10
Location: The Netherlands

Find all keywords within a string

Postby badidea » Jan 04, 2019 17:00

Hello all, I made the following program which searches for all occurrences of a list of keywords within another string.
It seems to work fine, but I ask for suggestions to make it faster (without using assembly).

Code: Select all

dim as string dataStr
dim as integer numKeywords = 0
const as integer MAX_KEYWORD = 400
dim as string keyword(MAX_KEYWORD-1)

'read the keywords
while numKeywords < MAX_KEYWORD
   read dataStr
   if len(dataStr) > 0 then
      keyword(numKeywords) = dataStr
      numKeywords +=1
   else
      exit while
   end if
wend
print "numKeywords: " & str(numKeywords)

data "Abs","Abstract","Access","Acos","Add","Alias","Allocate","Alpha","And","AndAlso","Append","As","Assert","AssertWarn","Asc","Asin","Asm","Atan2","Atn","Base","Beep","Bin","Binary","Bit","BitReset","BitSet","BLoad","BSave","ByRef","ByVal","Call","CAllocate","Case","Cast","CByte","CDbl","cdecl","Chain","ChDir","Chr","CInt","Circle","Class","Clear","CLng","CLngInt","Close","Cls","Color","Command","Common","CondBroadcast","CondCreate","CondDestroy","CondSignal","CondWait","Const","Constructor","Continue","Cos","CPtr","CShort","CSign","CSng","CsrLin","CUByte","CUInt","CULng","CULngInt","CUnsg","CurDir","CUShort","Custom","CVD","CVI","CVL","CVLongInt","CVS","CVShort","Data","Date","DateAdd","DateDiff","DatePart","DateSerial","DateValue","Day","Deallocate","Declare","DefByte","DefDbl","defined","DefInt","DefLng","DefLongInt","DefShort","DefSng","DefStr","DefUByte","DefUInt","DefULongInt","DefUShort","Delete","Destructor","Dim","Dir","Do","Draw","DyLibFree","DyLibLoad","DyLibSymbol","Else","ElseIf","Encoding","End","EndIf","Enum","Environ","EOF","Eqv","Erase","Erfn","Erl","Ermn","Err","Error","Event","Exec","ExePath","Exit","Exp","Export","Extends","Extern","False","Field","FileAttr","FileCopy","FileDateTime","FileExists","FileLen","Fix","Flip","For","Format","Frac","Fre","FreeFile","Function","Get","GetJoystick","GetKey","GetMouse","GoSub","Goto","Hex","HiByte","HiWord","Hour","If","IIf","ImageConvertRow","ImageCreate","ImageDestroy","ImageInfo","Imp","Implements","Import","Inkey","Inp","Input","InStr","InStrRev","Int","Is","IsDate","IsRedirected","Kill","LBound","LCase","Left","Len","Let","Lib","Line","LoByte","Loc","Local","Locate","Lock","LOF","Log","Loop","LoWord","LPOS","LPrint","LSet","LTrim","Mid","Minute","MKD","MkDir","MKI","MKL","MKLongInt","Mks","MkShort","Mod","Month","MonthName","MultiKey","MutexCreate","MutexDestroy","MutexLock","MutexUnlock","Naked","Name","Namespace","Next","New","Not","Now","Oct","OffsetOf","On","Once","Open","Operator","Option","Or","OrElse","Out","Output","Overload","Override","Paint","Palette","pascal","PCopy","Peek","PMap","Point","PointCoord","Poke","Pos","Preserve","PReset","Print","Private","ProcPtr","Property","Protected","PSet","Public","Put","Random","Randomize","Read","Reallocate","ReDim","Rem","Reset","Restore","Resume","Return","RGB","RGBA","Right","RmDir","Rnd","RSet","RTrim","Run","SAdd","Scope","Screen","ScreenCopy","ScreenControl","ScreenEvent","ScreenInfo","ScreenGLProc","ScreenList","ScreenLock","ScreenPtr","ScreenRes","ScreenSet","ScreenSync","ScreenUnlock","Second","Seek","Select","SetDate","SetEnviron","SetMouse","SetTime","Sgn","Shared","Shell","Shl","Shr","Sin","SizeOf","Sleep","Space","Spc","Sqr","Static","stdcall","Step","Stick","Stop","Str","Strig","StrPtr","Sub","Swap","System","Tab","Tan","Then","This","ThreadCall","ThreadCreate","ThreadDetach","ThreadWait","Time","TimeSerial","TimeValue","Timer","To","Trans","Trim","Type","Typeof","UBound","UCase","Union","Unlock","Until","Using","va_arg","va_first","va_next","Val","ValLng","ValInt","ValUInt","ValULng","Var","VarPtr","View","Virtual","Wait","WBin","WChr","WeekDay","WeekDayName","Wend","While","WHex","Width","Window","WindowTitle","WInput","With","WOct","Write","WSpace","WStr","Xor","Year"

const as integer NUM_LOOPS = 10000
dim as integer searchPos
dim as string searchText = "BlablaxxxThreadCreateyyyAbsxxxYearyyyMoretextAbs!?randomtext!absABS"
searchText &= searchText 'twice as long

dim as double t = timer
'Repeat NUM_LOOPS times for timing accuracy
for iLoop as integer = 0 to NUM_LOOPS - 1
   'find all occurrences of all keyword in a string
   for iKeyword as integer = 0 to numKeywords - 1
      searchPos = 0
      while 1
         searchPos = instr(searchPos + 1, searchText, keyword(iKeyword))
         if searchPos = 0 then exit while
         'only print on first search
         if iLoop = 0 then print keyword(iKeyword) & " @ " & str(searchPos)
      wend
   next
next
print "Average loop duration [us]: "; (timer-t) * 1e6 / NUM_LOOPS

Output:

numKeywords: 371
Abs @ 25
Abs @ 46
Abs @ 92
Abs @ 113
ThreadCreate @ 10
ThreadCreate @ 77
Year @ 31
Year @ 98
Average loop duration [us]: 121.9028945546597


BTW, I stole the keyword list from this project: viewtopic.php?f=8&t=27284
Last edited by badidea on Jan 05, 2019 11:36, edited 1 time in total.
D.J.Peters
Posts: 7659
Joined: May 28, 2005 3:28

Re: Find on keywords within a string

Postby D.J.Peters » Jan 04, 2019 23:36

Here are in BASIC and Assembler it's ultra fast !
How long it search for 10,000 keywords on your box ?

I can make it 3 times faster with an binary tree but why :-)

Joshy

Code: Select all

dim shared as ulong CRC32Table(255) => { _
&H00000000, &H77073096, &HEE0E612C, &H990951BA, _
&H076DC419, &H706AF48F, &HE963A535, &H9E6495A3, _
&H0EDB8832, &H79DCB8A4, &HE0D5E91E, &H97D2D988, _
&H09B64C2B, &H7EB17CBD, &HE7B82D07, &H90BF1D91, _
&H1DB71064, &H6AB020F2, &HF3B97148, &H84BE41DE, _
&H1ADAD47D, &H6DDDE4EB, &HF4D4B551, &H83D385C7, _
&H136C9856, &H646BA8C0, &HFD62F97A, &H8A65C9EC, _
&H14015C4F, &H63066CD9, &HFA0F3D63, &H8D080DF5, _
&H3B6E20C8, &H4C69105E, &HD56041E4, &HA2677172, _
&H3C03E4D1, &H4B04D447, &HD20D85FD, &HA50AB56B, _
&H35B5A8FA, &H42B2986C, &HDBBBC9D6, &HACBCF940, _
&H32D86CE3, &H45DF5C75, &HDCD60DCF, &HABD13D59, _
&H26D930AC, &H51DE003A, &HC8D75180, &HBFD06116, _
&H21B4F4B5, &H56B3C423, &HCFBA9599, &HB8BDA50F, _
&H2802B89E, &H5F058808, &HC60CD9B2, &HB10BE924, _
&H2F6F7C87, &H58684C11, &HC1611DAB, &HB6662D3D, _
&H76DC4190, &H01DB7106, &H98D220BC, &HEFD5102A, _
&H71B18589, &H06B6B51F, &H9FBFE4A5, &HE8B8D433, _
&H7807C9A2, &H0F00F934, &H9609A88E, &HE10E9818, _
&H7F6A0DBB, &H086D3D2D, &H91646C97, &HE6635C01, _
&H6B6B51F4, &H1C6C6162, &H856530D8, &HF262004E, _
&H6C0695ED, &H1B01A57B, &H8208F4C1, &HF50FC457, _
&H65B0D9C6, &H12B7E950, &H8BBEB8EA, &HFCB9887C, _
&H62DD1DDF, &H15DA2D49, &H8CD37CF3, &HFBD44C65, _
&H4DB26158, &H3AB551CE, &HA3BC0074, &HD4BB30E2, _
&H4ADFA541, &H3DD895D7, &HA4D1C46D, &HD3D6F4FB, _
&H4369E96A, &H346ED9FC, &HAD678846, &HDA60B8D0, _
&H44042D73, &H33031DE5, &HAA0A4C5F, &HDD0D7CC9, _
&H5005713C, &H270241AA, &HBE0B1010, &HC90C2086, _
&H5768B525, &H206F85B3, &HB966D409, &HCE61E49F, _
&H5EDEF90E, &H29D9C998, &HB0D09822, &HC7D7A8B4, _
&H59B33D17, &H2EB40D81, &HB7BD5C3B, &HC0BA6CAD, _
&HEDB88320, &H9ABFB3B6, &H03B6E20C, &H74B1D29A, _
&HEAD54739, &H9DD277AF, &H04DB2615, &H73DC1683, _
&HE3630B12, &H94643B84, &H0D6D6A3E, &H7A6A5AA8, _
&HE40ECF0B, &H9309FF9D, &H0A00AE27, &H7D079EB1, _
&HF00F9344, &H8708A3D2, &H1E01F268, &H6906C2FE, _
&HF762575D, &H806567CB, &H196C3671, &H6E6B06E7, _
&HFED41B76, &H89D32BE0, &H10DA7A5A, &H67DD4ACC, _
&HF9B9DF6F, &H8EBEEFF9, &H17B7BE43, &H60B08ED5, _
&HD6D6A3E8, &HA1D1937E, &H38D8C2C4, &H4FDFF252, _
&HD1BB67F1, &HA6BC5767, &H3FB506DD, &H48B2364B, _
&HD80D2BDA, &HAF0A1B4C, &H36034AF6, &H41047A60, _
&HDF60EFC3, &HA867DF55, &H316E8EEF, &H4669BE79, _
&HCB61B38C, &HBC66831A, &H256FD2A0, &H5268E236, _
&HCC0C7795, &HBB0B4703, &H220216B9, &H5505262F, _
&HC5BA3BBE, &HB2BD0B28, &H2BB45A92, &H5CB36A04, _
&HC2D7FFA7, &HB5D0CF31, &H2CD99E8B, &H5BDEAE1D, _
&H9B64C2B0, &HEC63F226, &H756AA39C, &H026D930A, _
&H9C0906A9, &HEB0E363F, &H72076785, &H05005713, _
&H95BF4A82, &HE2B87A14, &H7BB12BAE, &H0CB61B38, _
&H92D28E9B, &HE5D5BE0D, &H7CDCEFB7, &H0BDBDF21, _
&H86D3D2D4, &HF1D4E242, &H68DDB3F8, &H1FDA836E, _
&H81BE16CD, &HF6B9265B, &H6FB077E1, &H18B74777, _
&H88085AE6, &HFF0F6A70, &H66063BCA, &H11010B5C, _
&H8F659EFF, &HF862AE69, &H616BFFD3, &H166CCF45, _
&HA00AE278, &HD70DD2EE, &H4E048354, &H3903B3C2, _
&HA7672661, &HD06016F7, &H4969474D, &H3E6E77DB, _
&HAED16A4A, &HD9D65ADC, &H40DF0B66, &H37D83BF0, _
&HA9BCAE53, &HDEBB9EC5, &H47B2CF7F, &H30B5FFE9, _
&HBDBDF21C, &HCABAC28A, &H53B39330, &H24B4A3A6, _
&HBAD03605, &HCDD70693, &H54DE5729, &H23D967BF, _
&HB3667A2E, &HC4614AB8, &H5D681B02, &H2A6F2B94, _
&HB40BBE37, &HC30C8EA1, &H5A05DF1B, &H2D02EF8D }

#ifndef NO_ASM
 #if defined(__FB_DARWIN__) or defined(__FB_ARM__) or (__FB_ASM__ = "att")
  #define NO_ASM
 #endif
#endif

#ifdef NO_ASM
function CRC32(byval buffer as any ptr, byval nBytes as long) as ulong
  dim as ubyte ptr p = buffer
  dim as ubyte ptr e = p + nBytes
   dim as ulong crc = &HFFFFFFFF
   while (p<e)
      crc = crc32table((crc xor *p) and &HFF) xor (crc shr 8)
    p+=1
  wend 
   return crc xor &HFFFFFFFF
end function
#else
function CRC32(byval pAnyBuffer as const any ptr, byval BufferSize as integer) as ulong
  dim as ulong ptr pTable = @CRC32Table(0)
  dim as ubyte ptr pBuffer = cptr(ubyte ptr,pAnyBuffer)
  if pBuffer = 0 then return 0
  if BufferSize < 1 then Return 0
 #ifndef __FB_64BIT__ 
  asm
    mov edi,[pBuffer]
    mov esi,[pTable]
    mov ecx,[BufferSize]
    mov eax,&HFFFFFFFF
    mov edx,&H000000FF
    push ebp
    mov ebp,edx
    Xor edx,edx
    loop_it:
      mov dl,[edi]
      mov ebx,eax
      Xor eax,edx
      And eax,ebp
      Shr ebx,8
      mov eax,[esi+eax*4]
      inc edi
      Xor eax,ebx
      dec ecx
    jnz loop_it
    pop ebp
    Xor eax,&HFFFFFFFF
    mov [function],eax
  End asm
 #else
  asm
    mov rdi,[pBuffer]
    mov rsi,[pTable]
    mov rcx,[BufferSize]
    xor rax,rax          ' clear high part
    mov eax,&HFFFFFFFF   ' load low part
    mov edx,&H000000FF
    push rbp
    mov ebp,edx
    Xor edx,edx
    loop_it:
      mov dl,[rdi]
      mov ebx,eax
      Xor eax,edx
      And eax,ebp
      Shr ebx,8
      mov eax,[rsi+rax*4]
      inc edi
      Xor eax,ebx
      dec rcx
    jnz loop_it
    pop rbp
    Xor eax,&HFFFFFFFF
    mov [function],eax
  End asm
 #endif
End Function
#endif
' return index in keyword array if "KeyWord" are found otherwise return -1
function FindKeyword(byref KeyWord as string, CheckSums() as ulong) as integer
  var index = -1, LastIndex = ubound(CheckSums),nChars=len(KeyWord)
  if LastIndex<1 then return index
  if nChars   <1 then return index
  var CheckSum = CRC32(strptr(keyword),nChars)
  for i as integer = 0 to LastIndex
    if CheckSums(i) = CheckSum then index = i :exit for
  next
  return index
end function 
 


dim   as string KeyWords(...) =>{"Abs","Abstract","Access","Acos","Add","Alias","Allocate","Alpha","And","AndAlso","Append","As","Assert","AssertWarn","Asc","Asin","Asm","Atan2","Atn","Base","Beep","Bin","Binary","Bit","BitReset","BitSet","BLoad","BSave","ByRef","ByVal","Call","CAllocate","Case","Cast","CByte","CDbl","cdecl","Chain","ChDir","Chr","CInt","Circle","Class","Clear","CLng","CLngInt","Close","Cls","Color","Command","Common","CondBroadcast","CondCreate","CondDestroy","CondSignal","CondWait","Const","Constructor","Continue","Cos","CPtr","CShort","CSign","CSng","CsrLin","CUByte","CUInt","CULng","CULngInt","CUnsg","CurDir","CUShort","Custom","CVD","CVI","CVL","CVLongInt","CVS","CVShort","Data","Date","DateAdd","DateDiff","DatePart","DateSerial","DateValue","Day","Deallocate","Declare","DefByte","DefDbl","defined","DefInt","DefLng","DefLongInt","DefShort","DefSng","DefStr","DefUByte","DefUInt","DefULongInt","DefUShort","Delete","Destructor","Dim","Dir","Do","Draw","DyLibFree","DyLibLoad","DyLibSymbol","Else","ElseIf","Encoding","End","EndIf","Enum","Environ","EOF","Eqv","Erase","Erfn","Erl","Ermn","Err","Error","Event","Exec","ExePath","Exit","Exp","Export","Extends","Extern","False","Field","FileAttr","FileCopy","FileDateTime","FileExists","FileLen","Fix","Flip","For","Format","Frac","Fre","FreeFile","Function","Get","GetJoystick","GetKey","GetMouse","GoSub","Goto","Hex","HiByte","HiWord","Hour","If","IIf","ImageConvertRow","ImageCreate","ImageDestroy","ImageInfo","Imp","Implements","Import","Inkey","Inp","Input","InStr","InStrRev","Int","Is","IsDate","IsRedirected","Kill","LBound","LCase","Left","Len","Let","Lib","Line","LoByte","Loc","Local","Locate","Lock","LOF","Log","Loop","LoWord","LPOS","LPrint","LSet","LTrim","Mid","Minute","MKD","MkDir","MKI","MKL","MKLongInt","Mks","MkShort","Mod","Month","MonthName","MultiKey","MutexCreate","MutexDestroy","MutexLock","MutexUnlock","Naked","Name","Namespace","Next","New","Not","Now","Oct","OffsetOf","On","Once","Open","Operator","Option","Or","OrElse","Out","Output","Overload","Override","Paint","Palette","pascal","PCopy","Peek","PMap","Point","PointCoord","Poke","Pos","Preserve","PReset","Print","Private","ProcPtr","Property","Protected","PSet","Public","Put","Random","Randomize","Read","Reallocate","ReDim","Rem","Reset","Restore","Resume","Return","RGB","RGBA","Right","RmDir","Rnd","RSet","RTrim","Run","SAdd","Scope","Screen","ScreenCopy","ScreenControl","ScreenEvent","ScreenInfo","ScreenGLProc","ScreenList","ScreenLock","ScreenPtr","ScreenRes","ScreenSet","ScreenSync","ScreenUnlock","Second","Seek","Select","SetDate","SetEnviron","SetMouse","SetTime","Sgn","Shared","Shell","Shl","Shr","Sin","SizeOf","Sleep","Space","Spc","Sqr","Static","stdcall","Step","Stick","Stop","Str","Strig","StrPtr","Sub","Swap","System","Tab","Tan","Then","This","ThreadCall","ThreadCreate","ThreadDetach","ThreadWait","Time","TimeSerial","TimeValue","Timer","To","Trans","Trim","Type","Typeof","UBound","UCase","Union","Unlock","Until","Using","va_arg","va_first","va_next","Val","ValLng","ValInt","ValUInt","ValULng","Var","VarPtr","View","Virtual","Wait","WBin","WChr","WeekDay","WeekDayName","Wend","While","WHex","Width","Window","WindowTitle","WInput","With","WOct","Write","WSpace","WStr","Xor","Year"}
redim as ulong CheckSums(any)

var LastKeyWord = ubound(KeyWords)
var nKeywords = LastKeyWord + 1
redim CheckSums(LastKeyWord)

for i as integer = 0 to LastKeyWord
  CheckSums(i) = CRC32(strptr(KeyWords(i)),len(KeyWords(i)))
  print "crc32: 0x" & hex(CheckSums(i),8) & " of " & KeyWords(i)
next

' search rundom keywords
var nSearch = 10000
print "search started"

var tStart = Timer()
for i as integer = 1 to nSearch
  var search  = int(rnd()*LastKeyWord)
  var keyword = KeyWords(search)
  ' print "search: " & keyword;
  var index   = FindKeyWord(keyword,CheckSums())
  'if index>-1 then
  '  print " found at " & index
  'else
  '  print " not found must be an error !"
  '  beep : sleep : end 1
  'end if 
next
var tEnd = Timer()-tStart
print "searched " & nSearch & " keywords in " & tEnd & " seconds !"
sleep


D.J.Peters
Posts: 7659
Joined: May 28, 2005 3:28

Re: Find on keywords within a string

Postby D.J.Peters » Jan 04, 2019 23:41

On old laptop searching 1,000,000 keywords in under one second 0.9204567447304726 !!!

Or may be I measured it wrong I think about it ...

However it's fast enough to do syntax highlighting in real time.
With other words no once can type words faster :lol:

#define NO_ASM
on top of the file to enable BASIC only.

Joshy
D.J.Peters
Posts: 7659
Joined: May 28, 2005 3:28

Re: Find on keywords within a string

Postby D.J.Peters » Jan 04, 2019 23:53

Here are the BASIC only version it's ultra fast also :-)

I added one more test to be sure all CRC32 checksum's are really !!! unique !!!

Joshy

Code: Select all

dim shared as ulong CRC32Table(255) => { _
&H00000000, &H77073096, &HEE0E612C, &H990951BA, _
&H076DC419, &H706AF48F, &HE963A535, &H9E6495A3, _
&H0EDB8832, &H79DCB8A4, &HE0D5E91E, &H97D2D988, _
&H09B64C2B, &H7EB17CBD, &HE7B82D07, &H90BF1D91, _
&H1DB71064, &H6AB020F2, &HF3B97148, &H84BE41DE, _
&H1ADAD47D, &H6DDDE4EB, &HF4D4B551, &H83D385C7, _
&H136C9856, &H646BA8C0, &HFD62F97A, &H8A65C9EC, _
&H14015C4F, &H63066CD9, &HFA0F3D63, &H8D080DF5, _
&H3B6E20C8, &H4C69105E, &HD56041E4, &HA2677172, _
&H3C03E4D1, &H4B04D447, &HD20D85FD, &HA50AB56B, _
&H35B5A8FA, &H42B2986C, &HDBBBC9D6, &HACBCF940, _
&H32D86CE3, &H45DF5C75, &HDCD60DCF, &HABD13D59, _
&H26D930AC, &H51DE003A, &HC8D75180, &HBFD06116, _
&H21B4F4B5, &H56B3C423, &HCFBA9599, &HB8BDA50F, _
&H2802B89E, &H5F058808, &HC60CD9B2, &HB10BE924, _
&H2F6F7C87, &H58684C11, &HC1611DAB, &HB6662D3D, _
&H76DC4190, &H01DB7106, &H98D220BC, &HEFD5102A, _
&H71B18589, &H06B6B51F, &H9FBFE4A5, &HE8B8D433, _
&H7807C9A2, &H0F00F934, &H9609A88E, &HE10E9818, _
&H7F6A0DBB, &H086D3D2D, &H91646C97, &HE6635C01, _
&H6B6B51F4, &H1C6C6162, &H856530D8, &HF262004E, _
&H6C0695ED, &H1B01A57B, &H8208F4C1, &HF50FC457, _
&H65B0D9C6, &H12B7E950, &H8BBEB8EA, &HFCB9887C, _
&H62DD1DDF, &H15DA2D49, &H8CD37CF3, &HFBD44C65, _
&H4DB26158, &H3AB551CE, &HA3BC0074, &HD4BB30E2, _
&H4ADFA541, &H3DD895D7, &HA4D1C46D, &HD3D6F4FB, _
&H4369E96A, &H346ED9FC, &HAD678846, &HDA60B8D0, _
&H44042D73, &H33031DE5, &HAA0A4C5F, &HDD0D7CC9, _
&H5005713C, &H270241AA, &HBE0B1010, &HC90C2086, _
&H5768B525, &H206F85B3, &HB966D409, &HCE61E49F, _
&H5EDEF90E, &H29D9C998, &HB0D09822, &HC7D7A8B4, _
&H59B33D17, &H2EB40D81, &HB7BD5C3B, &HC0BA6CAD, _
&HEDB88320, &H9ABFB3B6, &H03B6E20C, &H74B1D29A, _
&HEAD54739, &H9DD277AF, &H04DB2615, &H73DC1683, _
&HE3630B12, &H94643B84, &H0D6D6A3E, &H7A6A5AA8, _
&HE40ECF0B, &H9309FF9D, &H0A00AE27, &H7D079EB1, _
&HF00F9344, &H8708A3D2, &H1E01F268, &H6906C2FE, _
&HF762575D, &H806567CB, &H196C3671, &H6E6B06E7, _
&HFED41B76, &H89D32BE0, &H10DA7A5A, &H67DD4ACC, _
&HF9B9DF6F, &H8EBEEFF9, &H17B7BE43, &H60B08ED5, _
&HD6D6A3E8, &HA1D1937E, &H38D8C2C4, &H4FDFF252, _
&HD1BB67F1, &HA6BC5767, &H3FB506DD, &H48B2364B, _
&HD80D2BDA, &HAF0A1B4C, &H36034AF6, &H41047A60, _
&HDF60EFC3, &HA867DF55, &H316E8EEF, &H4669BE79, _
&HCB61B38C, &HBC66831A, &H256FD2A0, &H5268E236, _
&HCC0C7795, &HBB0B4703, &H220216B9, &H5505262F, _
&HC5BA3BBE, &HB2BD0B28, &H2BB45A92, &H5CB36A04, _
&HC2D7FFA7, &HB5D0CF31, &H2CD99E8B, &H5BDEAE1D, _
&H9B64C2B0, &HEC63F226, &H756AA39C, &H026D930A, _
&H9C0906A9, &HEB0E363F, &H72076785, &H05005713, _
&H95BF4A82, &HE2B87A14, &H7BB12BAE, &H0CB61B38, _
&H92D28E9B, &HE5D5BE0D, &H7CDCEFB7, &H0BDBDF21, _
&H86D3D2D4, &HF1D4E242, &H68DDB3F8, &H1FDA836E, _
&H81BE16CD, &HF6B9265B, &H6FB077E1, &H18B74777, _
&H88085AE6, &HFF0F6A70, &H66063BCA, &H11010B5C, _
&H8F659EFF, &HF862AE69, &H616BFFD3, &H166CCF45, _
&HA00AE278, &HD70DD2EE, &H4E048354, &H3903B3C2, _
&HA7672661, &HD06016F7, &H4969474D, &H3E6E77DB, _
&HAED16A4A, &HD9D65ADC, &H40DF0B66, &H37D83BF0, _
&HA9BCAE53, &HDEBB9EC5, &H47B2CF7F, &H30B5FFE9, _
&HBDBDF21C, &HCABAC28A, &H53B39330, &H24B4A3A6, _
&HBAD03605, &HCDD70693, &H54DE5729, &H23D967BF, _
&HB3667A2E, &HC4614AB8, &H5D681B02, &H2A6F2B94, _
&HB40BBE37, &HC30C8EA1, &H5A05DF1B, &H2D02EF8D }

function CRC32(byval buffer as any ptr, byval nBytes as long) as ulong
  dim as ubyte ptr p = buffer
  dim as ubyte ptr e = p + nBytes
   dim as ulong crc = &HFFFFFFFF
   while (p<e)
      crc = crc32table((crc xor *p) and &HFF) xor (crc shr 8)
    p+=1
  wend 
   return crc xor &HFFFFFFFF
end function

' return index in keyword array if "KeyWord" are found otherwise return -1
function FindKeyword(byref KeyWord as string, CheckSums() as ulong) as integer
  var index = -1, LastIndex = ubound(CheckSums),nChars=len(KeyWord)
  if LastIndex<1 then return index
  if nChars   <1 then return index
  var CheckSum = CRC32(strptr(keyword),nChars)
  for i as integer = 0 to LastIndex
    if CheckSums(i) = CheckSum then index = i :exit for
  next
  return index
end function 

dim   as string KeyWords(...) =>{"Abs","Abstract","Access","Acos","Add","Alias","Allocate","Alpha","And","AndAlso","Append","As","Assert","AssertWarn","Asc","Asin","Asm","Atan2","Atn","Base","Beep","Bin","Binary","Bit","BitReset","BitSet","BLoad","BSave","ByRef","ByVal","Call","CAllocate","Case","Cast","CByte","CDbl","cdecl","Chain","ChDir","Chr","CInt","Circle","Class","Clear","CLng","CLngInt","Close","Cls","Color","Command","Common","CondBroadcast","CondCreate","CondDestroy","CondSignal","CondWait","Const","Constructor","Continue","Cos","CPtr","CShort","CSign","CSng","CsrLin","CUByte","CUInt","CULng","CULngInt","CUnsg","CurDir","CUShort","Custom","CVD","CVI","CVL","CVLongInt","CVS","CVShort","Data","Date","DateAdd","DateDiff","DatePart","DateSerial","DateValue","Day","Deallocate","Declare","DefByte","DefDbl","defined","DefInt","DefLng","DefLongInt","DefShort","DefSng","DefStr","DefUByte","DefUInt","DefULongInt","DefUShort","Delete","Destructor","Dim","Dir","Do","Draw","DyLibFree","DyLibLoad","DyLibSymbol","Else","ElseIf","Encoding","End","EndIf","Enum","Environ","EOF","Eqv","Erase","Erfn","Erl","Ermn","Err","Error","Event","Exec","ExePath","Exit","Exp","Export","Extends","Extern","False","Field","FileAttr","FileCopy","FileDateTime","FileExists","FileLen","Fix","Flip","For","Format","Frac","Fre","FreeFile","Function","Get","GetJoystick","GetKey","GetMouse","GoSub","Goto","Hex","HiByte","HiWord","Hour","If","IIf","ImageConvertRow","ImageCreate","ImageDestroy","ImageInfo","Imp","Implements","Import","Inkey","Inp","Input","InStr","InStrRev","Int","Is","IsDate","IsRedirected","Kill","LBound","LCase","Left","Len","Let","Lib","Line","LoByte","Loc","Local","Locate","Lock","LOF","Log","Loop","LoWord","LPOS","LPrint","LSet","LTrim","Mid","Minute","MKD","MkDir","MKI","MKL","MKLongInt","Mks","MkShort","Mod","Month","MonthName","MultiKey","MutexCreate","MutexDestroy","MutexLock","MutexUnlock","Naked","Name","Namespace","Next","New","Not","Now","Oct","OffsetOf","On","Once","Open","Operator","Option","Or","OrElse","Out","Output","Overload","Override","Paint","Palette","pascal","PCopy","Peek","PMap","Point","PointCoord","Poke","Pos","Preserve","PReset","Print","Private","ProcPtr","Property","Protected","PSet","Public","Put","Random","Randomize","Read","Reallocate","ReDim","Rem","Reset","Restore","Resume","Return","RGB","RGBA","Right","RmDir","Rnd","RSet","RTrim","Run","SAdd","Scope","Screen","ScreenCopy","ScreenControl","ScreenEvent","ScreenInfo","ScreenGLProc","ScreenList","ScreenLock","ScreenPtr","ScreenRes","ScreenSet","ScreenSync","ScreenUnlock","Second","Seek","Select","SetDate","SetEnviron","SetMouse","SetTime","Sgn","Shared","Shell","Shl","Shr","Sin","SizeOf","Sleep","Space","Spc","Sqr","Static","stdcall","Step","Stick","Stop","Str","Strig","StrPtr","Sub","Swap","System","Tab","Tan","Then","This","ThreadCall","ThreadCreate","ThreadDetach","ThreadWait","Time","TimeSerial","TimeValue","Timer","To","Trans","Trim","Type","Typeof","UBound","UCase","Union","Unlock","Until","Using","va_arg","va_first","va_next","Val","ValLng","ValInt","ValUInt","ValULng","Var","VarPtr","View","Virtual","Wait","WBin","WChr","WeekDay","WeekDayName","Wend","While","WHex","Width","Window","WindowTitle","WInput","With","WOct","Write","WSpace","WStr","Xor","Year"}
redim as ulong CheckSums(any)

var LastKeyWord = ubound(KeyWords)
var nKeywords = LastKeyWord + 1
redim CheckSums(LastKeyWord)

print "build CRCtable for " & nKeywords & " !"
for i as integer = 0 to LastKeyWord
  CheckSums(i) = CRC32(strptr(KeyWords(i)),len(KeyWords(i)))
  print "crc32: 0x" & hex(CheckSums(i),8) & " of " & KeyWords(i)
next
print "done ..."
print

print "be sure no two or more diffrent keywords share the same CRC sum !"
var flag = false
for i as integer = 0 to LastKeyWord
  for j as integer = 0 to LastKeyWord
    if i<>j then
      if CheckSums(i)=CheckSums(j) then
        flag = true
        print "oh no the keywords " & KeyWords(i) & " " & KeyWords(j) & " shares the same CRC " & CheckSums(i)
        sleep 1000
      end if 
    end if 
  next
next 

if flag then
  print "sorry CRC32 isn't the right solution !"
  beep :sleep :end
else
  print "looks fine all CRC checksums are unique !"
end if 

' search rundom keywords
var nSearch = 1000000
print "search started"

var tStart = Timer()
for i as integer = 1 to nSearch
  var search  = int(rnd()*LastKeyWord)
  var keyword = KeyWords(search)
  ' print "search: " & keyword;
  var index   = FindKeyWord(keyword,CheckSums())
  'if index>-1 then
  '  print " found at " & index
  'else
  '  print " not found must be an error !"
  '  beep : sleep : end 1
  'end if 
next
var tEnd = Timer()-tStart
print "searched " & nSearch & " keywords in " & tEnd & " seconds !"
sleep


badidea
Posts: 1175
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: Find on keywords within a string

Postby badidea » Jan 05, 2019 0:43

I don't understand. Which is the text to be searched?
In my example, this thing:
searchText = "BlablaxxxThreadCreateyyyAbsxxxYearyyyMoretextAbs!?randomtext!absABS"
dodicat
Posts: 5545
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Find on keywords within a string

Postby dodicat » Jan 05, 2019 1:35

Made the searchtext about a million characters by doubling up.
Number of occurrencies of the three keywords shown, press a key to see the positions (they are all there if you look)

Code: Select all

 


dim as string dataStr
dim as integer numKeywords = 0
const as integer MAX_KEYWORD = 400
dim as string keyword(MAX_KEYWORD-1)

'read the keywords
while numKeywords < MAX_KEYWORD
   read dataStr
   if len(dataStr) > 0 then
      keyword(numKeywords) = dataStr
      numKeywords +=1
   else
      exit while
   end if
wend
print "numKeywords: " & str(numKeywords)

data "Abs","Abstract","Access","Acos","Add","Alias","Allocate","Alpha","And","AndAlso","Append","As","Assert","AssertWarn","Asc","Asin","Asm","Atan2","Atn","Base","Beep","Bin","Binary","Bit","BitReset","BitSet","BLoad","BSave","ByRef","ByVal","Call","CAllocate","Case","Cast","CByte","CDbl","cdecl","Chain","ChDir","Chr","CInt","Circle","Class","Clear","CLng","CLngInt","Close","Cls","Color","Command","Common","CondBroadcast","CondCreate","CondDestroy","CondSignal","CondWait","Const","Constructor","Continue","Cos","CPtr","CShort","CSign","CSng","CsrLin","CUByte","CUInt","CULng","CULngInt","CUnsg","CurDir","CUShort","Custom","CVD","CVI","CVL","CVLongInt","CVS","CVShort","Data","Date","DateAdd","DateDiff","DatePart","DateSerial","DateValue","Day","Deallocate","Declare","DefByte","DefDbl","defined","DefInt","DefLng","DefLongInt","DefShort","DefSng","DefStr","DefUByte","DefUInt","DefULongInt","DefUShort","Delete","Destructor","Dim","Dir","Do","Draw","DyLibFree","DyLibLoad","DyLibSymbol","Else","ElseIf","Encoding","End","EndIf","Enum","Environ","EOF","Eqv","Erase","Erfn","Erl","Ermn","Err","Error","Event","Exec","ExePath","Exit","Exp","Export","Extends","Extern","False","Field","FileAttr","FileCopy","FileDateTime","FileExists","FileLen","Fix","Flip","For","Format","Frac","Fre","FreeFile","Function","Get","GetJoystick","GetKey","GetMouse","GoSub","Goto","Hex","HiByte","HiWord","Hour","If","IIf","ImageConvertRow","ImageCreate","ImageDestroy","ImageInfo","Imp","Implements","Import","Inkey","Inp","Input","InStr","InStrRev","Int","Is","IsDate","IsRedirected","Kill","LBound","LCase","Left","Len","Let","Lib","Line","LoByte","Loc","Local","Locate","Lock","LOF","Log","Loop","LoWord","LPOS","LPrint","LSet","LTrim","Mid","Minute","MKD","MkDir","MKI","MKL","MKLongInt","Mks","MkShort","Mod","Month","MonthName","MultiKey","MutexCreate","MutexDestroy","MutexLock","MutexUnlock","Naked","Name","Namespace","Next","New","Not","Now","Oct","OffsetOf","On","Once","Open","Operator","Option","Or","OrElse","Out","Output","Overload","Override","Paint","Palette","pascal","PCopy","Peek","PMap","Point","PointCoord","Poke","Pos","Preserve","PReset","Print","Private","ProcPtr","Property","Protected","PSet","Public","Put","Random","Randomize","Read","Reallocate","ReDim","Rem","Reset","Restore","Resume","Return","RGB","RGBA","Right","RmDir","Rnd","RSet","RTrim","Run","SAdd","Scope","Screen","ScreenCopy","ScreenControl","ScreenEvent","ScreenInfo","ScreenGLProc","ScreenList","ScreenLock","ScreenPtr","ScreenRes","ScreenSet","ScreenSync","ScreenUnlock","Second","Seek","Select","SetDate","SetEnviron","SetMouse","SetTime","Sgn","Shared","Shell","Shl","Shr","Sin","SizeOf","Sleep","Space","Spc","Sqr","Static","stdcall","Step","Stick","Stop","Str","Strig","StrPtr","Sub","Swap","System","Tab","Tan","Then","This","ThreadCall","ThreadCreate","ThreadDetach","ThreadWait","Time","TimeSerial","TimeValue","Timer","To","Trans","Trim","Type","Typeof","UBound","UCase","Union","Unlock","Until","Using","va_arg","va_first","va_next","Val","ValLng","ValInt","ValUInt","ValULng","Var","VarPtr","View","Virtual","Wait","WBin","WChr","WeekDay","WeekDayName","Wend","While","WHex","Width","Window","WindowTitle","WInput","With","WOct","Write","WSpace","WStr","Xor","Year"

dim as string searchText = "BlablaxxxThreadCreateyyyAbsxxxYearyyyMoretextAbs!?randomtext!absABS"
for n as long=1 to 14
searchText &= searchText
next
Function TALLY(SomeString As String,PartString As String,a() as long) As Long
    Dim As Long LenP=Len(PartString),count
    Dim As Long position=Instr(SomeString,PartString)
    If position=0 Then Return 0
    While position>0
        count+=1
        position=Instr(position+LenP,SomeString,PartString)
    Wend
    redim a(1 to count)
    var u=count
    position=Instr(SomeString,PartString)
    a(1)=position
    count=1
     While count < u
        count+=1
        position=Instr(position+LenP,SomeString,PartString)
        a(count)=position
    Wend
    Return u
End Function

'======================
redim as long p()
dim as string acc
for n as long=lbound(keyword) to numKeywords
     var t= tally(searchtext,keyword(n),p())
    if t then
    print keyword(n);t
    acc+=chr(10)+keyword(n)+chr(10) 'gather the positions in string acc
    acc+=chr(10)
    for m as long=lbound(p) to ubound(p)
        acc+= " @"+str(p(m))
    next m
    end if
next n
print "done, length of searchtext = ";len(searchtext)
print "Press a key for the positions in the string"
sleep
print acc
sleep
   
 
Last edited by dodicat on Jan 06, 2019 14:57, edited 1 time in total.
D.J.Peters
Posts: 7659
Joined: May 28, 2005 3:28

Re: Find on keywords within a string

Postby D.J.Peters » Jan 05, 2019 1:47

you wrote "find keywords" so I wrote it
but you mean InStr() what is wrong with InStr() ?

"BlablaxxxThreadCreateyyyAbsxxxYearyyyMoretextAbs!?randomtext!absABS"

What is the sense of this input string ?

I mean a programming language is a collection of tokens and tokens are separated by delimiters.

With other words what you are doing or what is the source of your blabla text without delimiters ?

Joshy
D.J.Peters
Posts: 7659
Joined: May 28, 2005 3:28

Re: Find on keywords within a string

Postby D.J.Peters » Jan 05, 2019 2:48

Why do you search PartString in SomeString two times in function TALLY ?

Joshy

Code: Select all

function TALLY(SomeString As String,PartString As String,a() as integer) As integer
  dim as integer count,index,nChars=Len(PartString)
  'if nChars<1 then return 0
  'if len(SomeString)<nChars then return 0
  index = Instr(SomeString,PartString)
  While index>0
    if ubound(a) < count then redim preserve a(count*2+1)
    a(count)=index : count+=1 : index+=nChars
    index=Instr(index,SomeString,PartString)
  Wend
  Return count
End Function
dim as string Keywords(...)=>{"Abs","Abstract","Access","Acos","Add","Alias","Allocate","Alpha","And","AndAlso","Append","As","Assert","AssertWarn","Asc","Asin","Asm","Atan2","Atn","Base","Beep","Bin","Binary","Bit","BitReset","BitSet","BLoad","BSave","ByRef","ByVal","Call","CAllocate","Case","Cast","CByte","CDbl","cdecl","Chain","ChDir","Chr","CInt","Circle","Class","Clear","CLng","CLngInt","Close","Cls","Color","Command","Common","CondBroadcast","CondCreate","CondDestroy","CondSignal","CondWait","Const","Constructor","Continue","Cos","CPtr","CShort","CSign","CSng","CsrLin","CUByte","CUInt","CULng","CULngInt","CUnsg","CurDir","CUShort","Custom","CVD","CVI","CVL","CVLongInt","CVS","CVShort","Data","Date","DateAdd","DateDiff","DatePart","DateSerial","DateValue","Day","Deallocate","Declare","DefByte","DefDbl","defined","DefInt","DefLng","DefLongInt","DefShort","DefSng","DefStr","DefUByte","DefUInt","DefULongInt","DefUShort","Delete","Destructor","Dim","Dir","Do","Draw","DyLibFree","DyLibLoad","DyLibSymbol","Else","ElseIf","Encoding","End","EndIf","Enum","Environ","EOF","Eqv","Erase","Erfn","Erl","Ermn","Err","Error","Event","Exec","ExePath","Exit","Exp","Export","Extends","Extern","False","Field","FileAttr","FileCopy","FileDateTime","FileExists","FileLen","Fix","Flip","For","Format","Frac","Fre","FreeFile","Function","Get","GetJoystick","GetKey","GetMouse","GoSub","Goto","Hex","HiByte","HiWord","Hour","If","IIf","ImageConvertRow","ImageCreate","ImageDestroy","ImageInfo","Imp","Implements","Import","Inkey","Inp","Input","InStr","InStrRev","Int","Is","IsDate","IsRedirected","Kill","LBound","LCase","Left","Len","Let","Lib","Line","LoByte","Loc","Local","Locate","Lock","LOF","Log","Loop","LoWord","LPOS","LPrint","LSet","LTrim","Mid","Minute","MKD","MkDir","MKI","MKL","MKLongInt","Mks","MkShort","Mod","Month","MonthName","MultiKey","MutexCreate","MutexDestroy","MutexLock","MutexUnlock","Naked","Name","Namespace","Next","New","Not","Now","Oct","OffsetOf","On","Once","Open","Operator","Option","Or","OrElse","Out","Output","Overload","Override","Paint","Palette","pascal","PCopy","Peek","PMap","Point","PointCoord","Poke","Pos","Preserve","PReset","Print","Private","ProcPtr","Property","Protected","PSet","Public","Put","Random","Randomize","Read","Reallocate","ReDim","Rem","Reset","Restore","Resume","Return","RGB","RGBA","Right","RmDir","Rnd","RSet","RTrim","Run","SAdd","Scope","Screen","ScreenCopy","ScreenControl","ScreenEvent","ScreenInfo","ScreenGLProc","ScreenList","ScreenLock","ScreenPtr","ScreenRes","ScreenSet","ScreenSync","ScreenUnlock","Second","Seek","Select","SetDate","SetEnviron","SetMouse","SetTime","Sgn","Shared","Shell","Shl","Shr","Sin","SizeOf","Sleep","Space","Spc","Sqr","Static","stdcall","Step","Stick","Stop","Str","Strig","StrPtr","Sub","Swap","System","Tab","Tan","Then","This","ThreadCall","ThreadCreate","ThreadDetach","ThreadWait","Time","TimeSerial","TimeValue","Timer","To","Trans","Trim","Type","Typeof","UBound","UCase","Union","Unlock","Until","Using","va_arg","va_first","va_next","Val","ValLng","ValInt","ValUInt","ValULng","Var","VarPtr","View","Virtual","Wait","WBin","WChr","WeekDay","WeekDayName","Wend","While","WHex","Width","Window","WindowTitle","WInput","With","WOct","Write","WSpace","WStr","Xor","Year"}
dim as string SearchText = "BlablaxxxThreadCreateyyyAbsxxxYearyyyMoretextAbs!?randomtext!absABS"
var nKeywords=ubound(Keywords)+1
print "numKeywords: " & nKeywords

for n as integer = 1 to 14
  searchText &= searchText
next
redim as integer p()
dim as string acc
for n as integer = 0 to nKeyWords-1
  var Keyword = Keywords(n)
  var count = tally(SearchText,Keyword,p())
  if count>0 then
    print "found " & keyword & " " & count & " times"
    acc+=chr(10)+keyword+chr(10) 'gather the positions in string acc
    acc+=chr(10)
    for m as integer=0 to count-1
      acc+= " @"+str(p(m))
    next
  end if
next
print "done, length of searchtext = ";len(SearchText)
print "Press a key for the positions in the string"
sleep
print acc
sleep
   
 
dodicat
Posts: 5545
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Find on keywords within a string

Postby dodicat » Jan 05, 2019 3:09

Twice in tally, to get the array dimension first time and fill the array with the positions the second time.
Both methods (your tweak and the original) take about the same time (~.45 seconds)
Last edited by dodicat on Jan 05, 2019 3:18, edited 1 time in total.
D.J.Peters
Posts: 7659
Joined: May 28, 2005 3:28

Re: Find on keywords within a string

Postby D.J.Peters » Jan 05, 2019 3:12

But that is not necessary and slower !
D.J.Peters wrote:With other words what you are doing or what is the source of your blabla text without delimiters ?
dodicat
Posts: 5545
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: Find on keywords within a string

Postby dodicat » Jan 05, 2019 3:24

I am just using Badidea's string doubled up a few times to give it some teeth.
I suppose the project idea is to count the number of each of the keywords in some .bas file.
I'll try a speed test on 32 bit Linux tomorrow, I think Badidea is a Linux user.
D.J.Peters
Posts: 7659
Joined: May 28, 2005 3:28

Re: Find on keywords within a string

Postby D.J.Peters » Jan 05, 2019 4:03

dodicat wrote:I suppose the project idea is to count the number of each of the keywords in some .bas file.
Sure but you have to parse the *.bas or *.bi files and split it in tokens.

Ignore comments with keywords and strings also
and most important you have to make all tokens and keywords lcase() or ucase()
FreeBASIC isn't case sensitive "PRINT" "print" "Print" "pRiNt" and "?" are the same.

If he split it in tokens my posted CRC32 string compare are a really fast solution.

bla bla bla :-)

Joshy
Last edited by D.J.Peters on Jan 05, 2019 4:24, edited 1 time in total.
badidea
Posts: 1175
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: Find all keywords within a string

Postby badidea » Jan 05, 2019 11:41

Don't you guys sleep?
It is for the game I am working on. It uses a 2d block grid with letters (characters) on it. the player needs to move the blocks around to create words (freebasic keywords). I will use caps only.
Image
I should use a letter distribution function (https://en.wikipedia.org/wiki/Letter_frequency) or it will be difficult.
"Atan2", "va_arg", "va_first" and "va_next" are a bit annoying. I hope no one will miss them.
Last edited by badidea on Jan 05, 2019 12:44, edited 1 time in total.
jj2007
Posts: 1056
Joined: Oct 23, 2016 15:28
Location: Roma, Italia
Contact:

Re: Find on keywords within a string

Postby jj2007 » Jan 05, 2019 12:28

D.J.Peters wrote:FreeBASIC isn't case sensitive "PRINT" "print" "Print" "pRiNt" and "?" are the same.
If he split it in tokens my posted CRC32 string compare are a really fast solution.
You suggest hashing, right? Could indeed be much faster.
badidea
Posts: 1175
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: Find all keywords within a string

Postby badidea » Jan 05, 2019 12:34

It does not need to be super-fast. My own version (with instr) is probably sufficient, not sure yet.

Distribution of letters in freebasic keywords (ignoring "Atan2", "va_arg", "va_first", "va_next"):

Code: Select all

dim as string dataStr
dim as integer numKeywords = 0
const as integer MAX_KEYWORD = 400
dim as string keyword(MAX_KEYWORD-1)

'read the keywords
while numKeywords < MAX_KEYWORD
   read dataStr
   if len(dataStr) > 0 then
      keyword(numKeywords) = ucase(dataStr)
      numKeywords +=1
   else
      exit while
   end if
wend
print "numKeywords: " & str(numKeywords)

'data "Abs","Abstract","Access","Acos","Add","Alias","Allocate","Alpha","And","AndAlso","Append","As","Assert","AssertWarn","Asc","Asin","Asm","Atan2","Atn","Base","Beep","Bin","Binary","Bit","BitReset","BitSet","BLoad","BSave","ByRef","ByVal","Call","CAllocate","Case","Cast","CByte","CDbl","cdecl","Chain","ChDir","Chr","CInt","Circle","Class","Clear","CLng","CLngInt","Close","Cls","Color","Command","Common","CondBroadcast","CondCreate","CondDestroy","CondSignal","CondWait","Const","Constructor","Continue","Cos","CPtr","CShort","CSign","CSng","CsrLin","CUByte","CUInt","CULng","CULngInt","CUnsg","CurDir","CUShort","Custom","CVD","CVI","CVL","CVLongInt","CVS","CVShort","Data","Date","DateAdd","DateDiff","DatePart","DateSerial","DateValue","Day","Deallocate","Declare","DefByte","DefDbl","defined","DefInt","DefLng","DefLongInt","DefShort","DefSng","DefStr","DefUByte","DefUInt","DefULongInt","DefUShort","Delete","Destructor","Dim","Dir","Do","Draw","DyLibFree","DyLibLoad","DyLibSymbol","Else","ElseIf","Encoding","End","EndIf","Enum","Environ","EOF","Eqv","Erase","Erfn","Erl","Ermn","Err","Error","Event","Exec","ExePath","Exit","Exp","Export","Extends","Extern","False","Field","FileAttr","FileCopy","FileDateTime","FileExists","FileLen","Fix","Flip","For","Format","Frac","Fre","FreeFile","Function","Get","GetJoystick","GetKey","GetMouse","GoSub","Goto","Hex","HiByte","HiWord","Hour","If","IIf","ImageConvertRow","ImageCreate","ImageDestroy","ImageInfo","Imp","Implements","Import","Inkey","Inp","Input","InStr","InStrRev","Int","Is","IsDate","IsRedirected","Kill","LBound","LCase","Left","Len","Let","Lib","Line","LoByte","Loc","Local","Locate","Lock","LOF","Log","Loop","LoWord","LPOS","LPrint","LSet","LTrim","Mid","Minute","MKD","MkDir","MKI","MKL","MKLongInt","Mks","MkShort","Mod","Month","MonthName","MultiKey","MutexCreate","MutexDestroy","MutexLock","MutexUnlock","Naked","Name","Namespace","Next","New","Not","Now","Oct","OffsetOf","On","Once","Open","Operator","Option","Or","OrElse","Out","Output","Overload","Override","Paint","Palette","pascal","PCopy","Peek","PMap","Point","PointCoord","Poke","Pos","Preserve","PReset","Print","Private","ProcPtr","Property","Protected","PSet","Public","Put","Random","Randomize","Read","Reallocate","ReDim","Rem","Reset","Restore","Resume","Return","RGB","RGBA","Right","RmDir","Rnd","RSet","RTrim","Run","SAdd","Scope","Screen","ScreenCopy","ScreenControl","ScreenEvent","ScreenInfo","ScreenGLProc","ScreenList","ScreenLock","ScreenPtr","ScreenRes","ScreenSet","ScreenSync","ScreenUnlock","Second","Seek","Select","SetDate","SetEnviron","SetMouse","SetTime","Sgn","Shared","Shell","Shl","Shr","Sin","SizeOf","Sleep","Space","Spc","Sqr","Static","stdcall","Step","Stick","Stop","Str","Strig","StrPtr","Sub","Swap","System","Tab","Tan","Then","This","ThreadCall","ThreadCreate","ThreadDetach","ThreadWait","Time","TimeSerial","TimeValue","Timer","To","Trans","Trim","Type","Typeof","UBound","UCase","Union","Unlock","Until","Using","va_arg","va_first","va_next","Val","ValLng","ValInt","ValUInt","ValULng","Var","VarPtr","View","Virtual","Wait","WBin","WChr","WeekDay","WeekDayName","Wend","While","WHex","Width","Window","WindowTitle","WInput","With","WOct","Write","WSpace","WStr","Xor","Year"
data "Abs","Abstract","Access","Acos","Add","Alias","Allocate","Alpha","And","AndAlso","Append","As","Assert","AssertWarn","Asc","Asin","Asm","Atn","Base","Beep","Bin","Binary","Bit","BitReset","BitSet","BLoad","BSave","ByRef","ByVal","Call","CAllocate","Case","Cast","CByte","CDbl","cdecl","Chain","ChDir","Chr","CInt","Circle","Class","Clear","CLng","CLngInt","Close","Cls","Color","Command","Common","CondBroadcast","CondCreate","CondDestroy","CondSignal","CondWait","Const","Constructor","Continue","Cos","CPtr","CShort","CSign","CSng","CsrLin","CUByte","CUInt","CULng","CULngInt","CUnsg","CurDir","CUShort","Custom","CVD","CVI","CVL","CVLongInt","CVS","CVShort","Data","Date","DateAdd","DateDiff","DatePart","DateSerial","DateValue","Day","Deallocate","Declare","DefByte","DefDbl","defined","DefInt","DefLng","DefLongInt","DefShort","DefSng","DefStr","DefUByte","DefUInt","DefULongInt","DefUShort","Delete","Destructor","Dim","Dir","Do","Draw","DyLibFree","DyLibLoad","DyLibSymbol","Else","ElseIf","Encoding","End","EndIf","Enum","Environ","EOF","Eqv","Erase","Erfn","Erl","Ermn","Err","Error","Event","Exec","ExePath","Exit","Exp","Export","Extends","Extern","False","Field","FileAttr","FileCopy","FileDateTime","FileExists","FileLen","Fix","Flip","For","Format","Frac","Fre","FreeFile","Function","Get","GetJoystick","GetKey","GetMouse","GoSub","Goto","Hex","HiByte","HiWord","Hour","If","IIf","ImageConvertRow","ImageCreate","ImageDestroy","ImageInfo","Imp","Implements","Import","Inkey","Inp","Input","InStr","InStrRev","Int","Is","IsDate","IsRedirected","Kill","LBound","LCase","Left","Len","Let","Lib","Line","LoByte","Loc","Local","Locate","Lock","LOF","Log","Loop","LoWord","LPOS","LPrint","LSet","LTrim","Mid","Minute","MKD","MkDir","MKI","MKL","MKLongInt","Mks","MkShort","Mod","Month","MonthName","MultiKey","MutexCreate","MutexDestroy","MutexLock","MutexUnlock","Naked","Name","Namespace","Next","New","Not","Now","Oct","OffsetOf","On","Once","Open","Operator","Option","Or","OrElse","Out","Output","Overload","Override","Paint","Palette","pascal","PCopy","Peek","PMap","Point","PointCoord","Poke","Pos","Preserve","PReset","Print","Private","ProcPtr","Property","Protected","PSet","Public","Put","Random","Randomize","Read","Reallocate","ReDim","Rem","Reset","Restore","Resume","Return","RGB","RGBA","Right","RmDir","Rnd","RSet","RTrim","Run","SAdd","Scope","Screen","ScreenCopy","ScreenControl","ScreenEvent","ScreenInfo","ScreenGLProc","ScreenList","ScreenLock","ScreenPtr","ScreenRes","ScreenSet","ScreenSync","ScreenUnlock","Second","Seek","Select","SetDate","SetEnviron","SetMouse","SetTime","Sgn","Shared","Shell","Shl","Shr","Sin","SizeOf","Sleep","Space","Spc","Sqr","Static","stdcall","Step","Stick","Stop","Str","Strig","StrPtr","Sub","Swap","System","Tab","Tan","Then","This","ThreadCall","ThreadCreate","ThreadDetach","ThreadWait","Time","TimeSerial","TimeValue","Timer","To","Trans","Trim","Type","Typeof","UBound","UCase","Union","Unlock","Until","Using","Val","ValLng","ValInt","ValUInt","ValULng","Var","VarPtr","View","Virtual","Wait","WBin","WChr","WeekDay","WeekDayName","Wend","While","WHex","Width","Window","WindowTitle","WInput","With","WOct","Write","WSpace","WStr","Xor","Year"

dim as integer charNum
dim as integer charCount(26-1)
for iKeyword as integer = 0 to numKeywords - 1
   for iChar as integer = 0 to len(keyword(iKeyword))-1
      charNum = keyword(iKeyword)[iChar]
      if charNum >= 65 and charNum < 26 + 65 then
         charCount(charNum - 65) += 1
      else
         print chr(charNum); 'bad char
      end if
   next
next
print

dim as integer maxCount = 0
for iChar as integer = 0 to 26-1
   if charCount(iChar) > maxCount then maxCount = charCount(iChar)
next
for iChar as integer = 0 to 26-1
   print chr(iChar + 65), charCount(iChar), string((charCount(iChar) / maxCount) * 80, ".")
next

Return to “General”

Who is online

Users browsing this forum: No registered users and 4 guests