What would be the best (fastest) way to do this.

General FreeBASIC programming questions.
Post Reply
phishguy
Posts: 1201
Joined: May 05, 2006 16:12
Location: West Richland, Wa

What would be the best (fastest) way to do this.

Post by phishguy »

Using Freebasic, I need to search a file containing 15 digit numbers (unsigned long integer) for a specific number. If the number doesn't exist, I need to add it to the file. This file will eventually contain hundreds of thousands of numbers, so some sort of intelligent search/sort will be necessary for speed. Would a database program like Sqlite be my best option? If so, does anyone have an example to do what I want? I looked at the examples on the forum and they don't seem to have exactly what I want. I haven't dealt much with searching and sorting routines and I'm not sure what approach would be best.
cha0s
Site Admin
Posts: 5319
Joined: May 27, 2005 6:42
Location: USA
Contact:

Post by cha0s »

A database wouldn't necessarily be bad, but if you keep the numbers in order, you can do binary search... which will give you extremely fast results. (O(log n)) http://en.wikipedia.org/wiki/Binary_search
phishguy
Posts: 1201
Joined: May 05, 2006 16:12
Location: West Richland, Wa

Post by phishguy »

Thanks Chaos, I had already thought of using a binary search. However, I wasn't sure, and still am not sure how to write it in Freebasic. The link you gave me is very C-ish, and I'm not quite sure how to change that to workable code in Freebasic.
Antoni
Posts: 1393
Joined: May 27, 2005 15:40
Location: Barcelona, Spain

Post by Antoni »

Perhaps this Arne Anderson (balanced binary) Tree I have been playing with may help

Code: Select all

'------------------------------------------------------------
' Arne Andersson balanced tree data structure 
'adapted  3/2008 to FreeBASIC by Antoni Gual from the tutorials
'Eternally Confuzzled  by Julienne Walker
'http://www.eternallyconfuzzled.com
'-------------------------------------------------------------
'                  add     trav       kill 
'2560000 values   14 sec   0,12 sec   0,55

enum
 d_right=-1
 d_left=0
End enum
#define  NULL 0


Type aa_node
  As Integer level
  As LongInt value
  As aa_node Ptr link(d_right To d_left) 'these are pointers to left and right. The array allows to use 
  Declare Constructor(value As Longint,level As Integer,nil As aa_node ptr)
  Declare Destructor() 
End Type


Type aa_tree
  
  Declare Constructor()
  Declare Destructor
  Declare Property getdepth() As integer
  Declare Property getcount() As integer 
  Declare Property getfirst() As aa_node ptr
  Declare property getlast() As aa_node Ptr
      
  'insert element
  Declare sub insert(_data As Integer)
  'find element
  declare function find(_data As Integer) As aa_node Ptr
  'remove element 
  Declare sub remove(_data As Integer)
  'empty the tree
  Declare sub blank
  
  'traversals, require the address of a working sub as a parameter
  Declare Sub inOrder(As sub( As aa_node Ptr))
  Declare Sub preOrder(As sub( As aa_node Ptr))
  Declare Sub postOrder(As sub( As aa_node Ptr))
  'declAre Function Prevelement(As Integer) As aa_treenode
  'declare Function Nextelement(As Integer) As aa_treenode
  
  Private:
  As Integer count         'nr of elements in the tree    
  As Integer thedata       'static var for some recursive member functions 
  As aa_node Ptr root      'ptr to the root element 
  As aa_node Ptr nil       'pointer to the sentinel element    
  As aa_node Ptr item,heir 'static elements used by remove_r  
  As sub(As aa_node Ptr)  f 'ptr to the function used in traversals
  
  'recursive functions
  Declare Sub inOrder_r(As aa_node Ptr)
  Declare Sub preOrder_r(As aa_node Ptr)
  Declare Sub postOrder_r(As aa_node Ptr)
  'Declare function remove_r( As aa_node Ptr)As aa_node Ptr
  Declare function insert_r( As aa_node ptr)As aa_node Ptr
End Type

'nil could be a member of aa_tree? in this case add node and remove node should be members too

Property aa_tree.getcount As integer:Return count: End Property
Property aa_tree.getdepth As integer:Return root->level:End property

Constructor aa_node(_data As LongInt, lev As Integer,nil As aa_node ptr)
   value=_data
   level=lev
   link(d_left)=nil
   link(d_right)=nil
End Constructor

Destructor aa_node():End destructor

Sub fkill(n As aa_node Ptr)
   Delete n
End Sub

Sub aa_tree.blank()
  this.postOrder(@fkill)
  root=nil
  count=0
End Sub 

Destructor aa_tree()
  blank
  
End Destructor

Constructor aa_tree()
  'create an empty sentinel node
  ' node constructor will give link ptrs  an undefined value
  nil=New aa_node(&h8000000,0,nil)
  'reset linka  ptrs of nil to the true value of nil, after giving it a value
  nil->link(d_left)=nil
  nil->link(d_right)=nil
  root=nil
  count=0
  item=0
  heir=0
End Constructor 

'skew and split are the housekeeping functions the Andersson tree uses to keep itself balanced

Function skew(root As aa_node Ptr) As aa_node Ptr
  'recursive
  Dim As aa_node Ptr temp  
  If root->level<>0 then
    If root->link(d_left)->level=root->level then
      temp =root
      root=root->link(d_left)
      temp->link(d_left)=root->link(d_right)
      root->link(d_right)=temp
    End If
    root->link(d_right)=skew(root->link(d_right))
  End if 
  Return root 
End Function

Function split(root As aa_node Ptr) As aa_node Ptr
   'recursive
   Dim As aa_node Ptr temp  
   If root->level<>0 Then
     if root->link(d_right)->link(d_right)->level = root->level Then
       temp=root
       root=root->link(d_right)
       temp->link(d_right)=root->link(d_left)
       root->link(d_left)=temp
       root->level+=1
       root->link(d_right)=split(root->link(d_right))
     End if
   EndIf
  Return root
End Function

'auxiliar functions for traversing
Sub fnothing(n As aa_node Ptr):End sub


Sub fprint(n As aa_node Ptr)
  Print Using "###############     ";n->value;  
End Sub

'traversing
Sub aa_tree.inorder_r(n As aa_node Ptr)
  'recursive
  If n<>nil then
   inorder_r(n->link(d_left))
   f(n)
   inorder_r(n->link(d_right))
  End if
End Sub

Sub aa_tree.inorder(fun As sub( As aa_node Ptr)): f=fun:  inorder_r(root): End Sub
  
'traversing
Sub aa_tree.postorder_r(n As aa_node Ptr)
  'recursive
  If n<>nil then
   postorder_r(n->link(d_left))
   postorder_r(n->link(d_right))
   f(n)
  End if
End Sub

Sub aa_tree.postorder(fun As sub( As aa_node Ptr)): f=fun: postorder_r(root): End Sub  
  
'traversing
Sub aa_tree.preorder_r(n As aa_node Ptr )
  'recursive
  If n<>nil Then
   f(n) 
   preorder_r(n->link(d_left))
   preorder_r(n->link(d_right))
   
  End if
End Sub

Sub aa_tree.preorder(fun As sub( As aa_node Ptr)):f=fun: preorder_r(root): End Sub  
  

Function aa_tree.insert_r(r As aa_node Ptr)As aa_node Ptr
  'recursive 
  If r=nil Then
    r=New aa_node(thedata,1,nil)
    if r=NULL Then 
      ? "Could not add a new node":Sleep :End
    Else
      count+=1
    End if   
  Else
    Dim As integer di=(r->value<thedata)
    r->link(di)=insert_r(r->link(di))
    r=skew(r)
    r=split(r)  
  EndIf
  Return r
End function

Sub aa_tree.insert(_data As Integer):thedata=_data: root=insert_r(root):End sub

property aa_tree.getfirst() As aa_node Ptr
  Dim As aa_node Ptr temp=root,t1
  do Until temp=nil
   t1=temp 
   temp=temp->link(d_left)
  Loop
  If temp=root Then return NULL Else Return t1
End property

Property aa_tree.getlast() As aa_node Ptr
  Dim As aa_node Ptr temp=root,t1
  do Until  temp=nil
    t1=temp
    temp=temp->link(d_right)
  Loop
  If temp=root Then return NULL Else Return t1
End Property

function aa_tree.find(_data As Integer) As aa_node ptr
  Dim As aa_node Ptr temp=root
  do
  If temp=nil Then 
     return NULL
  ElseIf _data=temp->value then
     Return temp   
  Else
    temp=temp->link((temp->value<_data))
  End  if
  loop
End function

/'
simple recursive deletion: fails if the value to delete does not exist and is bigger than the biggest element

Function  aa_tree.remove_r(r As aa_node Ptr) As aa_node ptr
  'recursive
  If r<>nil Then
    heir=r
    Dim As Integer di=(r->value<thedata)
    If di=d_left Then item=r
    r->link(di)=remove_r(r->link(di))
  
   'found
    If r=heir Then
      ' beep
     'at the bottom, just remove
      If (item=nil)or (item->value<>thedata) Then Return nil
      item->value=r->value
      item=nil
      r=r->link(d_right)
      Delete heir :count-=1 
    
    Else
      If r->link(d_left)->level<r->level-1 Or r->link(d_right)->level<r->level-1 Then
        r->level -=1
        If r->link(d_right)->level >r->level Then  r->link(d_right)->level = r->level
        r=skew(r)
        r=split(r)
      End if 
    End If
  End if
  Return r
End function

Sub aa_tree.remove(_data As Integer):thedata=_data:root=remove_r(root):End sub
'/

sub aa_tree.remove(_data As Integer)
  If root<>nil Then
    Dim As Integer top=0,di=0
    Dim As aa_node Ptr up(50)=any,it=root
     'find the element to remove
     do 
       up(top)=it:top+=1   
       If it=nil Then 
         Exit Sub   'not found
       ElseIf _data=it->value Then
         Exit Do
       EndIf
       di = (it->value < _Data)
       it = it->link(di)
     loop 
     ' remove it
     If it->link(d_left)=nil Or it->link(d_right)=nil Then
       
       'single child case
       Dim As Integer di2
       di2=(it->link(di)=nil)
       top-=1
       If top<>0 Then
         up(top-1)->link(di)=it->link(di2)
       Else
         root=it->link(d_right)
       EndIf
       DeAllocate (it):count-=1
     Else
     
       'two child case
       Dim As aa_node Ptr heir,prev
       heir=it->link(d_right)
       prev=it
       While heir->link(d_left) <>nil
           up(top)=heir:top+=1:prev=heir
           heir = heir->link(d_left)
       Wend
       it->value = heir->value
      prev->link((prev = it)) = heir->link(d_right)
      DeAllocate (heir):count-=1
     EndIf
     'rebalance up
     top-=1
     while top >= 0 
       if  top <> 0 Then  di = (up(top - 1)->link(d_right) = up(top))
 
       if  (up(top)->link(d_left)->level < up(top)->level - 1) or (up(top)->link(d_right)->level < up(top)->level - 1) then
       
         up(top)->level-=1
         if  up(top)->link(d_right)->level > up(top)->level Then  up(top)->link(d_right)->level = up(top)->level
 
         up(top) = skew ( up(top) )
         
         up(top) = split ( up(top) )
         
       End if
 
       if  top <> 0 then 
         up(top - 1)->link(Di) = up(top)
       else
        root = up(top)
       End if 
      top-=1  
   wend
     
  EndIf
End Sub


'---------------------
'Test

Const numel=200000
Dim t As Single
? SizeOf(aa_tree)

  
t=Timer
Dim As  aa_tree ptr a= New aa_tree()
'Dim As  aa_tree ptr b= New aa_tree()


For i As Integer = 1 To numel
  
  a->insert 10000000LL*rnd
  'a->remove 2
  'beep
  If i And 1 Then a->remove numel -I/2 
 ' b->insert -i
  'b->inorder(@fprint)
  'b->remove 0
 ' If  (i And 1) Then b->remove -(i\2) 

Next



Print a->getdepth, a->getcount
'Print b->getdepth, b->getcount
Print 
Print a->getfirst->value, a->getlast->value
'Print b->getfirst->value, b->getlast->value


a->inorder(@fprint)
'b->inorder(@fprint)

a->blank
Print a->getdepth, a->getcount
'Print b->getdepth, b->getcount
Delete a
'Delete b
?"ended"
sleep
phishguy
Posts: 1201
Joined: May 05, 2006 16:12
Location: West Richland, Wa

Post by phishguy »

I think that I've got the binary tree figured out. It seems to work OK. Please review the following code and let me know if there are any probems of suggestions.

Code: Select all

Dim n As Integer
Dim low As Integer
Dim high As Integer
Dim middle As Integer
Dim found As Integer
Dim value As Integer

n = 5

Dim a(n) As Ulongint
a(0) = 5
a(1) = 100
a(2) = 500
a(3) = 555
a(4) = 556
value = 500

low = 0
high = N
While (low < high)
    
    middle = (low + high)/2
    If A(middle) < value Then
        low = middle + 1
    Else
        high = middle
    End If
Wend
If ((low < N) Or (A(low) = value)) Then 
    found = low 
Else
    found = -1 
End If

Print found,a(found)
Sleep
End

cha0s
Site Admin
Posts: 5319
Joined: May 27, 2005 6:42
Location: USA
Contact:

Post by cha0s »

It looks ok at first glance, just don't ever actually access that array with -1... I thought I had posted some bsearch code before but I guess not. I also have found through searching forum for 'bsearch' that the CRT implements this function, although I haven't ever used it.

Seriously a bsearch written in brainf*ck will be fast... hehe.
phishguy
Posts: 1201
Joined: May 05, 2006 16:12
Location: West Richland, Wa

Post by phishguy »

Ooops, it doesn't work if the value is the last element in the list. I should start at a(1).
Zippy
Posts: 1295
Joined: Feb 10, 2006 18:05

Post by Zippy »

@phish

Something for you to compare with.. You've already got it figured out.

Code: Select all

'sort integer array, + binary search
' fb-updated version of Ethan Winer's Quick Sort, ca 1992
declare sub intQsort(myarray() as uinteger,startEl as integer,numEls as integer)
declare function BinarySearch(myarray() as uinteger,target as uinteger) as integer
'
dim shared as uinteger myarray(500000)
dim as integer c=0,i,res,target
dim as double bt,et
'
randomize timer
i=500000
For c = 0 To 499999
  myarray(c) = i
  i-=1
Next
'
bt=timer
intQsort(myarray(),0,500000)
et=timer
print using "#.########";et-bt
print "Sorts per second: " & ( 1/(et-bt) ) * 100000
'
print
for i=499990 to 499999
    print myarray(i),
next
'
'
target=500000
print
print "Search for: ";target
'
res=BinarySearch(myarray(),target)
'
if res>-1 then
    print target; " found at element: ";res
else
    print target; " not found in array"
end if
print
print "Done, Sleeping to Exit.."
sleep
end
'
sub intQsort(myarray() as integer,startEl as integer,numEls as integer)
'
    dim as integer qstack(NumEls\5+10)
    dim as integer i,j,first,last,stackptr
    dim as integer temp
    '
    first = startEl
    last  = StartEl + NumEls - 1
    do
        do
            temp=myarray((last + first)\2)
            i=first:j=last
            do  'reverse both < and > below to sort descending
                while myarray(i)<temp:i+=1:wend '> here
                while myarray(j)>temp:j-=1:wend '< here
                if i>j then exit do
                if i<j then swap myarray(i),myarray(j)
                i+=1:j-=1
            loop until i > j
            if i < last then
                qstack(stackptr)=i
                qstack(stackptr+1)=last
                stackptr+=2
            end if
            last=j
        loop until first>=last
        if stackptr=0 then
            exit do
        end if
        stackptr-=2
        first=qstack(stackptr)
        last=qstack(stackptr+1)
    loop
'
end sub
'
function BinarySearch(myarray() as uinteger,target as uinteger) as integer
'
    dim as integer amin = lbound(myarray)
    dim as integer amax = ubound(myarray)
    dim as integer atry
    '
    do
        atry = (amax + amin) \ 2         'start testing in middle
        '
        if myarray(atry) = target then   'found it!
            return atry                     
        end if
        '
        if myarray(atry) > target then   'too high, cut in half
            amax = atry - 1
        else
            amin = atry + 1               'too low, cut other way
        end if
        '
    loop while amax >= amin
    '
    return -1
'
'http://www.petesqbsite.com/sections/zines/qbcm/issues/2-3/default.html
end function
In the past sorting/searching "several hundred thousand" anythings was slow and disk i/o bound. Now it is nearly instant and fits nicely in RAM.
phishguy
Posts: 1201
Joined: May 05, 2006 16:12
Location: West Richland, Wa

Post by phishguy »

Thanks, I did find some issues with my code and it is now fixed. I have integrated it into a function to use with a file search.

Code: Select all

Function search( value As Ulongint) As Integer
    Dim upper As Integer
    Dim middle As Integer
    Dim lowvalue As Ulongint
    Dim highvalue As Ulongint
    Dim middlevalue As Ulongint
    Dim ret As Integer
    Dim lower As Integer = 2
    Get #1,1,upper
    upper +=1
    
    ret = -1
    Get #1,lower, lowvalue
    Get #1,upper, highvalue
    If ( lowvalue < value) And  (value < highvalue)  Then 
        
        While ( upper > lower + 1 ) 
            middle = ( upper + lower ) / 2
            Get #1,middle,middlevalue
            If  middlevalue = value Then ret = middle
            If middlevalue < value Then
                lower = middle
            Else 
                upper = middle
            End If
        Wend
        
    End If
    Get #1,lower,lowvalue
    If  lowvalue = value Then ret =  lower
    Get #1,upper,highvalue
    If  highvalue = value Then ret = upper 
    Return ret
End Function

Dim position As Integer
Dim value As Ulongint = 985000000000000
Dim flen As Ulongint
flen = 1000000
Dim t As Single

Randomize Timer

Open "tagnumbers.dat" For Random As #1 Len = 8
Put #1,1,flen
For x As Integer = 2 To flen + 1
    value = value + Int(Rnd * 10) + 1
    Put #1,x,value
Next x

Get #1,1,value
Print "number of element = ";value
Get #1,2,value
Print "print first value = ";value
Get #1,flen + 1,value
Print "print last value = ";value
Get #1,50000,value
t = Timer
Print "value read at element 50000 = ";value
position = search(value)
Print value; " is found at ";position
Print Timer - t
Close #1
Print "done"

Sleep
End
Nexinarus
Posts: 146
Joined: May 28, 2005 6:08
Location: Everywhere
Contact:

Post by Nexinarus »

Well if there will only be around 100,000 numbers you should be able to store it all in memory - or load it once at the start of the program or something.

If so then the fastest method without a doubt would be a trie, which is a tree except that at each node you store 1 digit of the number. for example if you
had three numbers in it: 123, 100, 050 and 129 then the trie would look like:

Code: Select all

0 - 5 - 0
1 - 0 - 0
  - 2 - 3
      - 9
However if you never want to store the numbers in memory and just add / search in the file a more specific algorithm will be needed.
Post Reply