What would be the best (fastest) way to do this.
What would be the best (fastest) way to do this.
Using Freebasic, I need to search a file containing 15 digit numbers (unsigned long integer) for a specific number. If the number doesn't exist, I need to add it to the file. This file will eventually contain hundreds of thousands of numbers, so some sort of intelligent search/sort will be necessary for speed. Would a database program like Sqlite be my best option? If so, does anyone have an example to do what I want? I looked at the examples on the forum and they don't seem to have exactly what I want. I haven't dealt much with searching and sorting routines and I'm not sure what approach would be best.
A database wouldn't necessarily be bad, but if you keep the numbers in order, you can do binary search... which will give you extremely fast results. (O(log n)) http://en.wikipedia.org/wiki/Binary_search
Perhaps this Arne Anderson (balanced binary) Tree I have been playing with may help
Code: Select all
'------------------------------------------------------------
' Arne Andersson balanced tree data structure
'adapted 3/2008 to FreeBASIC by Antoni Gual from the tutorials
'Eternally Confuzzled by Julienne Walker
'http://www.eternallyconfuzzled.com
'-------------------------------------------------------------
' add trav kill
'2560000 values 14 sec 0,12 sec 0,55
enum
d_right=-1
d_left=0
End enum
#define NULL 0
Type aa_node
As Integer level
As LongInt value
As aa_node Ptr link(d_right To d_left) 'these are pointers to left and right. The array allows to use
Declare Constructor(value As Longint,level As Integer,nil As aa_node ptr)
Declare Destructor()
End Type
Type aa_tree
Declare Constructor()
Declare Destructor
Declare Property getdepth() As integer
Declare Property getcount() As integer
Declare Property getfirst() As aa_node ptr
Declare property getlast() As aa_node Ptr
'insert element
Declare sub insert(_data As Integer)
'find element
declare function find(_data As Integer) As aa_node Ptr
'remove element
Declare sub remove(_data As Integer)
'empty the tree
Declare sub blank
'traversals, require the address of a working sub as a parameter
Declare Sub inOrder(As sub( As aa_node Ptr))
Declare Sub preOrder(As sub( As aa_node Ptr))
Declare Sub postOrder(As sub( As aa_node Ptr))
'declAre Function Prevelement(As Integer) As aa_treenode
'declare Function Nextelement(As Integer) As aa_treenode
Private:
As Integer count 'nr of elements in the tree
As Integer thedata 'static var for some recursive member functions
As aa_node Ptr root 'ptr to the root element
As aa_node Ptr nil 'pointer to the sentinel element
As aa_node Ptr item,heir 'static elements used by remove_r
As sub(As aa_node Ptr) f 'ptr to the function used in traversals
'recursive functions
Declare Sub inOrder_r(As aa_node Ptr)
Declare Sub preOrder_r(As aa_node Ptr)
Declare Sub postOrder_r(As aa_node Ptr)
'Declare function remove_r( As aa_node Ptr)As aa_node Ptr
Declare function insert_r( As aa_node ptr)As aa_node Ptr
End Type
'nil could be a member of aa_tree? in this case add node and remove node should be members too
Property aa_tree.getcount As integer:Return count: End Property
Property aa_tree.getdepth As integer:Return root->level:End property
Constructor aa_node(_data As LongInt, lev As Integer,nil As aa_node ptr)
value=_data
level=lev
link(d_left)=nil
link(d_right)=nil
End Constructor
Destructor aa_node():End destructor
Sub fkill(n As aa_node Ptr)
Delete n
End Sub
Sub aa_tree.blank()
this.postOrder(@fkill)
root=nil
count=0
End Sub
Destructor aa_tree()
blank
End Destructor
Constructor aa_tree()
'create an empty sentinel node
' node constructor will give link ptrs an undefined value
nil=New aa_node(&h8000000,0,nil)
'reset linka ptrs of nil to the true value of nil, after giving it a value
nil->link(d_left)=nil
nil->link(d_right)=nil
root=nil
count=0
item=0
heir=0
End Constructor
'skew and split are the housekeeping functions the Andersson tree uses to keep itself balanced
Function skew(root As aa_node Ptr) As aa_node Ptr
'recursive
Dim As aa_node Ptr temp
If root->level<>0 then
If root->link(d_left)->level=root->level then
temp =root
root=root->link(d_left)
temp->link(d_left)=root->link(d_right)
root->link(d_right)=temp
End If
root->link(d_right)=skew(root->link(d_right))
End if
Return root
End Function
Function split(root As aa_node Ptr) As aa_node Ptr
'recursive
Dim As aa_node Ptr temp
If root->level<>0 Then
if root->link(d_right)->link(d_right)->level = root->level Then
temp=root
root=root->link(d_right)
temp->link(d_right)=root->link(d_left)
root->link(d_left)=temp
root->level+=1
root->link(d_right)=split(root->link(d_right))
End if
EndIf
Return root
End Function
'auxiliar functions for traversing
Sub fnothing(n As aa_node Ptr):End sub
Sub fprint(n As aa_node Ptr)
Print Using "############### ";n->value;
End Sub
'traversing
Sub aa_tree.inorder_r(n As aa_node Ptr)
'recursive
If n<>nil then
inorder_r(n->link(d_left))
f(n)
inorder_r(n->link(d_right))
End if
End Sub
Sub aa_tree.inorder(fun As sub( As aa_node Ptr)): f=fun: inorder_r(root): End Sub
'traversing
Sub aa_tree.postorder_r(n As aa_node Ptr)
'recursive
If n<>nil then
postorder_r(n->link(d_left))
postorder_r(n->link(d_right))
f(n)
End if
End Sub
Sub aa_tree.postorder(fun As sub( As aa_node Ptr)): f=fun: postorder_r(root): End Sub
'traversing
Sub aa_tree.preorder_r(n As aa_node Ptr )
'recursive
If n<>nil Then
f(n)
preorder_r(n->link(d_left))
preorder_r(n->link(d_right))
End if
End Sub
Sub aa_tree.preorder(fun As sub( As aa_node Ptr)):f=fun: preorder_r(root): End Sub
Function aa_tree.insert_r(r As aa_node Ptr)As aa_node Ptr
'recursive
If r=nil Then
r=New aa_node(thedata,1,nil)
if r=NULL Then
? "Could not add a new node":Sleep :End
Else
count+=1
End if
Else
Dim As integer di=(r->value<thedata)
r->link(di)=insert_r(r->link(di))
r=skew(r)
r=split(r)
EndIf
Return r
End function
Sub aa_tree.insert(_data As Integer):thedata=_data: root=insert_r(root):End sub
property aa_tree.getfirst() As aa_node Ptr
Dim As aa_node Ptr temp=root,t1
do Until temp=nil
t1=temp
temp=temp->link(d_left)
Loop
If temp=root Then return NULL Else Return t1
End property
Property aa_tree.getlast() As aa_node Ptr
Dim As aa_node Ptr temp=root,t1
do Until temp=nil
t1=temp
temp=temp->link(d_right)
Loop
If temp=root Then return NULL Else Return t1
End Property
function aa_tree.find(_data As Integer) As aa_node ptr
Dim As aa_node Ptr temp=root
do
If temp=nil Then
return NULL
ElseIf _data=temp->value then
Return temp
Else
temp=temp->link((temp->value<_data))
End if
loop
End function
/'
simple recursive deletion: fails if the value to delete does not exist and is bigger than the biggest element
Function aa_tree.remove_r(r As aa_node Ptr) As aa_node ptr
'recursive
If r<>nil Then
heir=r
Dim As Integer di=(r->value<thedata)
If di=d_left Then item=r
r->link(di)=remove_r(r->link(di))
'found
If r=heir Then
' beep
'at the bottom, just remove
If (item=nil)or (item->value<>thedata) Then Return nil
item->value=r->value
item=nil
r=r->link(d_right)
Delete heir :count-=1
Else
If r->link(d_left)->level<r->level-1 Or r->link(d_right)->level<r->level-1 Then
r->level -=1
If r->link(d_right)->level >r->level Then r->link(d_right)->level = r->level
r=skew(r)
r=split(r)
End if
End If
End if
Return r
End function
Sub aa_tree.remove(_data As Integer):thedata=_data:root=remove_r(root):End sub
'/
sub aa_tree.remove(_data As Integer)
If root<>nil Then
Dim As Integer top=0,di=0
Dim As aa_node Ptr up(50)=any,it=root
'find the element to remove
do
up(top)=it:top+=1
If it=nil Then
Exit Sub 'not found
ElseIf _data=it->value Then
Exit Do
EndIf
di = (it->value < _Data)
it = it->link(di)
loop
' remove it
If it->link(d_left)=nil Or it->link(d_right)=nil Then
'single child case
Dim As Integer di2
di2=(it->link(di)=nil)
top-=1
If top<>0 Then
up(top-1)->link(di)=it->link(di2)
Else
root=it->link(d_right)
EndIf
DeAllocate (it):count-=1
Else
'two child case
Dim As aa_node Ptr heir,prev
heir=it->link(d_right)
prev=it
While heir->link(d_left) <>nil
up(top)=heir:top+=1:prev=heir
heir = heir->link(d_left)
Wend
it->value = heir->value
prev->link((prev = it)) = heir->link(d_right)
DeAllocate (heir):count-=1
EndIf
'rebalance up
top-=1
while top >= 0
if top <> 0 Then di = (up(top - 1)->link(d_right) = up(top))
if (up(top)->link(d_left)->level < up(top)->level - 1) or (up(top)->link(d_right)->level < up(top)->level - 1) then
up(top)->level-=1
if up(top)->link(d_right)->level > up(top)->level Then up(top)->link(d_right)->level = up(top)->level
up(top) = skew ( up(top) )
up(top) = split ( up(top) )
End if
if top <> 0 then
up(top - 1)->link(Di) = up(top)
else
root = up(top)
End if
top-=1
wend
EndIf
End Sub
'---------------------
'Test
Const numel=200000
Dim t As Single
? SizeOf(aa_tree)
t=Timer
Dim As aa_tree ptr a= New aa_tree()
'Dim As aa_tree ptr b= New aa_tree()
For i As Integer = 1 To numel
a->insert 10000000LL*rnd
'a->remove 2
'beep
If i And 1 Then a->remove numel -I/2
' b->insert -i
'b->inorder(@fprint)
'b->remove 0
' If (i And 1) Then b->remove -(i\2)
Next
Print a->getdepth, a->getcount
'Print b->getdepth, b->getcount
Print
Print a->getfirst->value, a->getlast->value
'Print b->getfirst->value, b->getlast->value
a->inorder(@fprint)
'b->inorder(@fprint)
a->blank
Print a->getdepth, a->getcount
'Print b->getdepth, b->getcount
Delete a
'Delete b
?"ended"
sleep
I think that I've got the binary tree figured out. It seems to work OK. Please review the following code and let me know if there are any probems of suggestions.
Code: Select all
Dim n As Integer
Dim low As Integer
Dim high As Integer
Dim middle As Integer
Dim found As Integer
Dim value As Integer
n = 5
Dim a(n) As Ulongint
a(0) = 5
a(1) = 100
a(2) = 500
a(3) = 555
a(4) = 556
value = 500
low = 0
high = N
While (low < high)
middle = (low + high)/2
If A(middle) < value Then
low = middle + 1
Else
high = middle
End If
Wend
If ((low < N) Or (A(low) = value)) Then
found = low
Else
found = -1
End If
Print found,a(found)
Sleep
End
It looks ok at first glance, just don't ever actually access that array with -1... I thought I had posted some bsearch code before but I guess not. I also have found through searching forum for 'bsearch' that the CRT implements this function, although I haven't ever used it.
Seriously a bsearch written in brainf*ck will be fast... hehe.
Seriously a bsearch written in brainf*ck will be fast... hehe.
@phish
Something for you to compare with.. You've already got it figured out.
In the past sorting/searching "several hundred thousand" anythings was slow and disk i/o bound. Now it is nearly instant and fits nicely in RAM.
Something for you to compare with.. You've already got it figured out.
Code: Select all
'sort integer array, + binary search
' fb-updated version of Ethan Winer's Quick Sort, ca 1992
declare sub intQsort(myarray() as uinteger,startEl as integer,numEls as integer)
declare function BinarySearch(myarray() as uinteger,target as uinteger) as integer
'
dim shared as uinteger myarray(500000)
dim as integer c=0,i,res,target
dim as double bt,et
'
randomize timer
i=500000
For c = 0 To 499999
myarray(c) = i
i-=1
Next
'
bt=timer
intQsort(myarray(),0,500000)
et=timer
print using "#.########";et-bt
print "Sorts per second: " & ( 1/(et-bt) ) * 100000
'
print
for i=499990 to 499999
print myarray(i),
next
'
'
target=500000
print
print "Search for: ";target
'
res=BinarySearch(myarray(),target)
'
if res>-1 then
print target; " found at element: ";res
else
print target; " not found in array"
end if
print
print "Done, Sleeping to Exit.."
sleep
end
'
sub intQsort(myarray() as integer,startEl as integer,numEls as integer)
'
dim as integer qstack(NumEls\5+10)
dim as integer i,j,first,last,stackptr
dim as integer temp
'
first = startEl
last = StartEl + NumEls - 1
do
do
temp=myarray((last + first)\2)
i=first:j=last
do 'reverse both < and > below to sort descending
while myarray(i)<temp:i+=1:wend '> here
while myarray(j)>temp:j-=1:wend '< here
if i>j then exit do
if i<j then swap myarray(i),myarray(j)
i+=1:j-=1
loop until i > j
if i < last then
qstack(stackptr)=i
qstack(stackptr+1)=last
stackptr+=2
end if
last=j
loop until first>=last
if stackptr=0 then
exit do
end if
stackptr-=2
first=qstack(stackptr)
last=qstack(stackptr+1)
loop
'
end sub
'
function BinarySearch(myarray() as uinteger,target as uinteger) as integer
'
dim as integer amin = lbound(myarray)
dim as integer amax = ubound(myarray)
dim as integer atry
'
do
atry = (amax + amin) \ 2 'start testing in middle
'
if myarray(atry) = target then 'found it!
return atry
end if
'
if myarray(atry) > target then 'too high, cut in half
amax = atry - 1
else
amin = atry + 1 'too low, cut other way
end if
'
loop while amax >= amin
'
return -1
'
'http://www.petesqbsite.com/sections/zines/qbcm/issues/2-3/default.html
end function
Thanks, I did find some issues with my code and it is now fixed. I have integrated it into a function to use with a file search.
Code: Select all
Function search( value As Ulongint) As Integer
Dim upper As Integer
Dim middle As Integer
Dim lowvalue As Ulongint
Dim highvalue As Ulongint
Dim middlevalue As Ulongint
Dim ret As Integer
Dim lower As Integer = 2
Get #1,1,upper
upper +=1
ret = -1
Get #1,lower, lowvalue
Get #1,upper, highvalue
If ( lowvalue < value) And (value < highvalue) Then
While ( upper > lower + 1 )
middle = ( upper + lower ) / 2
Get #1,middle,middlevalue
If middlevalue = value Then ret = middle
If middlevalue < value Then
lower = middle
Else
upper = middle
End If
Wend
End If
Get #1,lower,lowvalue
If lowvalue = value Then ret = lower
Get #1,upper,highvalue
If highvalue = value Then ret = upper
Return ret
End Function
Dim position As Integer
Dim value As Ulongint = 985000000000000
Dim flen As Ulongint
flen = 1000000
Dim t As Single
Randomize Timer
Open "tagnumbers.dat" For Random As #1 Len = 8
Put #1,1,flen
For x As Integer = 2 To flen + 1
value = value + Int(Rnd * 10) + 1
Put #1,x,value
Next x
Get #1,1,value
Print "number of element = ";value
Get #1,2,value
Print "print first value = ";value
Get #1,flen + 1,value
Print "print last value = ";value
Get #1,50000,value
t = Timer
Print "value read at element 50000 = ";value
position = search(value)
Print value; " is found at ";position
Print Timer - t
Close #1
Print "done"
Sleep
End
Well if there will only be around 100,000 numbers you should be able to store it all in memory - or load it once at the start of the program or something.
If so then the fastest method without a doubt would be a trie, which is a tree except that at each node you store 1 digit of the number. for example if you
had three numbers in it: 123, 100, 050 and 129 then the trie would look like:
However if you never want to store the numbers in memory and just add / search in the file a more specific algorithm will be needed.
If so then the fastest method without a doubt would be a trie, which is a tree except that at each node you store 1 digit of the number. for example if you
had three numbers in it: 123, 100, 050 and 129 then the trie would look like:
Code: Select all
0 - 5 - 0
1 - 0 - 0
- 2 - 3
- 9