PHP-like associative arrays

Post your FreeBASIC tips and tricks here. Please don’t post your code without including an explanation.
krcko
Posts: 163
Joined: Jul 30, 2006 0:34
Location: Serbia
Contact:

PHP-like associative arrays

Postby krcko » May 24, 2008 17:33

Today i've tryed to implement hash table datatype for the first time. And i was suprised when i saw how easy hash tables are to implement.
So i wrote associative array datatype, this implementation is like PHP's arrays - you can have strings or numbers as keys.

here's the code for assocarray.bi:

Code: Select all


#Define ASSOC_ARRAY_LOG_COLLISIONS
   
'
'  Paul Hsieh's SuperFastHash
'  http://www.azillionmonkeys.com/qed/hash.html
'   
Function SuperFastHash(Key As Const String) As UInteger

    #Define get16bits(d) ((Cast(UInteger, Cast(UByte Ptr, d)[1]) Shl 8) + Cast(UInteger, Cast(UByte Ptr, d)[0]))
   
    Dim As UInteger length = Len(Key), hash = length, tmp
    Dim As Integer r
    Dim As ZString Ptr chars = StrPtr(Key)
   
    If length = 0 Then Return 0
   
    r = length And 3
    length Shr= 2
   
    Do While length > 0
       
        hash  += get16bits(chars)
        tmp    = (get16bits(chars + 2) Shl 11) Xor hash
        hash   = (hash Shl 16) Xor tmp
        chars += 2 * SizeOf(UShort)
        hash  += hash Shr 11
       
        length -= 1
    Loop
   
    Select Case r
       
        Case 3
            hash += get16bits(chars)
            hash Xor= hash Shr 16
            hash Xor= chars[SizeOf(UShort)] Shl 18
            hash += hash Shr 11
           
        Case 2
            hash += get16bits(chars)
            hash Xor= hash Shl 11
            hash += hash Shr 17
           
        Case 1
            hash += *Cast(UByte Ptr, chars)
            hash Xor= hash Shl 10
            hash += hash Shr 1
           
    End Select
   
    hash Xor= hash Shl 3
     hash +=   hash Shr 5
     hash Xor= hash Shl 4
     hash +=   hash Shr 17
     hash Xor= hash Shl 25
     hash +=   hash Shr 6
   
    Return hash
   
    #Undef get16bits
       
End Function

Type HashFunc As Function(Key As Const String) As UInteger

Enum KeyType
    Undefined
    IntegerKey
    StringKey
End Enum

#Macro DefineAssocArrayType(_TYPE_)
   
    #Ifndef __ASSOC_ARRAY_TYPE_##_TYPE_
    #Define __ASSOC_ARRAY_TYPE_##_TYPE_
   
    Type _TYPE_##ArrayItem
        Value        As _TYPE_
        Key         As KeyType
        Union
            iKey    As Integer Ptr
            sKey    As String Ptr
        End Union
        Declare Sub Clear
        Declare Destructor
    End Type
   
    Sub _TYPE_##ArrayItem.Clear
        Select Case Key
            Case IntegerKey:     DeAllocate iKey
            Case StringKey:     DeAllocate sKey
        End Select
    End Sub
   
    Destructor _TYPE_##ArrayItem
        This.Clear
    End Destructor

    Type _TYPE_##Array
       
        Public:
       
            Declare Property Item(ByVal Key As Integer) As _TYPE_
            Declare Property Item(ByVal Key As Integer, Value As _TYPE_)
           
            Declare Property Item(ByVal Key As String) As _TYPE_
            Declare Property Item(ByVal Key As String, Value As _TYPE_)
           
            Declare Constructor(numBuckets As Integer = 10007, numItems As Integer = 31, hashfunc As HashFunc = ProcPtr(SuperFastHash))
            Declare Destructor
       
        Private:
           
            hash                As HashFunc
            table                As _TYPE_##ArrayItem Pointer Pointer
            numBuckets        As Integer
            itemsPerBucket    As Integer
       
    End Type

    Constructor _TYPE_##Array(numBuckets As Integer = 10007, numItems As Integer = 31, hashfunc As HashFunc = ProcPtr(SuperFastHash))
       
        This.numBuckets = numBuckets
        itemsPerBucket = numItems
        hash = hashfunc
       
        table = New _TYPE_##ArrayItem Pointer[numBuckets]
       
        For i As Integer = 0 To numBuckets - 1
            table[i] = New _TYPE_##ArrayItem[itemsPerBucket]
        Next
       
    End Constructor

    Destructor _TYPE_##Array
        For i As Integer = 0 To numBuckets - 1
            Delete[] table[i]
        Next
        Delete[] table
    End Destructor

    Property _TYPE_##Array.Item(ByVal Key As Integer) As _TYPE_
        Dim As UInteger keyHash = hash(Chr(1) + Str(Key))
        Return table[keyHash Mod numBuckets][keyHash Mod itemsPerBucket].Value
    End Property
   
    Property _TYPE_##Array.Item(ByVal Key As Integer, Value As _TYPE_)
        Dim As UInteger keyHash = hash(Chr(1) + Str(Key))
        With table[keyHash Mod numBuckets][keyHash Mod itemsPerBucket]
            If .Key <> Undefined Then ' collision - DAMMIT!
                If Not (.Key = IntegerKey And *.iKey = Key) Then
                    .Clear
                    #Ifdef ASSOC_ARRAY_LOG_COLLISIONS
                        Print "collision: (integer)"; Key; " with ";
                        Select Case .Key
                            Case IntegerKey:     Print " (integer) "; .iKey
                            Case StringKey:     Print " (string) ";     .sKey
                        End Select
                    #EndIf
                EndIf
            EndIf
            .Value = Value
            .Key = IntegerKey
            .iKey = Allocate(SizeOf(Integer))
            *.iKey = Key
        End With
    End Property
   
    Property _TYPE_##Array.Item(ByVal Key As String) As _TYPE_
        Dim As UInteger keyHash = hash(Key)
        Return table[keyHash Mod numBuckets][keyHash Mod itemsPerBucket].Value
    End Property
   
    Property _TYPE_##Array.Item(ByVal Key As String, Value As _TYPE_)
        Dim As UInteger keyHash = hash(Key)
        With table[keyHash Mod numBuckets][keyHash Mod itemsPerBucket]
            If .Key <> Undefined Then ' collision - DAMMIT!
                If Not (.Key = StringKey And *.sKey = Key) Then
                    .Clear
                    #Ifdef ASSOC_ARRAY_LOG_COLLISIONS
                        Print "collision: (string)"; Key; " with ";
                        Select Case .Key
                            Case IntegerKey:     Print " (integer) "; .iKey
                            Case StringKey:     Print " (string) ";     .sKey
                        End Select
                    #EndIf
                EndIf
            EndIf
            .Value = Value
            .Key = StringKey
            .sKey = Callocate(SizeOf(String))
            *.sKey = Key
        End With
    End Property

    #EndIf
#EndMacro

and here's example of using it:

Code: Select all

#include "assocarray.bi"

DefineAssocArrayType( Integer )
DefineAssocArrayType( String )

Dim As IntegerArray array
Dim As StringArray s

array.Item("bla") = 123
array.Item(123) = 124
array.Item(123) = 124324

Print array.Item("bla"), array.Item(123)

s.item("color") = "red"
s.item("fruit") = "oranges"
s.item(1) = "one"

Print s.item("color"), s.item("fruit"), s.item(1)

Sleep

you can use some "variant" types like Mixed together with DefineAssocArrayType to create more PHPish arrays:

Code: Select all

#include "assocarray.bi"
#include "mixed.bi"

DefineAssocArrayType( Mixed )

Dim array As MixedArray

array.Item("color name") = "red"
array.Item("color value") = &HFF0000
array.Item(10) = "Ten"
array.Item(100) = 100
v1ctor
Site Admin
Posts: 3801
Joined: May 27, 2005 8:08
Location: SP / Bra[s]il
Contact:

Postby v1ctor » May 25, 2008 1:15

I wonder how much faster could the FB lexer get if we start using that hash function, iterating just 1/4 of the symbol could for sure make things a bit more speedy. Gotta try it some day.

Great implementation, if functors were supported (overloading the () operator), the access to arrays would look exactly like in PHP.
krcko
Posts: 163
Joined: Jul 30, 2006 0:34
Location: Serbia
Contact:

Postby krcko » May 25, 2008 1:19

yeah, this is some really fast (and really good) hash function (even people from apple are using it, and i also saw that this hash function is used in open office products)

Great implementation, if functors were supported (overloading the () operator), the access to arrays would look exactly like in PHP.

this is excatly what i posted as feature request (http://sourceforge.net/tracker/index.ph ... tid=693199) few mins after i created this tread
AGS
Posts: 1284
Joined: Sep 25, 2007 0:26
Location: the Netherlands

Re: PHP-like associative arrays

Postby AGS » May 25, 2008 9:08

krcko wrote:Today i've tryed to implement hash table datatype for the first time. And i was suprised when i saw how easy hash tables are to implement.
So i wrote associative array datatype, this implementation is like PHP's arrays - you can have strings or numbers as keys.

here's the code for assocarray.bi:


Thank you VERY much, krcko. I had a bit of a problem translating a program written in C#. You see, .NET comes with a hashtable that was being used by a C# program I am translating. The program used a hashtable with numbers as key/data. I was in need of a (fast) implementation of a hashtable that has integers for keys/data. And there you are publishing just the kind of thing I need. Great!
bradlis7
Posts: 12
Joined: May 29, 2008 19:20
Location: Texas
Contact:

Postby bradlis7 » May 29, 2008 19:22

This is great. The only thing I notice is that you can't use array as values, but it turns out for my problem, I didn't need it anyways.

I don't suppose there's a way to iterate over each item that is in one of the arrays is there? Similar to a foreach loop (I realize FB doesn't have foreach).

--EDIT--
From what I see, you'd have to overload for, next, and step, but looking at how this is actually implemented, it's going to be hard since you have buckets, and items in the buckets. I'm actually starting to understand a little of what's going on (scary), and I'm going to try to try some stuff out to see what I can come up with.

--EDIT2--
Nope, don't think it's possible within a reasonable amount of processing. You'd have to loop through up to 300K items with the default settings. Someone else may know a better solution than brute force testing though...
krcko
Posts: 163
Joined: Jul 30, 2006 0:34
Location: Serbia
Contact:

Postby krcko » May 30, 2008 0:43

well i haven't tought about foreach-like behavior... it is hard to implement that behavior with this code... becouse as you said it's needed to iterate over 300K items, but that may not be too slow...
AGS
Posts: 1284
Joined: Sep 25, 2007 0:26
Location: the Netherlands

Postby AGS » Jun 07, 2008 6:36

I wanted to see how fast the associatieve array is so I compared it with my http://www.freebasic.net/forum/viewtopic.php?t=11588 keyword recognizer.

To be honest, the keyword recognizer is just a C --> FreeBASIC translation of the output of a tool used by the gcc developers (gperf). gperf is used for recognizing keywords and it yields a perfect hashfunction for a given list of words (no collisions). The keyword recognizer is more of a multiset then an associative array. Using it you can check if a certain key is inside a set of words, there is nothing 'associated' to the key.

But the associative array can be used as a multiset. So I just dropped 367 FreeBASIC keywords in the associative array (mapping the keywords to the number 1) and measured how fast it would return answers to the simple question 'Is this keyword in the array'?

And the associative array initially outperformed the key recognizer! And that was not what I was expecting. The associative array produced results 40% faster than the keyword recognizer and that's a lot.

I had to adjust the code of the recognizer a bit or rather, let gperf produce somewhat different code before the keyword recognizer caught up with the associative array (after some tweaking the recognizer ran as fast as the associative array).

I also checked how the associative array behaved when being fed 'keywords' that were not in the array (I had put together a list of 160 countries). Finding out that a keyword isn't in the array runs a lot faster then finding out that a keyword is in the array (factor 2). And that's a good thing.

All in all I think the performance of your implementation of an associative array works quite well as a possible implementation of a multiset/keyword recognizer. I think I could have tweaked the key recognizer some more to make it go even faster but the recognizers code just doesn't look as clean as that of this associative array.
krcko
Posts: 163
Joined: Jul 30, 2006 0:34
Location: Serbia
Contact:

Postby krcko » Jun 07, 2008 9:24

hey thanks for testing :)

i dunno about gperf, what hash function does it use? i use SuperFastHash algorithm which is very fast and maybe that is the reason why is this implementation faster than gperf

but again, this was just my first try in implementing a hash table, there is no logic for handling collisions, or expanding table if needed, i yet have much to learn about hash tables (they are interesting so i will make a little research in that field :p)
AGS
Posts: 1284
Joined: Sep 25, 2007 0:26
Location: the Netherlands

Postby AGS » Jun 11, 2008 21:55

krcko wrote:hey thanks for testing :)

i dunno about gperf, what hash function does it use?


Gperf builds a hash function for a given set of keywords. It builds a 'perfect' hash function for a given set of keywords (usually the keywords from a programming language). This is what the builders of gperf state:

The perfect hash function generator gperf reads a set of “keywords” from an input file (or from the standard input by default). It attempts to derive a perfect hashing function that recognizes a member of the static keyword set with at most a single probe into the lookup table. If gperf succeeds in generating such a function it produces a pair of C source code routines that perform hashing and table lookup recognition


IBM has a nice article on gperf that explains the internal workings of gperf in some detail. gperf is weird but it works very well.

http://www.ibm.com/developerworks/linux ... gperf.html

krcko wrote:i use SuperFastHash algorithm which is very fast and maybe that is the reason why is this implementation faster than gperf


As fast as gperf, krcko, as fast as gperf :-D Let's not get carried away :-D
You can tweak the way gperf builds the source files (hash function/lookup function) and I could get my keyword recognizer even faster then it is now. But I'd much rather use your associative array because it's more versatile. It can do more then 'just' look up some keywords.

gperf can do more as well but your program looks better, 'feels' better. And it doesn't contain a 'black magic' table like gperf produces. (the table is used in the produced hashfunction).

krcko wrote:but again, this was just my first try in implementing a hash table, there is no logic for handling collisions, or expanding table if needed, i yet have much to learn about hash tables (they are interesting so i will make a little research in that field :p)


Another way of implementing associative arrays is through the use of a red and black tree. It doesn't use a hashfunction but rather an intricate way to keep the tree balanced. Lookups are quite fast (O log(n)). I've seen a version in the bsd repository where it has been implemented using macros (it's in C).

http://www.freebsd.org/cgi/cvsweb.cgi/s ... web-markup

Hashtables are used for mappings/associative arrays etc... and so are red and black trees.

And a nice FreeBASIC implementation of hashtables/red-and-black-trees/other data structures are always welcome (FreeBASIC could have it's own 'collections' package http://msdn.microsoft.com/en-us/library/ybcx56wz(VS.80).aspx :) ).
krcko
Posts: 163
Joined: Jul 30, 2006 0:34
Location: Serbia
Contact:

Postby krcko » Jun 12, 2008 11:45

As fast as gperf, krcko, as fast as gperf :-D Let's not get carried away :-D


i've only read this part:
The associative array produced results 40% faster than the keyword recognizer and that's a lot.

;)

and haven't noticed that part about tweaking gperf...

but anyway i wanted to say that if associative array is faster than gperf than it's not my 'fault' but Paul Hseih's :)


this black-red trees looks interesting... i've did some research and i'll try to make another implementation (that solves collisions and auto-expands) as soon as i find some spare time...
cha0s
Site Admin
Posts: 5317
Joined: May 27, 2005 6:42
Location: Illinois
Contact:

Postby cha0s » Jun 12, 2008 13:53

I'm thinking that fbext has an implementation of RB trees, although I might be getting that confused with an earlier effort by stylin to recreate the STL in FB...
sir_mud
Posts: 1401
Joined: Jul 29, 2006 3:00
Location: US
Contact:

Postby sir_mud » Jun 13, 2008 2:05

We don't have any tree in the Extended library yet, but we do want to add one. I've been planning on implementing an Andersson tree at some point when I have some free time, but if someone would like to contribute another variant or an Andersson they're more than welcome.
AGS
Posts: 1284
Joined: Sep 25, 2007 0:26
Location: the Netherlands

Postby AGS » Jun 14, 2008 5:55

sir_mud wrote:We don't have any tree in the Extended library yet, but we do want to add one. I've been planning on implementing an Andersson tree at some point when I have some free time, but if someone would like to contribute another variant or an Andersson they're more than welcome.


Help me out here, sir_mud, 'cause I'm trying to benchmark the hashtable that's inside the extended library with the same benchmarking code I used to benchmark my keyword recognizer and the associative array but I cannot get a good benchmark.

For starters, the hashtable from the extended library writes a lot of strings to the screen. For a proper benchmark this writing to the screen should be turned of but I do not know how (fiddling around with the debug macro did not work).

Before I forget: I am using the fb ext binaries from the fb ext google site (libs have been compiled on 09-05-2008).

When I try to find a key that, on insertion, collided with some other key I get output on the screen stating that

"<keyword> was not found on try #<some_number)".

When I use an iterator with ForEach, using the code as given in the hashtable example that comes with fb ext, I do get to see all the keys printed out (including the ones that collided with some other key) so inserting the keys works (regardless of the messages hashtable.find outputs to the screen).

Perhaps this discussion on the fb ext hashtable should be continued in the fb ext thread?
Makoto WATANABE
Posts: 193
Joined: Apr 10, 2010 11:41
Location: Japan
Contact:

Re: PHP-like associative arrays

Postby Makoto WATANABE » Jun 16, 2020 13:06

Dear All!

I compiled this example with FreeBASIC 1.07.1.
Compilation failed with the following error.
assocarray.bi(18) error 181: Invalid assignment/conversion in 'Dim As ZString Ptr chars = StrPtr(Key)'

By the way, "rosettacode" has a topic of "Associative arrays".
http://rosettacode.org/wiki/Associative_array/Creation
There seems to be no FreeBASIC example registered for this topic.
I would appreciate it if you could register a code suitable for FreeBASIC.
srvaldez
Posts: 2511
Joined: Sep 25, 2005 21:54

Re: PHP-like associative arrays

Postby srvaldez » Jun 16, 2020 14:31

hello Makoto WATANABE
to fix the error change the line Dim As ZString Ptr chars = StrPtr(Key) to Dim As ZString Ptr chars = cast(zstring ptr,StrPtr(Key))

Return to “Tips and Tricks”

Who is online

Users browsing this forum: No registered users and 2 guests