String Hash Table

Started by Theo Gottwald, August 23, 2023, 06:36:58 PM

Previous topic - Next topic

0 Members and 1 Guest are viewing this topic.

Theo Gottwald

Reasonably fast.
add 1,000,000 key/value items to empty hash table = 0.488 seconds
use key to get 1,000,000 values in hash table of 1,000,000 key/value items = 0.249 seconds
Can be stored/restored to/from String/File.

Added note: the key's associated value can be a UDT.
Key and value may contain nulls, any kind of binary data.

Source: Powerbasic Forum


'Public domain, use at own risk. SDurham

#If Not %Def(%Memory230424)
    %Memory230424 = 1
    Declare Function GlobalAlloc Lib "Kernel32.dll" Alias "GlobalAlloc" (ByVal uFlags As Dword, ByVal dwBytes As Dword) As Dword
    Declare Function GlobalReAlloc Lib "Kernel32.dll" Alias "GlobalReAlloc" (ByVal hMem As Dword, ByVal dwBytes As Dword, ByVal uFlags As Dword) As Dword
    Declare Function GlobalFree Lib "Kernel32.dll" Alias "GlobalFree" (ByVal hMem As Dword) As Dword
    %GMEM_FIXED_         = &H0000
    %GMEM_MOVEABLE_      = &H0002
    %GMEM_NOCOMPACT_     = &H0010
    %GMEM_NODISCARD_     = &H0020
    %GMEM_ZEROINIT_      = &H0040
    %GMEM_MODIFY_        = &H0080
    %GMEM_DISCARDABLE_   = &H0100
    %GMEM_NOT_BANKED_    = &H1000
    %GMEM_SHARE_         = &H2000
    %GMEM_DDESHARE_      = &H2000
    %GMEM_NOTIFY_        = &H4000
    %GMEM_LOWER_         = %GMEM_NOT_BANKED_
    %GMEM_VALID_FLAGS_   = &H7F72
    %GMEM_INVALID_HANDLE_ = &H08000
    %GHND_               = (%GMEM_ZEROINIT_ Or %GMEM_MOVEABLE_)
    %GPTR_               = (%GMEM_ZEROINIT_ Or %GMEM_FIXED_)
    Function MemAllocate(ByVal bytes As Long) ThreadSafe As Long
        If bytes Then Function = GlobalAlloc(ByVal %GPTR_, ByVal bytes)
    End Function
    Function MemReAllocate(ByVal h As Long, ByVal bytes As Long) ThreadSafe As Long
        If h And bytes Then
            Function = GlobalReAlloc(ByVal h, ByVal bytes, ByVal %GMEM_MOVEABLE_ Or %GMEM_ZEROINIT_)
        ElseIf bytes Then
            Function = GlobalAlloc(ByVal %GPTR_, ByVal bytes)
        ElseIf h Then
            Function = GlobalFree(ByVal h)
        End If
    End Function
    Function MemFree(ByVal h As Long) ThreadSafe As Long
        If h Then GlobalFree(ByVal h)
    End Function
#EndIf '%Memory230424

#If Not %Def(%Str230424)
    %Str230424 = 1
    ' String container that can be stored in UDT.
    ' public domain, use at own risk
    ' SDurham
    %StrItemSize = 1
    Type StrT
        count As Long
        mem As Long
    End Type
    Sub StrFinal(t As StrT) ThreadSafe
        ' must call before variable goes out of scope
        StrClear t
    End Sub
    Sub StrClear(t As StrT) ThreadSafe
        ' empty container
        t.count = 0
        t.mem = MemFree(t.mem)
    End Sub
    Function StrCount(t As StrT) ThreadSafe As Long
        ' get character count
        Function = t.count
    End Function
    Sub StrSet(t As StrT, ByVal value As String) ThreadSafe
        ' set string
        Local lenValue As Long : lenValue = Len(value)
        t.count = 0
        If t.mem Then t.mem = MemFree(t.mem)
        If lenValue Then
            t.mem = MemAllocate(lenValue * %StrItemSize)
            If t.mem = 0 Then Exit Sub
            t.count = lenValue
            Memory Copy StrPtr(value), t.mem, lenValue * %StrItemSize
        End If
    End Sub
    Function StrGet(t As StrT) ThreadSafe As String
        ' get string
        If t.count Then Function = Peek$(t.mem, t.count)
    End Function
    Function StrCompareCallback(ByVal a As String, ByVal b As String) ThreadSafe As Long
        ' string container case sensitive compare callback
        Function = Switch&(a < b, -1, a > b, 1) 'else zero, match
    End Function
    Function StrCompareUCaseCallback(ByVal a As String, ByVal b As String) ThreadSafe As Long
        ' string container ignore calse compare callback
        Local sa As String : sa = UCase$(a)
        Local sb As String : sb = UCase$(b)
        Function = Switch&(sa < sb, -1, sa > sb, 1) 'else zero, match
    End Function
#EndIf '%Str230424

#If Not %Def(%StrBld230424)
    %StrBld230424 = 1
    ' String Builder : all memory freed when StrBldGet() called
    ' add 1,000,000 one-character Strings =  0.094 seconds
    ' public domain, use at own risk
    ' SDurham
    %StrBldItemsSize = 1
    %StrBldBuffer = 100000
    Type StrBldT
        mem As Long
        count As Long
        max As Long
    End Type
    Function StrBldCount(t As StrBldT) ThreadSafe As Long
        ' get character count
        Function = t.count
    End Function
    Sub StrBldAdd(t As StrBldT, ByRef value As String) ThreadSafe
        ' append string
        Local currentCount, currentMax, newMax As Long
        Local lenValue As Long : lenValue = Len(value)
        If lenValue Then
            If lenValue > t.max - t.count Then
                currentCount = t.count
                currentMax = t.max
                t.count = 0
                t.max = 0
                newMax = currentCount + lenValue + %StrBldBuffer
                t.mem = MemReAllocate(t.mem, newMax * %StrBldItemsSize)
                If t.mem = 0 Then Exit Sub
                t.count = currentCount
                t.max = newMax
            End If
            Memory Copy StrPtr(value), t.mem + (t.count * %StrBldItemsSize), lenValue * %StrBldItemsSize
            t.count += lenValue
        End If
    End Sub
    Function StrBldGet(t As StrBldT) ThreadSafe As String
        ' get complete string and free all memory
        If t.count Then Function = Peek$(t.mem, t.count)
        t.mem = MemFree(t.mem)
        t.count = 0
        t.max = 0
    End Function
#EndIf '%StrBld230424

#If Not %Def(%FileUtilities230424)
    %FileUtilities230424 = 1
    ' File Utilities
    ' public domain, use at own risk
    ' SDurham
    Sub StrToFile(ByVal file As WString, ByVal s As String) ThreadSafe
        ' store string to File
        Local f As Long
        If Len(file) = 0 Then Exit Sub
        f = FreeFile
        Open file For Binary As f
        SetEof f
        Put$ f, s
        Close f
    End Sub
    Function StrFromFile(ByVal file As WString) ThreadSafe As String
        ' get file contents as string
        Local f As Long
        Local s As String
        If IsFalse IsFile(file) Then Exit Function
        f = FreeFile
        Open file For Binary As f
        Get$ f, Lof(f), s
        Function = s
        Close f
    End Function
    Sub WStrToFile(ByVal file As WString, ByVal s As WString) ThreadSafe
        ' store string to File
        Local f As Long
        If Len(file) = 0 Then Exit Sub
        f = FreeFile
        Open file For Binary As f
        SetEof f
        Put$$ f, s
        Close f
    End Sub
    Function WStrFromFile(ByVal file As WString) ThreadSafe As WString
        ' get file contents as string
        Local f As Long
        Local s As WString
        If IsFalse IsFile(file) Then Exit Function
        f = FreeFile
        Open file For Binary As f
        Get$$ f, Lof(f), s
        Function = s
        Close f
    End Function
    Sub WStrToTextFile(ByVal file As WString, ByVal s As WString) ThreadSafe
        ' store string converted to UTF8 to File
        StrToFile file, ChrToUtf8$(s)
    End Sub
    Function WStrFromTextFile(ByVal file As WString) As WString
        ' get file contents converted from UTF8
        Function = Utf8ToChr$(StrFromFile(file))
    End Function
    Function WStrFromTextFileFixed(ByVal file As WString) ThreadSafe As WString
        ' get file contents converted from UTF8 fixing Unix line endings if any
        Local s As WString
        s = Utf8ToChr$(StrFromFile(file))
        Replace $CrLf With $Lf In s
        Replace $CrLf With $Lf In s
        Replace $Cr With $Lf In s
        Replace $Cr With $Lf In s
        Replace $Lf With $CrLf In s
        Function = s
    End Function
#EndIf '%FileUtilities230424

#If Not %Def(%StrHash230424)
    %StrHash230424 = 1
    ' String/String Key/Value Hash Table
    ' Value stored and retrieved using unique key.
    ' Key case sensitive.
    ' Key and value may contain nulls, any kind of binary data.
    '
    ' add 1,000,000 key/value items to empty hash table = 0.488 seconds
    ' use key to get 1,000,000 values in hash table of 1,000,000 key/value items = 0.249 seconds
    Type StrHashRecordT
        key As StrT
        value As StrT
        next As StrHashRecordT Ptr
    End Type
    Type StrHashT
        count As Long
        capacity As Long
        arr As Long Ptr
        cursorIndex As Long
        cursorRecord As StrHashRecordT Ptr
    End Type
    Sub StrHashCapacity(t As StrHashT, ByVal capacity As Long) ThreadSafe
        ' capacity must be set before use : about number of expected items
        If capacity < 1 Then Exit Sub
        t.arr = MemAllocate(capacity * 4)
        If t.arr = 0 Then Exit Sub
        t.capacity = capacity
    End Sub
    Sub StrHashFinal(t As StrHashT) ThreadSafe
        ' must call before variable goes out of scope to free memory
        StrHashClear t
        t.arr = MemFree(t.arr)
    End Sub
    Sub StrHashClear(t As StrHashT) ThreadSafe
        ' empty container
        Register i As Long
        Local record As StrHashRecordT Ptr
        For i = 0 To t.capacity - 1
            While t.@arr[i]
                record = t.@arr[i]
                t.@arr[i] = @record.next
                StrFinal @record.key
                StrFinal @record.value
                record = MemFree(record)
            Wend
            t.count = 0
            t.cursorIndex = -1
            t.cursorRecord = 0
        Next
    End Sub
    Function StrHashCoumt(t As StrHashT) ThreadSafe As Long
        ' get item count
        Function = t.count
    End Function
    Function StrHashAdd(t As StrHashT, ByRef key As String, ByVal value As String) ThreadSafe As Byte
        ' add key and associated value to hash table : value may be UDT : fail if key already in table
        Local keyIndex As Long
        Local record As StrHashRecordT Ptr
        If t.capacity = 0 Then Exit Function
        keyIndex = StrHashKeyIndex(key, t.capacity)
        record = t.@arr[keyIndex]
        While record
            If Peek$(@record.key.mem, @record.key.count) = key Then Exit Function
            record = @record.next
        Wend
        record = MemAllocate(SizeOf(@record))
        If record = 0 Then Exit Function
        StrSet @record.key, key
        StrSet @record.value, value
        @record.next = t.@arr[keyIndex]
        t.@arr[keyIndex] = record
        Incr t.count
        Function = 1
    End Function
    Function StrHashContains(t As StrHashT, ByRef key As String) ThreadSafe As Byte
        ' true/false key in hash table
        Local keyIndex As Long
        Local record As StrHashRecordT Ptr
        If t.capacity = 0 Then Exit Function
        keyIndex = StrHashKeyIndex(key, t.capacity)
        record = t.@arr[keyIndex]
        While record
            If Peek$(@record.key.mem, @record.key.count) = key Then
                Function = 1 : Exit Function
            End If
            record = @record.next
        Wend
    End Function
    Function StrHashSet(t As StrHashT, ByRef key As String, ByVal value As String) ThreadSafe As Byte
        ' update key's associated value : fail if key not in hash table
        Local keyIndex As Long
        Local record As StrHashRecordT Ptr
        If t.capacity = 0 Then Exit Function
        keyIndex = StrHashKeyIndex(key, t.capacity)
        record = t.@arr[keyIndex]
        While record
            If Peek$(@record.key.mem, @record.key.count) = key Then
                StrSet @record.value, value
                Function = 1 : Exit Function
            End If
            record = @record.next
        Wend
    End Function
    Function StrHashGet(t As StrHashT, ByRef key As String) ThreadSafe As String
        ' get key's associated value : null if key not in hash table
        Local keyIndex As Long
        Local record As StrHashRecordT Ptr
        If t.capacity = 0 Then Exit Function
        keyIndex = StrHashKeyIndex(key, t.capacity)
        record = t.@arr[keyIndex]
        While record
            If Peek$(@record.key.mem, @record.key.count) = key Then
                Function = Peek$(@record.value.mem, @record.value.count)
                Exit Function
            End If
            record = @record.next
        Wend
    End Function
    Function StrHashGetPtr(t As StrHashT, ByRef key As String) ThreadSafe As Long
        ' if key's value is a UDT, return UDT pointer : use pointer to modify stored UDT : fail if key not in hash table or value null
        Local keyIndex As Long
        Local record As StrHashRecordT Ptr
        If t.capacity = 0 Then Exit Function
        keyIndex = StrHashKeyIndex(key, t.capacity)
        record = t.@arr[keyIndex]
        While record
            If Peek$(@record.key.mem, @record.key.count) = key Then
                Function = @record.value.mem
                Exit Function
            End If
            record = @record.next
        Wend
    End Function
    Function StrHashDelete(t As StrHashT, ByRef key As String) ThreadSafe As Byte
        ' delete key and associated value : fail if key not in hash table
        Local keyIndex As Long
        Local record, previousRecord As StrHashRecordT Ptr
        If t.capacity = 0 Then Exit Function
        keyIndex = StrHashKeyIndex(key, t.capacity)
        record = t.@arr[keyIndex]
        While record
            If Peek$(@record.key.mem, @record.key.count) = key Then
                StrFinal @record.key
                StrFinal @record.value
                If previousRecord Then @previousRecord.next = @record.next Else  t.@arr[keyIndex] = @record.next
                record = MemFree(record)
                Decr t.count
                Function = 1
                Exit Function
            End If
            previousRecord = record
            record = @record.next
        Wend
    End Function
    Function StrHashFirst(t As StrHashT) ThreadSafe As Byte
        ' move cursor to first key in hash table : true/false success
        Register i As Long
        t.cursorIndex = -1
        t.cursorRecord = 0
        For i = 0 To t.capacity - 1
            If t.@arr[i] Then
                t.cursorIndex = i
                t.cursorRecord = t.@arr[i]
                Function = 1
                Exit Function
            End If
        Next i
    End Function
    Function StrHashNext(t As StrHashT) ThreadSafe As Byte
        ' move cursor to next key in hash table : true/false success
        Register i As Long
        If t.cursorIndex > -1 And t.cursorIndex < t.capacity And t.cursorRecord Then
            If t.@cursorRecord.next Then
                t.cursorRecord = t.@cursorRecord.next
                Function = 1
                Exit Function
            Else
                t.cursorRecord = 0
                For i = t.cursorIndex + 1 To t.capacity - 1
                    If t.@arr[i] Then
                        t.cursorIndex = i
                        t.cursorRecord = t.@arr[i]
                        Function = 1
                        Exit Function
                    End If
                Next i
            End If
        End If
    End Function
    Function StrHashKey(t As StrHashT) ThreadSafe As String
        ' get key at cursor position : null if cursor invalid
        If t.cursorIndex > -1 And t.cursorIndex < t.capacity And t.cursorRecord Then
            Function = StrGet(t.@cursorRecord.key)
        End If
    End Function
    Function StrHashValue(t As StrHashT) ThreadSafe As String
        ' get value at cursor position : null if cursor invalid
        If t.cursorIndex > -1 And t.cursorIndex < t.capacity And t.cursorRecord Then
            Function = StrGet(t.@cursorRecord.value)
        End If
    End Function
    Function StrHashValuePtr(t As StrHashT) ThreadSafe As Long
        ' if value is as stored UDT, get pointer to stored UDT : null if cursor invalid or value null
        If t.cursorIndex > -1 And t.cursorIndex < t.capacity And t.cursorRecord Then
            Function = t.@cursorRecord.value.mem
        End If
    End Function
    Function StrHashStore(t As StrHashT) ThreadSafe As String
        ' store hash table to String
        Local sb As StrBldT
        Local more As Byte
        Local key, value As String
        StrBldAdd sb, Mkl$(t.capacity)
        StrBldAdd sb, Mkl$(t.count)
        more = StrHashFirst(t)
        While IsTrue more
            key = StrHashKey(t)
            value = StrHashValue(t)
            StrBldAdd sb, Mkl$(Len(key))
            StrBldAdd sb, key
            StrBldAdd sb, Mkl$(Len(value))
            StrBldAdd sb, value
            more = StrHashNext(t)
        Wend
        Function = StrBldGet(sb)
    End Function
    Sub StrHashRestore(t As StrHashT, ByVal stored As String) ThreadSafe
        ' restore hash table from String
        Register i As Long
        Local capacity, items, characters As Long
        Local p As Long Ptr
        Local key, value As String
        StrHashClear t
        If Len(stored) Then
            p = StrPtr(stored)
            capacity = @p : Incr p
            items = @p : Incr p
            If capacity And items Then
                StrHashFinal t
                StrHashCapacity t, capacity
                For i = 1 To items
                    characters = @p : Incr p
                    key = Peek$(p, characters) : p += characters
                    characters = @p : Incr p
                    value = Peek$(p, characters) : p += characters
                    StrHashAdd(t, key, value)
                Next i
            End If
        End If
    End Sub
    Sub StrHashFileStore(t As StrHashT, ByVal file As WString) ThreadSafe
        ' store hash table to File
        StrToFile file, StrHashStore(t)
    End Sub
    Sub StrHashFileRestore(t As StrHashT, ByVal file As WString) ThreadSafe
        ' restore hash table to File
        StrHashRestore t, StrFromFile(file)
    End Sub
    Function StrHashKeyIndex(ByRef key As String, ByVal capacity As Long) Private ThreadSafe As Long
        ' get key's hash code index
        Register i As Long
        Register total As Long
        Local result As Long
        Local p As Byte Ptr
        p = StrPtr(key)
        total = 123456
        For i = 0 To Len(key) - 1
            result = @p
            result += total
            Shift Left total, 4
            total += result
            Incr p
        Next i
        Function = Abs(total Mod capacity)
    End Function
#EndIf '%StrHash230424�