Pattern-Matcher for Wildcards

Started by Theo Gottwald, April 02, 2024, 08:16:11 AM

Previous topic - Next topic

0 Members and 1 Guest are viewing this topic.

Theo Gottwald

Version 1: using Glushkov's algorithm implementation in PowerBasic that handles the wildcard characters and the cases where a character class is empty and a wildcard character needs to be escaped.

' Global variables for the DFA
Dim dfaStates() As DFAState
Dim dfaInitialState As Long
Dim dfaAcceptingStates As String ' Bitset representing the accepting states

Type DFAState
    isAccepting As Boolean
    transitions(0 To 255) As Long ' Assuming ASCII characters
End Type

Function PatternMatch(ByVal text As String, ByVal pattern As String, ByVal occurrence As Long, ByRef position As Long, ByRef length As Long) As Long
    ' Preprocess the pattern to build a DFA
    BuildDFA(pattern)

    ' Match the text against the DFA
    Dim state As Long
    state = dfaInitialState
    Dim found As Long
    found = 0
    Dim i As Long
    i = 1
    Do While i <= Len(text)
        state = DFATransition(state, Mid$(text, i, 1))
        If DFAIsAccepting(state) Then
            found = found + 1
            If found = occurrence Then
                position = i - Len(pattern) + 1
                length = Len(pattern)
                PatternMatch = -1
                Exit Function
            End If
        End If
        i = i + 1
    Loop

    PatternMatch = 0
End Function

Sub BuildDFA(ByVal pattern As String)
    ' Implement Glushkov's algorithm to build a DFA from the pattern

    Dim numStates As Long
    numStates = Len(pattern) + 1
    ReDim dfaStates(0 To numStates - 1)

    Dim i As Long
    For i = 0 To numStates - 1
        Dim state As DFAState
        state.isAccepting = (i = numStates - 1)
        For j = 0 To 255
            state.transitions(j) = -1 ' -1 represents an invalid transition
        Next j
        dfaStates(i) = state
    Next i

    Dim currentState As Long
    currentState = 0
    For i = 1 To Len(pattern)
        Dim c As String
        c = Mid$(pattern, i, 1)
        Dim charCode As Long
        charCode = Asc(c)
        Select Case c
            Case "+"
                ' Handle the "+" wildcard character
                ' This is equivalent to the previous character being optional
                ' We add a transition from the previous state to the current state for the previous character
                Dim prevCharCode As Long
                prevCharCode = Asc(Mid$(pattern, i - 1, 1))
                dfaStates(currentState - 1).transitions(prevCharCode) = currentState

            Case "*"
                ' Handle the "*" wildcard character
                ' This is equivalent to the previous character being repeated zero or more times
                ' We add a transition from the previous state to itself for the previous character
                ' We also add a transition from the previous state to the current state for any character
                prevCharCode = Asc(Mid$(pattern, i - 1, 1))
                dfaStates(currentState - 1).transitions(prevCharCode) = currentState - 1
                For j = 0 To 255
                    dfaStates(currentState - 1).transitions(j) = currentState
                Next j

            Case "?"
                ' Handle the "?" wildcard character
                ' This is equivalent to the previous character being optional
                ' We add a transition from the previous state to the current state for any character
                For j = 0 To 255
                    dfaStates(currentState - 1).transitions(j) = currentState
                Next j

            Case "["
                ' Handle the "[" wildcard character
                ' We start a new character class
                ' We also handle the case where the character class is empty
                Dim charClassStart As Long
                charClassStart = i
                If i = Len(pattern) Or Mid$(pattern, i + 1, 1) = "]" Then
                    ' The character class is empty, so we add a transition for any character
                    For j = 0 To 255
                        dfaStates(currentState - 1).transitions(j) = currentState
                    Next j
                End If

            Case "]"
                ' Handle the "]" wildcard character
                ' We end a character class
                ' We add a transition from the previous state to the current state for any character in the class
                Dim charClassEnd As Long
                charClassEnd = i
                For j = charClassStart + 1 To charClassEnd - 1
                    Dim charClassChar As String
                    charClassChar = Mid$(pattern, j, 1)
                    Dim charClassCharCode As Long
                    charClassCharCode = Asc(charClassChar)
                    dfaStates(currentState - 1).transitions(charClassCharCode) = currentState
                Next j

            Case Else
                ' Handle a regular character or an escaped wildcard character
                If c = "\" Then
                    ' The next character is escaped
                    Dim nextChar As String
                    nextChar = Mid$(pattern, i + 1, 1)
                    Dim nextCharCode As Long
                    nextCharCode = Asc(nextChar)
                    dfaStates(currentState).transitions(nextCharCode) = currentState + 1
                    currentState = currentState + 1
                    i = i + 1 ' Skip the next character
                Else
                    dfaStates(currentState).transitions(charCode) = currentState + 1
                    currentState = currentState + 1
                End If
        End Select
    Next i

    ' Set the accepting states
    dfaAcceptingStates = Str$(2 ^ (numStates - 1)) ' Only the last state is accepting
    dfaInitialState = 0
End Sub

Function DFAIsAccepting(ByVal state As Long) As Boolean
    Dim acceptingStates As String
    acceptingStates = Str$(dfaAcceptingStates)
    Dim stateBit As Long
    stateBit = (state Mod 10)
    Return (stateBit = 1)
End Function

Function DFATransition(ByVal state As Long, ByVal c As String) As Long
    Dim charCode As Long
    charCode = Asc(c)
    Dim transition As Long
    transition = dfaStates(state).transitions(charCode)
    If transition = -1 Then
        ' If there's no valid transition, stay in the same state
        transition = state
    End If
    Return transition
End Function
' using MISTRAL LARGE



Theo Gottwald

Patternmatcher Simple Variant.
This function takes the following parameters:
' Input:
- `text`: The string to search for patterns.
- `pattern`: The pattern to match against the text.
- `occurrence`: The x'th occurrence of the pattern to find (1-based index).
Output:
- `position`: ByRef parameter to store the starting position of the found pattern.
- `length`: ByRef

Function PatternMatch(ByVal text As String, ByVal pattern As String, ByVal occurrence As Long, ByRef position As Long, ByRef length As Long) As Long
    Dim i As Long, j As Long, k As Long
    Dim found As Long, match As Long
    Dim patternLength As Long, textLength As Long
   
    patternLength = Len(pattern)
    textLength = Len(text)
   
    found = 0
    i = 1
    j = 1
   
    Do While i <= textLength
        Select Case Mid$(pattern, j, 1)
            Case "+"
                If j = patternLength Then
                    found = found + 1
                    If found = occurrence Then
                        position = i - patternLength + 1
                        length = patternLength
                        PatternMatch = -1
                        Exit Function
                    End If
                    j = 1
                Else
                    j = j + 1
                End If
                i = i + 1
           
            Case "*"
                If j = patternLength Then
                    found = found + 1
                    If found = occurrence Then
                        position = i - patternLength + 1
                        length = textLength - i + 1
                        PatternMatch = -1
                        Exit Function
                    End If
                    j = 1
                Else
                    j = j + 1
                End If
           
            Case "?"
                If j = patternLength Then
                    found = found + 1
                    If found = occurrence Then
                        position = i - patternLength + 1
                        length = patternLength
                        PatternMatch = -1
                        Exit Function
                    End If
                    j = 1
                Else
                    j = j + 1
                End If
                i = i + 1
           
            Case "["
                match = 0
                j = j + 1
                Do While Mid$(pattern, j, 1) <> "]"
                    If Mid$(pattern, j, 1) = Mid$(text, i, 1) Then
                        match = -1
                        Exit Do
                    End If
                    j = j + 1
                Loop
                If match Then
                    If j = patternLength Then
                        found = found + 1
                        If found = occurrence Then
                            position = i - patternLength + 1
                            length = patternLength
                            PatternMatch = -1
                            Exit Function
                        End If
                        j = 1
                    Else
                        j = j + 1
                    End If
                    i = i + 1
                Else
                    i = i + 1
                    j = 1
                End If
           
            Case Else
                If Mid$(pattern, j, 1) = Mid$(text, i, 1) Then
                    If j = patternLength Then
                        found = found + 1
                        If found = occurrence Then
                            position = i - patternLength + 1
                            length = patternLength
                            PatternMatch = -1
                            Exit Function
                        End If
                        j = 1
                    Else
                        j = j + 1
                    End If
                    i = i + 1
                Else
                    i = i + 1
                    j = 1
                End If
        End Select
    Loop
   
    PatternMatch = 0
End Function
' Coded by MISTRAL LARGE