Function strMatchPattern recently posted by Theo generated by AI
do i made some stupid mistakes or i forget or something
else ..this function need array or not
in original use continue inside While loop i never saw before
is that normal ..program simply crush ?
my try:
' Simple pattern matching function
'global vars
#lookahead
Dim in[500] as string
Dim patt[500] as string
'fn
Function strMatchPattern(s[500] as string, pattern[500] as string) as bool
Dim sPos as int : sPos = 1
Dim pPos as int : pPos = 1
Dim sLen as int : slen = len(s[])
Dim pLen as int : pLen = len(pattern[])
Dim match as int : match = 0
Dim star as int : star = 0
While sPos <= sLen
if pPos <= pLen and (pattern[pPos] = s[sPos] or pattern[pPos] = "?") then
pPos = pPos + 1
sPos = sPos + 1
goto cont
end if
if pPos <= pLen and pattern[pPos] = "*" then
star = pPos
match = sPos
pPos = pPos + 1
goto cont
end if
if star <> 0 then
pPos = star + 1
match += 1
sPos = match
goto cont
end if
return false
Wend
'label
cont:
While pPos <= pLen and pattern[pPos] = "*"
pPos = pPos + 1
Wend
'fn return boolean TRUE/FALSE
Return pPos > pLen
End Function
'call function...
in[1] = "testing" : patt[1]="test*"
print strMatchPattern( in[1] , patt[1] )
so nobody respond ...looks like a stupid question or what ?
It is an interesting algorithm with hidden complexity. Here is one of my code serpents. :) It is in the form of a instr, and as you can see, it is considerably more complex than this AI version:
'01/12/2023 CP
'01/12/2023
'
function instrp(int i,string s,p,int*en=0) as int
=================================================
'
'i start position
's string to search
'p search string (with optional ? * wildcards
'en boundary of found key string
'
if i<1
i=1
endif
if not s
return 0 'no source string
endif
if not p
return 1 'no search string
endif
'
indexbase 1
byte bs at strptr(s) 'source string bytes
byte bp at strptr(p) 'search string bytes
int js=i 'source string position
int jp=1 'search string position
int ks=i 'next search start
int kt=i 'current search start
int kf=0 'prev key wild card position
int kg=0 'prev source wild card position
int fs=i 'possible found position
int ls=len(s) 'limit of source string
int lp=len(p) 'limit of search string
int ok=1 'vrification flag
byte ka=bp[1] 'first byte to search
do
if jp>lp
exit do
endif
if js>ls
if jp<=lp
ok=0
endif
exit do
endif
select bp[jp]
case 42 '*
ok=1
while bp[jp]=42
jp++
wend
do
if js>ls 'end of string
ok=1
jp--
exit do
endif
if ka=bs[js]
ks=js
endif
if jp>lp 'end of pattern
ok=1
jp--
exit do
endif
if bp[jp]=bs[js] 'possible match char
ok=1
jp--
'these flags used for resuming wild card
kf=jp
kg=js+1
exit do
endif
if bp[jp]=63 'possible match ? char
ok=1
jp--
exit do
endif
js++
loop
case 63 '?
if ka=bs[js]
ks=js
endif
ok=1
js++
case else 'all other chars
if bp[jp]=bs[js] 'byte match
ok=1
if ka=bs[js]
ks=js
endif
js++
else 'mismatch
ok=0
if kf
'prev wildcard positions
jp=kf-1
js=kg
kf=0
kg=0
else
if ka=63
ks=kt '?' was wildcard start
endif
if ks=kt
ks++ 'nudge fwd
endif
kt=ks
js=ks 'restart search
fs=js
jp=0
endif
endif
end select
jp++ 'next pattern byte
loop
if ok
function=fs
else
js=0
endif
en=js
end function
'
'test cases
===========
string s="aabcdefghijklmmnopqrstuvwxyz"
'print instrp 1,s,"ab"
'print instrp 1,s,"abz"
'print instrp 1,s,"xyz"
'print instrp 1,s,"mno"
'print instrp 1,s,"mm?o"
'print instrp 1,s,"m?o"
'print instrp 1,s,"m??"
'print instrp 1,s,"m??p"
'print instrp 1,s,"??m"
'print instrp 1,s,"*m"
'print instrp 1,s,"m*"
'print instrp 1,s,"m*z"
'print instrp 1,s,"m*k*z"
'print instrp 1,s,"m*n"
'print instrp 1,s,"a*b"
'print instrp 1,s,"a?*c"
'print instrp 1,s,"c?d"
'print instrp 1,s,"c*d"
'print instrp 1,s,"c?*e"
'print instrp 1,s,"c**f"
'print instrp 1,s,"c*??f" 'yes
'print instrp 1,s,"c*??g" 'no
'print instrp 1,s,"c??*g" 'yes
int en
'print instrp 1,s,"c??*g",en 'yes
'print en
uses console
s="abcdefghijklmmabcdefghijklmn"
int b
b=instrp(1,s,"d*lmm",en)
print b " - " en cr
print mid(s,b,en-b) cr
print instrp(1,s,"d*lmn",en) " - " en cr
print cr "ok" cr
wait
Optimizations and Changes:
Wildcard Handling: Optimized the handling of the * wildcard by skipping consecutive *s and efficiently searching for the next character in the pattern.
Redundant Checks: Removed redundant length checks and streamlined the handling of edge cases like empty strings or patterns.
Character Matching Logic: Direct comparison of bytes from source and pattern strings to enhance performance.
Memory Optimization: Reduced the number of variables and their scope where possible.
Efficient Data Structures: Utilized direct memory access (byte bs at strptr(s), byte bp at strptr(p)) for potentially more efficient string processing, depending on language support.
Notes:
The code is written in a style similar to the original, assuming a Basic-like language. The actual optimization may vary based on the specific language's features and capabilities.
Profiling and testing are recommended to ensure the optimized code performs as expected and to validate improvements.
function instrp(int i, string s, p, int* en=0) as int
' Initialize variables
if i < 1 then i = 1
if not s then return 0 ' No source string
if not p then return 1 ' No search string
' Pre-calculate string lengths to avoid redundant calls
int ls = len(s)
int lp = len(p)
if ls = 0 or lp = 0 then return 0 ' Handle empty strings
' Using more efficient direct memory access (if supported by the language)
byte bs at strptr(s)
byte bp at strptr(p)
' Optimization: Preprocessing for first character of pattern
byte ka = bp[1]
int js = i, jp = 1, ks = i, kt = i
int fs = 0, kf = 0, kg = 0, ok = 0
' Main search loop
while js <= ls and jp <= lp
select bp[jp]
case 42 ' * Wildcard
' Skip consecutive * characters
do while jp < lp and bp[jp+1] = 42
jp += 1
loop
' Special case: * at the end of pattern
if jp = lp then return ks
' Find the next character in source string
jp += 1
ka = bp[jp]
do while js <= ls and bs[js] <> ka
js += 1
loop
if js > ls then return 0
kf = jp
kg = js
case 63 ' ? Wildcard
if js > ls then return 0
if ka = bs[js] then ks = js
js += 1
case else ' Regular character
if bp[jp] <> bs[js] then
' Mismatch, use previous wildcard positions if available
if kf > 0 then
jp = kf
js = kg
kf = 0
kg = 0
continue while
else
' No previous wildcard, restart search
jp = 1
ks += 1
js = ks
if js > ls then return 0
endif
else
' Character match
if fs = 0 then fs = js ' First match position
js += 1
endif
end select
jp += 1
wend
' Check for match completion
if jp > lp then
en = js - 1
return fs
else
return 0
endif
end function
Interesting.
At first glance, it understands the algorithm well, though the optimization of early termination on an end wild-card would be incorrect for the purposes of the instrp returning a complete match-string. The en variable should also return the position following the last character of the match. My variable annotations were also omitted, with the variables stacked with colons, thereby less easy to read.
In PB, I use the best one from Wildcard matching algo (https://xoomer.virgilio.it/acantato/dev/wildcard/wildmatch.html#c_cpp_userjournal_algo)
Much faster than PathMatchSpec().
The ones you guys have seems real nice too...
Charles
Maybe is your more complex but looking good to me
and i don't like using while-continue ugly construction
btw AI program simply crush,so not work for me.
I think that pattern ..if we called "pattern" with same symbol like "*"
is to use some sort of lexer...but maybe i am wrong.
This is a sleeker version of my previous code-snake, that uses pointers instead of arrays to improve performance, and some redundant/repetitive lines have been removed:
'CP
'01/12/2023
'13/12/2023
uses console
'using pointers and null terminators
'case insensitive
'accepts char* and strings
'
indexbase 0
'mapping lowercase to uppercase
dim byte UpperCase[255]
int i
for i=0 to 255
UpperCase[i]=i
next
for i=0x60 to 0x7a
UpperCase[i]-=32
next
function instrp(int i,char *s,*p, int *en=0) as int
===================================================
'
'i start position
's string to search
'p search string (with optional ? * wildcards
'en boundary of found key string
'
en=0
'
if i<1
i=1
endif
if not s
return 0 'no source string
endif
if not p
return 1 'no search string
endif
'
byte js at strptr(s) 'source string bytes
@js+=i-1 'instrp offset
byte jp at strptr(p) 'search string bytes
byte ks at @js 'next search start
byte kt at @js 'current search start
byte *kf 'prev key wild card position
byte *kg 'prev source wild card position
int fs=0 'found index in s
int star 'wildcard flag
ScanBytes:
==========
do
'
if jp=0
if star=0
en=@js-strptr(s)+1
return @kt-strptr(s)+1 ' 'success
endif
elseif js=0
return 0 'fail
endif
if ks=js
@ks=@js 'update restart pos
endif
while jp=42
@jp++
star=1
wend
byte *us,*up
@us=@UpperCase+js
@up=@UpperCase+jp
if (us=up) or (jp=63) 'possible match char
@jp++
@js++
if star
'these pointers used for resuming wild card
@kf=@jp-2
@kg=@js
if js=0
@js--
endif
star=0
endif
continue do
endif 'match
if star 'no need to match
@js++ 'next char
continue do
endif
'mismatching chars
if @kf
'prev wildcard positions
@jp=@kf
@js=@kg
@kf=0
@kg=0
continue do
endif
if ks=63
@ks=@kt '?' was wildcard start
endif
if @ks=@kt
@ks++ 'nudge fwd
endif
@kt=@ks
@js=@ks 'restart search
@jp=strptr(p) 'reset pattern
'
end do
end function
'
'test cases
===========
int en
def pr
output %1 + tab + str(instrp(1,s,%1,en)) + tab + str(en) + cr
end def
string s="aabcdefghijklmmnopqrstuvwxyz"
pr "a"
pr "aa"
pr "ab"
pr "abz"
pr "xyz"
pr "mno"
print "----------" cr
pr "m?o"
pr "m??"
pr "m??p"
pr "??m"
print "----------" cr
pr "*m"
pr "m*"
pr "m*z"
pr "m*k*z"
pr "k*m*z"
pr "m*n"
pr "a*b"
print "----------" cr
pr "a?*c"
pr "c?d"
pr "c*d"
pr "c?*e"
pr "c**f"
pr "c*??f"
pr "c*??g"
pr "c??*g"
print "----------" cr
int en
print str(instrp(1,s,"c??*g", en)) cr
print str(en) cr
print "----------" cr
s="abcdefghijklmmabcdefghijklmn"
int b
b=instrp(1,s,"d*lmm",en)
print str(b) " - " str(en) cr
print mid(s,b,en-b) cr
print str(instrp(1,s,"d*lmn",en)) " - " str(en) cr
print cr "ok" cr
wait
PS: correction to patterns ending with '*'