PCRE1 RegExp - String functions & Regular Expressions

User projects written in or related to FreeBASIC.
Post Reply
Iczer
Posts: 99
Joined: Jul 04, 2017 18:09

PCRE1 RegExp - String functions & Regular Expressions

Post by Iczer »

I was not satisfied with functionality and some GPF errors of preg_class.bi, so looked in to PCRE1 to make good RegExp library/functions for my needs.
Result - is PCRE1_RegExpClass and examples with speed test. Hope it would be useful for others also...

Project uses pcre1 library built with : SUPPORT_PCRE8, SUPPORT_JIT, SUPPORT_UTF, SUPPORT_UCP
PCRE1_RegExpClass.bi

Code: Select all

#Pragma Once
#Include Once "windows.bi"
#Include Once "crt/string.bi"

#Ifndef symbol
	#Define PCRE_STATIC
#EndIf

#include Once "pcre.bi"

#Macro CopyUTF8String_ZStringPTRToZStringPTR(sZStringPTRin,sZStringPTRout)
	Scope
		Dim As Integer iStrLen = strlen(sZStringPTRin)
		sZStringPTRout = ReAllocate(sZStringPTRout,iStrLen + 1)
							  Clear(sZStringPTRout[iStrLen],0,1)
							  memcpy(sZStringPTRout, sZStringPTRin, iStrLen)
	End Scope
#EndMacro

Type tagReplacementPattern
	Dim sReplacementPattern					As ZString Ptr
	Dim iReplacementGroupCount				As UInteger = 0
	Dim iReplacementGroupSize				As UInteger = 8
	Dim iReplacementGroupMaxNum			As UInteger = 0
	ReDim iPos1(1 To 8)						As UInteger
	ReDim iPos2(1 To 8)						As UInteger
	ReDim iReplacementGroupNum(1 To 8)	As Long
	Declare Constructor ()
	Declare Destructor ()
End Type

Constructor tagReplacementPattern
	This.sReplacementPattern = Callocate(8,1)
	This.iReplacementGroupSize = 8
End Constructor

Destructor tagReplacementPattern
	DeAllocate(This.sReplacementPattern)
End Destructor

type PCRE1_RegExpClass
	iMatchLimit				As Long			' the maximum number of substrings that may be matched, default -1 to use PCRE_CONFIG_MATCH_LIMIT pcre library compiled with
	iCount					As Long			' rezult -> array elements count (StringRegExpArray1d1Based) or number of capture groups in pattern (StringRegExpReplace)
	iRows						As Long			' rezult -> entire pattern match count (StringRegExpArray1d1Based only)
	iColumns					As Long			' rezult -> number of capture groups in pattern (StringRegExpArray1d1Based only)
	iReplacementCount		As Long			' rezult -> successfull replacement count (StringRegExpReplace only)
	IsMatch					As Boolean		' rezult -> Pattern match found true/false
	pcreErrorCode			As Long			' rezult -> Error code
	pcreErrorStr			As ZString Ptr	' rezult -> Error message
	pcreErrorOffset		As Long			' rezult -> Offset in pattern where error was found
	UseAutoErrPrint		As Boolean		' automatically print Error message to console
	Private:

		iXMLSpInitDone		As Long = 0

		pcreComJitStack As pcre_jit_stack Ptr = 0

		pcreComCount As Long
		pcreRepCount As Long
		ReDim pcreComPattern(1 To 8) As pcre Ptr
		ReDim pcreComExtra(1 To 8) As pcre_extra Ptr
		ReDim pcreComCaptureGroupCount(1 To 8) As Long
		ReDim pcreComReplacement(1 To 8) As tagReplacementPattern
		pcreComSize As Long = 8
		pcreRepSize As Long = 8
	Public:
		Declare Sub TranslateErrorMessage()
		Declare Function StringRegExpBoolean(ByRef sSubjectString As ZString Ptr, ByRef sPattern As ZString Ptr, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Boolean
		Declare Function StringRegExpReplace(ByRef sSubjectString As ZString Ptr, ByRef sPattern As ZString Ptr, ByRef sReplacement As ZString Ptr, ByRef sOutputString As ZString Ptr, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Long
		Declare Function StringRegExpArray1d1Based(ByRef sSubjectString As ZString Ptr,ByRef sPattern As ZString Ptr, aRezult() As String, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Long
		Declare Function StringRegExpBoolean(ByRef sSubjectString As ZString Ptr, ByRef iPattern As Long, ByVal pcreOffset as long = 0) As Boolean
		Declare Function StringRegExpReplace(ByRef sSubjectString As ZString Ptr, ByRef iPattern As Long, ByRef iReplacement As Long, ByRef sOutputString As ZString Ptr, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Long
		Declare Function StringRegExpArray1d1Based(ByRef sSubjectString As ZString Ptr,ByRef iPattern As Long, aRezult() As String, ByVal pcreOffset as long = 0) As Long
		Declare Function StringRegExpReplaceXMLSpecials(ByRef sSubjectString As ZString Ptr) As Long
		Declare Function StringRegExpCompile(ByRef sPattern As ZString Ptr, ByVal manualOpt as long = 0) As Long
		Declare Function StringRegExpCompileReplacement(ByRef sReplacement As ZString Ptr) As Long
		Declare Constructor ()
		Declare Destructor ()
End Type

Constructor PCRE1_RegExpClass()
	pcre_config(PCRE_CONFIG_MATCH_LIMIT, @This.iMatchLimit)
	This.iCount					= 0
	This.iRows					= 0
	This.iColumns				= 0
	This.iReplacementCount	= 0
	This.IsMatch				= FALSE
	This.pcreErrorCode		= 0
	This.pcreErrorStr			= Callocate(512,1)
	This.pcreErrorOffset		= 0

	This.UseAutoErrPrint		= TRUE
	This.pcreComCount			= 0
	This.pcreComSize			= 8
	This.iXMLSpInitDone		= 0
End Constructor

Destructor PCRE1_RegExpClass()
	DeAllocate(This.pcreErrorStr)
	If This.pcreComJitStack <> NULL Then pcre_jit_stack_free(This.pcreComJitStack)
	If This.pcreComSize > 0 Then
		For i As Integer = 1 To This.pcreComSize
			pcre_free_study(This.pcreComExtra(i))
			pcre_free(This.pcreComPattern(i))
		Next
	EndIf

End Destructor

Function PCRE1_RegExpClass.StringRegExpBoolean(ByRef sSubjectString As ZString Ptr, ByRef sPattern As ZString Ptr, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Boolean

	Dim As pcre Ptr reCompiled
	Dim As Long pcreExecRet, iSubjectStringLen, subStrVecCount
	Dim As Long Ptr subStrVec

	This.iCount					= 0
	This.iRows					= 0
	This.iColumns				= 0
	This.iReplacementCount	= 0
	This.IsMatch				= FALSE
	This.pcreErrorCode		= 0
	This.pcreErrorOffset		= 0
	*This.pcreErrorStr		= ""

	reCompiled = pcre_compile2(sPattern, manualOpt, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
	If reCompiled = NULL Then
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
		Print "PCRE1_RegExpClass.StringRegExpBoolean :: Error : Could not compile Pattern, pcreErrorStr = ";*This.pcreErrorStr;" sPattern = ";*sPattern
		Return This.pcreErrorCode
	EndIf
	
	iSubjectStringLen = strlen(sSubjectString)
	subStrVecCount = 6
	subStrVec = New Long[subStrVecCount]

	pcreExecRet = pcre_exec(reCompiled, _			'	compiled pattern
									0, 			_			'	additional information from study compiled pattern
									sSubjectString, _		'	Points to the subject string
									iSubjectStringLen, _	'	Length of the subject string in bytes for UTF8 lirary
									pcreOffset, _			'	Offset in the subject at which to start matching in bytes for UTF8 lirary. In UTF8 mode should point to the start of a UTF-8 character otherwise PCRE_ERROR_BADUTF8_OFFSET error is given. In ASCII any Offset >= 0 and <= Length of the subject string is valid.
									0, _						'	Option bits
									subStrVec, _			'	Points to a vector of ints for result offsets
									subStrVecCount)		'	Number of elements in the vector (a multiple of 3)

	If (pcreExecRet < 0)  Then
		This.IsMatch = FALSE
		This.pcreErrorCode = pcreExecRet
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
	Else
		This.IsMatch = TRUE
	EndIf
	Delete[] subStrVec
	pcre_free(reCompiled)
	Function = This.pcreErrorCode
End Function

Function PCRE1_RegExpClass.StringRegExpReplace(ByRef sSubjectString As ZString Ptr, ByRef sPattern As ZString Ptr, ByRef sReplacement As ZString Ptr, ByRef sOutputString As ZString Ptr, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Long

	Dim As pcre Ptr reCompiled
	Dim As Long pcreExecRet, iSubjectStringLen, subStrVecCount, pcreRetValue, iAppendPos, iAppendSize
	Dim As Long Ptr subStrVec
	Dim As Const ZString Ptr Ptr ppsubStrMatches
	Dim As Long itmpStrSize, iPos1, iPos2, iCapturingGroupNum, iCapturingGroupLast, iNumOfDigits
	Dim As ZString Ptr pstmpString

	This.iCount					= 0
	This.iRows					= 0
	This.iColumns				= 0
	This.iReplacementCount	= 0
	This.IsMatch				= FALSE
	This.pcreErrorCode		= 0
	This.pcreErrorOffset		= 0
	*This.pcreErrorStr		= ""

	reCompiled = pcre_compile2(sPattern, manualOpt, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
	If reCompiled = NULL Then
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
		Print "PCRE1_RegExpClass.StringRegExpReplace :: Error : Could not compile Pattern, pcreErrorStr = ";*This.pcreErrorStr;" sPattern = ";*sPattern
		CopyUTF8String_ZStringPTRToZStringPTR(sSubjectString,sOutputString)
		Return This.pcreErrorCode
	EndIf

	pcreRetValue = pcre_fullinfo(reCompiled, 0, PCRE_INFO_CAPTURECOUNT, @This.iCount)
	If pcreRetValue = PCRE_ERROR_UNSET Then
		Print "PCRE1_RegExpClass.StringRegExpReplace :: Error : Could not get info, pcreErrorStr = ";*This.pcreErrorStr;" sPattern = ";*sPattern
		This.iCount = 0
		This.pcreErrorCode = pcreRetValue
		CopyUTF8String_ZStringPTRToZStringPTR(sSubjectString,sOutputString)
		Return This.pcreErrorCode
	EndIf

	iSubjectStringLen = strlen(sSubjectString)
	subStrVecCount = IIf(This.iCount < This.iMatchLimit,This.iCount,This.iMatchLimit) * 3 + 3
	subStrVec = New Long[subStrVecCount]
	itmpStrSize = iSubjectStringLen * 2
	pstmpString = Callocate(itmpStrSize)
	iAppendPos = 0
	iAppendSize = 0

	Do
		pcreExecRet = pcre_exec(reCompiled, _			'	compiled pattern
										0, _						'	additional information from study compiled pattern
										sSubjectString, _		'	Points to the subject string
										iSubjectStringLen, _	'	Length of the subject string in bytes for UTF8 lirary
										pcreOffset, _			'	Offset in the subject at which to start matching in bytes for UTF8 lirary. In UTF8 mode should point to the start of a UTF-8 character otherwise PCRE_ERROR_BADUTF8_OFFSET error is given. In ASCII any Offset >= 0 and <= Length of the subject string is valid.
										0, _						'	Option bits
										subStrVec, _			'	Points to a vector of ints for result offsets
										subStrVecCount)		'	Number of elements in the vector (a multiple of 3)

		If pcreExecRet > 0 Then
			iPos1 = 0
			iPos2 = 0
			iAppendSize = subStrVec[0] - pcreOffset
			If iAppendPos + iAppendSize > itmpStrSize - iSubjectStringLen Then
				itmpStrSize += iSubjectStringLen
				pstmpString = ReAllocate(pstmpString,itmpStrSize)
			EndIf
			memcpy(@pstmpString[iAppendPos],@sSubjectString[pcreOffset],iAppendSize)
			iAppendPos += iAppendSize
			Do
				iPos2 = InStr(sReplacement[iPos1], "$")
				If iPos2 = 0 Then
					Exit Do
				EndIf
				memcpy(@pstmpString[iAppendPos],@sReplacement[iPos1],iPos2 - 1)
				iAppendPos += iPos2 - 1
				iNumOfDigits = 0
				While isdigit(Asc(Left(sReplacement[iPos2 + iPos1 + iNumOfDigits], 1)))
					iNumOfDigits += 1
				Wend
				iCapturingGroupNum = ValInt(Left(sReplacement[iPos1 + iPos2], iNumOfDigits))
				If (iNumOfDigits = 0) Then
					memcpy(@pstmpString[iAppendPos],@"$",2)
					iAppendPos += 1
					iPos1 += iPos2
				ElseIf pcreExecRet > iCapturingGroupNum Then
					If subStrVec[2 * iCapturingGroupNum] > -1 Then
						memcpy(@pstmpString[iAppendPos],@sSubjectString[subStrVec[2 * iCapturingGroupNum]],subStrVec[2 * iCapturingGroupNum + 1] - subStrVec[2 * iCapturingGroupNum])
						iAppendPos += subStrVec[2 * iCapturingGroupNum + 1] - subStrVec[2 * iCapturingGroupNum]
					End If
					iPos1 += iPos2 + iNumOfDigits
				Else
					This.pcreErrorCode = -200
					Exit Do
				End If
			Loop
			This.iReplacementCount += 1
			memcpy(@pstmpString[iAppendPos],@sReplacement[iPos1],Len(sReplacement[iPos1])+1)
			iAppendPos += Len(sReplacement[iPos1])
			pcreOffset = subStrVec[1]
		Else
			This.pcreErrorCode = pcreExecRet
			Exit Do
		EndIf

	Loop

	If ((This.pcreErrorCode = 0) OrElse ((This.pcreErrorCode = PCRE_ERROR_NOMATCH) And (This.iReplacementCount > 0))) Then

		This.IsMatch = TRUE
		This.pcreErrorCode = 0
		memcpy(@pstmpString[iAppendPos],@sSubjectString[pcreOffset],iSubjectStringLen - pcreOffset)
		iSubjectStringLen = strlen(pstmpString)
		sOutputString = ReAllocate(sOutputString,iSubjectStringLen + 1)
		Clear(sOutputString[iSubjectStringLen],0, 1)
		memcpy(sOutputString,pstmpString,iSubjectStringLen)
	ElseIf (This.pcreErrorCode < 0) Then
		This.iCount = 0
		This.iReplacementCount = 0
		This.IsMatch = FALSE
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
		CopyUTF8String_ZStringPTRToZStringPTR(sSubjectString,sOutputString)
	EndIf

	Delete[] subStrVec
	DeAllocate(pstmpString)
	pcre_free(reCompiled)
	'	Function = This.pcreErrorCode
End Function

Function PCRE1_RegExpClass.StringRegExpArray1d1Based(ByRef sSubjectString As ZString Ptr,ByRef sPattern As ZString Ptr, aRezult() As String, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Long

	Dim As pcre Ptr reCompiled
	Dim As Long pcreExecRet, iSubjectStringLen, subStrVecCount, iSize = 0
	Dim As Long Ptr subStrVec
	Dim As Const ZString Ptr Ptr ppsubStrMatches

	This.iCount					= 0
	This.iRows					= 0
	This.iColumns				= 0
	This.iReplacementCount	= 0
	This.IsMatch				= FALSE
	This.pcreErrorCode		= 0
	This.pcreErrorOffset		= 0
	*This.pcreErrorStr		= ""

	reCompiled = pcre_compile2(sPattern, manualOpt, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
	If reCompiled = NULL Then
		If This.UseAutoErrPrint Then This.TranslateErrorMessage()
		Print "PCRE1_RegExpClass.StringRegExpArray1d1Based :: Error : Could not compile Pattern, pcreErrorStr = ";*This.pcreErrorStr;" sPattern = ";*sPattern
		Return This.pcreErrorCode
	EndIf

	iSubjectStringLen = strlen(sSubjectString)
	subStrVecCount = IIf(iSubjectStringLen < This.iMatchLimit,iSubjectStringLen,This.iMatchLimit) * 3 + 3
	subStrVec = New Long[subStrVecCount]

	Do
		pcreExecRet = pcre_exec(reCompiled, _			'	compiled pattern
										0, _						'	additional information from study compiled pattern
										sSubjectString, _		'	Points to the subject string
										iSubjectStringLen, _	'	Length of the subject string in bytes for UTF8 lirary
										pcreOffset, _			'	Offset in the subject at which to start matching in bytes for UTF8 lirary. In UTF8 mode should point to the start of a UTF-8 character otherwise PCRE_ERROR_BADUTF8_OFFSET error is given. In ASCII any Offset >= 0 and <= Length of the subject string is valid.
										0, _						'	Option bits
										subStrVec, _			'	Points to a vector of ints for result offsets
										subStrVecCount)		'	Number of elements in the vector (a multiple of 3)

		If (pcreExecRet >= 0) Then

			If pcreExecRet = 0 Then pcreExecRet = iSubjectStringLen

			If This.iCount = 0 Then
				ppsubStrMatches = ReAllocate(ppsubStrMatches, (pcreExecRet + 1) * SizeOf(ZString Ptr Ptr))
			EndIf
			This.pcreErrorCode = pcre_get_substring_list(sSubjectString, subStrVec, pcreExecRet, @ppsubStrMatches)
			If This.pcreErrorCode = 0 Then
				If iSize < (This.iCount + pcreExecRet) Then
					iSize += 88 + pcreExecRet
					ReDim Preserve aRezult(1 To iSize) As String
					This.IsMatch = TRUE
					This.iColumns	= pcreExecRet - 1
				EndIf
				This.iRows += 1
				For i As Long = 2 To pcreExecRet
					This.iCount += 1
					aRezult(This.iCount) = *ppsubStrMatches[i-1]
				Next
				pcre_free_substring(*ppsubStrMatches)
			EndIf
		Else
			This.pcreErrorCode = pcreExecRet
		EndIf
		pcreOffset = subStrVec[1]
	Loop While This.pcreErrorCode = 0
	If (This.pcreErrorCode = PCRE_ERROR_NOMATCH) And (This.iCount > 0) Then
		ReDim Preserve aRezult(1 To This.iCount) As String
		This.pcreErrorCode = 0
	Else
		ReDim aRezult(1 To 1) As String
		This.IsMatch	= FALSE
		This.iCount		= 0
		This.iRows		= 0
		This.iColumns	= 0
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
	EndIf
	DeAllocate(ppsubStrMatches)
	Delete[] subStrVec
	pcre_free(reCompiled)
	Function = This.pcreErrorCode
End Function

Function PCRE1_RegExpClass.StringRegExpBoolean(ByRef sSubjectString As ZString Ptr, ByRef iPattern As Long, ByVal pcreOffset as long = 0) As Boolean

	Dim As Long pcreExecRet, iSubjectStringLen, subStrVecCount
	Dim As Long Ptr subStrVec

	This.iCount					= 0
	This.iRows					= 0
	This.iColumns				= 0
	This.iReplacementCount	= 0
	This.IsMatch				= FALSE
	This.pcreErrorCode		= 0
	This.pcreErrorOffset		= 0
	*This.pcreErrorStr		= ""

	iSubjectStringLen = strlen(sSubjectString)
	subStrVecCount = This.pcreComCaptureGroupCount(iPattern) * 3 + 3
	subStrVec = New Long[subStrVecCount]

	pcreExecRet = pcre_jit_exec(This.pcreComPattern(iPattern), _'	compiled pattern
										 This.pcreComExtra(iPattern), _	'	additional information from study compiled pattern
										 sSubjectString, _					'	Points to the subject string
										 iSubjectStringLen, _				'	Length of the subject string in bytes for UTF8 lirary
										 pcreOffset, _							'	Offset in the subject at which to start matching in bytes for UTF8 lirary. In UTF8 mode should point to the start of a UTF-8 character otherwise PCRE_ERROR_BADUTF8_OFFSET error is given. In ASCII any Offset >= 0 and <= Length of the subject string is valid.
										 0, _										'	Option bits
										 subStrVec, _							'	Points to a vector of ints for result offsets
										 subStrVecCount, _					'	Number of elements in the vector (a multiple of 3)
										 This.pcreComJitStack)				'  Jit Stack

	If (pcreExecRet > 0)  Then
		This.IsMatch = TRUE
	Else
		This.IsMatch = FALSE
		This.pcreErrorCode = pcreExecRet
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
	EndIf

	Delete[] subStrVec

	Function = This.pcreErrorCode
End Function

Function PCRE1_RegExpClass.StringRegExpReplace(ByRef sSubjectString As ZString Ptr, ByRef iPattern As Long, ByRef iReplacement As Long, ByRef sOutputString As ZString Ptr, ByVal manualOpt as long = 0, ByVal pcreOffset as long = 0) As Long

	Dim As pcre Ptr reCompiled
	Dim As Long pcreExecRet, iSubjectStringLen, subStrVecCount, pcreRetValue, iAppendPos, iAppendSize, isubStrVecPoint
	Dim As Long Ptr subStrVec
	Dim As Const ZString Ptr Ptr ppsubStrMatches
	Dim As Long itmpStrSize, iPos1, iPos2, iCapturingGroupNum, iCapturingGroupLast, iNumOfDigits
	Dim As ZString Ptr pstmpString
	'	This.iCount					= This.pcreComCaptureGroupCount(iPattern)
	This.iRows					= 0
	This.iColumns				= 0
	This.iReplacementCount	= 0
	This.IsMatch				= FALSE
	This.pcreErrorCode		= 0
	This.pcreErrorOffset		= 0
	*This.pcreErrorStr		= ""

	iSubjectStringLen = strlen(sSubjectString)
	subStrVecCount = IIf(This.iCount < This.iMatchLimit,This.iCount,This.iMatchLimit) * 3 + 3
	subStrVec = New Long[subStrVecCount]
	itmpStrSize = iSubjectStringLen * 2
	pstmpString = Callocate(itmpStrSize)
	iAppendPos = 0
	iAppendSize = 0

	Do
		pcreExecRet = pcre_jit_exec(This.pcreComPattern(iPattern), _'	compiled pattern
											 This.pcreComExtra(iPattern), _	'	additional information from study compiled pattern
											 sSubjectString, _					'	Points to the subject string
											 iSubjectStringLen, _				'	Length of the subject string in bytes for UTF8 lirary
											 pcreOffset, _							'	Offset in the subject at which to start matching in bytes for UTF8 lirary. In UTF8 mode should point to the start of a UTF-8 character otherwise PCRE_ERROR_BADUTF8_OFFSET error is given. In ASCII any Offset >= 0 and <= Length of the subject string is valid.
											 0, _										'	Option bits
											 subStrVec, _							'	Points to a vector of ints for result offsets
											 subStrVecCount, _					'	Number of elements in the vector (a multiple of 3)
											 This.pcreComJitStack)				'  Jit Stack

		If pcreExecRet > 0 Then
			iAppendSize = subStrVec[0] - pcreOffset
			If iAppendPos + iAppendSize > itmpStrSize - iSubjectStringLen Then
				itmpStrSize += iSubjectStringLen
				pstmpString = ReAllocate(pstmpString,itmpStrSize)
			EndIf
			memcpy(@pstmpString[iAppendPos],@sSubjectString[pcreOffset],iAppendSize)
			iAppendPos += iAppendSize
			For i As UInteger = 1 To This.pcreComReplacement(iReplacement).iReplacementGroupCount
				iAppendSize = This.pcreComReplacement(iReplacement).iPos2(i) - This.pcreComReplacement(iReplacement).iPos1(i) - 1
				memcpy(@pstmpString[iAppendPos],@(This.pcreComReplacement(iReplacement).sReplacementPattern)[This.pcreComReplacement(iReplacement).iPos1(i)],iAppendSize)
				iAppendPos += iAppendSize
				isubStrVecPoint = 2 * This.pcreComReplacement(iReplacement).iReplacementGroupNum(i)
				memcpy(@pstmpString[iAppendPos],@sSubjectString[subStrVec[isubStrVecPoint]],subStrVec[isubStrVecPoint + 1] - subStrVec[isubStrVecPoint])
				iAppendPos += subStrVec[isubStrVecPoint + 1] - subStrVec[isubStrVecPoint]
			Next
			This.iReplacementCount += 1
			iAppendSize = Len((This.pcreComReplacement(iReplacement).sReplacementPattern)[This.pcreComReplacement(iReplacement).iPos1(This.pcreComReplacement(iReplacement).iReplacementGroupCount + 1)])
			memcpy(@pstmpString[iAppendPos],@(This.pcreComReplacement(iReplacement).sReplacementPattern)[This.pcreComReplacement(iReplacement).iPos1(This.pcreComReplacement(iReplacement).iReplacementGroupCount + 1)],iAppendSize)
			iAppendPos += iAppendSize
			pcreOffset = subStrVec[1]
		Else
			This.pcreErrorCode = pcreExecRet
			Exit Do
		EndIf

	Loop

	If ((This.pcreErrorCode = 0) OrElse ((This.pcreErrorCode = PCRE_ERROR_NOMATCH) And (This.iReplacementCount > 0))) Then

		This.IsMatch = TRUE
		This.pcreErrorCode = 0
		memcpy(@pstmpString[iAppendPos],@sSubjectString[pcreOffset],iSubjectStringLen - pcreOffset)
		iSubjectStringLen = strlen(pstmpString)
		sOutputString = ReAllocate(sOutputString,iSubjectStringLen + 1)
		Clear(sOutputString[iSubjectStringLen],0, 1)
		memcpy(sOutputString,pstmpString,iSubjectStringLen)
	ElseIf (This.pcreErrorCode < 0) Then
		This.iCount = 0
		This.iReplacementCount = 0
		This.IsMatch = FALSE
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
		CopyUTF8String_ZStringPTRToZStringPTR(sSubjectString,sOutputString)
	EndIf

	Delete[] subStrVec
	DeAllocate(pstmpString)
	pcre_free(reCompiled)

	Function = This.pcreErrorCode
End Function

Function PCRE1_RegExpClass.StringRegExpArray1d1Based(ByRef sSubjectString As ZString Ptr,ByRef iPattern As Long, aRezult() As String, ByVal pcreOffset as long = 0) As Long

	Dim As Long pcreExecRet, iSubjectStringLen, subStrVecCount, iSize = 0
	Dim As Long Ptr subStrVec
	Dim As Const ZString Ptr Ptr ppsubStrMatches

	This.iCount					= 0
	This.iRows					= 0
	This.iColumns				= 0
	This.iReplacementCount	= 0
	This.IsMatch				= FALSE
	This.pcreErrorCode		= 0
	This.pcreErrorOffset		= 0
	*This.pcreErrorStr		= ""

	iSubjectStringLen = strlen(sSubjectString)
	subStrVecCount = IIf(This.pcreComCaptureGroupCount(iPattern) < This.iMatchLimit,This.pcreComCaptureGroupCount(iPattern),This.iMatchLimit) * 3 + 3
	subStrVec = New Long[subStrVecCount]

	Do
		pcreExecRet = pcre_jit_exec(This.pcreComPattern(iPattern), _'	compiled pattern
											 This.pcreComExtra(iPattern), _	'	additional information from study compiled pattern
											 sSubjectString, _					'	Points to the subject string
											 iSubjectStringLen, _				'	Length of the subject string in bytes for UTF8 lirary
											 pcreOffset, _							'	Offset in the subject at which to start matching in bytes for UTF8 lirary. In UTF8 mode should point to the start of a UTF-8 character otherwise PCRE_ERROR_BADUTF8_OFFSET error is given. In ASCII any Offset >= 0 and <= Length of the subject string is valid.
											 0, _										'	Option bits
											 subStrVec, _							'	Points to a vector of ints for result offsets
											 subStrVecCount, _					'	Number of elements in the vector (a multiple of 3)
											 This.pcreComJitStack)				'  Jit Stack

		If (pcreExecRet < 0) Then
			This.pcreErrorCode = pcreExecRet
		Else

			If pcreExecRet = 0 Then pcreExecRet = iSubjectStringLen

			If This.iCount = 0 Then
				ppsubStrMatches = ReAllocate(ppsubStrMatches, (pcreExecRet + 1) * SizeOf(ZString Ptr Ptr))
			EndIf
			This.pcreErrorCode = pcre_get_substring_list(sSubjectString, subStrVec, pcreExecRet, @ppsubStrMatches)
			If This.pcreErrorCode = 0 Then
				If iSize < (This.iCount + pcreExecRet) Then
					iSize += 88 + pcreExecRet
					ReDim Preserve aRezult(1 To iSize) As String
					This.IsMatch = TRUE
					This.iColumns	= pcreExecRet - 1
				EndIf
				This.iRows += 1
				For i As Long = 2 To pcreExecRet
					This.iCount += 1
					aRezult(This.iCount) = *ppsubStrMatches[i-1]
				Next
				pcre_free_substring(*ppsubStrMatches)
			EndIf
		EndIf
		pcreOffset = subStrVec[1]
	Loop While This.pcreErrorCode = 0
	If (This.pcreErrorCode = PCRE_ERROR_NOMATCH) And (This.iCount > 0) Then
		ReDim Preserve aRezult(1 To This.iCount) As String
		This.pcreErrorCode = 0
	Else
		ReDim aRezult(1 To 1) As String
		This.IsMatch	= FALSE
		This.iCount		= 0
		This.iRows		= 0
		This.iColumns	= 0
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
	EndIf
	DeAllocate(ppsubStrMatches)
	Delete[] subStrVec
	Function = This.pcreErrorCode
End Function

Function PCRE1_RegExpClass.StringRegExpReplaceXMLSpecials(ByRef sSubjectString As ZString Ptr) As Long

	Static As Long pcreExecRet,iSubjectStringLen,pcreRetValue,pcreOffset,iPatternNum,iAppendPos
	Dim As Long Ptr subStrVec
	Dim As ZString Ptr pstmpString

	Static As Long pcreOption(1 To 6) => {0,PCRE_NO_UTF8_CHECK,PCRE_NO_UTF8_CHECK,PCRE_NO_UTF8_CHECK,PCRE_NO_UTF8_CHECK,PCRE_NO_UTF8_CHECK}
	Static As zString * 2 aXMLSpecials(1 To 6) => {"&", "&", "<", ">", "'", Chr(34)}
	If This.iXMLSpInitDone = 0 Then
		iPatternNum = This.pcreComCount
		This.pcreComCount += 6
		If This.pcreComCount >= This.pcreComSize Then
			This.pcreComSize += 8
			ReDim Preserve This.pcreComPattern(1 To This.pcreComSize)
			ReDim Preserve This.pcreComExtra(1 To This.pcreComSize)
		EndIf
		If This.pcreComJitStack = 0 Then This.pcreComJitStack = pcre_jit_stack_alloc(32*1024, 512*1024)

		This.pcreComPattern(iPatternNum + 1)	= pcre_compile2(StrPtr("\x26amp;"), PCRE_UTF8, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
		This.pcreComExtra(iPatternNum + 1)	= pcre_study(This.pcreComPattern(iPatternNum + 1), PCRE_STUDY_JIT_COMPILE, @This.pcreErrorStr)
		pcre_assign_jit_stack(This.pcreComExtra(iPatternNum + 1), NULL, This.pcreComJitStack)

		This.pcreComPattern(iPatternNum + 2)	= pcre_compile2(StrPtr("\x26amp;"), PCRE_UTF8, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
		This.pcreComExtra(iPatternNum + 2)	= pcre_study(This.pcreComPattern(iPatternNum + 2), PCRE_STUDY_JIT_COMPILE, @This.pcreErrorStr)
		pcre_assign_jit_stack(This.pcreComExtra(iPatternNum + 2), NULL, This.pcreComJitStack)

		This.pcreComPattern(iPatternNum + 3)	= pcre_compile2(StrPtr("\x26lt;"), PCRE_UTF8, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
		This.pcreComExtra(iPatternNum + 3)	= pcre_study(This.pcreComPattern(iPatternNum + 3), PCRE_STUDY_JIT_COMPILE, @This.pcreErrorStr)
		pcre_assign_jit_stack(This.pcreComExtra(iPatternNum + 3), NULL, This.pcreComJitStack)

		This.pcreComPattern(iPatternNum + 4)	= pcre_compile2(StrPtr("\x26gt;"), PCRE_UTF8, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
		This.pcreComExtra(iPatternNum + 4)	= pcre_study(This.pcreComPattern(iPatternNum + 4), PCRE_STUDY_JIT_COMPILE, @This.pcreErrorStr)
		pcre_assign_jit_stack(This.pcreComExtra(iPatternNum + 4), NULL, This.pcreComJitStack)

		This.pcreComPattern(iPatternNum + 5)	= pcre_compile2(StrPtr("\x26apos;"), PCRE_UTF8, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
		This.pcreComExtra(iPatternNum + 5)	= pcre_study(This.pcreComPattern(iPatternNum + 5), PCRE_STUDY_JIT_COMPILE, @This.pcreErrorStr)
		pcre_assign_jit_stack(This.pcreComExtra(iPatternNum + 5), NULL, This.pcreComJitStack)

		This.pcreComPattern(iPatternNum + 6)	= pcre_compile2(StrPtr("\x26quot;"), PCRE_UTF8, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
		This.pcreComExtra(iPatternNum + 6)	= pcre_study(This.pcreComPattern(iPatternNum + 6), PCRE_STUDY_JIT_COMPILE, @This.pcreErrorStr)
		pcre_assign_jit_stack(This.pcreComExtra(iPatternNum + 6), NULL, This.pcreComJitStack)

		This.iXMLSpInitDone = 1

	EndIf

	subStrVec					= New Long[3]
	This.IsMatch				= FALSE
	This.iCount					= 0
	This.iReplacementCount	= 0
	This.iRows					= 0
	This.iColumns				= 0
	iSubjectStringLen = strlen(sSubjectString)
	pstmpString = Callocate(iSubjectStringLen + 1)

	For i As Long = 1 To 6

		pcreOffset = 0
		iAppendPos = 0

		Do
			pcreExecRet = pcre_jit_exec(This.pcreComPattern(iPatternNum + i), _	'	compiled pattern
												 This.pcreComExtra(iPatternNum + i), _		'	additional information from study compiled pattern
												 sSubjectString, _								'	Points to the subject string
												 iSubjectStringLen, _							'	Length of the subject string in bytes for UTF8 lirary
												 pcreOffset, _										'	Offset in the subject at which to start matching in bytes for UTF8 lirary. In UTF8 mode should point to the start of a UTF-8 character otherwise PCRE_ERROR_BADUTF8_OFFSET error is given. In ASCII any Offset >= 0 and <= Length of the subject string is valid.
												 pcreOption(i), _									'	Option bits
												 subStrVec, _										'	Points to a vector of ints for result offsets
												 3, _													'	Number of elements in the vector (a multiple of 3)
												 This.pcreComJitStack)							'  Jit Stack
			If pcreExecRet > 0 Then
				This.iReplacementCount += 1
				memcpy(@pstmpString[iAppendPos],@sSubjectString[pcreOffset],subStrVec[0] - pcreOffset)
				iAppendPos += subStrVec[0] - pcreOffset
				memcpy(@pstmpString[iAppendPos],@aXMLSpecials(i),2)
				iAppendPos += 1
				pcreOffset = subStrVec[1]
			Else
				This.pcreErrorCode = pcreExecRet
				Exit Do
			EndIf
		Loop

		If pcreExecRet = PCRE_ERROR_NOMATCH And This.iReplacementCount > 0 Then

			This.IsMatch = TRUE

			memcpy(@pstmpString[iAppendPos],@sSubjectString[pcreOffset],iSubjectStringLen - pcreOffset)
			memcpy(sSubjectString,pstmpString,iSubjectStringLen+1)
			Clear(*pstmpString,0,iSubjectStringLen)
			iSubjectStringLen = strlen(sSubjectString)

		ElseIf pcreExecRet = PCRE_ERROR_NOMATCH Then
			This.pcreErrorCode = pcreExecRet
		Else
			This.pcreErrorCode = pcreExecRet
			If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
			Exit For
		EndIf

	Next i

	Delete subStrVec
	DeAllocate(pstmpString)

	Function = This.pcreErrorCode
End Function

Function PCRE1_RegExpClass.StringRegExpCompile(ByRef sPattern As ZString Ptr, ByVal manualOpt as long = 0) As Long

	This.pcreComCount += 1
	If This.pcreComCount >= This.pcreComSize Then
		This.pcreComSize += 8
		ReDim Preserve This.pcreComPattern(1 To This.pcreComSize) As pcre Ptr
		ReDim Preserve This.pcreComExtra(1 To This.pcreComSize) As pcre_extra Ptr
		ReDim Preserve This.pcreComCaptureGroupCount(1 To This.pcreComSize) As Long
	EndIf

	This.pcreComPattern(This.pcreComCount) = pcre_compile2(sPattern, manualOpt, @This.pcreErrorCode, @This.pcreErrorStr, @This.pcreErrorOffset, NULL)
	If This.pcreComPattern(This.pcreComCount) = NULL Then
		Print "PCRE1_RegExpClass.StringRegExpCompile :: Error : Could not compile Pattern, pcreErrorStr = ";*This.pcreErrorStr;" sPattern = ";*sPattern
		This.pcreComCount -= 1
		Return NULL
	EndIf

	This.pcreComExtra(This.pcreComCount) = pcre_study(This.pcreComPattern(This.pcreComCount), PCRE_STUDY_JIT_COMPILE, @This.pcreErrorStr)
	If This.pcreComExtra(This.pcreComCount) = NULL Then
		If *This.pcreErrorStr <> "" Then
			If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
			Print "PCRE1_RegExpClass.StringRegExpCompile :: Error : Could not study Pattern, pcreErrorStr = ";*This.pcreErrorStr;" sPattern = ";*sPattern
			This.pcreErrorCode = 100
			This.pcreComCount -= 1
			Return NULL
		EndIf
	EndIf

	This.pcreErrorCode = pcre_fullinfo(This.pcreComPattern(This.pcreComCount), This.pcreComExtra(This.pcreComCount), PCRE_INFO_CAPTURECOUNT, @This.pcreComCaptureGroupCount(This.pcreComCount))
	If This.pcreErrorCode <> NULL Then
		If This.UseAutoErrPrint Then If This.UseAutoErrPrint Then This.TranslateErrorMessage()
		Print "PCRE1_RegExpClass.StringRegExpCompile :: Error : Could not get PCRE_INFO_CAPTURECOUNT, pcreErrorStr = ";*This.pcreErrorStr
		This.pcreComCount -= 1
		Return NULL
	EndIf

	If This.pcreComJitStack = 0 Then This.pcreComJitStack = pcre_jit_stack_alloc(32*1024, 512*1024)
	pcre_assign_jit_stack(This.pcreComExtra(This.pcreComCount), NULL, This.pcreComJitStack)

	Function = This.pcreComCount
End Function

Function PCRE1_RegExpClass.StringRegExpCompileReplacement(ByRef sReplacement As ZString Ptr) As Long

	This.pcreRepCount += 1
	If This.pcreRepCount >= This.pcreRepSize Then
		This.pcreRepSize += 8
		ReDim Preserve This.pcreComReplacement(1 To This.pcreRepSize) As tagReplacementPattern
	EndIf

	Dim As UInteger iReplStrLen = strlen(sReplacement)
	Dim As Integer iPos1(iReplStrLen),iPos2(iReplStrLen),iNumOfDigits(iReplStrLen),iCapturingGroupNum(iReplStrLen), iNum = 0
	Do
		iNum += 1
		iPos2(iNum) = InStr(sReplacement[iPos1(iNum)], "$")
		If iPos2(iNum) = 0 Then
			iNum -= 1
			Exit Do
		EndIf
		iPos2(iNum) += iPos1(iNum)
		While isdigit(Asc(Left(sReplacement[iPos2(iNum) + iNumOfDigits(iNum)], 1)))
			iNumOfDigits(iNum) += 1
		Wend
		iCapturingGroupNum(iNum) = ValInt(Left(sReplacement[iPos2(iNum)], iNumOfDigits(iNum)))
		iPos1(iNum + 1) = iPos2(iNum) + iNumOfDigits(iNum)
	Loop

	If This.pcreComReplacement(This.pcreRepCount).iReplacementGroupSize < (iNum + 1) Then
		This.pcreComReplacement(This.pcreRepCount).iReplacementGroupSize = iNum + 1
		ReDim (This.pcreComReplacement(This.pcreRepCount).iPos1)(1 To iNum + 1) As UInteger
		ReDim (This.pcreComReplacement(This.pcreRepCount).iPos2)(1 To iNum + 1) As UInteger
		ReDim (This.pcreComReplacement(This.pcreRepCount).iReplacementGroupNum)(1 To iNum + 1) As UInteger
	EndIf
	This.pcreComReplacement(This.pcreRepCount).iReplacementGroupCount = iNum
	This.pcreComReplacement(This.pcreRepCount).sReplacementPattern = ReAllocate(This.pcreComReplacement(This.pcreRepCount).sReplacementPattern,iReplStrLen + 1)
																						  Clear(*This.pcreComReplacement(This.pcreRepCount).sReplacementPattern,0,iReplStrLen + 1)
																						  memcpy(This.pcreComReplacement(This.pcreRepCount).sReplacementPattern,sReplacement,iReplStrLen)
	For i As UInteger = 1 To (iNum + 1)
		This.pcreComReplacement(This.pcreRepCount).iPos1(i) = iPos1(i)
		This.pcreComReplacement(This.pcreRepCount).iPos2(i) = iPos2(i)
		This.pcreComReplacement(This.pcreRepCount).iReplacementGroupNum(i) = iCapturingGroupNum(i)
		If iCapturingGroupNum(i) > This.pcreComReplacement(This.pcreRepCount).iReplacementGroupMaxNum Then
			This.pcreComReplacement(This.pcreRepCount).iReplacementGroupMaxNum = iCapturingGroupNum(i)
		EndIf
	Next

	Function = This.pcreRepCount
End Function

Sub PCRE1_RegExpClass.TranslateErrorMessage()
	If This.pcreErrorCode <> 100 Then
		This.pcreErrorStr = ReAllocate(This.pcreErrorStr,512)
								  Clear(*This.pcreErrorStr,32,511)
	EndIf
	Select Case This.pcreErrorCode

		'		COMPILATION ERROR CODES

		Case 0
			*This.pcreErrorStr = "no error"
		Case 1
			*This.pcreErrorStr = "\ at end of pattern"
		Case 2
			*This.pcreErrorStr = "\c at end of pattern"
		Case 3
			*This.pcreErrorStr = "unrecognized character follows \"
		Case 4
			*This.pcreErrorStr = "numbers out of order in {} quantifier"
		Case 5
			*This.pcreErrorStr = "number too big in {} quantifier"
		Case 6
			*This.pcreErrorStr = "missing terminating ] for character class"
		Case 7
			*This.pcreErrorStr = "invalid escape sequence in character class"
		Case 8
			*This.pcreErrorStr = "range out of order in character class"
		Case 9
			*This.pcreErrorStr = "nothing to repeat"
		Case 10
			*This.pcreErrorStr = "[this code is not in use]"
		Case 11
			*This.pcreErrorStr = "internal error: unexpected repeat"
		Case 12
			*This.pcreErrorStr = "unrecognized character after (? or (?-"
		Case 13
			*This.pcreErrorStr = "POSIX named classes are supported only within a class"
		Case 14
			*This.pcreErrorStr = "missing )"
		Case 15
			*This.pcreErrorStr = "reference to non-existent subpattern"
		Case 16
			*This.pcreErrorStr = "erroffset passed as NULL"
		Case 17
			*This.pcreErrorStr = "unknown option bit(s) set"
		Case 18
			*This.pcreErrorStr = "missing ) after comment"
		Case 19
			*This.pcreErrorStr = "[this code is not in use]"
		Case 20
			*This.pcreErrorStr = "regular expression is too large"
		Case 21
			*This.pcreErrorStr = "failed to get memory"
		Case 22
			*This.pcreErrorStr = "unmatched parentheses"
		Case 23
			*This.pcreErrorStr = "internal error: code overflow"
		Case 24
			*This.pcreErrorStr = "unrecognized character after (?<"
		Case 25
			*This.pcreErrorStr = "lookbehind assertion is not fixed length"
		Case 26
			*This.pcreErrorStr = "malformed number or name after (?("
		Case 27
			*This.pcreErrorStr = "conditional group contains more than two branches"
		Case 28
			*This.pcreErrorStr = "assertion expected after (?("
		Case 29
			*This.pcreErrorStr = "(?R or (?[+-]digits must be followed by )"
		Case 30
			*This.pcreErrorStr = "unknown POSIX class name"
		Case 31
			*This.pcreErrorStr = "POSIX collating elements are not supported"
		Case 32
			*This.pcreErrorStr = "this version of PCRE is compiled without UTF support"
		Case 33
			*This.pcreErrorStr = "[this code is not in use]"
		Case 34
			*This.pcreErrorStr = "character value in \x{} or \o{} is too large"
		Case 35
			*This.pcreErrorStr = "invalid condition (?(0)"
		Case 36
			*This.pcreErrorStr = "\C not allowed in lookbehind assertion"
		Case 37
			*This.pcreErrorStr = "PCRE does not support \L, \l, \N{name}, \U, or \u"
		Case 38
			*This.pcreErrorStr = "number after (?C is > 255"
		Case 39
			*This.pcreErrorStr = "closing ) for (?C expected"
		Case 40
			*This.pcreErrorStr = "recursive call could loop indefinitely"
		Case 41
			*This.pcreErrorStr = "unrecognized character after (?P"
		Case 42
			*This.pcreErrorStr = "syntax error in subpattern name (missing terminator)"
		Case 43
			*This.pcreErrorStr = "two named subpatterns have the same name"
		Case 44
			*This.pcreErrorStr = "invalid UTF-8 string (specifically UTF-8)"
		Case 45
			*This.pcreErrorStr = "support for \P, \p, and \X has not been compiled"
		Case 46
			*This.pcreErrorStr = "malformed \P or \p sequence"
		Case 47
			*This.pcreErrorStr = "unknown property name after \P or \p"
		Case 48
			*This.pcreErrorStr = "subpattern name is too long (maximum 32 characters)"
		Case 49
			*This.pcreErrorStr = "too many named subpatterns (maximum 10000)"
		Case 50
			*This.pcreErrorStr = "[this code is not in use]"
		Case 51
			*This.pcreErrorStr = "octal value is greater than \377 in 8-bit non-UTF-8 mode"
		Case 52
			*This.pcreErrorStr = "internal error: overran compiling workspace"
		Case 53
			*This.pcreErrorStr = "internal error: previously-checked referenced subpattern not found"
		Case 54
			*This.pcreErrorStr = "DEFINE group contains more than one branch"
		Case 55
			*This.pcreErrorStr = "repeating a DEFINE group is not allowed"
		Case 56
			*This.pcreErrorStr = "inconsistent NEWLINE options"
		Case 57
			*This.pcreErrorStr = "\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number"
		Case 58
			*This.pcreErrorStr = "a numbered reference must not be zero"
		Case 59
			*This.pcreErrorStr = "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"
		Case 60
			*This.pcreErrorStr = "(*VERB) not recognized or malformed"
		Case 61
			*This.pcreErrorStr = "number is too big"
		Case 62
			*This.pcreErrorStr = "subpattern name expected"
		Case 63
			*This.pcreErrorStr = "digit expected after (?+"
		Case 64
			*This.pcreErrorStr = "] is an invalid data character in JavaScript compatibility mode"
		Case 65
			*This.pcreErrorStr = "different names for subpatterns of the same number are not allowed"
		Case 66
			*This.pcreErrorStr = "(*MARK) must have an argument"
		Case 67
			*This.pcreErrorStr = "this version of PCRE is not compiled with Unicode property support"
		Case 68
			*This.pcreErrorStr = "\c must be followed by an ASCII character"
		Case 69
			*This.pcreErrorStr = "\k is not followed by a braced, angle-bracketed, or quoted name"
		Case 70
			*This.pcreErrorStr = "internal error: unknown opcode in find_fixedlength()"
		Case 71
			*This.pcreErrorStr = "\N is not supported in a class"
		Case 72
			*This.pcreErrorStr = "too many forward references"
		Case 73
			*This.pcreErrorStr = "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)"
		Case 74
			*This.pcreErrorStr = "invalid UTF-16 string (specifically UTF-16)"
		Case 75
			*This.pcreErrorStr = "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"
		Case 76
			*This.pcreErrorStr = "character value in \u.... sequence is too large"
		Case 77
			*This.pcreErrorStr = "invalid UTF-32 string (specifically UTF-32)"
		Case 78
			*This.pcreErrorStr = "setting UTF is disabled by the application"
		Case 79
			*This.pcreErrorStr = "non-hex character in \x{} (closing brace missing?)"
		Case 80
			*This.pcreErrorStr = "non-octal character in \o{} (closing brace missing?)"
		Case 81
			*This.pcreErrorStr = "missing opening brace after \o"
		Case 82
			*This.pcreErrorStr = "parentheses are too deeply nested"
		Case 83
			*This.pcreErrorStr = "invalid range in character class"
		Case 84
			*This.pcreErrorStr = "group name must start with a non-digit"
		Case 85
			*This.pcreErrorStr = "parentheses are too deeply nested (stack check)"
		'		Error return values from pcre_study()		Case 100
			' error string olready passed to *This.pcreErrorStr
		'		Error return values from pcre_exec() and pcre_fullinfo()
		Case PCRE_ERROR_NOMATCH
			*This.pcreErrorStr = "The subject string did not match the pattern."
		Case PCRE_ERROR_NULL
			*This.pcreErrorStr = "Either code or subject was passed as NULL, or ovector was NULL and ovecsize was not zero."
		Case PCRE_ERROR_BADOPTION
			*This.pcreErrorStr = "An unrecognized bit was set in the options argument."
		Case PCRE_ERROR_BADMAGIC
			*This.pcreErrorStr = "PCRE was passed a junk pointer or a pattern that was compiled in an environment of one endianness is run in an environment with the other endianness."
		Case PCRE_ERROR_UNKNOWN_OPCODE
			*This.pcreErrorStr = "While running the pattern match, an unknown item was encountered in the compiled pattern. This error could be caused by a bug in PCRE or by overwriting of the compiled pattern."
		Case PCRE_ERROR_NOMEMORY
			*This.pcreErrorStr = "If a pattern contains back references, but the ovector that is passed to pcre_exec() is not big enough to remember the referenced substrings. This can happen only when PCRE has been compiled with --disable-stack-for-recursion."
		Case PCRE_ERROR_NOSUBSTRING
			*This.pcreErrorStr = "This error is used by the pcre_copy_substring(), pcre_get_substring(), and pcre_get_substring_list() functions (see below). It is never returned by pcre_exec()."
		Case PCRE_ERROR_MATCHLIMIT
			*This.pcreErrorStr = "The backtracking limit, as specified by the match_limit field in a pcre_extra structure (or defaulted) was reached. See the description above."
		Case PCRE_ERROR_CALLOUT
			*This.pcreErrorStr = "This error is never generated by pcre_exec() itself. It is provided for use by callout functions that want to yield a distinctive error code."
		Case PCRE_ERROR_BADUTF8
			*This.pcreErrorStr = "A string that contains an invalid UTF-8 byte sequence was passed as a subject, and the PCRE_NO_UTF8_CHECK option was not set."
		Case PCRE_ERROR_BADUTF8_OFFSET
			*This.pcreErrorStr = "The UTF-8 byte sequence that was passed as a subject was checked and found to be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of startoffset did not point to the beginning of a UTF-8 character or the end of the subject."
		Case PCRE_ERROR_PARTIAL
			*This.pcreErrorStr = "The subject string did not match, but it did match partially."
		Case PCRE_ERROR_BADPARTIAL
			*This.pcreErrorStr = "This code is no longer in use since From release 8.00 onwards, now there are no restrictions on partial matching."
		Case PCRE_ERROR_INTERNAL
			*This.pcreErrorStr = "An unexpected internal error has occurred. This error could be caused by a bug in PCRE or by overwriting of the compiled pattern."
		Case PCRE_ERROR_BADCOUNT
			*This.pcreErrorStr = "This error is given if the value of the ovecsize argument is negative."
		Case PCRE_ERROR_RECURSIONLIMIT
			*This.pcreErrorStr = "The internal recursion limit, as specified by the match_limit_recursion field in a pcre_extra structure (or defaulted) was reached. See the description above."
		Case PCRE_ERROR_BADNEWLINE
			*This.pcreErrorStr = "An invalid combination of PCRE_NEWLINE_xxx options was given."
		Case PCRE_ERROR_BADOFFSET
			*This.pcreErrorStr = "The value of startoffset was negative or greater than the length of the subject, that is, the value in length."
		Case PCRE_ERROR_SHORTUTF8
			*This.pcreErrorStr = "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set."
		Case PCRE_ERROR_RECURSELOOP
			*This.pcreErrorStr = "This error is returned when pcre_exec() detects a recursion loop within the pattern. Specifically, it means that either the whole pattern or a subpattern has been called recursively for the second time at the same position in the subject string."
		Case PCRE_ERROR_JIT_STACKLIMIT
			*This.pcreErrorStr = "This error is returned when a pattern that was successfully studied using a JIT compile option is being matched, but the memory available for the just-in-time processing stack is not large enough."
		Case PCRE_ERROR_BADMODE
			*This.pcreErrorStr = "This error is given if a pattern that was compiled by the 8-bit library is passed to a 16-bit or 32-bit library function, or vice versa."
		Case PCRE_ERROR_BADENDIANNESS
			*This.pcreErrorStr = "This error is given if a pattern that was compiled and saved is reloaded on a host with different endianness. The utility function pcre_pattern_to_host_byte_order() can be used to convert such a pattern so that it runs on the new host."
		Case PCRE_ERROR_JIT_BADOPTION
			*This.pcreErrorStr = "This error is returned when a pattern that was successfully studied using a JIT compile option is being matched, but the matching mode (partial or complete match) does not correspond to any JIT compilation mode. When the JIT fast path function is used, this error may be also given for invalid options."
		Case PCRE_ERROR_BADLENGTH
			*This.pcreErrorStr = "This error is given if pcre_exec() is called with a negative value for the length argument."

		Case PCRE_ERROR_UNSET
			*This.pcreErrorStr = "The requested field is not set."
		'		Error return values from pattern replacement algoritm
		Case -200
			*This.pcreErrorStr = "Requested matching group number not exist in actual pattern."
		'		Unknown Errors
		Case Else
			*This.pcreErrorStr		= "Unknown error"
	End Select
	Print "PCRE1_RegExpClass.StringRegExp ... :: Error : Code = ";This.pcreErrorCode;"   Str = '";*This.pcreErrorStr;"'"
End Sub
Test :

Code: Select all

#Pragma Once
#Include Once "string.bi"
#Include Once "inc\PCRE1_RegExpClass.bi"


Function PCRE1_StringRegExp_Test(ByRef sSubjectString As ZString Ptr,ByRef sPatternString As ZString Ptr,ByRef sReplasementString As ZString Ptr) As Long

	Dim As PCRE1_RegExpClass pcreNew
	
	pcreNew.UseAutoErrPrint = FALSE

	Dim As Double TimeOrig, TimeOrigStart, TimeCompiled,TimeCompiledStart, TimeCompiled2,TimeCompiled2Start
	Dim As Long iLim = 1,iReplacementCountOrig,iReplacementCountCompiled,iReplacementCountCompiled2
	
	Dim As ZString Ptr sSubjectString1 = Callocate(88,1),sSubjectString2 = Callocate(88,1),sSubjectString3 = Callocate(88,1)
	Dim As ZString Ptr sOutputString1 = Callocate(64), sOutputString2 = Callocate(64), sOutputString3 = Callocate(64)
	
	Dim As Long iTestParrern = pcreNew.StringRegExpCompile(sPatternString)
	Dim As Long iReplasementPattern = pcreNew.StringRegExpCompileReplacement(sReplasementString)
	
	Dim As Long iPattern1 = pcreNew.StringRegExpCompile(StrPtr("\x26amp;"))
	Dim As Long iPattern2 = pcreNew.StringRegExpCompile(StrPtr("\x26lt;"))
	Dim As Long iPattern3 = pcreNew.StringRegExpCompile(StrPtr("\x26gt;"))
	Dim As Long iPattern4 = pcreNew.StringRegExpCompile(StrPtr("\x26apos;"))
	Dim As Long iPattern5 = pcreNew.StringRegExpCompile(StrPtr("\x26quot;"))
	
	Dim As Long iReplasementPattern1 = pcreNew.StringRegExpCompileReplacement(StrPtr("&"))
	Dim As Long iReplasementPattern2 = pcreNew.StringRegExpCompileReplacement(StrPtr("<"))
	Dim As Long iReplasementPattern3 = pcreNew.StringRegExpCompileReplacement(StrPtr(">"))
	Dim As Long iReplasementPattern4 = pcreNew.StringRegExpCompileReplacement(StrPtr("'"))
	Dim As Long iReplasementPattern5 = pcreNew.StringRegExpCompileReplacement(StrPtr(Chr(34)))

	CopyUTF8String_ZStringPTRToZStringPTR(sSubjectString,sOutputString1)
	CopyUTF8String_ZStringPTRToZStringPTR(sSubjectString,sOutputString2)
	CopyUTF8String_ZStringPTRToZStringPTR(sSubjectString,sOutputString3)

	Print String(64,"#")
	Print "StringRegExpReplaceXMLSpecials :"
	Print String(64,".")

	TimeOrigStart = Timer()
	'For i  As ULong = 1 To iLim
		Do
			pcreNew.StringRegExpReplace(sOutputString1, StrPtr("\x26amp;"), StrPtr("&"), sOutputString1)
		Loop While pcreNew.IsMatch
		
		pcreNew.StringRegExpReplace(sOutputString1, StrPtr("\x26lt;"), StrPtr("<"), sOutputString1)
		pcreNew.StringRegExpReplace(sOutputString1, StrPtr("\x26gt;"), StrPtr(">"), sOutputString1)
		pcreNew.StringRegExpReplace(sOutputString1, StrPtr("\x26apos;"), StrPtr("'"), sOutputString1)
		pcreNew.StringRegExpReplace(sOutputString1, StrPtr("\x26quot;"), StrPtr(Chr(34)), sOutputString1)
	'Next
	TimeOrig = Timer() - TimeOrigStart

	TimeCompiledStart = Timer()
	'For i  As ULong = 1 To iLim
		Do
			pcreNew.StringRegExpReplace(sOutputString2, iPattern1, iReplasementPattern1, sOutputString2)
		Loop While pcreNew.IsMatch
		
		pcreNew.StringRegExpReplace(sOutputString2, iPattern2, iReplasementPattern2, sOutputString2)
		pcreNew.StringRegExpReplace(sOutputString2, iPattern3, iReplasementPattern3, sOutputString2)
		pcreNew.StringRegExpReplace(sOutputString2, iPattern4, iReplasementPattern4, sOutputString2)
		pcreNew.StringRegExpReplace(sOutputString2, iPattern5, iReplasementPattern5, sOutputString2)
	'Next
	TimeCompiled = Timer() - TimeCompiledStart

	TimeCompiled2Start = Timer()
	'For i  As ULong = 1 To iLim
		pcreNew.StringRegExpReplaceXMLSpecials(sOutputString3)
	'Next
	TimeCompiled2 = Timer() - TimeCompiled2Start
	iReplacementCountCompiled2 = pcreNew.iReplacementCount
	
	
	iReplacementCountOrig = iReplacementCountCompiled2
	iReplacementCountCompiled = iReplacementCountCompiled2

	Print 
	Print "Original  Time........................ = ";Format (TimeOrig * 1000,"#0.000000");" milli seconds"
	Print "Original  iReplacementCount........... = ";iReplacementCountOrig;"/";Format (TimeOrig * 1000000/iReplacementCountOrig,"#0.000000");" micro seconds per match"
	Print 
	Print "Compiled  Time ....................... = ";Format (TimeCompiled * 1000,"#0.000000");" milli seconds"
	Print "Compiled  iReplacementCount........... = ";iReplacementCountCompiled;"/";Format (TimeCompiled * 1000000/iReplacementCountCompiled,"#0.000000");" micro seconds per match"
	Print "Compiled  Replacement Equal........... = ";IIf(*sOutputString1 = *sOutputString2,"true","false")
	Print 
	Print "Compiled+ Time ....................... = ";Format (TimeCompiled2 * 1000,"#0.000000");" milli seconds"
	Print "Compiled+ iReplacementCount........... = ";iReplacementCountCompiled2;"/";Format (TimeCompiled2 * 1000000/iReplacementCountCompiled2,"#0.000000");" micro seconds per match"
	Print "Compiled+ Replacement Equal........... = ";IIf(*sOutputString1 = *sOutputString3,"true","false")
	Print 

	sOutputString1 = ReAllocate(sOutputString1,64)
						  Clear(*sOutputString1,0,64)
	sOutputString2 = ReAllocate(sOutputString2,64)
						  Clear(*sOutputString2,0,64)
	sOutputString3 = ReAllocate(sOutputString3,64)
						  Clear(*sOutputString3,0,64)

	Print String(64,"#")
	Print "StringRegExpReplace :"
	Print String(64,".")

	TimeOrigStart = Timer()
	'For i  As ULong = 1 To iLim
		pcreNew.StringRegExpReplace(sSubjectString, sPatternString, sReplasementString, sOutputString1)
	'Next
	TimeOrig = Timer() - TimeOrigStart
	iReplacementCountOrig = pcreNew.iReplacementCount

	TimeCompiledStart = Timer()
	'For i  As ULong = 1 To iLim
		pcreNew.StringRegExpReplace(sSubjectString, iTestParrern, iReplasementPattern, sOutputString2)
	'Next
	TimeCompiled = Timer() - TimeCompiledStart
	iReplacementCountCompiled = pcreNew.iReplacementCount

	Print 
	Print "Original  Time........................ = ";Format (TimeOrig * 1000,"#0.000000");" milli seconds"
	Print "Original  iReplacementCount........... = ";iReplacementCountOrig;"/";Format (TimeOrig * 1000000/iReplacementCountOrig,"#0.000000");" micro seconds per match"
	Print 
	Print "Compiled  Time ....................... = ";Format (TimeCompiled * 1000,"#0.000000");" milli seconds"
	Print "Compiled  iReplacementCount........... = ";iReplacementCountCompiled;"/";Format (TimeCompiled * 1000000/iReplacementCountCompiled,"#0.000000");" micro seconds per match"
	Print "Compiled  Replacement Equal........... = ";IIf(*sOutputString1 = *sOutputString2,"true","false")
	Print 

	Print String(64,"#")
	Print "StringRegExpBoolean :"
	Print String(64,".")
	
	iLim = 1000

	TimeOrigStart = Timer()
	For i  As ULong = 1 To iLim
		pcreNew.StringRegExpBoolean(sSubjectString, sPatternString)
	Next
	TimeOrig = Timer() - TimeOrigStart
	iReplacementCountOrig = pcreNew.IsMatch

	TimeCompiledStart = Timer()
	For i  As ULong = 1 To iLim
		pcreNew.StringRegExpBoolean(sSubjectString, iTestParrern)
	Next
	TimeCompiled = Timer() - TimeCompiledStart
	iReplacementCountCompiled = pcreNew.IsMatch

	Print 
	Print "Original  Time *";iLim;".............. = ";Format (TimeOrig * 1000,"#0.000000");" milli seconds per ";iLim
	Print "Original  IsMatch..................... = ";iReplacementCountOrig
	Print 
	Print "Compiled  Time *";iLim;".............. = ";Format (TimeCompiled * 1000,"#0.000000");" milli seconds per ";iLim
	Print "Compiled  IsMatch..................... = ";iReplacementCountCompiled
	Print "Compiled  Replacement Equal........... = ";IIf(iReplacementCountOrig = iReplacementCountCompiled,"true","false")
	Print 

	
	Dim As String aRezult(), aRezult1()
	
	ReDim aRezult(1 To 8), aRezult1(1 To 8)
	Dim As Long iRows,iColumns,iElements, iRows1,iColumns1,iElements1, iRezult = 0

	Print String(64,"#")
	Print "StringRegExpArray1d1Based :"
	Print String(64,".")
	
	TimeOrigStart = Timer()
	'For i  As ULong = 1 To iLim
		pcreNew.StringRegExpArray1d1Based(sSubjectString, sPatternString, aRezult())
	'Next
	TimeOrig = Timer() - TimeOrigStart
	
	iRows = pcreNew.iRows
	iColumns = pcreNew.iColumns
	iElements = pcreNew.iCount
	
		
	TimeCompiledStart = Timer()
	'For i  As ULong = 1 To iLim
		pcreNew.StringRegExpArray1d1Based(sSubjectString, iTestParrern, aRezult1())
	'Next
	TimeCompiled = Timer() - TimeCompiledStart
	
	iRows1 = pcreNew.iRows
	iColumns1 = pcreNew.iColumns
	iElements1 = pcreNew.iCount

	Print 
	Print "Original  Time................................... = ";Format (TimeOrig * 1000,"#0.000000");" milli seconds"
	Print "Original  1d Array Total Elements (Rows/Columns). = ";iElements;" (";iRows;"/";iColumns;")"
	Print 
	Print "Compiled  Time .................................. = ";Format (TimeCompiled * 1000,"#0.000000");" milli seconds"
	Print "Compiled  1d Array Total Elements (Rows/Columns). = ";iElements;" (";iRows;"/";iColumns;")"
	Print "Compiled  1d Array Equal......................... = ";

	If iRows = iRows1 And iColumns = iColumns1 And iElements = iElements1 Then
		iRezult = 1
		For i As Integer = 1 To iRows
			If aRezult(i) <> aRezult1(i) Then
				
				Print i
				Print aRezult(i)
				Print aRezult1(i)
				
				iRezult = 0
				Exit For
			EndIf
		Next
	EndIf
	
	Print IIf(iRezult = 1,"true","false")
	
	Print String(64,"#")
	Print 

	Function = 0
End Function

End Extern



Dim As String sSubjectString = "aaa&amp;bbb<ccc>ddd&apos;eee"fffggg&<>&apos;"hhh"

sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString
sSubjectString &= " | " & sSubjectString

Dim As String sPattern = "[^a]?(a{3})[^a].+?[^b](b{3})[^b].+?[^c](c{3})[^c]"
Dim As String sReplacement = " [$1 - $3]/[$2 - $2]/[$3 - $1] found "


PCRE1_StringRegExp_Test( sSubjectString, sPattern, sReplacement)
formatting lines are deleted to reduce the size of post, so it's not easy readable

edit: correct rediming of compilation functions
Last edited by Iczer on Apr 10, 2018 14:04, edited 1 time in total.
Iczer
Posts: 99
Joined: Jul 04, 2017 18:09

Re: PCRE1 RegExp - String functions & Regular Expressions

Post by Iczer »

My test times:

Code: Select all

################################################################
StringRegExpReplaceXMLSpecials :
................................................................

Original  Time........................ = 10,516700 milli seconds
Original  iReplacementCount........... =  22528/0,466828 micro seconds per match

Compiled  Time ....................... = 3,878705 milli seconds
Compiled  iReplacementCount........... =  22528/0,172173 micro seconds per match
Compiled  Replacement Equal........... = true

Compiled+ Time ....................... = 2,032940 milli seconds
Compiled+ iReplacementCount........... =  22528/0,090241 micro seconds per match
Compiled+ Replacement Equal........... = true

################################################################
StringRegExpReplace :
................................................................

Original  Time........................ = 15,721831 milli seconds
Original  iReplacementCount........... =  2048/7,676675 micro seconds per match

Compiled  Time ....................... = 2,076242 milli seconds
Compiled  iReplacementCount........... =  2048/1,013790 micro seconds per match
Compiled  Replacement Equal........... = true

################################################################
StringRegExpBoolean :
................................................................

Original  Time * 1000.............. = 17,075634 milli seconds per  1000
Original  IsMatch..................... = -1

Compiled  Time * 1000.............. = 13,358681 milli seconds per  1000
Compiled  IsMatch..................... = -1
Compiled  Replacement Equal........... = true

################################################################
StringRegExpArray1d1Based :
................................................................

Original  Time................................... = 10,741030 milli seconds
Original  1d Array Total Elements (Rows/Columns). =  6144 ( 2048/ 3)

Compiled  Time .................................. = 2,084902 milli seconds
Compiled  1d Array Total Elements (Rows/Columns). =  6144 ( 2048/ 3)
Compiled  1d Array Equal......................... = true
################################################################
I think it's pretty decent times, compared to previously used "string &= *zstring", but i'm sure perfection is ton reached yet ^_^
Roland Chastain
Posts: 1003
Joined: Nov 24, 2011 19:49
Location: France
Contact:

Re: PCRE1 RegExp - String functions & Regular Expressions

Post by Roland Chastain »

Hello! Sounds interesting. Thank you for sharing.

For me, I would need some simpler usage examples. The example you provided is a bit too much complicated for me.

If you have found errors in preg_class.bas, maybe could you share a corrected version, or at least tell us where the errors are?
Iczer
Posts: 99
Joined: Jul 04, 2017 18:09

Re: PCRE1 RegExp - String functions & Regular Expressions

Post by Iczer »

errors in preg_class.bas - in it replace function it could not handle replacement patterns where groups closely following on another, like "$1$2" so:

sSubject = "aaa *** bbb ### ccc"
sPattern = "(\w+)\W+(\w+)\W+(\w+)"
sPeplacementPattern = "$1$2$3"

wont work
also replace function sometimes crash on pcre_malloc()/clean_up() functions

Quick-reference syntax summary can be found at - https://www.pcre.org/original/doc/html/pcresyntax.html
The syntax and semantics of the regular expressions that are supported by PCRE are described in - https://www.pcre.org/original/doc/html/pcrepattern.html

Usage of my PCRE1_RegExpClass:

Code: Select all

Dim As PCRE1_RegExpClass RegExpClass
Static As PCRE1_RegExpClass RegExpClass 
StringRegExpBoolean() function can be used if you need confirm existance of pattern in subject string.
Function have two arguments - sSubjectString (string, to search in - as ZString Ptr)
sPattern (the regular expression to compare - as ZString Ptr)

Rezult - .IsMatch variable - As Boolean, indicate that Pattern match was found subject string - true/false

usage :

Code: Select all

RegExpClass.StringRegExpBoolean(sSubjectString , sPattern)

If RegExpClass.IsMatch Then
	'do something or not
Else
	'do something else
EndIf
StringRegExpReplace() function can be used if you need replace or rebuild subject string according to pattern
Function have four arguments - sSubjectString (string, to search in - as ZString Ptr)
sPattern (the regular expression to compare - as ZString Ptr)
sReplacement (the text to replace the regular expression matching text with. To insert matched group text, $0 - $65536 can be used as back-references- as ZString Ptr)
sOutputString (an updated sSubjectString based on regular expressions - as ZString Ptr)

Rezult - .IsMatch variable - As Boolean, indicate that Pattern match was found subject string - true/false
.iReplacementCount - As Long - count of whole pattern matches found in subject string
.iCount - As Long - number of capture groups in pattern

usage :

Code: Select all

RegExpClass.StringRegExpReplace(sSubjectString , sPattern, sReplacement, sOutputString)

If RegExpClass.iReplacementCount = 0 Then
	'do something or not
Else
	'do something else using sOutputString
EndIf
Or :

Code: Select all

RegExpClass.StringRegExpReplace(sSubjectString, sPattern, sReplacement, sOutputString)
RegExpClass.StringRegExpReplace(sOutputString, sPattern, sReplacement, sOutputString)
RegExpClass.StringRegExpReplace(sOutputString, sPattern, sReplacement, sOutputString)
StringRegExpArray1d1Based() function can be used if you need to split subject string according to pattern, it return array of global matches.
Function have 3 arguments - sSubjectString (string, to search in - as ZString Ptr)
sPattern (the regular expression to match - as ZString Ptr)
aRezult() (rezulting array of matches - As String)

Rezult - .IsMatch variable - As Boolean, indicate that Pattern match was found subject string - true/false
.iCount - As Long - array elements count
.iRows - entire pattern match count
.iColumns - number of capture groups in pattern

Rezulting array aRezult() have format aRezult(1 To .iCount) - it 1d 1Based array.
If number of capture groups in pattern is more than 1, then you can transform aRezult() in to 2darray : aRezult2d(1 to .iRows, 1 to .iColumns) if you need

usage :

Code: Select all

RegExpClass.StringRegExpArray1d1Based( SiteURL, StrPtr("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"), aRezult())

If RegExpClass.IsMatch Then
	sBaseSiteURL = aRezult(1) & aRezult(3)
Else
	sBaseSiteURL = ""
EndIf
also with all those functions you can use compiled patterns - it would be even faster

StringRegExpReplaceXMLSpecials()
StringRegExpReplaceHTMLSpecials()

Those functions as their names say - replace special characters in UTF8 string - https://en.wikipedia.org/wiki/List_of_X ... references

Rezult - .IsMatch variable - As Boolean, indicate that specials was found in subject string - true/false
.iReplacementCount - As Long - count of specials found in subject string
Roland Chastain
Posts: 1003
Joined: Nov 24, 2011 19:49
Location: France
Contact:

Re: PCRE1 RegExp - String functions & Regular Expressions

Post by Roland Chastain »

Hello! Thank you for the examples.

Unfortunately I can't compile them. FB 1.05.0, Windows 10.
usage1.o:fake:(.text+0x565): undefined reference to `pcre_jit_stack_free'
usage1.o:fake:(.text+0x5a0): undefined reference to `pcre_free_study'
usage1.o:fake:(.text+0x1494): undefined reference to `pcre_jit_exec'
usage1.o:fake:(.text+0x16ad): undefined reference to `pcre_jit_exec'
usage1.o:fake:(.text+0x1c70): undefined reference to `pcre_jit_exec'
usage1.o:fake:(.text+0x1f20): undefined reference to `pcre_jit_stack_alloc'
usage1.o:fake:(.text+0x1fc7): undefined reference to `pcre_assign_jit_stack'
usage1.o:fake:(.text+0x2068): undefined reference to `pcre_assign_jit_stack'
usage1.o:fake:(.text+0x2109): undefined reference to `pcre_assign_jit_stack'
usage1.o:fake:(.text+0x21aa): undefined reference to `pcre_assign_jit_stack'
usage1.o:fake:(.text+0x224b): undefined reference to `pcre_assign_jit_stack'
usage1.o:fake:(.text+0x22ec): more undefined references to `pcre_assign_jit_stack' follow
usage1.o:fake:(.text+0x23f0): undefined reference to `pcre_jit_exec'
usage1.o:fake:(.text+0x28be): undefined reference to `pcre_jit_stack_alloc'
usage1.o:fake:(.text+0x28e9): undefined reference to `pcre_assign_jit_stack'
I seems that my libpcre.a file is different from the one which you use.
Iczer
Posts: 99
Joined: Jul 04, 2017 18:09

Re: PCRE1 RegExp - String functions & Regular Expressions

Post by Iczer »

it shows that pcre library you using was compiled without jit support
CMake config should like that:

Code: Select all

/* config.h for CMake builds */

#define HAVE_DIRENT_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_SYS_TYPES_H 1
#define HAVE_UNISTD_H 1
#define HAVE_WINDOWS_H 1
#define HAVE_STDINT_H 1                                                   
#define HAVE_INTTYPES_H 1    

/* #undef HAVE_TYPE_TRAITS_H */
/* #undef HAVE_BITS_TYPE_TRAITS_H */

/* #undef HAVE_BCOPY */
#define HAVE_MEMMOVE 1
#define HAVE_STRERROR 1
#define HAVE_STRTOLL 1
/* #undef HAVE_STRTOQ */
#define HAVE__STRTOI64 1

#define PCRE_STATIC 1

#define SUPPORT_PCRE8 1
/* #undef SUPPORT_PCRE16 */
/* #undef SUPPORT_PCRE32 */
#define SUPPORT_JIT 1
#define SUPPORT_PCREGREP_JIT 1
#define SUPPORT_UTF 1
#define SUPPORT_UCP 1
/* #undef EBCDIC */
/* #undef EBCDIC_NL25 */
#define BSR_ANYCRLF 1
/* #undef NO_RECURSE */

#define HAVE_LONG_LONG 1
#define HAVE_UNSIGNED_LONG_LONG 1

/* #undef SUPPORT_LIBBZ2 */
/* #undef SUPPORT_LIBZ */
/* #undef SUPPORT_LIBEDIT */
/* #undef SUPPORT_LIBREADLINE */

/* #undef SUPPORT_VALGRIND */
/* #undef SUPPORT_GCOV */

#define NEWLINE			-2
#define POSIX_MALLOC_THRESHOLD	10
#define LINK_SIZE		2
#define PARENS_NEST_LIMIT       250
#define MATCH_LIMIT		10000000
#define MATCH_LIMIT_RECURSION	MATCH_LIMIT
#define PCREGREP_BUFSIZE        20480

#define MAX_NAME_SIZE	32
#define MAX_NAME_COUNT	10000

/* end config.h for CMake builds */
UEZ
Posts: 988
Joined: May 05, 2017 19:59
Location: Germany

Re: PCRE1 RegExp - String functions & Regular Expressions

Post by UEZ »

Hi Iczer,

in which folder I have to put config.h / pcre.h?

I get the same em: fake:(.text+0x1494): undefined reference to `pcre_jit_exec'
MrSwiss
Posts: 3910
Joined: Jun 02, 2013 9:27
Location: Switzerland

Re: PCRE1 RegExp - String functions & Regular Expressions

Post by MrSwiss »

UEZ wrote:I get the same em: fake:(.text+0x1494): undefined reference to `pcre_jit_exec'
As Iczer clearly stated: you will have to build the library first (from source).
With the help, of the supplied "cmake", configuration file.
The library has to support "JIT" (aka: "just in time" compiler/compiling).
(The ones, you seem to currently use, apparently don't support "JIT".)
Post Reply