Text replacement function for natural language sentence

Post your FreeBASIC tips and tricks here. Please don’t post your code without including an explanation.
Tourist Trap
Posts: 2817
Joined: Jun 02, 2015 16:24

Text replacement function for natural language sentence

Postby Tourist Trap » Jan 02, 2017 13:32

Hi,

That's a function that is able to replace text in a string, but with options allowing to take care of keeping the sentence correctly shaped from a human point of view. It can be done of course in many steps with simpler functions, and it would probably result in a faster execution but I haven't found how (maybe a word should be considered as a character series + one space before any replacement, removal).

Anyway slowly but surely it seems to behave as expected unless maybe some hidden cases remain, as always.

Code: Select all

 'function ReplaceSubString1BySubString2InMainString
'makes some replacements/clean-up of a human language sentence
'
'INFO UDT is for debug purpose initially,
'can be removed with minor changes to the function
'
'no optimization is done, so a rather slow function, but seems to do the job
'tested OK on XP32 fb1.05


type INFO
    declare operator cast() as string
    declare property EllapsedTimeInfo() as double
    declare sub StartInfoChronometer()
    declare sub EndInfoChronometer()
    declare sub AddInfo(byref InfoText as const string)
        as double   _startTime
        as double   _endTime
        as double   _lastTotalDuration
        as string   _otherInfo(any)
end type
operator INFO.cast() as string
    var consoleFileNum  => freeFile()
    print #consoleFileNum, iif(screenPtr<>0, "CONSOLE SECONDARY OUTPUT", "")
    open cons for output as #consoleFileNum
        for index as integer =  lBound(THIS._otherInfo) to _
                                uBound(THIS._otherInfo) - 1
            print #consoleFileNum, THIS._otherInfo(index)
        next index
    print #consoleFileNum, iif(screenPtr<>0, chr(13)&chr(10), "")
    close #consoleFileNum
    '
    return THIS._otherInfo(uBound(THIS._otherInfo))
end operator
property INFO.EllapsedTimeInfo() as double
    if THIS._endTime>-1 then
        return  THIS._lastTotalDuration
    else
        return TIMER - THIS._startTime
    end if
end property
sub INFO.StartInfoChronometer()
    THIS._startTime = TIMER
    THIS._endTime   = -1
end sub
sub INFO.EndInfoChronometer()
    THIS._endTime   = TIMER
    THIS._lastTotalDuration = THIS._endTime - THIS._startTime
end sub
sub INFO.AddInfo(byref InfoText as const string)
    redim preserve THIS._otherInfo(uBound(THIS._otherInfo) + 1)
    THIS._otherInfo(uBound(THIS._otherInfo)) = InfoText
end sub


function ReplaceSubString1BySubString2InMainString(byref SubString1 as const string="", _
                                                   byref SubString2 as const string="", _
                                                   byref MainString as const string, _
                                                   byval MutilateWords as boolean=TRUE, _
                                                   byval WorkCaseSensitively as boolean=TRUE, _
                                                   byval ShrinkSpaceBloc as boolean=TRUE) _
                                                   as INFO
    dim as INFO resultInfo
    resultInfo.AddInfo("init="& MainString)
    resultInfo.AddInfo("word_mutilation="& str(MutilateWords))
    resultInfo.AddInfo("case_sensitivity="& str(WorkCaseSensitively))
    resultInfo.AddInfo("space_shrinking="& str(ShrinkSpaceBloc))
    resultInfo.StartInfoChronometer()
    '
    if MainString="" then
        resultInfo.EndInfoChronometer()
        resultInfo.AddInfo("time="& str(resultInfo.EllapsedTimeInfo))
        resultInfo.AddInfo(MainString)
        '
        return resultInfo
    end if
    '
    var notConstantString   => MainString &""
    '
    select case WorkCaseSensitively
    case TRUE
        'case sensitive
        var separatorMotive     => iif(SubString2="", asc(space(1)), SubString2[0])
        var sentence            => trim(notConstantString, any chr(separatorMotive))
        if len(notConstantString)>3 andAlso _
           SubString1="" then
            var sentenceIndex   => 1
            while sentenceIndex<=(len(sentence) - 3)
                if sentence[sentenceIndex]=separatorMotive then
                    if sentenceIndex=1 andAlso sentence[2]=separatorMotive then
                        sentence =  left(sentence, sentenceIndex + 1) & _
                                    right(sentence, len(sentence) - sentenceIndex - 2)
                        sentenceIndex -= 1
                    elseIf sentence[sentenceIndex + 1]=separatorMotive then
                        sentence =  left(sentence, sentenceIndex + 1) & _
                                    right(sentence, len(sentence) - sentenceIndex - 2)
                        sentenceIndex -= 1
                    else
                        '
                    end if
                end if
                sentenceIndex += 1
            wend
            notConstantString = sentence
        end if
    case else
        'case insensitive
        var separatorMotive => iif(SubString2="", asc(space(1)), asc(lCase(chr(SubString2[0]))) )
        var sentence        => lTrim(lCase(notConstantString), any chr(separatorMotive))
        var caseConservativeSentence    => right(notConstantString, _
                                                    len(notConstantString) - _
                                                    (   len(notConstantString) - _
                                                        len(sentence)   ) _
                                                    )
        sentence                    = rTrim(lCase(  caseConservativeSentence), _
                                                    any chr(separatorMotive)    )
        caseConservativeSentence    = left(caseConservativeSentence, len(sentence))
        if len(notConstantString)>3 andAlso _
           SubString1="" then
            var sentenceIndex   => 1
            while sentenceIndex<=(len(sentence) - 3)
                if sentence[sentenceIndex]=separatorMotive then
                    if sentenceIndex=1 andAlso sentence[2]=separatorMotive then
                        sentence =  left(sentence, sentenceIndex + 1) & _
                                    right(sentence, len(sentence) - sentenceIndex - 2)
                        caseConservativeSentence =  left(   caseConservativeSentence, _
                                                            sentenceIndex + 1   ) & _
                                                        right(  caseConservativeSentence, _
                                                                len(caseConservativeSentence) - _
                                                                sentenceIndex - 2   )
                        sentenceIndex -= 1
                    elseIf sentence[sentenceIndex + 1]=separatorMotive then
                        sentence =  left(sentence, sentenceIndex + 1) & _
                                    right(sentence, len(sentence) - sentenceIndex - 2)
                        caseConservativeSentence =  left(   caseConservativeSentence, _
                                                            sentenceIndex + 1   ) & _
                                                        right(  caseConservativeSentence, _
                                                                len(caseConservativeSentence) - _
                                                                sentenceIndex - 2   )
                        sentenceIndex -= 1
                    else
                        '
                    end if
                end if
                sentenceIndex += 1
            wend
            notConstantString = caseConservativeSentence
        end if
    end select
    '
    var subStringPosition   => iif( _
                                    WorkCaseSensitively, _
                                    inStr(1, notConstantString, SubString1), _
                                    inStr(1, lCase(notConstantString), lCase(SubString1)) _
                                    )
    #macro _TEMPMACROSTANDINGFORREPLACE()
            notConstantString = left(notConstantString, subStringPosition - 1) & _
                                SubString2 & _
                                right( _
                                        notConstantString,  _
                                        len(notConstantString) - _
                                        len(SubString1) + 1 - _
                                        subStringPosition _
                                        )
    #endMacro
    while subStringPosition>0
        select case MutilateWords
        'mutilate words
        case TRUE
            _TEMPMACROSTANDINGFORREPLACE()
        case else
        'don't mutilate words
            if len(SubString1)=len(notConstantString) then
                notConstantString = SubString2
            elseIf  subStringPosition=1 andAlso _
                    notConstantString[subStringPosition + len(SubString1) - 1]=asc(space(1)) then
                _TEMPMACROSTANDINGFORREPLACE()
            elseIf  subStringPosition=(len(notConstantString) - len(SubString1)+ 1) andAlso _
                    notConstantString[subStringPosition - 2]=asc(space(1)) then
                _TEMPMACROSTANDINGFORREPLACE()
            elseIf  subStringPosition>1 andAlso _
                    subStringPosition<(len(notConstantString) - len(SubString1)+ 1) andAlso _
                    notConstantString[subStringPosition - 2]=asc(space(1)) andAlso _
                    notConstantString[subStringPosition + len(SubString1) - 1]=asc(space(1)) then
                _TEMPMACROSTANDINGFORREPLACE()
            else
                'if any, a replacement here would mutilate a word
            end if
        end select
        '
        subStringPosition = iif( _
                                WorkCaseSensitively, _
                                inStr(subStringPosition + 1, notConstantString, SubString1), _
                                inStr(subStringPosition + 1, lCase(notConstantString), lCase(SubString1)) _
                                )
    wend
    #undef _TEMPMACROSTANDINGFORREPLACE
    '
    if ShrinkSpaceBloc then
        notConstantString => _
            ReplaceSubString1BySubString2InMainString(  "", _
                                                        space(1), _
                                                        notConstantString, _
                                                        , _
                                                        , _
                                                        FALSE)
    end if
    '
    resultInfo.EndInfoChronometer()
    resultInfo.AddInfo("time="& str(resultInfo.EllapsedTimeInfo))
    resultInfo.AddInfo(notConstantString)
    return resultInfo
end function


''===========================================================
''======================================================DEMO=
screenRes 725, 380, 8
Width 725\8, 380\16

var sentence    => _
"   AaAaAaH! The RAaaaaaBBIT eats aaAaall the CAAAArRots     from aaAAAaaa  gaaaaarden of     mine.     "

'makes clean-up or/and replacements

'clean-up
? "clean-up"
? "1::"; ReplaceSubString1BySubString2InMainString(,"to_clean_char_1_only","")
? "2::"; ReplaceSubString1BySubString2InMainString(, "a", sentence)
? "3::"; ReplaceSubString1BySubString2InMainString("", "a", sentence, TRUE)
? "4::"; ReplaceSubString1BySubString2InMainString("", "a", sentence, FALSE, TRUE)
? "5::"; ReplaceSubString1BySubString2InMainString("", "a", sentence, TRUE, FALSE)
? "6::"; ReplaceSubString1BySubString2InMainString("", "A", sentence, FALSE, TRUE, TRUE)
? "7::"; ReplaceSubString1BySubString2InMainString("", "a", sentence, TRUE, FALSE, FALSE)
'full clean-up version (also slowest) in its simplest form may in general be:
? "8::"; ReplaceSubString1BySubString2InMainString(, "a", sentence, TRUE, FALSE, TRUE)

'replacement uses the first argument
? : ? "replacement"
? "1::"; ReplaceSubString1BySubString2InMainString("to_replace","replacer","")
? "2::"; ReplaceSubString1BySubString2InMainString("a", "x", sentence, TRUE, FALSE, TRUE)


''===========================================================
getKey()
'(eof)

Return to “Tips and Tricks”

Who is online

Users browsing this forum: No registered users and 1 guest