Page 1 of 1

[fixed in trunk]string::split

Posted: Tue Sep 27, 2016 6:43 am
by manni63
The method string::split doesn't work correctly if the flag ignoreEmptyTokens is set to false and there are consecutive separators in a string.

Code: Select all

 
    u32 split(container& ret, const T* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
    {
        if (!c)
            return 0;
 
        const u32 oldSize=ret.size();
        u32 lastpos = 0;
        bool lastWasSeparator = false;
        for (u32 i=0; i<used; ++i)
        {
            bool foundSeparator = false;
            for (u32 j=0; j<count; ++j)
            {
                if (array[i] == c[j])
                {
                   if ((!ignoreEmptyTokens || i - lastpos != 0) && !lastWasSeparator)
                        ret.push_back(string<T,TAlloc>(&array[lastpos], i - lastpos));
                    foundSeparator = true;
                    lastpos = (keepSeparators ? i : i + 1);
                    break;
                }
            }
            lastWasSeparator = foundSeparator;
        }
        if ((used - 1) > lastpos)
            ret.push_back(string<T,TAlloc>(&array[lastpos], (used - 1) - lastpos));
        return ret.size()-oldSize;
    }
 
The reason is the flag lastWasSeparator, that should be removed from the if-statement:

Code: Select all

 
u32 split(container& ret, const T* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
    {
        if (!c)
            return 0;
 
        const u32 oldSize=ret.size();
        u32 lastpos = 0;
        bool lastWasSeparator = false;
        for (u32 i=0; i<used; ++i)
        {
            bool foundSeparator = false;
            for (u32 j=0; j<count; ++j)
            {
                if (array[i] == c[j])
                {
                   if (!ignoreEmptyTokens || i - lastpos != 0)
                        ret.push_back(string<T,TAlloc>(&array[lastpos], i - lastpos));
                    foundSeparator = true;
                    lastpos = (keepSeparators ? i : i + 1);
                    break;
                }
            }
            lastWasSeparator = foundSeparator;
        }
        if ((used - 1) > lastpos)
            ret.push_back(string<T,TAlloc>(&array[lastpos], (used - 1) - lastpos));
        return ret.size()-oldSize;
    }
 

Re: string::split

Posted: Tue Sep 27, 2016 9:10 am
by CuteAlien
Thanks for the report. I'll check it out.

Re: string::split

Posted: Tue Sep 27, 2016 7:49 pm
by CuteAlien
Strange stuff, it's a lot simpler now (could remove lastWasSeparator and foundSeparator variables completely).
I'm also confused in the way it adds separators (adding them to the string behind the separator instead to the previous one - I guess both correct in some way - just not what expected for some reason).

I changed it only in Irrlicht trunk. Not sure if I should backport it as it changes behaviour (even if old one was wrong). Pretty sure it was a bug - but I'm still irritated because that code looked so deliberate... basically bugfix was deleting all special code. So I'm really wondering if I missed anything.

Anyway - my test-code says new code is correct:

Code: Select all

 
#include <irrlicht.h>
#include <iostream>
 
using namespace irr;
using namespace core;
using namespace scene;
using namespace video;
using namespace io;
using namespace gui;
 
#ifdef _MSC_VER
#pragma comment(lib, "Irrlicht.lib")
#endif
 
void printResult(const array<stringc>& arr, const stringc& testName)
{
    std::cout << "test: " << testName.c_str() << std::endl;
    for ( u32 i=0; i<arr.size(); ++i )
    {
        std::cout << i << ")" << arr[i].c_str() << std::endl;
    }
    std::cout << std::endl;
}
 
int main()
{
    stringc splitTest1(".foo.bar..foofoo.");
    stringc splitTest2(".,.,");
    stringc splitTest3("");
 
    array<stringc> result;
    
    // ... bool ignoreEmptyTokens, bool keepSeparators
    result.clear();
    splitTest1.split(result, ".", 1, true, true);
    printResult(result, splitTest1);
    result.clear();
    splitTest1.split(result, ".", 1, true, false);
    printResult(result, splitTest1);
    result.clear();
    splitTest1.split(result, ".", 1, false, false);
    printResult(result, splitTest1);
    result.clear();
    splitTest1.split(result, ".", 1, false, true);
    printResult(result, splitTest1);
    
    
    result.clear();
    splitTest2.split(result, ".,", 2, true, true);
    printResult(result, splitTest2);
    result.clear();
    splitTest2.split(result, ".,", 2, true, false);
    printResult(result, splitTest2);
    result.clear();
    splitTest2.split(result, ".,", 2, false, false);
    printResult(result, splitTest2);
    result.clear();
    splitTest2.split(result, ".,", 2, false, true);
    printResult(result, splitTest2);    
    
    result.clear();
    splitTest3.split(result, ".", 1, true, true);
    printResult(result, splitTest3);
    result.clear();
    splitTest3.split(result, ".", 1, true, false);
    printResult(result, splitTest3);
    result.clear();
    splitTest3.split(result, ".", 1, false, false);
    printResult(result, splitTest3);
    result.clear();
    splitTest3.split(result, ".", 1, false, true);
    printResult(result, splitTest3);        
 
    return 0;
}
 

Re: [fixed in trunk]string::split

Posted: Tue Oct 04, 2016 6:35 pm
by chronologicaldot
Maybe it was one of those "I'll get back to it" kind of bugs... or an "I forgot why I wanted this" bug.