Here the modifications I have made in CXMLReaderImpl::parseText() method :
Code: Select all
bool setText(char_type* start, char_type* end)
{
// we always do the check at the beginning but without the limit of 3 characters
// so if the text is composed only with withespace, it is squeezed
// if we add a configuration flag, we can have something like that
#ifdef WITH_WHITESPACE_COMPRESSION
if (m_bWithWhitespaceCompression)
#endif
{
// this part of code is unchanged
char_type* p = start;
for(; p != end; ++p)
if (!isWhiteSpace(*p))
break;
if (p == end)
return false;
// here, we now there is at least one no whitespace character
// the string begins at its position
start=p;
// now, we can suppress those at the end of the string
for (p=end-1;p!=start;--p)
{
if (!isWhiteSpace(*p))
break;
}
// normaly, it's impossible that p==start here, but it's good to verify that...
if (p == start)
return false;
end=p+1;
// end of modification
}
I have add a part of code which tests a whitespace compression configuration flag. You can remark that I have suppress the 3 characters llenght test. I think it's simple for users that parser have a binary behavior:
- you want a total whitespace compression or
- you want no whitespace compression at all.
For the moment, I have not implemented the whitespace compression inside the text (I have not the problem yet
).
The code should seem like that (
totally untested):
Code: Select all
core::string<char_type> compressMultipleWhitespace(
core::string<char_type>& origstr)
{
// first find the first multiple whitespace occurence
int pos,total_len;
total_len=origstr.size();
for (pos=0;pos<total_len-1;++pos)
{
if (isWhiteSpace(origstr[pos]) &&
isWhiteSpace(origstr[pos+1]))
{
break;
}
}
// if we didn't find multiple, it's done
if (pos==origstr.size()-1)
{
return origstr;
}
core::string<char_type> newstr;
oldPos=0;
for (;;)
{
// append the non whitespace block
newstr.append(origstr.substring(oldPos,pos-oldPos));
// first find the next no whitespace character
// we know that origstr[pos] and origstr[pos+1]
// are whitespace so we begin at pos+2
int next_no_whitespace=pos+2;
while (next_no_whitespace<total_len &&
isWhiteSpace(origstr[next_no_whitespace]))
{
++next_no_whitespace;
}
// check if we are at the end of the string
if (next_no_whitespace<total_len)
{
newstr.append(L' ');
}
oldPos=pos=next_no_whitespace;
// search again for the next multiple whitespace occurence
while (++pos<total_len-1)
{
if (isWhiteSpace(origstr[pos]) &&
isWhiteSpace(origstr[pos+1]))
{
break;
}
}
if (pos==total_len-1)
{
// copy the end of the string
newstr.append(origstr.substring(oldPos,total_len-oldPos));
break;
}
}
return newstr;
}
Voila. Hope this can help...