[mojosetup] utf8codepoint() must advance the pointer even when encountering an invalid character
Francois Gouget
fgouget at codeweavers.com
Mon May 23 18:08:52 EDT 2011
# HG changeset patch
# User Francois Gouget <fgouget at codeweavers.com>
# Date 1306186314 -7200
# Node ID 446e28688755f9297d0b1a7e073f7c5bfc75147e
# Parent bc35a9e23f6d798f5da5ba91bbca1692d617e82c
utf8codepoint() must advance the pointer even when encountering an invalid character.
Otherwise utf8len() gets into an infinite loop if the string is not valid UTF-8.
diff -r bc35a9e23f6d -r 446e28688755 mojosetup.c
--- a/mojosetup.c Mon May 23 23:32:09 2011 +0200
+++ b/mojosetup.c Mon May 23 23:31:54 2011 +0200
@@ -667,12 +667,13 @@
else if (octet < 224) // two octets
{
+ (*_str)++; // advance at least one byte in case of an error
octet -= (128+64);
octet2 = (uint32) ((uint8) *(++str));
if ((octet2 & (128+64)) != 128) // Format isn't 10xxxxxx?
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 2; // skip to next possible start of codepoint.
+ *_str += 1; // skip to next possible start of codepoint.
retval = ((octet << 6) | (octet2 - 128));
if ((retval >= 0x80) && (retval <= 0x7FF))
return retval;
@@ -680,6 +681,7 @@
else if (octet < 240) // three octets
{
+ (*_str)++; // advance at least one byte in case of an error
octet -= (128+64+32);
octet2 = (uint32) ((uint8) *(++str));
if ((octet2 & (128+64)) != 128) // Format isn't 10xxxxxx?
@@ -689,7 +691,7 @@
if ((octet3 & (128+64)) != 128) // Format isn't 10xxxxxx?
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 3; // skip to next possible start of codepoint.
+ *_str += 2; // skip to next possible start of codepoint.
retval = ( ((octet << 12)) | ((octet2-128) << 6) | ((octet3-128)) );
// There are seven "UTF-16 surrogates" that are illegal in UTF-8.
@@ -712,6 +714,7 @@
else if (octet < 248) // four octets
{
+ (*_str)++; // advance at least one byte in case of an error
octet -= (128+64+32+16);
octet2 = (uint32) ((uint8) *(++str));
if ((octet2 & (128+64)) != 128) // Format isn't 10xxxxxx?
@@ -725,7 +728,7 @@
if ((octet4 & (128+64)) != 128) // Format isn't 10xxxxxx?
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 4; // skip to next possible start of codepoint.
+ *_str += 3; // skip to next possible start of codepoint.
retval = ( ((octet << 18)) | ((octet2 - 128) << 12) |
((octet3 - 128) << 6) | ((octet4 - 128)) );
if ((retval >= 0x10000) && (retval <= 0x10FFFF))
@@ -738,6 +741,7 @@
else if (octet < 252) // five octets
{
+ (*_str)++; // advance at least one byte in case of an error
octet = (uint32) ((uint8) *(++str));
if ((octet & (128+64)) != 128) // Format isn't 10xxxxxx?
return UNICODE_BOGUS_CHAR_VALUE;
@@ -754,7 +758,7 @@
if ((octet & (128+64)) != 128) // Format isn't 10xxxxxx?
return UNICODE_BOGUS_CHAR_VALUE;
- *_str += 5; // skip to next possible start of codepoint.
+ *_str += 4; // skip to next possible start of codepoint.
return UNICODE_BOGUS_CHAR_VALUE;
} // else if
More information about the mojosetup
mailing list