WordPress Version: 6.4
/**
* Checks to see if a string is utf8 encoded.
*
* NOTE: This function checks for 5-Byte sequences, UTF8
* has Bytes Sequences with a maximum length of 4.
*
* @author bmorel at ssi dot fr (modified)
* @since 1.2.1
*
* @param string $str The string to be checked
* @return bool True if $str fits a UTF-8 model, false otherwise.
*/
function seems_utf8($str)
{
mbstring_binary_safe_encoding();
$length = strlen($str);
reset_mbstring_encoding();
for ($i = 0; $i < $length; $i++) {
$c = ord($str[$i]);
if ($c < 0x80) {
$n = 0;
// 0bbbbbbb
} elseif (($c & 0xe0) === 0xc0) {
$n = 1;
// 110bbbbb
} elseif (($c & 0xf0) === 0xe0) {
$n = 2;
// 1110bbbb
} elseif (($c & 0xf8) === 0xf0) {
$n = 3;
// 11110bbb
} elseif (($c & 0xfc) === 0xf8) {
$n = 4;
// 111110bb
} elseif (($c & 0xfe) === 0xfc) {
$n = 5;
// 1111110b
} else {
return false;
// Does not match any model.
}
for ($j = 0; $j < $n; $j++) {
// n bytes matching 10bbbbbb follow ?
if (++$i === $length || (ord($str[$i]) & 0xc0) !== 0x80) {
return false;
}
}
}
return true;
}