seems_utf8

The timeline below displays how wordpress function seems_utf8 has changed across different WordPress versions. If a version is not listed, refer to the next available version below.

WordPress Version: 6.4

/**
 * Checks to see if a string is utf8 encoded.
 *
 * NOTE: This function checks for 5-Byte sequences, UTF8
 *       has Bytes Sequences with a maximum length of 4.
 *
 * @author bmorel at ssi dot fr (modified)
 * @since 1.2.1
 *
 * @param string $str The string to be checked
 * @return bool True if $str fits a UTF-8 model, false otherwise.
 */
function seems_utf8($str)
{
    mbstring_binary_safe_encoding();
    $length = strlen($str);
    reset_mbstring_encoding();
    for ($i = 0; $i < $length; $i++) {
        $c = ord($str[$i]);
        if ($c < 0x80) {
            $n = 0;
            // 0bbbbbbb
        } elseif (($c & 0xe0) === 0xc0) {
            $n = 1;
            // 110bbbbb
        } elseif (($c & 0xf0) === 0xe0) {
            $n = 2;
            // 1110bbbb
        } elseif (($c & 0xf8) === 0xf0) {
            $n = 3;
            // 11110bbb
        } elseif (($c & 0xfc) === 0xf8) {
            $n = 4;
            // 111110bb
        } elseif (($c & 0xfe) === 0xfc) {
            $n = 5;
            // 1111110b
        } else {
            return false;
            // Does not match any model.
        }
        for ($j = 0; $j < $n; $j++) {
            // n bytes matching 10bbbbbb follow ?
            if (++$i === $length || (ord($str[$i]) & 0xc0) !== 0x80) {
                return false;
            }
        }
    }
    return true;
}

WordPress Version: 6.3

/**
 * Checks to see if a string is utf8 encoded.
 *
 * NOTE: This function checks for 5-Byte sequences, UTF8
 *       has Bytes Sequences with a maximum length of 4.
 *
 * @author bmorel at ssi dot fr (modified)
 * @since 1.2.1
 *
 * @param string $str The string to be checked
 * @return bool True if $str fits a UTF-8 model, false otherwise.
 */
function seems_utf8($str)
{
    mbstring_binary_safe_encoding();
    $length = strlen($str);
    reset_mbstring_encoding();
    for ($i = 0; $i < $length; $i++) {
        $c = ord($str[$i]);
        if ($c < 0x80) {
            $n = 0;
            // 0bbbbbbb
        } elseif (($c & 0xe0) == 0xc0) {
            $n = 1;
            // 110bbbbb
        } elseif (($c & 0xf0) == 0xe0) {
            $n = 2;
            // 1110bbbb
        } elseif (($c & 0xf8) == 0xf0) {
            $n = 3;
            // 11110bbb
        } elseif (($c & 0xfc) == 0xf8) {
            $n = 4;
            // 111110bb
        } elseif (($c & 0xfe) == 0xfc) {
            $n = 5;
            // 1111110b
        } else {
            return false;
            // Does not match any model.
        }
        for ($j = 0; $j < $n; $j++) {
            // n bytes matching 10bbbbbb follow ?
            if (++$i === $length || (ord($str[$i]) & 0xc0) != 0x80) {
                return false;
            }
        }
    }
    return true;
}

WordPress Version: 5.4

/**
 * Checks to see if a string is utf8 encoded.
 *
 * NOTE: This function checks for 5-Byte sequences, UTF8
 *       has Bytes Sequences with a maximum length of 4.
 *
 * @author bmorel at ssi dot fr (modified)
 * @since 1.2.1
 *
 * @param string $str The string to be checked
 * @return bool True if $str fits a UTF-8 model, false otherwise.
 */
function seems_utf8($str)
{
    mbstring_binary_safe_encoding();
    $length = strlen($str);
    reset_mbstring_encoding();
    for ($i = 0; $i < $length; $i++) {
        $c = ord($str[$i]);
        if ($c < 0x80) {
            $n = 0;
            // 0bbbbbbb
        } elseif (($c & 0xe0) == 0xc0) {
            $n = 1;
            // 110bbbbb
        } elseif (($c & 0xf0) == 0xe0) {
            $n = 2;
            // 1110bbbb
        } elseif (($c & 0xf8) == 0xf0) {
            $n = 3;
            // 11110bbb
        } elseif (($c & 0xfc) == 0xf8) {
            $n = 4;
            // 111110bb
        } elseif (($c & 0xfe) == 0xfc) {
            $n = 5;
            // 1111110b
        } else {
            return false;
            // Does not match any model.
        }
        for ($j = 0; $j < $n; $j++) {
            // n bytes matching 10bbbbbb follow ?
            if (++$i == $length || (ord($str[$i]) & 0xc0) != 0x80) {
                return false;
            }
        }
    }
    return true;
}

WordPress Version: 5.1

/**
 * Checks to see if a string is utf8 encoded.
 *
 * NOTE: This function checks for 5-Byte sequences, UTF8
 *       has Bytes Sequences with a maximum length of 4.
 *
 * @author bmorel at ssi dot fr (modified)
 * @since 1.2.1
 *
 * @param string $str The string to be checked
 * @return bool True if $str fits a UTF-8 model, false otherwise.
 */
function seems_utf8($str)
{
    mbstring_binary_safe_encoding();
    $length = strlen($str);
    reset_mbstring_encoding();
    for ($i = 0; $i < $length; $i++) {
        $c = ord($str[$i]);
        if ($c < 0x80) {
            $n = 0;
            // 0bbbbbbb
        } elseif (($c & 0xe0) == 0xc0) {
            $n = 1;
            // 110bbbbb
        } elseif (($c & 0xf0) == 0xe0) {
            $n = 2;
            // 1110bbbb
        } elseif (($c & 0xf8) == 0xf0) {
            $n = 3;
            // 11110bbb
        } elseif (($c & 0xfc) == 0xf8) {
            $n = 4;
            // 111110bb
        } elseif (($c & 0xfe) == 0xfc) {
            $n = 5;
            // 1111110b
        } else {
            return false;
            // Does not match any model
        }
        for ($j = 0; $j < $n; $j++) {
            // n bytes matching 10bbbbbb follow ?
            if (++$i == $length || (ord($str[$i]) & 0xc0) != 0x80) {
                return false;
            }
        }
    }
    return true;
}

WordPress Version: 4.2

/**
 * Checks to see if a string is utf8 encoded.
 *
 * NOTE: This function checks for 5-Byte sequences, UTF8
 *       has Bytes Sequences with a maximum length of 4.
 *
 * @author bmorel at ssi dot fr (modified)
 * @since 1.2.1
 *
 * @param string $str The string to be checked
 * @return bool True if $str fits a UTF-8 model, false otherwise.
 */
function seems_utf8($str)
{
    mbstring_binary_safe_encoding();
    $length = strlen($str);
    reset_mbstring_encoding();
    for ($i = 0; $i < $length; $i++) {
        $c = ord($str[$i]);
        if ($c < 0x80) {
            $n = 0;
        } elseif (($c & 0xe0) == 0xc0) {
            $n = 1;
        } elseif (($c & 0xf0) == 0xe0) {
            $n = 2;
        } elseif (($c & 0xf8) == 0xf0) {
            $n = 3;
        } elseif (($c & 0xfc) == 0xf8) {
            $n = 4;
        } elseif (($c & 0xfe) == 0xfc) {
            $n = 5;
        } else {
            return false;
        }
        // Does not match any model
        for ($j = 0; $j < $n; $j++) {
            // n bytes matching 10bbbbbb follow ?
            if (++$i == $length || (ord($str[$i]) & 0xc0) != 0x80) {
                return false;
            }
        }
    }
    return true;
}

WordPress Version: 4.0

/**
 * Checks to see if a string is utf8 encoded.
 *
 * NOTE: This function checks for 5-Byte sequences, UTF8
 *       has Bytes Sequences with a maximum length of 4.
 *
 * @author bmorel at ssi dot fr (modified)
 * @since 1.2.1
 *
 * @param string $str The string to be checked
 * @return bool True if $str fits a UTF-8 model, false otherwise.
 */
function seems_utf8($str)
{
    mbstring_binary_safe_encoding();
    $length = strlen($str);
    reset_mbstring_encoding();
    for ($i = 0; $i < $length; $i++) {
        $c = ord($str[$i]);
        if ($c < 0x80) {
            $n = 0;
        } elseif (($c & 0xe0) == 0xc0) {
            $n = 1;
        } elseif (($c & 0xf0) == 0xe0) {
            $n = 2;
        } elseif (($c & 0xf8) == 0xf0) {
            $n = 3;
        } elseif (($c & 0xfc) == 0xf8) {
            $n = 4;
        } elseif (($c & 0xfe) == 0xfc) {
            $n = 5;
        } else {
            return false;
        }
        # Does not match any model
        for ($j = 0; $j < $n; $j++) {
            # n bytes matching 10bbbbbb follow ?
            if (++$i == $length || (ord($str[$i]) & 0xc0) != 0x80) {
                return false;
            }
        }
    }
    return true;
}

WordPress Version: 3.7

/**
 * Checks to see if a string is utf8 encoded.
 *
 * NOTE: This function checks for 5-Byte sequences, UTF8
 *       has Bytes Sequences with a maximum length of 4.
 *
 * @author bmorel at ssi dot fr (modified)
 * @since 1.2.1
 *
 * @param string $str The string to be checked
 * @return bool True if $str fits a UTF-8 model, false otherwise.
 */
function seems_utf8($str)
{
    $length = strlen($str);
    for ($i = 0; $i < $length; $i++) {
        $c = ord($str[$i]);
        if ($c < 0x80) {
            $n = 0;
        } elseif (($c & 0xe0) == 0xc0) {
            $n = 1;
        } elseif (($c & 0xf0) == 0xe0) {
            $n = 2;
        } elseif (($c & 0xf8) == 0xf0) {
            $n = 3;
        } elseif (($c & 0xfc) == 0xf8) {
            $n = 4;
        } elseif (($c & 0xfe) == 0xfc) {
            $n = 5;
        } else {
            return false;
        }
        # Does not match any model
        for ($j = 0; $j < $n; $j++) {
            # n bytes matching 10bbbbbb follow ?
            if (++$i == $length || (ord($str[$i]) & 0xc0) != 0x80) {
                return false;
            }
        }
    }
    return true;
}