+ /*
+ * Decode a UTF-8 character. Check RFC 3629
+ * (http://www.ietf.org/rfc/rfc3629.txt) for more details.
+ *
+ * The following table (taken from the RFC) is used for
+ * decoding.
+ *
+ * Char. number range | UTF-8 octet sequence
+ * (hexadecimal) | (binary)
+ * --------------------+------------------------------------
+ * 0000 0000-0000 007F | 0xxxxxxx
+ * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+ * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * Additionally, the characters in the range 0xD800-0xDFFF
+ * are prohibited as they are reserved for use with UTF-16
+ * surrogate pairs.
+ */
+
+ /* Determine the length of the UTF-8 sequence. */
+
+ octet = parser->raw_buffer.pointer[0];
+ width = (octet & 0x80) == 0x00 ? 1 :
+ (octet & 0xE0) == 0xC0 ? 2 :
+ (octet & 0xF0) == 0xE0 ? 3 :
+ (octet & 0xF8) == 0xF0 ? 4 : 0;
+
+ /* Check if the leading octet is valid. */
+
+ if (!width)
+ return yaml_parser_set_reader_error(parser,
+ "Invalid leading UTF-8 octet",
+ parser->offset, octet);