diff --git a/CMakeLists.txt b/CMakeLists.txt index 5523a5fb..ac8ba7f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8) project(oniguruma C) set(PACKAGE onig) -set(PACKAGE_VERSION "6.1.1") +set(PACKAGE_VERSION "6.1.2") set(USE_COMBINATION_EXPLOSION_CHECK 0) set(USE_CRNL_AS_LINE_TERMINATOR 0) diff --git a/HISTORY b/HISTORY index 21038f16..c59fe4b6 100644 --- a/HISTORY +++ b/HISTORY @@ -1,5 +1,13 @@ History +2016/11/07: Version 6.1.2 + +2016/10/25: allow word bound, word begin and word end in look-behind. +2016/10/19: add ONIG_OPTION_CHECK_VALIDITY_OF_STRING option. +2016/10/16: fix use after free node. +2016/10/10: fix memory leaks after parsing regexp error. +2016/09/22: implement many of is_valid_mbc_string(). + 2016/09/02: Version 6.1.1 2016/08/31: fix segfault /W.?{888}{888}{888}\x00/ (found by libfuzzer) @@ -1721,7 +1729,7 @@ History 2003/03/12: [spec] change named backref and subexp call format. backref: \k, call: \g (thanks akr) 2003/03/11: [inst] add regparse.[ch] in win32/Makefile. -2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't set +2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't set, then compile error in unset_addr_list_fix(). (thanks knu) 2003/03/10: [impl] divide regcomp.c to regcomp.c, regparse.c and regparse.h. 2003/03/10: [bug] should handle multi-byte code name in fetch_name(). diff --git a/README.md b/README.md index a2c49cd6..bfb41c74 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,12 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.1.2 +-------------------------- + +* allow word bound, word begin and word end in look-behind. +* NEW option: ONIG_OPTION_CHECK_VALIDITY_OF_STRING + New feature of version 6.1 -------------------------- diff --git a/configure.ac b/configure.ac index e7d84592..beeaf5a1 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.1.1) +AC_INIT(onig, 6.1.2) AC_CONFIG_MACRO_DIR([m4]) diff --git a/dist.info b/dist.info index 6ecfe658..8e8d1aa9 100644 --- a/dist.info +++ b/dist.info @@ -1,7 +1,7 @@ --- This file is part of LuaDist project name = "onig" -version = "6.1.1" +version = "6.1.2" desc = "Oniguruma is a regular expressions library." author = "K.Kosako" diff --git a/index.html b/index.html index 159d6875..cf9177c0 100644 --- a/index.html +++ b/index.html @@ -8,7 +8,7 @@

Oniguruma

(Japanese)

-(c) K.Kosako, updated at: 2016/08/31 +(c) K.Kosako, updated at: 2016/11/07

@@ -16,9 +16,9 @@

Oniguruma

(Japanese)
What's new
    +
  • 2016/11/07: Version 6.1.2 released.
  • 2016/09/02: Version 6.1.1 released.
  • 2016/08/29: Version 6.1.0 released.
  • -
  • 2016/05/09: Version 6.0.0 released.
  • 2014/12/12: Version 5.9.6 released.
diff --git a/index_ja.html b/index_ja.html index 0918897b..e11e0f56 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@

鬼車

-(c) K.Kosako, 最終更新: 2016/08/31 +(c) K.Kosako, 最終更新: 2016/11/07

@@ -16,9 +16,9 @@

鬼車

更新情報
    +
  • 2016/11/07: Version 6.1.2 リリース
  • 2016/09/02: Version 6.1.1 リリース
  • 2016/08/29: Version 6.1.0 リリース
  • -
  • 2016/05/09: Version 6.0.0 リリース
  • 2014/12/12: Version 5.9.6 リリース
diff --git a/src/big5.c b/src/big5.c index 3d449759..bc713abd 100644 --- a/src/big5.c +++ b/src/big5.c @@ -55,9 +55,28 @@ big5_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_BIG5, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0x40) return FALSE; + if (*p > 0x7e && *p < 0xa1) return FALSE; + if (*p == 0xff) return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_jp.c b/src/euc_jp.c index 19422ce7..3b54e958 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -57,9 +57,39 @@ mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_JP, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p > 0xa0) { + if (*p == 0xff) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + } + else if (*p == 0x8e) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p > 0xdf) return FALSE; + p++; + } + else if (*p == 0x8f) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_kr.c b/src/euc_kr.c index 12803cdb..450caf14 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -55,9 +55,27 @@ euckr_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_KR, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/euc_tw.c b/src/euc_tw.c index 4e07567a..b3ee6286 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -55,9 +55,42 @@ euctw_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_TW, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + if (*p == 0x8e) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p > 0xb0) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + else if (*p < 0xff) { + p++; + if (p >= end) return FALSE; + if (*p < 0xa1 || *p == 0xff) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static OnigCodePoint diff --git a/src/gb18030.c b/src/gb18030.c index 36fc3de1..c8b58650 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -76,9 +76,43 @@ gb18030_mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_GB18030, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p == 0x80 || *p == 0xff) { + return FALSE; + } + else { + p++; + if (p >= end) return FALSE; + if (*p < 0x40) { + if (*p < 0x30 || *p > 0x39) + return FALSE; + + p++; + if (p >= end) return FALSE; + if (*p < 0x81 || *p == 0xff) return FALSE; + + p++; + if (p >= end) return FALSE; + if (*p < 0x30 || *p > 0x39) + return FALSE; + + p++; + } + else if (*p == 0x7f || *p == 0xff) { + return FALSE; + } + else { + p++; + } + } + } + + return TRUE; } static OnigCodePoint diff --git a/src/oniguruma.h b/src/oniguruma.h index 5aa49f6b..6090165b 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,7 +36,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 #define ONIGURUMA_VERSION_MINOR 1 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_TEENY 2 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -364,7 +364,7 @@ int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN -int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); +int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); @@ -398,7 +398,8 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) #define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ +#define ONIG_OPTION_CHECK_VALIDITY_OF_STRING (ONIG_OPTION_POSIX_REGION << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_CHECK_VALIDITY_OF_STRING /* limit */ #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) @@ -742,7 +743,7 @@ void onig_free P_((OnigRegex)); ONIG_EXTERN void onig_free_body P_((OnigRegex)); ONIG_EXTERN -int onig_scan(regex_t* reg, const UChar* str, const UChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); +int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN diff --git a/src/regcomp.c b/src/regcomp.c index fb3de21e..11ba1e75 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -1795,6 +1795,11 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = noname_disable_map(&(NANCHOR(node)->target), map, counter); + break; + default: break; } @@ -1853,6 +1858,11 @@ renumber_by_map(Node* node, GroupNumRemap* map) r = renumber_node_backref(node, map); break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = renumber_by_map(NANCHOR(node)->target, map); + break; + default: break; } @@ -1884,6 +1894,11 @@ numbered_ref_check(Node* node) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; + case NT_ANCHOR: + if (NANCHOR(node)->target) + r = numbered_ref_check(NANCHOR(node)->target); + break; + default: break; } @@ -3875,9 +3890,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION #define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) + #define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) case ANCHOR_LOOK_BEHIND: { @@ -4712,6 +4728,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; @@ -4734,8 +4752,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ case ANCHOR_LOOK_BEHIND_NOT: break; } @@ -4989,6 +5005,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) + reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { @@ -5252,6 +5271,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, UnsetAddrList uslist; #endif + root = 0; if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; #ifdef ONIG_DEBUG diff --git a/src/regexec.c b/src/regexec.c index 3599b213..7e8d3d15 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -3111,6 +3111,13 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On r = 0; if (r == 0) { + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto end; + } + } + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE @@ -3119,6 +3126,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On at, prev, &msa); } + end: MATCH_ARG_FREE(msa); return r; } @@ -3391,6 +3399,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto finish_no_msa; + } + } + #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE @@ -3747,6 +3762,13 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, int rs; const UChar* start; + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + + ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING); + } + n = 0; start = str; while (1) { diff --git a/src/regparse.c b/src/regparse.c index 1106c0b3..8f1d1cbf 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -29,6 +29,10 @@ #include "regparse.h" #include "st.h" +#ifdef DEBUG_NODE_FREE +#include +#endif + #define WARN_BUFSIZE 256 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS @@ -1003,13 +1007,16 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node) return 0; } - extern void onig_node_free(Node* node) { start: if (IS_NULL(node)) return ; +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "onig_node_free: %p\n", node); +#endif + switch (NTYPE(node)) { case NT_STR: if (NSTR(node)->capa != 0 && @@ -1071,6 +1078,9 @@ node_new(void) node = (Node* )xmalloc(sizeof(Node)); /* xmemset(node, 0, sizeof(Node)); */ +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "node_new: %p\n", node); +#endif return node; } @@ -4318,7 +4328,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CClassNode* acc; r = parse_char_class(&anode, tok, &p, end, env); - if (r != 0) goto cc_open_err; + if (r != 0) { + onig_node_free(anode); + goto cc_open_err; + } acc = NCCLASS(anode); r = or_cclass(cc, acc, env->enc); @@ -4412,7 +4425,6 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, err: if (cc != NCCLASS(*np)) bbuf_free(cc->mbuf); - onig_node_free(*np); return r; } @@ -4542,11 +4554,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { - onig_node_free(*np); return num; } else if (num >= (int )BIT_STATUS_BITS_NUM) { - onig_node_free(*np); return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } NENCLOSE(*np)->regnum = num; @@ -4614,7 +4624,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); env->option = prev; - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } *np = node_new_option(option); CHECK_NULL_RETURN_MEMERR(*np); NENCLOSE(*np)->target = target; @@ -4647,7 +4660,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = fetch_token(tok, &p, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } if (NTYPE(*np) == NT_ANCHOR) NANCHOR(*np)->target = target; @@ -4908,7 +4924,10 @@ parse_exp(Node** np, OnigToken* tok, int term, if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); env->option = prev; - if (r < 0) return r; + if (r < 0) { + onig_node_free(target); + return r; + } NENCLOSE(*np)->target = target; return tok->type; } @@ -5220,7 +5239,10 @@ parse_branch(Node** top, OnigToken* tok, int term, *top = NULL; r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(node); + return r; + } if (r == TK_EOT || r == term || r == TK_ALT) { *top = node; @@ -5230,7 +5252,10 @@ parse_branch(Node** top, OnigToken* tok, int term, headp = &(NCDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; + if (r < 0) { + onig_node_free(node); + return r; + } if (NTYPE(node) == NT_LIST) { *headp = node; @@ -5272,8 +5297,10 @@ parse_subexp(Node** top, OnigToken* tok, int term, r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) return r; - + if (r < 0) { + onig_node_free(node); + return r; + } *headp = onig_node_new_alt(node, NULL); headp = &(NCDR(*headp)); } @@ -5282,8 +5309,8 @@ parse_subexp(Node** top, OnigToken* tok, int term, goto err; } else { - err: onig_node_free(node); + err: if (term == TK_SUBEXP_CLOSE) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; else diff --git a/src/sjis.c b/src/sjis.c index a607b3d7..33784748 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -77,9 +77,36 @@ mbc_enc_len(const UChar* p) } static int -is_valid_mbc_string(const UChar* s, const UChar* end) +is_valid_mbc_string(const UChar* p, const UChar* end) { - return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_SJIS, s, end); + while (p < end) { + if (*p < 0x80) { + p++; + } + else if (*p < 0xa1) { + if (*p == 0xa0 || *p == 0x80) + return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0x40 || *p > 0xfc || *p == 0x7f) + return FALSE; + p++; + } + else if (*p < 0xe0) { + p++; + } + else if (*p < 0xfd) { + p++; + if (p >= end) return FALSE; + if (*p < 0x40 || *p > 0xfc || *p == 0x7f) + return FALSE; + p++; + } + else + return FALSE; + } + + return TRUE; } static int