diff options
Diffstat (limited to 'src/Common/libzip/zip_utf-8.c')
-rw-r--r-- | src/Common/libzip/zip_utf-8.c | 118 |
1 files changed, 59 insertions, 59 deletions
diff --git a/src/Common/libzip/zip_utf-8.c b/src/Common/libzip/zip_utf-8.c index 8f02f88a..678912f6 100644 --- a/src/Common/libzip/zip_utf-8.c +++ b/src/Common/libzip/zip_utf-8.c @@ -1,9 +1,9 @@ /* zip_utf-8.c -- UTF-8 support functions for libzip - Copyright (C) 2011-2014 Dieter Baron and Thomas Klausner + Copyright (C) 2011-2021 Dieter Baron and Thomas Klausner This file is part of libzip, a library to manipulate ZIP archives. - The authors can be contacted at <libzip@nih.at> + The authors can be contacted at <info@libzip.org> Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -39,7 +39,7 @@ static const zip_uint16_t _cp437_to_unicode[256] = { /* 0x00 - 0x0F */ - 0x2007, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C, + 0x0000, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C, /* 0x10 - 0x1F */ 0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8, 0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC, @@ -103,54 +103,54 @@ _zip_guess_encoding(zip_string_t *str, zip_encoding_type_t expected_encoding) { zip_uint32_t i, j, ulen; if (str == NULL) - return ZIP_ENCODING_ASCII; + return ZIP_ENCODING_ASCII; name = str->raw; if (str->encoding != ZIP_ENCODING_UNKNOWN) - enc = str->encoding; + enc = str->encoding; else { - enc = ZIP_ENCODING_ASCII; - for (i = 0; i < str->length; i++) { - if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t') - continue; - - enc = ZIP_ENCODING_UTF8_GUESSED; - if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH) - ulen = 1; - else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH) - ulen = 2; - else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH) - ulen = 3; - else { - enc = ZIP_ENCODING_CP437; - break; - } - - if (i + ulen >= str->length) { - enc = ZIP_ENCODING_CP437; - break; - } - - for (j = 1; j <= ulen; j++) { - if ((name[i + j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) { - enc = ZIP_ENCODING_CP437; - goto done; - } - } - i += ulen; - } + enc = ZIP_ENCODING_ASCII; + for (i = 0; i < str->length; i++) { + if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t') + continue; + + enc = ZIP_ENCODING_UTF8_GUESSED; + if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH) + ulen = 1; + else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH) + ulen = 2; + else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH) + ulen = 3; + else { + enc = ZIP_ENCODING_CP437; + break; + } + + if (i + ulen >= str->length) { + enc = ZIP_ENCODING_CP437; + break; + } + + for (j = 1; j <= ulen; j++) { + if ((name[i + j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) { + enc = ZIP_ENCODING_CP437; + goto done; + } + } + i += ulen; + } } done: str->encoding = enc; if (expected_encoding != ZIP_ENCODING_UNKNOWN) { - if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED) - str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN; + if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED) + str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN; - if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII) - return ZIP_ENCODING_ERROR; + if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII) + return ZIP_ENCODING_ERROR; } return enc; @@ -160,11 +160,11 @@ done: static zip_uint32_t _zip_unicode_to_utf8_len(zip_uint32_t codepoint) { if (codepoint < 0x0080) - return 1; + return 1; if (codepoint < 0x0800) - return 2; + return 2; if (codepoint < 0x10000) - return 3; + return 3; return 4; } @@ -172,19 +172,19 @@ _zip_unicode_to_utf8_len(zip_uint32_t codepoint) { static zip_uint32_t _zip_unicode_to_utf8(zip_uint32_t codepoint, zip_uint8_t *buf) { if (codepoint < 0x0080) { - buf[0] = codepoint & 0xff; - return 1; + buf[0] = codepoint & 0xff; + return 1; } if (codepoint < 0x0800) { - buf[0] = (zip_uint8_t)(UTF_8_LEN_2_MATCH | ((codepoint >> 6) & 0x1f)); - buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); - return 2; + buf[0] = (zip_uint8_t)(UTF_8_LEN_2_MATCH | ((codepoint >> 6) & 0x1f)); + buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); + return 2; } if (codepoint < 0x10000) { - buf[0] = (zip_uint8_t)(UTF_8_LEN_3_MATCH | ((codepoint >> 12) & 0x0f)); - buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 6) & 0x3f)); - buf[2] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); - return 3; + buf[0] = (zip_uint8_t)(UTF_8_LEN_3_MATCH | ((codepoint >> 12) & 0x0f)); + buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 6) & 0x3f)); + buf[2] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); + return 3; } buf[0] = (zip_uint8_t)(UTF_8_LEN_4_MATCH | ((codepoint >> 18) & 0x07)); buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 12) & 0x3f)); @@ -201,26 +201,26 @@ _zip_cp437_to_utf8(const zip_uint8_t *const _cp437buf, zip_uint32_t len, zip_uin zip_uint32_t buflen, i, offset; if (len == 0) { - if (utf8_lenp) - *utf8_lenp = 0; - return NULL; + if (utf8_lenp) + *utf8_lenp = 0; + return NULL; } buflen = 1; for (i = 0; i < len; i++) - buflen += _zip_unicode_to_utf8_len(_cp437_to_unicode[cp437buf[i]]); + buflen += _zip_unicode_to_utf8_len(_cp437_to_unicode[cp437buf[i]]); if ((utf8buf = (zip_uint8_t *)malloc(buflen)) == NULL) { - zip_error_set(error, ZIP_ER_MEMORY, 0); - return NULL; + zip_error_set(error, ZIP_ER_MEMORY, 0); + return NULL; } offset = 0; for (i = 0; i < len; i++) - offset += _zip_unicode_to_utf8(_cp437_to_unicode[cp437buf[i]], utf8buf + offset); + offset += _zip_unicode_to_utf8(_cp437_to_unicode[cp437buf[i]], utf8buf + offset); utf8buf[buflen - 1] = 0; if (utf8_lenp) - *utf8_lenp = buflen - 1; + *utf8_lenp = buflen - 1; return utf8buf; } |