Index: apps/app_voicemail.c =================================================================== --- apps/app_voicemail.c (revision 376198) +++ apps/app_voicemail.c (working copy) @@ -4484,34 +4484,49 @@ */ static const char *ast_str_encode_mime(struct ast_str **end, ssize_t maxlen, const char *start, size_t preamble, size_t postamble) { - struct ast_str *tmp = ast_str_alloca(80); + struct ast_str *buffer = ast_str_alloca(80); int first_section = 1; + char qhead[80]; + char qtail[] = "?="; + snprintf(qhead, sizeof(qhead), "=?%s?Q?", charset); ast_str_reset(*end); - ast_str_set(&tmp, -1, "=?%s?Q?", charset); + ast_str_set(&buffer, -1, "%s", qhead); + + /* Lots of nice info in: + * http://tools.ietf.org/html/rfc5322#section-3.2.2 and + * http://tools.ietf.org/html/rfc5322#section-2.2.3 */ for (; *start; start++) { - int need_encoding = 0; - if (*start < 33 || *start > 126 || strchr("()<>@,:;/\"[]?.=_", *start)) { - need_encoding = 1; + int need_encoding = (*start < 32 || *start > 126 || strchr("()<>@,:;/\"[]?.=_", *start)); + int left_on_line = 76 - ast_str_strlen(buffer); + if (first_section) { + left_on_line -= preamble; } - if ((first_section && need_encoding && preamble + ast_str_strlen(tmp) > 70) || - (first_section && !need_encoding && preamble + ast_str_strlen(tmp) > 72) || - (!first_section && need_encoding && ast_str_strlen(tmp) > 70) || - (!first_section && !need_encoding && ast_str_strlen(tmp) > 72)) { - /* Start new line */ - ast_str_append(end, maxlen, "%s%s?=", first_section ? "" : " ", ast_str_buffer(tmp)); - ast_str_set(&tmp, -1, "=?%s?Q?", charset); + /* We break early on space, because: + * (a) If we get multibyte input, we might break it in the middle + * of a character. If we're dealing with UTF-8, breaking on any + * byte < 127 is fine. If you're dealing with some other + * multibyte encoding, you're out of luck. + * (b) According to RFC, the space used for folding should be + * included back into the unfolded element. Unfortunately the + * implementations vary. We add the trailing space in the + * q-encoded bit too. Two spaces is better than none. + */ + if (*start == ' ' || (need_encoding && left_on_line < 6) || (!need_encoding && left_on_line < 4)) { + /* Start new line (mark it as such by a space) */ + ast_str_append(end, maxlen, "%s%s%s%s", first_section ? "" : " ", ast_str_buffer(buffer), *start == ' ' ? "_" : "", qtail); + ast_str_set(&buffer, -1, "%s", qhead); first_section = 0; } - if (need_encoding && *start == ' ') { - ast_str_append(&tmp, -1, "_"); - } else if (need_encoding) { - ast_str_append(&tmp, -1, "=%hhX", *start); - } else { - ast_str_append(&tmp, -1, "%c", *start); + if (*start != ' ') { + if (need_encoding) { + ast_str_append(&buffer, -1, "=%hhX", *start); + } else { + ast_str_append(&buffer, -1, "%c", *start); + } } } - ast_str_append(end, maxlen, "%s%s?=%s", first_section ? "" : " ", ast_str_buffer(tmp), ast_str_strlen(tmp) + postamble > 74 ? " " : ""); + ast_str_append(end, maxlen, "%s%s%s%s", first_section ? "" : " ", ast_str_buffer(buffer), qtail, ast_str_strlen(buffer) + postamble > 74 ? " " : ""); return ast_str_buffer(*end); } @@ -4598,6 +4613,8 @@ fprintf(p, "%s %s" ENDL, first_line ? "From:" : "", ast_str_buffer(str2)); first_line = 0; /* Substring is smaller, so this will never grow */ + /* But we're assuming that the internal strcpy will work + * with overlapping mem! FIXME */ ast_str_set(&str2, 0, "%s", ptr + 1); } fprintf(p, "%s %s <%s>" ENDL, first_line ? "From:" : "", ast_str_buffer(str2), who); @@ -13093,6 +13110,64 @@ return res; } +AST_TEST_DEFINE(test_q_encoding) +{ + int res = AST_TEST_FAIL; + char old_charset[32]; + + switch (cmd) { + case TEST_INIT: + info->name = "q_encoding"; + info->category = "/apps/app_voicemail/"; + info->summary = "Test Q-encoding"; + info->description = + "ASTERISK-20167"; + return AST_TEST_NOT_RUN; + case TEST_EXECUTE: + break; + } + + /* Switch to UTF-8 */ + ast_copy_string(old_charset, charset, sizeof(old_charset)); + ast_copy_string(charset, "UTF-8", sizeof(charset)); + + /* Break out of the loop on failure */ + do { + struct ast_str *output = ast_str_alloca(1024); + const char *input; + const char *expected; + const char *ret; + + /* This breaks often, but it's better than breaking mid-utf-8 */ + input = "Сообщение от \"anonymous\" в Monday, July 23, 2012 at 11:45:46 PM"; + expected = "=?UTF-8?Q?=D0=A1=D0=BE=D0=BE=D0=B1=D1=89=D0=B5=D0=BD=D0=B8=D0=B5_?= " + "=?UTF-8?Q?=D0=BE=D1=82_?= =?UTF-8?Q?=22anonymous=22_?= " + "=?UTF-8?Q?=3Canonymous=3E_?= =?UTF-8?Q?=D0=B2_?= =?UTF-8?Q?Monday=2C_?= " + "=?UTF-8?Q?July_?= =?UTF-8?Q?23=2C_?= =?UTF-8?Q?2012_?= =?UTF-8?Q?at_?= " + "=?UTF-8?Q?11=3A45=3A46_?= =?UTF-8?Q?PM?="; + ret = ast_str_encode_mime(&output, 0, input, 0, 0); + if (strcmp(ret, expected)) { + ast_test_status_update(test, "Failed, expected:\n \"\"\"%s\"\"\"\ngot:\n \"\"\"%s\"\"\"\n", expected, ret); + break; + } + + /* This breaks on 76 chars, but unfortunately mid-utf-8 */ + input = "€€€€€€€€"; + expected = "=?UTF-8?Q?=E2=82=AC=E2=82=AC=E2=82=AC=E2=82=AC=E2=82=AC=E2=82=AC=E2=82?= " + "=?UTF-8?Q?=AC=E2=82=AC?="; /* <-- this break mid utf-8 is not cool, but it is the current implementation */ + ret = ast_str_encode_mime(&output, 0, input, 2, 0); /* <-- preamble 2 to break mid-utf8 */ + if (strcmp(ret, expected)) { + ast_test_status_update(test, "Failed, expected:\n \"\"\"%s\"\"\"\ngot:\n \"\"\"%s\"\"\"\n", expected, ret); + break; + } + + res = AST_TEST_PASS; + } while(0); + + ast_copy_string(charset, old_charset, sizeof(charset)); + return res; +} + #endif /* defined(TEST_FRAMEWORK) */ static int reload(void) @@ -13118,6 +13193,7 @@ res |= AST_TEST_UNREGISTER(test_voicemail_vmuser); res |= AST_TEST_UNREGISTER(test_voicemail_notify_endl); res |= AST_TEST_UNREGISTER(test_voicemail_load_config); + res |= AST_TEST_UNREGISTER(test_q_encoding); #endif ast_cli_unregister_multiple(cli_voicemail, ARRAY_LEN(cli_voicemail)); ast_uninstall_vm_functions(); @@ -13168,6 +13244,7 @@ res |= AST_TEST_REGISTER(test_voicemail_vmuser); res |= AST_TEST_REGISTER(test_voicemail_notify_endl); res |= AST_TEST_REGISTER(test_voicemail_load_config); + res |= AST_TEST_REGISTER(test_q_encoding); #endif if (res)