From 564567d389a07a45233cb61ecef18b887e61228ca2db5b5fdad293622ff6372d Mon Sep 17 00:00:00 2001 From: ADAM David Alan Martin Date: Fri, 11 Apr 2025 19:49:07 -0400 Subject: [PATCH] Charset now an optional string. --- src/cache.cc | 11 ++++++----- src/form.cc | 8 ++++---- src/html.cc | 12 +++++------- src/html_common.hh | 9 +++++++-- src/misc.cc | 27 +++++++++++++-------------- src/misc.hh | 4 ++-- src/uicmd.cc | 2 +- 7 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/cache.cc b/src/cache.cc index 53cde43..59ff2a2 100644 --- a/src/cache.cc +++ b/src/cache.cc @@ -493,7 +493,8 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype, const char *from) { const char *curr; - char *major, *minor, *charset; + char *major, *minor; + std::optional< std::string > charset; CacheEntry_t *entry = Cache_entry_search(url); dReturn_val_if_fail (entry != NULL, NULL); @@ -514,7 +515,7 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype, } if (a_Misc_content_type_cmp(curr, ctype)) { /* ctype gives one different from current */ - a_Misc_parse_content_type(ctype, &major, &minor, &charset); + charset= a_Misc_parse_content_type(ctype, &major, &minor); if (*from == 'm' && charset && ((!major || !*major) && (!minor || !*minor))) { /* META only gives charset; use detected MIME type too */ @@ -524,7 +525,7 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype, /* WORKAROUND: doxygen uses "text/xhtml" in META */ if (charset) { entry->TypeNorm = dStrconcat("application/xhtml+xml", - "; charset=", charset, NULL); + "; charset=", charset.value().c_str(), NULL); } else { entry->TypeNorm = dStrdup("application/xhtml+xml"); } @@ -532,14 +533,14 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype, if (charset) { if (entry->CharsetDecoder) a_Decode_free(entry->CharsetDecoder); - entry->CharsetDecoder = a_Decode_charset_init(charset); + entry->CharsetDecoder = a_Decode_charset_init(charset.value().c_str()); curr = Cache_current_content_type(entry); /* Invalidate UTF8Data */ dStr_free(entry->UTF8Data, 1); entry->UTF8Data = NULL; } - dFree(major); dFree(minor); dFree(charset); + dFree(major); dFree(minor); } } return curr; diff --git a/src/form.cc b/src/form.cc index aa08aed..c7b4a63 100644 --- a/src/form.cc +++ b/src/form.cc @@ -341,7 +341,8 @@ void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize) DilloUrl *action; DilloHtmlMethod method; DilloHtmlEnc content_type; - char *charset, *first; + char *first; + std::optional< std::string > charset; const char *attrbuf; HT2TB(html)->addParbreak (9, html->wordStyle ()); @@ -376,7 +377,6 @@ void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize) if (!dStrAsciiCasecmp(attrbuf, "multipart/form-data")) content_type = DILLO_HTML_ENC_MULTIPART; } - charset = NULL; first = NULL; if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "accept-charset"))) { /* a list of acceptable charsets, separated by commas or spaces */ @@ -395,7 +395,7 @@ void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize) } if (!charset) charset = html->charset; - html->formNew(method, action, content_type, charset); + html->formNew(method, action, content_type, charset.has_value() ? charset.value().c_str() : nullptr ); dFree(first); a_Url_free(action); } @@ -604,7 +604,7 @@ void Html_tag_open_isindex(DilloHtml *html, const char *tag, int tagsize) action = a_Url_dup(html->base_url); html->formNew(DILLO_HTML_METHOD_GET, action, DILLO_HTML_ENC_URLENCODED, - html->charset); + html->charset.has_value() ? html->charset.value().c_str() : nullptr); html->InFlags |= IN_FORM; HT2TB(html)->addParbreak (9, html->wordStyle ()); diff --git a/src/html.cc b/src/html.cc index 9813e18..a7d0b2e 100644 --- a/src/html.cc +++ b/src/html.cc @@ -431,7 +431,7 @@ static bool Html_must_add_breaks(DilloHtml *html) * Create and initialize a new DilloHtml class */ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url, - const char *content_type) + const std::string_view content_type) { /* Init main variables */ bw = p_bw; @@ -449,11 +449,11 @@ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url, Start_Buf = NULL; Start_Ofs = 0; - _MSG("DilloHtml(): content type: %s\n", content_type); - this->content_type = dStrdup(content_type); + this->content_type = content_type; + _MSG("DilloHtml(): content type: %s\n", this->content_type.c_str()); /* get charset */ - a_Misc_parse_content_type(content_type, NULL, NULL, &charset); + charset= a_Misc_parse_content_type(this->content_type.c_str(), NULL, NULL); stop_parser = false; @@ -631,8 +631,6 @@ void DilloHtml::freeParseData() dStr_free(Stash, TRUE); dStr_free(attr_data, TRUE); - dFree(content_type); - dFree(charset); } /** @@ -3096,7 +3094,7 @@ static void Html_update_content_type(DilloHtml *html, const char *content) /* Cannot ask cache whether the content type was changed, as * this code in another bw might have already changed it for us. */ - if (a_Misc_content_type_cmp(html->content_type, new_content)) { + if (a_Misc_content_type_cmp(html->content_type.data(), new_content)) { html->stop_parser = true; /* The cache buffer is no longer valid */ a_UIcmd_repush(html->bw); } diff --git a/src/html_common.hh b/src/html_common.hh index bfbf857..b74efc2 100644 --- a/src/html_common.hh +++ b/src/html_common.hh @@ -31,6 +31,10 @@ #include "styleengine.hh" +#include +#include +#include + /* * Macros */ @@ -170,7 +174,8 @@ public: //BUG: for now everything is public /* -------------------------------------------------------------------*/ char *Start_Buf; int Start_Ofs; - char *content_type, *charset; + std::string content_type; + std::optional< std::string > charset; bool stop_parser; size_t CurrOfs, OldOfs, OldLine; @@ -224,7 +229,7 @@ private: void initDw(); /* Used by the constructor */ public: - DilloHtml(BrowserWindow *bw, const DilloUrl *url, const char *content_type); + DilloHtml(BrowserWindow *bw, const DilloUrl *url, std::string_view content_type); ~DilloHtml(); void bugMessage(const char *format, ... ); void connectSignals(dw::core::Widget *dw); diff --git a/src/misc.cc b/src/misc.cc index 92b228b..6f751b8 100644 --- a/src/misc.cc +++ b/src/misc.cc @@ -213,8 +213,7 @@ int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT) * Parse Content-Type string, e.g., "text/html; charset=utf-8". * Content-Type is defined in RFC 2045 section 5.1. */ -void a_Misc_parse_content_type(const char *type, char **major, char **minor, - char **charset) +std::optional a_Misc_parse_content_type(const char *type, char **major, char **minor) { static const char tspecials_space[] = "()<>@,;:\\\"/[]?= "; const char *str, *s; @@ -223,10 +222,8 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor, *major = NULL; if (minor) *minor = NULL; - if (charset) - *charset = NULL; if (!(str = type)) - return; + return std::nullopt; for (s = str; *s && d_isascii((uchar_t)*s) && !iscntrl((uchar_t)*s) && !strchr(tspecials_space, *s); s++) ; @@ -239,7 +236,7 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor, if (minor) *minor = dStrndup(str, s - str); } - if (charset && *s && + if ( *s && (dStrnAsciiCasecmp(type, "text/", 5) == 0 || dStrnAsciiCasecmp(type, "application/xhtml+xml", 21) == 0)) { /* "charset" parameter defined for text media type in RFC 2046, @@ -266,11 +263,12 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor, s++; len -= 2; } - *charset = dStrndup(s, len); + return std::string{ s, len }; } } } } + return std::nullopt; } /** @@ -279,7 +277,9 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor, */ int a_Misc_content_type_cmp(const char *ct1, const char *ct2) { - char *major1, *major2, *minor1, *minor2, *charset1, *charset2; + char *major1, *major2, *minor1, *minor2; + std::optional< std::string > charset1; + std::optional< std::string > charset2; int ret; if ((!ct1 || !*ct1) && (!ct2 || !*ct2)) @@ -287,22 +287,21 @@ int a_Misc_content_type_cmp(const char *ct1, const char *ct2) if ((!ct1 || !*ct1) || (!ct2 || !*ct2)) return 1; - a_Misc_parse_content_type(ct1, &major1, &minor1, &charset1); - a_Misc_parse_content_type(ct2, &major2, &minor2, &charset2); + charset1= a_Misc_parse_content_type(ct1, &major1, &minor1); + charset2= a_Misc_parse_content_type(ct2, &major2, &minor2); if (major1 && major2 && !dStrAsciiCasecmp(major1, major2) && minor1 && minor2 && !dStrAsciiCasecmp(minor1, minor2) && ((!charset1 && !charset2) || - (charset1 && charset2 && !dStrAsciiCasecmp(charset1, charset2)) || - (!charset1 && charset2 && !dStrAsciiCasecmp(charset2, "UTF-8")) || - (charset1 && !charset2 && !dStrAsciiCasecmp(charset1, "UTF-8")))) { + (charset1 && charset2 && !dStrAsciiCasecmp(charset1.value().c_str(), charset2.value().c_str())) || + (!charset1 && charset2 && !dStrAsciiCasecmp(charset2.value().c_str(), "UTF-8")) || + (charset1 && !charset2 && !dStrAsciiCasecmp(charset1.value().c_str(), "UTF-8")))) { ret = 0; } else { ret = 1; } dFree(major1); dFree(major2); dFree(minor1); dFree(minor2); - dFree(charset1); dFree(charset2); return ret; } diff --git a/src/misc.hh b/src/misc.hh index 4cf853a..c7b3a74 100644 --- a/src/misc.hh +++ b/src/misc.hh @@ -6,15 +6,15 @@ #ifdef __cplusplus #include +#include #include std::string a_Misc_escape_chars(std::string_view str, std::string_view esc_set); +std::optional< std::string > a_Misc_parse_content_type(const char *str, char **major, char **minor); #endif int a_Misc_expand_tabs(char **start, char *end, char *buf, int buflen); int a_Misc_get_content_type_from_data(void *Data, size_t Size,const char **PT); int a_Misc_content_type_check(const char *EntryType, const char *DetectedType); -void a_Misc_parse_content_type(const char *str, char **major, char **minor, - char **charset); int a_Misc_content_type_cmp(const char* ct1, const char *ct2); int a_Misc_parse_geometry(char *geom, int *x, int *y, int *w, int *h); int a_Misc_parse_search_url(char *source, char **label, char **urlstr); diff --git a/src/uicmd.cc b/src/uicmd.cc index b5a3936..e9c6e09 100644 --- a/src/uicmd.cc +++ b/src/uicmd.cc @@ -1320,7 +1320,7 @@ void a_UIcmd_view_page_source(BrowserWindow *bw, const DilloUrl *url) char tag[16]; const char *content_type = a_Nav_get_content_type(url); - a_Misc_parse_content_type(content_type, &major, NULL, NULL); + a_Misc_parse_content_type(content_type, &major, NULL); if (major && dStrAsciiCasecmp(major, "image") && a_Nav_get_buf(url, &buf, &buf_size)) {