From 564567d389a07a45233cb61ecef18b887e61228ca2db5b5fdad293622ff6372d Mon Sep 17 00:00:00 2001
From: ADAM David Alan Martin <adam@recursive.engineer>
Date: Fri, 11 Apr 2025 19:49:07 -0400
Subject: [PATCH] Charset now an optional string.

---
 src/cache.cc       | 11 ++++++-----
 src/form.cc        |  8 ++++----
 src/html.cc        | 12 +++++-------
 src/html_common.hh |  9 +++++++--
 src/misc.cc        | 27 +++++++++++++--------------
 src/misc.hh        |  4 ++--
 src/uicmd.cc       |  2 +-
 7 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/src/cache.cc b/src/cache.cc
index 53cde43..59ff2a2 100644
--- a/src/cache.cc
+++ b/src/cache.cc
@@ -493,7 +493,8 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
                                      const char *from)
 {
    const char *curr;
-   char *major, *minor, *charset;
+   char *major, *minor;
+   std::optional< std::string > charset;
    CacheEntry_t *entry = Cache_entry_search(url);
 
    dReturn_val_if_fail (entry != NULL, NULL);
@@ -514,7 +515,7 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
       }
       if (a_Misc_content_type_cmp(curr, ctype)) {
          /* ctype gives one different from current */
-         a_Misc_parse_content_type(ctype, &major, &minor, &charset);
+         charset= a_Misc_parse_content_type(ctype, &major, &minor);
          if (*from == 'm' && charset &&
              ((!major || !*major) && (!minor || !*minor))) {
             /* META only gives charset; use detected MIME type too */
@@ -524,7 +525,7 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
             /* WORKAROUND: doxygen uses "text/xhtml" in META */
             if (charset) {
                entry->TypeNorm = dStrconcat("application/xhtml+xml",
-                        "; charset=", charset, NULL);
+                        "; charset=", charset.value().c_str(), NULL);
             } else {
                entry->TypeNorm = dStrdup("application/xhtml+xml");
             }
@@ -532,14 +533,14 @@ const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
          if (charset) {
             if (entry->CharsetDecoder)
                a_Decode_free(entry->CharsetDecoder);
-            entry->CharsetDecoder = a_Decode_charset_init(charset);
+            entry->CharsetDecoder = a_Decode_charset_init(charset.value().c_str());
             curr = Cache_current_content_type(entry);
 
             /* Invalidate UTF8Data */
             dStr_free(entry->UTF8Data, 1);
             entry->UTF8Data = NULL;
          }
-         dFree(major); dFree(minor); dFree(charset);
+         dFree(major); dFree(minor);
       }
    }
    return curr;
diff --git a/src/form.cc b/src/form.cc
index aa08aed..c7b4a63 100644
--- a/src/form.cc
+++ b/src/form.cc
@@ -341,7 +341,8 @@ void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize)
    DilloUrl *action;
    DilloHtmlMethod method;
    DilloHtmlEnc content_type;
-   char *charset, *first;
+   char *first;
+   std::optional< std::string > charset;
    const char *attrbuf;
 
    HT2TB(html)->addParbreak (9, html->wordStyle ());
@@ -376,7 +377,6 @@ void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize)
       if (!dStrAsciiCasecmp(attrbuf, "multipart/form-data"))
          content_type = DILLO_HTML_ENC_MULTIPART;
    }
-   charset = NULL;
    first = NULL;
    if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "accept-charset"))) {
       /* a list of acceptable charsets, separated by commas or spaces */
@@ -395,7 +395,7 @@ void Html_tag_open_form(DilloHtml *html, const char *tag, int tagsize)
    }
    if (!charset)
       charset = html->charset;
-   html->formNew(method, action, content_type, charset);
+   html->formNew(method, action, content_type, charset.has_value() ? charset.value().c_str() : nullptr );
    dFree(first);
    a_Url_free(action);
 }
@@ -604,7 +604,7 @@ void Html_tag_open_isindex(DilloHtml *html, const char *tag, int tagsize)
       action = a_Url_dup(html->base_url);
 
    html->formNew(DILLO_HTML_METHOD_GET, action, DILLO_HTML_ENC_URLENCODED,
-                 html->charset);
+                 html->charset.has_value() ? html->charset.value().c_str() : nullptr);
    html->InFlags |= IN_FORM;
 
    HT2TB(html)->addParbreak (9, html->wordStyle ());
diff --git a/src/html.cc b/src/html.cc
index 9813e18..a7d0b2e 100644
--- a/src/html.cc
+++ b/src/html.cc
@@ -431,7 +431,7 @@ static bool Html_must_add_breaks(DilloHtml *html)
  * Create and initialize a new DilloHtml class
  */
 DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url,
-                     const char *content_type)
+                     const std::string_view content_type)
 {
    /* Init main variables */
    bw = p_bw;
@@ -449,11 +449,11 @@ DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url,
    Start_Buf = NULL;
    Start_Ofs = 0;
 
-   _MSG("DilloHtml(): content type: %s\n", content_type);
-   this->content_type = dStrdup(content_type);
+   this->content_type = content_type;
+   _MSG("DilloHtml(): content type: %s\n", this->content_type.c_str());
 
    /* get charset */
-   a_Misc_parse_content_type(content_type, NULL, NULL, &charset);
+   charset= a_Misc_parse_content_type(this->content_type.c_str(), NULL, NULL);
 
    stop_parser = false;
 
@@ -631,8 +631,6 @@ void DilloHtml::freeParseData()
 
    dStr_free(Stash, TRUE);
    dStr_free(attr_data, TRUE);
-   dFree(content_type);
-   dFree(charset);
 }
 
 /**
@@ -3096,7 +3094,7 @@ static void Html_update_content_type(DilloHtml *html, const char *content)
    /* Cannot ask cache whether the content type was changed, as
     * this code in another bw might have already changed it for us.
     */
-   if (a_Misc_content_type_cmp(html->content_type, new_content)) {
+   if (a_Misc_content_type_cmp(html->content_type.data(), new_content)) {
       html->stop_parser = true; /* The cache buffer is no longer valid */
       a_UIcmd_repush(html->bw);
    }
diff --git a/src/html_common.hh b/src/html_common.hh
index bfbf857..b74efc2 100644
--- a/src/html_common.hh
+++ b/src/html_common.hh
@@ -31,6 +31,10 @@
 
 #include "styleengine.hh"
 
+#include <string>
+#include <string_view>
+#include <optional>
+
 /*
  * Macros
  */
@@ -170,7 +174,8 @@ public:  //BUG: for now everything is public
    /* -------------------------------------------------------------------*/
    char *Start_Buf;
    int Start_Ofs;
-   char *content_type, *charset;
+   std::string content_type;
+   std::optional< std::string > charset;
    bool stop_parser;
 
    size_t CurrOfs, OldOfs, OldLine;
@@ -224,7 +229,7 @@ private:
    void initDw();  /* Used by the constructor */
 
 public:
-   DilloHtml(BrowserWindow *bw, const DilloUrl *url, const char *content_type);
+   DilloHtml(BrowserWindow *bw, const DilloUrl *url, std::string_view content_type);
    ~DilloHtml();
    void bugMessage(const char *format, ... );
    void connectSignals(dw::core::Widget *dw);
diff --git a/src/misc.cc b/src/misc.cc
index 92b228b..6f751b8 100644
--- a/src/misc.cc
+++ b/src/misc.cc
@@ -213,8 +213,7 @@ int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT)
  * Parse Content-Type string, e.g., "text/html; charset=utf-8".
  * Content-Type is defined in RFC 2045 section 5.1.
  */
-void a_Misc_parse_content_type(const char *type, char **major, char **minor,
-                               char **charset)
+std::optional<std::string> a_Misc_parse_content_type(const char *type, char **major, char **minor)
 {
    static const char tspecials_space[] = "()<>@,;:\\\"/[]?= ";
    const char *str, *s;
@@ -223,10 +222,8 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor,
       *major = NULL;
    if (minor)
       *minor = NULL;
-   if (charset)
-      *charset = NULL;
    if (!(str = type))
-      return;
+      return std::nullopt;
 
    for (s = str; *s && d_isascii((uchar_t)*s) && !iscntrl((uchar_t)*s) &&
         !strchr(tspecials_space, *s); s++) ;
@@ -239,7 +236,7 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor,
       if (minor)
          *minor = dStrndup(str, s - str);
    }
-   if (charset && *s &&
+   if ( *s &&
        (dStrnAsciiCasecmp(type, "text/", 5) == 0 ||
         dStrnAsciiCasecmp(type, "application/xhtml+xml", 21) == 0)) {
       /* "charset" parameter defined for text media type in RFC 2046,
@@ -266,11 +263,12 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor,
                  s++;
                  len -= 2;
                }
-               *charset = dStrndup(s, len);
+               return std::string{ s, len };
             }
          }
       }
    }
+   return std::nullopt;
 }
 
 /**
@@ -279,7 +277,9 @@ void a_Misc_parse_content_type(const char *type, char **major, char **minor,
  */
 int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
 {
-   char *major1, *major2, *minor1, *minor2, *charset1, *charset2;
+   char *major1, *major2, *minor1, *minor2;
+   std::optional< std::string > charset1;
+   std::optional< std::string > charset2;
    int ret;
 
    if ((!ct1 || !*ct1) && (!ct2 || !*ct2))
@@ -287,22 +287,21 @@ int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
    if ((!ct1 || !*ct1) || (!ct2 || !*ct2))
       return 1;
 
-   a_Misc_parse_content_type(ct1, &major1, &minor1, &charset1);
-   a_Misc_parse_content_type(ct2, &major2, &minor2, &charset2);
+   charset1= a_Misc_parse_content_type(ct1, &major1, &minor1);
+   charset2= a_Misc_parse_content_type(ct2, &major2, &minor2);
 
    if (major1 && major2 && !dStrAsciiCasecmp(major1, major2) &&
        minor1 && minor2 && !dStrAsciiCasecmp(minor1, minor2) &&
        ((!charset1 && !charset2) ||
-        (charset1 && charset2 && !dStrAsciiCasecmp(charset1, charset2)) ||
-        (!charset1 && charset2 && !dStrAsciiCasecmp(charset2, "UTF-8")) ||
-        (charset1 && !charset2 && !dStrAsciiCasecmp(charset1, "UTF-8")))) {
+        (charset1 && charset2 && !dStrAsciiCasecmp(charset1.value().c_str(), charset2.value().c_str())) ||
+        (!charset1 && charset2 && !dStrAsciiCasecmp(charset2.value().c_str(), "UTF-8")) ||
+        (charset1 && !charset2 && !dStrAsciiCasecmp(charset1.value().c_str(), "UTF-8")))) {
       ret = 0;
    } else {
       ret = 1;
    }
    dFree(major1); dFree(major2);
    dFree(minor1); dFree(minor2);
-   dFree(charset1); dFree(charset2);
 
    return ret;
 }
diff --git a/src/misc.hh b/src/misc.hh
index 4cf853a..c7b3a74 100644
--- a/src/misc.hh
+++ b/src/misc.hh
@@ -6,15 +6,15 @@
 
 #ifdef __cplusplus
 #include <string>
+#include <optional>
 #include <string_view>
 std::string a_Misc_escape_chars(std::string_view str, std::string_view esc_set);
+std::optional< std::string > a_Misc_parse_content_type(const char *str, char **major, char **minor);
 #endif
 
 int a_Misc_expand_tabs(char **start, char *end, char *buf, int buflen);
 int a_Misc_get_content_type_from_data(void *Data, size_t Size,const char **PT);
 int a_Misc_content_type_check(const char *EntryType, const char *DetectedType);
-void a_Misc_parse_content_type(const char *str, char **major, char **minor,
-                               char **charset);
 int a_Misc_content_type_cmp(const char* ct1, const char *ct2);
 int a_Misc_parse_geometry(char *geom, int *x, int *y, int *w, int *h);
 int a_Misc_parse_search_url(char *source, char **label, char **urlstr);
diff --git a/src/uicmd.cc b/src/uicmd.cc
index b5a3936..e9c6e09 100644
--- a/src/uicmd.cc
+++ b/src/uicmd.cc
@@ -1320,7 +1320,7 @@ void a_UIcmd_view_page_source(BrowserWindow *bw, const DilloUrl *url)
    char tag[16];
    const char *content_type = a_Nav_get_content_type(url);
 
-   a_Misc_parse_content_type(content_type, &major, NULL, NULL);
+   a_Misc_parse_content_type(content_type, &major, NULL);
 
    if (major && dStrAsciiCasecmp(major, "image") &&
        a_Nav_get_buf(url, &buf, &buf_size)) {