From 0932c217ddff0860c0c1b5614d158868f3874e88 Mon Sep 17 00:00:00 2001
From: Maximilian Heise <7600641+maxheise@users.noreply.github.com>
Date: Fri, 29 May 2026 10:28:32 +0200
Subject: [PATCH] epub: free libxml2 strings with xmlFree instead of g_free

backend/epub/epub-document.c reads many strings out of the parsed
container.xml and OPF package document through the helper
xml_get_data_from_node(), which returns an xmlChar* allocated by
libxml2 (it calls xmlGetProp or xmlNodeListGetString internally). A
string obtained from libxml2 must be released with xmlFree, because
libxml2 allocates it through its own allocator (xmlMalloc). The code
released most of these strings with GLib's g_free, and in a few places
stored a libxml2 string into a struct field that is later freed with
g_free, or never freed it at all. Mixing the two allocators is wrong:
it only happens to work while libxml2 and GLib share the system
malloc, and breaks if libxml2 is built with a custom allocator
(xmlMemSetup) or where the two libraries do not share one heap. A few
sites also leaked the libxml2 string outright.

This change makes every libxml2 string in the file use the matching
allocator, using one of three shapes depending on who owns the string:

1. Where the code owns the free site and the freed pointer is the
   libxml2 string itself, switch g_free to xmlFree. This covers the
   relativepath in get_uri_to_content() and setup_document_content_list()
   (the OPF full-path and item href), the newnode->key error paths and
   free_tree_nodes() (the spine idref), free_link_nodes() linktext, the
   href in get_child_list(), the class attribute in
   change_to_night_sheet(), the tocfilename in setup_document_index()
   (returned by get_toc_file_name()/epub_document_get_nav_file()), the
   stylesheet name in epub_document_check_add_night_sheet() (returned by
   epub_document_get_alternate_stylesheet()), and mathml plus href in
   epub_document_add_mathJax(). The companion g_free calls in those
   functions that release g_filename_to_uri / g_strdup / g_strdup_printf
   results (for example dataptr->value, dataptr->pagelink, the filepath
   and filename locals) are GLib strings and are left as g_free.

2. Where a libxml2 string was copied and then leaked, free it. The
   "toc" attribute ncx in get_toc_file_name() is used only to look up
   the manifest item and is now xmlFree'd after that lookup. The
   "version" attribute in epub_document_get_info() is copied by
   g_string_new() and is now captured in a local, passed to
   g_string_new(), then xmlFree'd.

3. Where a libxml2 string is stored into a struct whose destructor
   frees it with g_free and cannot be changed, store a GLib copy and
   free the libxml2 original. This applies to the EvDocumentInfo fields
   title, author, subject and creator (ev_document_info_free() frees
   them with g_free), and to EpubDocument.docTitle (freed with g_free
   in the finalize handler), which is assigned both in
   setup_document_index() and through epub_document_set_document_title().
   Each of these now does epubinfo->field = g_strdup((const char*)s)
   followed by xmlFree(s). The NULL / "unknown" fallback branches were
   already g_strdup'd and are unchanged. docTitle keeps its existing
   g_free at finalize, which is now correct because it always holds a
   g_strdup'd copy. g_strdup(NULL) returns NULL and xmlFree(NULL) is a
   no-op, so the empty-metadata cases behave exactly as before; the
   docTitle while-loop in setup_document_index() also keeps its
   original termination behaviour.

For valid documents the visible result is unchanged; only the allocator
used to free each string is corrected, and the previously leaked toc and
version strings are now released.
---
 backend/epub/epub-document.c | 63 ++++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/backend/epub/epub-document.c b/backend/epub/epub-document.c
index 57d626c4..433eeff0 100644
--- a/backend/epub/epub-document.c
+++ b/backend/epub/epub-document.c
@@ -943,7 +943,7 @@ get_uri_to_content(const gchar* uri,GError ** error,EpubDocument *epub_document)
 
 	GString *absolutepath = g_string_new(tmp_archive_dir);
     g_string_append_printf(absolutepath,"/%s",relativepath);
-    g_free (relativepath);
+    xmlFree (relativepath);
 
     gchar *content_uri = g_filename_to_uri(absolutepath->str,NULL,&err);
     g_string_free(absolutepath,TRUE);
@@ -1072,7 +1072,7 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu
             }
             else
             {
-                g_free (newnode->key);
+                xmlFree (newnode->key);
                 g_free (newnode);
                 errorflag = TRUE;
                 break;
@@ -1086,14 +1086,14 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu
             // Handle the html extension (IssueID #266)
            if (g_strrstr(relativepath, ".html") != NULL)
                 g_string_insert_c (absolutepath, absolutepath->len-4, 'x');
-            g_free (relativepath);
+            xmlFree (relativepath);
             
             newnode->value = g_filename_to_uri(absolutepath->str,NULL,&err);
             g_string_free(absolutepath, TRUE);
 
             if ( newnode->value == NULL )
             {
-                g_free (newnode->key);
+                xmlFree (newnode->key);
                 g_free (newnode);
                 errorflag = TRUE;
                 break;
@@ -1136,7 +1136,7 @@ free_tree_nodes(gpointer data)
 {
     contentListNode* dataptr = data ;
     g_free(dataptr->value);
-    g_free(dataptr->key);
+    xmlFree(dataptr->key);
     g_free(dataptr);
 }
 
@@ -1145,7 +1145,7 @@ free_link_nodes(gpointer data)
 {
     linknode* dataptr = data ;
     g_free(dataptr->pagelink);
-    g_free(dataptr->linktext);
+    xmlFree(dataptr->linktext);
 
 	if (dataptr->children) {
 		g_list_free_full(dataptr->children,(GDestroyNotify)free_link_nodes);
@@ -1174,6 +1174,7 @@ get_toc_file_name(gchar *containeruri)
 
 	xmlretval = NULL;
 	xml_parse_children_of_node(manifest,(xmlChar*)"item",(xmlChar*)"id",ncx);
+	xmlFree (ncx);
 
 	gchar* tocfilename = (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"href");
 	xml_free_doc();
@@ -1215,7 +1216,7 @@ get_child_list(xmlNodePtr ol,gchar* documentdir)
                 gchar* filename = (gchar*)xml_get_data_from_node(children,XML_ATTRIBUTE,(xmlChar*)"href");
 				gchar *filepath = g_strdup_printf("%s/%s",documentdir,filename);
 				newlinknode->pagelink = g_filename_to_uri(filepath,NULL,NULL);
-				g_free(filename);
+				xmlFree(filename);
 				g_free(filepath);
                 newlinknode->children = NULL;
                 childlist = g_list_prepend(childlist,newlinknode);
@@ -1361,12 +1362,12 @@ setup_document_index(EpubDocument *epub_document,gchar *containeruri)
         g_string_append_printf (tocpath,"/%s",tocfilename);
         index = setup_index_from_navfile(tocpath->str);
         g_string_free(tocpath,TRUE);
-        g_free (tocfilename);
+        xmlFree (tocfilename);
         return index;
     }
 
     g_string_append_printf (tocpath,"/%s",tocfilename);
-    g_free (tocfilename);
+    xmlFree (tocfilename);
 
     open_xml_document(tocpath->str);
     g_string_free(tocpath,TRUE);
@@ -1377,7 +1378,9 @@ setup_document_index(EpubDocument *epub_document,gchar *containeruri)
 	xml_parse_children_of_node(docTitle,(xmlChar*)"text",NULL,NULL);
 
 	while (epub_document->docTitle == NULL && xmlretval != NULL) {
-		epub_document->docTitle = (gchar*)xml_get_data_from_node(xmlretval,XML_KEYWORD,NULL);
+		xmlChar *titledata = xml_get_data_from_node(xmlretval,XML_KEYWORD,NULL);
+		epub_document->docTitle = g_strdup((const gchar*)titledata);
+		xmlFree(titledata);
 		xmlretval = xmlretval->next;
 	}
     xmlNodePtr navMap = xml_get_pointer_to_node((xmlChar*)"navMap",NULL,NULL);
@@ -1441,21 +1444,35 @@ epub_document_get_info(EvDocument *document)
 	if ( metanode == NULL )
 	  epubinfo->title = NULL ;
 	else
-	  epubinfo->title = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	{
+	  xmlChar *title = xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	  epubinfo->title = g_strdup((const char*)title);
+	  xmlFree(title);
+	}
 
 	metanode = xml_get_pointer_to_node((xmlChar*)"creator",NULL,NULL);
 	if ( metanode == NULL )
 	  epubinfo->author = g_strdup("unknown");
 	else
-	  epubinfo->author = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	{
+	  xmlChar *author = xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	  epubinfo->author = g_strdup((const char*)author);
+	  xmlFree(author);
+	}
 
 	metanode = xml_get_pointer_to_node((xmlChar*)"subject",NULL,NULL);
 	if ( metanode == NULL )
 	   epubinfo->subject = g_strdup("unknown");
 	else
-	   epubinfo->subject = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	{
+	   xmlChar *subject = xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	   epubinfo->subject = g_strdup((const char*)subject);
+	   xmlFree(subject);
+	}
 
-	buffer = g_string_new((gchar*)xml_get_data_from_node (xmlroot,XML_ATTRIBUTE,(xmlChar*)"version"));
+	xmlChar *version = xml_get_data_from_node (xmlroot,XML_ATTRIBUTE,(xmlChar*)"version");
+	buffer = g_string_new((gchar*)version);
+	xmlFree (version);
 	g_string_prepend(buffer,"epub ");
 	epubinfo->format = g_string_free(buffer,FALSE);
 
@@ -1467,7 +1484,11 @@ epub_document_get_info(EvDocument *document)
 	if ( metanode == NULL )
 	   epubinfo->creator = g_strdup("unknown");
 	else
-	   epubinfo->creator = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	{
+	   xmlChar *creator = xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
+	   epubinfo->creator = g_strdup((const char*)creator);
+	   xmlFree(creator);
+	}
 
 	/* number of pages */
 	epubinfo->n_pages = epub_document_get_n_pages(document);
@@ -1508,7 +1529,7 @@ change_to_night_sheet(contentListNode *nodedata,gpointer user_data)
 	if ( (class = (gchar*)xml_get_data_from_node(day,XML_ATTRIBUTE,(xmlChar*)"class")) == NULL) {
 		xmlSetProp(day,(xmlChar*)"class",(xmlChar*)"day");
 	}
-	g_free(class);
+	xmlFree(class);
     xmlSetProp(day,(xmlChar*)"rel",(xmlChar*)"alternate stylesheet");
     xmlretval = NULL;
     xml_parse_children_of_node(head,(xmlChar*)"link",(xmlChar*)"class",(xmlChar*)"night");
@@ -1621,7 +1642,7 @@ epub_document_check_add_night_sheet(EvDocument *document)
         g_list_foreach(epub_document->contentList,(GFunc)add_night_sheet,csspath);
         g_free(csspath);
     }
-    g_free(stylesheetfilename);
+    xmlFree(stylesheetfilename);
 }
 
 static void
@@ -1645,7 +1666,9 @@ epub_document_set_document_title(gchar *containeruri)
 
 	xmlNodePtr title = xml_get_pointer_to_node((xmlChar*)"title",NULL,NULL);
 
-	doctitle = (gchar*)xml_get_data_from_node(title, XML_KEYWORD, NULL);
+	xmlChar *titledata = xml_get_data_from_node(title, XML_KEYWORD, NULL);
+	doctitle = g_strdup((const gchar*)titledata);
+	xmlFree(titledata);
 	xml_free_doc();
 
 	return doctitle;
@@ -1741,10 +1764,10 @@ epub_document_add_mathJax(gchar* containeruri,gchar* documentdir)
 			gchar *filename = g_strdup_printf("%s/%s",documentdir,href);
 
 			add_mathjax_script_node_to_file(filename,nodedata);
-			g_free(href);
+			xmlFree(href);
 			g_free(filename);
 		}
-		g_free(mathml);
+		xmlFree(mathml);
 		item = item->next;
 	}
 	xml_free_doc();